diff --git a/apps/orchestrator/src/spawner/docker-sandbox.service.spec.ts b/apps/orchestrator/src/spawner/docker-sandbox.service.spec.ts index c7e2de1..49c51cf 100644 --- a/apps/orchestrator/src/spawner/docker-sandbox.service.spec.ts +++ b/apps/orchestrator/src/spawner/docker-sandbox.service.spec.ts @@ -233,19 +233,52 @@ describe("DockerSandboxService", () => { }); describe("removeContainer", () => { - it("should remove a container by ID", async () => { + it("should gracefully stop and remove a container by ID", async () => { const containerId = "container-123"; await service.removeContainer(containerId); expect(mockDocker.getContainer).toHaveBeenCalledWith(containerId); + expect(mockContainer.stop).toHaveBeenCalledWith({ t: 10 }); + expect(mockContainer.remove).toHaveBeenCalledWith({ force: false }); + }); + + it("should fall back to force remove when graceful stop fails", async () => { + const containerId = "container-123"; + + (mockContainer.stop as ReturnType).mockRejectedValueOnce( + new Error("Container already stopped") + ); + + await service.removeContainer(containerId); + + expect(mockContainer.stop).toHaveBeenCalledWith({ t: 10 }); expect(mockContainer.remove).toHaveBeenCalledWith({ force: true }); }); - it("should throw error if container removal fails", async () => { + it("should fall back to force remove when graceful remove fails", async () => { const containerId = "container-123"; - (mockContainer.remove as ReturnType).mockRejectedValue( + (mockContainer.remove as ReturnType) + .mockRejectedValueOnce(new Error("Container still running")) + .mockResolvedValueOnce(undefined); + + await service.removeContainer(containerId); + + expect(mockContainer.stop).toHaveBeenCalledWith({ t: 10 }); + // First call: graceful remove (force: false) - fails + expect(mockContainer.remove).toHaveBeenNthCalledWith(1, { force: false }); + // Second call: force remove (force: true) - succeeds + expect(mockContainer.remove).toHaveBeenNthCalledWith(2, { force: true }); + }); + + it("should throw error if both graceful and force removal fail", async () => { + const containerId = "container-123"; + + (mockContainer.stop as ReturnType).mockRejectedValueOnce( + new Error("Stop failed") + ); + (mockContainer.remove as ReturnType).mockRejectedValueOnce( new Error("Container not found") ); @@ -253,6 +286,31 @@ describe("DockerSandboxService", () => { "Failed to remove container container-123" ); }); + + it("should use configurable graceful stop timeout", async () => { + const customConfigService = { + get: vi.fn((key: string, defaultValue?: unknown) => { + const config: Record = { + "orchestrator.docker.socketPath": "/var/run/docker.sock", + "orchestrator.sandbox.enabled": true, + "orchestrator.sandbox.defaultImage": "node:20-alpine", + "orchestrator.sandbox.defaultMemoryMB": 512, + "orchestrator.sandbox.defaultCpuLimit": 1.0, + "orchestrator.sandbox.networkMode": "bridge", + "orchestrator.sandbox.gracefulStopTimeoutSeconds": 30, + }; + return config[key] !== undefined ? config[key] : defaultValue; + }), + } as unknown as ConfigService; + + const customService = new DockerSandboxService(customConfigService, mockDocker); + const containerId = "container-123"; + + await customService.removeContainer(containerId); + + expect(mockContainer.stop).toHaveBeenCalledWith({ t: 30 }); + expect(mockContainer.remove).toHaveBeenCalledWith({ force: false }); + }); }); describe("getContainerStatus", () => { @@ -280,24 +338,30 @@ describe("DockerSandboxService", () => { }); describe("cleanup", () => { - it("should stop and remove container", async () => { + it("should stop and remove container gracefully", async () => { const containerId = "container-123"; await service.cleanup(containerId); + // cleanup calls stopContainer first, then removeContainer + // stopContainer sends stop({ t: 10 }) + // removeContainer also tries stop({ t: 10 }) then remove({ force: false }) expect(mockContainer.stop).toHaveBeenCalledWith({ t: 10 }); - expect(mockContainer.remove).toHaveBeenCalledWith({ force: true }); + expect(mockContainer.remove).toHaveBeenCalledWith({ force: false }); }); - it("should remove container even if stop fails", async () => { + it("should remove container even if initial stop fails", async () => { const containerId = "container-123"; + // First stop call (from cleanup's stopContainer) fails + // Second stop call (from removeContainer's graceful attempt) also fails (mockContainer.stop as ReturnType).mockRejectedValue( new Error("Container already stopped") ); await service.cleanup(containerId); + // removeContainer falls back to force remove after graceful stop fails expect(mockContainer.remove).toHaveBeenCalledWith({ force: true }); }); diff --git a/apps/orchestrator/src/spawner/docker-sandbox.service.ts b/apps/orchestrator/src/spawner/docker-sandbox.service.ts index d0eabbd..fd52c18 100644 --- a/apps/orchestrator/src/spawner/docker-sandbox.service.ts +++ b/apps/orchestrator/src/spawner/docker-sandbox.service.ts @@ -81,6 +81,7 @@ export class DockerSandboxService { private readonly defaultNetworkMode: string; private readonly envWhitelist: readonly string[]; private readonly defaultSecurityOptions: Required; + private readonly gracefulStopTimeoutSeconds: number; constructor( private readonly configService: ConfigService, @@ -144,6 +145,11 @@ export class DockerSandboxService { noNewPrivileges: configNoNewPrivileges ?? DEFAULT_SECURITY_OPTIONS.noNewPrivileges, }; + this.gracefulStopTimeoutSeconds = this.configService.get( + "orchestrator.sandbox.gracefulStopTimeoutSeconds", + 10 + ); + // Validate default image tag at startup to fail fast on misconfiguration this.validateImageTag(this.defaultImage); @@ -336,15 +342,34 @@ export class DockerSandboxService { } /** - * Remove a Docker container + * Remove a Docker container with graceful shutdown. + * First attempts to gracefully stop the container (SIGTERM with configurable timeout), + * then removes it without force. If graceful stop fails, falls back to force remove (SIGKILL). * @param containerId Container ID to remove */ async removeContainer(containerId: string): Promise { + this.logger.log(`Removing container: ${containerId}`); + const container = this.docker.getContainer(containerId); + + // Try graceful stop first (SIGTERM with timeout), then non-force remove + try { + this.logger.log( + `Attempting graceful stop of container ${containerId} (timeout: ${this.gracefulStopTimeoutSeconds.toString()}s)` + ); + await container.stop({ t: this.gracefulStopTimeoutSeconds }); + await container.remove({ force: false }); + this.logger.log(`Container gracefully stopped and removed: ${containerId}`); + return; + } catch (gracefulError) { + this.logger.warn( + `Graceful stop failed for container ${containerId}, falling back to force remove: ${gracefulError instanceof Error ? gracefulError.message : String(gracefulError)}` + ); + } + + // Fallback: force remove (SIGKILL) try { - this.logger.log(`Removing container: ${containerId}`); - const container = this.docker.getContainer(containerId); await container.remove({ force: true }); - this.logger.log(`Container removed successfully: ${containerId}`); + this.logger.log(`Container force-removed: ${containerId}`); } catch (error) { const enhancedError = error instanceof Error ? error : new Error(String(error)); enhancedError.message = `Failed to remove container ${containerId}: ${enhancedError.message}`;