fix(CQ-ORCH-7): Graceful Docker container shutdown before force remove
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

Replace the always-force container removal (SIGKILL) with a two-phase
approach: first attempt graceful stop (SIGTERM with configurable timeout),
then remove without force. Falls back to force remove only if the graceful
path fails. The graceful stop timeout is configurable via
orchestrator.sandbox.gracefulStopTimeoutSeconds (default: 10s).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jason Woltje
2026-02-06 14:05:53 -06:00
parent 2b356f6ca2
commit a0062494b7
2 changed files with 99 additions and 10 deletions

View File

@@ -81,6 +81,7 @@ export class DockerSandboxService {
private readonly defaultNetworkMode: string;
private readonly envWhitelist: readonly string[];
private readonly defaultSecurityOptions: Required<DockerSecurityOptions>;
private readonly gracefulStopTimeoutSeconds: number;
constructor(
private readonly configService: ConfigService,
@@ -144,6 +145,11 @@ export class DockerSandboxService {
noNewPrivileges: configNoNewPrivileges ?? DEFAULT_SECURITY_OPTIONS.noNewPrivileges,
};
this.gracefulStopTimeoutSeconds = this.configService.get<number>(
"orchestrator.sandbox.gracefulStopTimeoutSeconds",
10
);
// Validate default image tag at startup to fail fast on misconfiguration
this.validateImageTag(this.defaultImage);
@@ -336,15 +342,34 @@ export class DockerSandboxService {
}
/**
* Remove a Docker container
* Remove a Docker container with graceful shutdown.
* First attempts to gracefully stop the container (SIGTERM with configurable timeout),
* then removes it without force. If graceful stop fails, falls back to force remove (SIGKILL).
* @param containerId Container ID to remove
*/
async removeContainer(containerId: string): Promise<void> {
this.logger.log(`Removing container: ${containerId}`);
const container = this.docker.getContainer(containerId);
// Try graceful stop first (SIGTERM with timeout), then non-force remove
try {
this.logger.log(
`Attempting graceful stop of container ${containerId} (timeout: ${this.gracefulStopTimeoutSeconds.toString()}s)`
);
await container.stop({ t: this.gracefulStopTimeoutSeconds });
await container.remove({ force: false });
this.logger.log(`Container gracefully stopped and removed: ${containerId}`);
return;
} catch (gracefulError) {
this.logger.warn(
`Graceful stop failed for container ${containerId}, falling back to force remove: ${gracefulError instanceof Error ? gracefulError.message : String(gracefulError)}`
);
}
// Fallback: force remove (SIGKILL)
try {
this.logger.log(`Removing container: ${containerId}`);
const container = this.docker.getContainer(containerId);
await container.remove({ force: true });
this.logger.log(`Container removed successfully: ${containerId}`);
this.logger.log(`Container force-removed: ${containerId}`);
} catch (error) {
const enhancedError = error instanceof Error ? error : new Error(String(error));
enhancedError.message = `Failed to remove container ${containerId}: ${enhancedError.message}`;