fix(CQ-ORCH-7): Graceful Docker container shutdown before force remove
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Replace the always-force container removal (SIGKILL) with a two-phase approach: first attempt graceful stop (SIGTERM with configurable timeout), then remove without force. Falls back to force remove only if the graceful path fails. The graceful stop timeout is configurable via orchestrator.sandbox.gracefulStopTimeoutSeconds (default: 10s). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,6 +81,7 @@ export class DockerSandboxService {
|
||||
private readonly defaultNetworkMode: string;
|
||||
private readonly envWhitelist: readonly string[];
|
||||
private readonly defaultSecurityOptions: Required<DockerSecurityOptions>;
|
||||
private readonly gracefulStopTimeoutSeconds: number;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
@@ -144,6 +145,11 @@ export class DockerSandboxService {
|
||||
noNewPrivileges: configNoNewPrivileges ?? DEFAULT_SECURITY_OPTIONS.noNewPrivileges,
|
||||
};
|
||||
|
||||
this.gracefulStopTimeoutSeconds = this.configService.get<number>(
|
||||
"orchestrator.sandbox.gracefulStopTimeoutSeconds",
|
||||
10
|
||||
);
|
||||
|
||||
// Validate default image tag at startup to fail fast on misconfiguration
|
||||
this.validateImageTag(this.defaultImage);
|
||||
|
||||
@@ -336,15 +342,34 @@ export class DockerSandboxService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a Docker container
|
||||
* Remove a Docker container with graceful shutdown.
|
||||
* First attempts to gracefully stop the container (SIGTERM with configurable timeout),
|
||||
* then removes it without force. If graceful stop fails, falls back to force remove (SIGKILL).
|
||||
* @param containerId Container ID to remove
|
||||
*/
|
||||
async removeContainer(containerId: string): Promise<void> {
|
||||
this.logger.log(`Removing container: ${containerId}`);
|
||||
const container = this.docker.getContainer(containerId);
|
||||
|
||||
// Try graceful stop first (SIGTERM with timeout), then non-force remove
|
||||
try {
|
||||
this.logger.log(
|
||||
`Attempting graceful stop of container ${containerId} (timeout: ${this.gracefulStopTimeoutSeconds.toString()}s)`
|
||||
);
|
||||
await container.stop({ t: this.gracefulStopTimeoutSeconds });
|
||||
await container.remove({ force: false });
|
||||
this.logger.log(`Container gracefully stopped and removed: ${containerId}`);
|
||||
return;
|
||||
} catch (gracefulError) {
|
||||
this.logger.warn(
|
||||
`Graceful stop failed for container ${containerId}, falling back to force remove: ${gracefulError instanceof Error ? gracefulError.message : String(gracefulError)}`
|
||||
);
|
||||
}
|
||||
|
||||
// Fallback: force remove (SIGKILL)
|
||||
try {
|
||||
this.logger.log(`Removing container: ${containerId}`);
|
||||
const container = this.docker.getContainer(containerId);
|
||||
await container.remove({ force: true });
|
||||
this.logger.log(`Container removed successfully: ${containerId}`);
|
||||
this.logger.log(`Container force-removed: ${containerId}`);
|
||||
} catch (error) {
|
||||
const enhancedError = error instanceof Error ? error : new Error(String(error));
|
||||
enhancedError.message = `Failed to remove container ${containerId}: ${enhancedError.message}`;
|
||||
|
||||
Reference in New Issue
Block a user