fix(#196): fix race condition in job status updates
Implemented optimistic locking with version field and SELECT FOR UPDATE transactions to prevent data corruption from concurrent job status updates. Changes: - Added version field to RunnerJob schema for optimistic locking - Created migration 20260202_add_runner_job_version_for_concurrency - Implemented ConcurrentUpdateException for conflict detection - Updated RunnerJobsService methods with optimistic locking: * updateStatus() - with version checking and retry logic * updateProgress() - with version checking and retry logic * cancel() - with version checking and retry logic - Updated CoordinatorIntegrationService with SELECT FOR UPDATE: * updateJobStatus() - transaction with row locking * completeJob() - transaction with row locking * failJob() - transaction with row locking * updateJobProgress() - optimistic locking - Added retry mechanism (3 attempts) with exponential backoff - Added comprehensive concurrency tests (10 tests, all passing) - Updated existing test mocks to support updateMany Test Results: - All 10 concurrency tests passing ✓ - Tests cover concurrent status updates, progress updates, completions, cancellations, retry logic, and exponential backoff This fix prevents race conditions that could cause: - Lost job results (double completion) - Lost progress updates - Invalid status transitions - Data corruption under concurrent access Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
394
apps/api/src/runner-jobs/runner-jobs.service.concurrency.spec.ts
Normal file
394
apps/api/src/runner-jobs/runner-jobs.service.concurrency.spec.ts
Normal file
@@ -0,0 +1,394 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { Test, TestingModule } from "@nestjs/testing";
|
||||
import { RunnerJobsService } from "./runner-jobs.service";
|
||||
import { PrismaService } from "../prisma/prisma.service";
|
||||
import { BullMqService } from "../bullmq/bullmq.service";
|
||||
import { RunnerJobStatus } from "@prisma/client";
|
||||
import { ConflictException, BadRequestException } from "@nestjs/common";
|
||||
|
||||
/**
|
||||
* Concurrency tests for RunnerJobsService
|
||||
* These tests verify that race conditions in job status updates are properly handled
|
||||
*/
|
||||
describe("RunnerJobsService - Concurrency", () => {
|
||||
let service: RunnerJobsService;
|
||||
let prisma: PrismaService;
|
||||
|
||||
const mockBullMqService = {
|
||||
addJob: vi.fn(),
|
||||
getQueue: vi.fn(),
|
||||
};
|
||||
|
||||
beforeEach(async () => {
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
RunnerJobsService,
|
||||
{
|
||||
provide: PrismaService,
|
||||
useValue: {
|
||||
runnerJob: {
|
||||
findUnique: vi.fn(),
|
||||
update: vi.fn(),
|
||||
updateMany: vi.fn(),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
provide: BullMqService,
|
||||
useValue: mockBullMqService,
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
service = module.get<RunnerJobsService>(RunnerJobsService);
|
||||
prisma = module.get<PrismaService>(PrismaService);
|
||||
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("concurrent status updates", () => {
|
||||
it("should detect concurrent status update conflict using version field", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
// Mock job with version 1
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 1,
|
||||
startedAt: new Date(),
|
||||
};
|
||||
|
||||
// First findUnique returns job with version 1
|
||||
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
|
||||
|
||||
// updateMany returns 0 (no rows updated - version mismatch)
|
||||
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 0 });
|
||||
|
||||
// Should throw ConflictException when concurrent update detected
|
||||
await expect(
|
||||
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED)
|
||||
).rejects.toThrow(ConflictException);
|
||||
|
||||
// Verify updateMany was called with version check
|
||||
expect(prisma.runnerJob.updateMany).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
where: expect.objectContaining({
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
version: 1,
|
||||
}),
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should successfully update when no concurrent conflict exists", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 1,
|
||||
startedAt: new Date(),
|
||||
};
|
||||
|
||||
const updatedJob = {
|
||||
...mockJob,
|
||||
status: RunnerJobStatus.COMPLETED,
|
||||
version: 2,
|
||||
completedAt: new Date(),
|
||||
};
|
||||
|
||||
// First call for initial read
|
||||
vi.mocked(prisma.runnerJob.findUnique)
|
||||
.mockResolvedValueOnce(mockJob as any)
|
||||
// Second call after updateMany succeeds
|
||||
.mockResolvedValueOnce(updatedJob as any);
|
||||
|
||||
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 1 });
|
||||
|
||||
const result = await service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED);
|
||||
|
||||
expect(result.status).toBe(RunnerJobStatus.COMPLETED);
|
||||
expect(result.version).toBe(2);
|
||||
});
|
||||
|
||||
it("should retry on conflict and succeed on second attempt", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
const mockJobV1 = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 1,
|
||||
};
|
||||
|
||||
const mockJobV2 = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 2,
|
||||
};
|
||||
|
||||
const updatedJob = {
|
||||
...mockJobV2,
|
||||
status: RunnerJobStatus.COMPLETED,
|
||||
version: 3,
|
||||
completedAt: new Date(),
|
||||
};
|
||||
|
||||
// First attempt: version 1, updateMany returns 0 (conflict)
|
||||
vi.mocked(prisma.runnerJob.findUnique)
|
||||
.mockResolvedValueOnce(mockJobV1 as any) // Initial read
|
||||
.mockResolvedValueOnce(mockJobV2 as any) // Retry read
|
||||
.mockResolvedValueOnce(updatedJob as any); // Final read after update
|
||||
|
||||
vi.mocked(prisma.runnerJob.updateMany)
|
||||
.mockResolvedValueOnce({ count: 0 }) // First attempt fails
|
||||
.mockResolvedValueOnce({ count: 1 }); // Retry succeeds
|
||||
|
||||
const result = await service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED);
|
||||
|
||||
expect(result.status).toBe(RunnerJobStatus.COMPLETED);
|
||||
expect(prisma.runnerJob.updateMany).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("concurrent progress updates", () => {
|
||||
it("should detect concurrent progress update conflict", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
progressPercent: 50,
|
||||
version: 5,
|
||||
};
|
||||
|
||||
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
|
||||
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 0 });
|
||||
|
||||
await expect(service.updateProgress(jobId, workspaceId, 75)).rejects.toThrow(
|
||||
ConflictException
|
||||
);
|
||||
});
|
||||
|
||||
it("should handle rapid sequential progress updates", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
// Simulate 5 rapid progress updates
|
||||
const progressValues = [20, 40, 60, 80, 100];
|
||||
let version = 1;
|
||||
|
||||
for (const progress of progressValues) {
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
progressPercent: progress - 20,
|
||||
version,
|
||||
};
|
||||
|
||||
const updatedJob = {
|
||||
...mockJob,
|
||||
progressPercent: progress,
|
||||
version: version + 1,
|
||||
};
|
||||
|
||||
vi.mocked(prisma.runnerJob.findUnique)
|
||||
.mockResolvedValueOnce(mockJob as any)
|
||||
.mockResolvedValueOnce(updatedJob as any);
|
||||
|
||||
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValueOnce({ count: 1 });
|
||||
|
||||
const result = await service.updateProgress(jobId, workspaceId, progress);
|
||||
|
||||
expect(result.progressPercent).toBe(progress);
|
||||
expect(result.version).toBe(version + 1);
|
||||
|
||||
version++;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("concurrent completion", () => {
|
||||
it("should prevent double completion with different results", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 1,
|
||||
startedAt: new Date(),
|
||||
};
|
||||
|
||||
const updatedJob = {
|
||||
...mockJob,
|
||||
status: RunnerJobStatus.COMPLETED,
|
||||
version: 2,
|
||||
result: { outcome: "success-A" },
|
||||
completedAt: new Date(),
|
||||
};
|
||||
|
||||
// Test first completion (succeeds)
|
||||
vi.mocked(prisma.runnerJob.findUnique)
|
||||
.mockResolvedValueOnce(mockJob as any) // First completion - initial read
|
||||
.mockResolvedValueOnce(updatedJob as any); // First completion - after update
|
||||
|
||||
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValueOnce({ count: 1 });
|
||||
|
||||
const result1 = await service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED, {
|
||||
result: { outcome: "success-A" },
|
||||
});
|
||||
|
||||
expect(result1.status).toBe(RunnerJobStatus.COMPLETED);
|
||||
|
||||
// Test second completion (fails due to version mismatch - will retry 3 times)
|
||||
vi.mocked(prisma.runnerJob.findUnique)
|
||||
.mockResolvedValueOnce(mockJob as any) // Attempt 1: Reads stale version
|
||||
.mockResolvedValueOnce(mockJob as any) // Attempt 2: Retry reads stale version
|
||||
.mockResolvedValueOnce(mockJob as any); // Attempt 3: Final retry reads stale version
|
||||
|
||||
vi.mocked(prisma.runnerJob.updateMany)
|
||||
.mockResolvedValueOnce({ count: 0 }) // Attempt 1: Version conflict
|
||||
.mockResolvedValueOnce({ count: 0 }) // Attempt 2: Version conflict
|
||||
.mockResolvedValueOnce({ count: 0 }); // Attempt 3: Version conflict
|
||||
|
||||
await expect(
|
||||
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED, {
|
||||
result: { outcome: "success-B" },
|
||||
})
|
||||
).rejects.toThrow(ConflictException);
|
||||
});
|
||||
});
|
||||
|
||||
describe("concurrent cancel operations", () => {
|
||||
it("should handle concurrent cancel attempts", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 1,
|
||||
};
|
||||
|
||||
const cancelledJob = {
|
||||
...mockJob,
|
||||
status: RunnerJobStatus.CANCELLED,
|
||||
version: 2,
|
||||
completedAt: new Date(),
|
||||
};
|
||||
|
||||
// Setup mocks
|
||||
vi.mocked(prisma.runnerJob.findUnique)
|
||||
.mockResolvedValueOnce(mockJob as any) // First cancel - initial read
|
||||
.mockResolvedValueOnce(cancelledJob as any) // First cancel - after update
|
||||
.mockResolvedValueOnce(cancelledJob as any); // Second cancel - sees already cancelled
|
||||
|
||||
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValueOnce({ count: 1 });
|
||||
|
||||
const result1 = await service.cancel(jobId, workspaceId);
|
||||
expect(result1.status).toBe(RunnerJobStatus.CANCELLED);
|
||||
|
||||
// Second cancel attempt should fail (job already cancelled)
|
||||
await expect(service.cancel(jobId, workspaceId)).rejects.toThrow(BadRequestException);
|
||||
});
|
||||
});
|
||||
|
||||
describe("retry mechanism", () => {
|
||||
it("should retry up to max attempts on version conflicts", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 1,
|
||||
};
|
||||
|
||||
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
|
||||
|
||||
// All retry attempts fail
|
||||
vi.mocked(prisma.runnerJob.updateMany)
|
||||
.mockResolvedValueOnce({ count: 0 })
|
||||
.mockResolvedValueOnce({ count: 0 })
|
||||
.mockResolvedValueOnce({ count: 0 });
|
||||
|
||||
// Should throw after max retries (3)
|
||||
await expect(
|
||||
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED)
|
||||
).rejects.toThrow(ConflictException);
|
||||
|
||||
expect(prisma.runnerJob.updateMany).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it("should use exponential backoff between retries", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.RUNNING,
|
||||
version: 1,
|
||||
};
|
||||
|
||||
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
|
||||
|
||||
const updateManyCalls: number[] = [];
|
||||
|
||||
vi.mocked(prisma.runnerJob.updateMany).mockImplementation(async () => {
|
||||
updateManyCalls.push(Date.now());
|
||||
return { count: 0 };
|
||||
});
|
||||
|
||||
await expect(
|
||||
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED)
|
||||
).rejects.toThrow(ConflictException);
|
||||
|
||||
// Verify delays between calls increase (exponential backoff)
|
||||
expect(updateManyCalls.length).toBe(3);
|
||||
if (updateManyCalls.length >= 3) {
|
||||
const delay1 = updateManyCalls[1] - updateManyCalls[0];
|
||||
const delay2 = updateManyCalls[2] - updateManyCalls[1];
|
||||
// Second delay should be >= first delay (exponential)
|
||||
expect(delay2).toBeGreaterThanOrEqual(delay1);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("status transition validation with concurrency", () => {
|
||||
it("should prevent invalid transitions even under concurrent updates", async () => {
|
||||
const jobId = "job-123";
|
||||
const workspaceId = "workspace-123";
|
||||
|
||||
// Job is already completed
|
||||
const mockJob = {
|
||||
id: jobId,
|
||||
workspaceId,
|
||||
status: RunnerJobStatus.COMPLETED,
|
||||
version: 5,
|
||||
completedAt: new Date(),
|
||||
};
|
||||
|
||||
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
|
||||
|
||||
// Should reject transition from COMPLETED to RUNNING
|
||||
await expect(
|
||||
service.updateStatus(jobId, workspaceId, RunnerJobStatus.RUNNING)
|
||||
).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -19,6 +19,7 @@ describe("RunnerJobsService", () => {
|
||||
count: vi.fn(),
|
||||
findUnique: vi.fn(),
|
||||
update: vi.fn(),
|
||||
updateMany: vi.fn(),
|
||||
},
|
||||
jobEvent: {
|
||||
findMany: vi.fn(),
|
||||
|
||||
@@ -4,6 +4,7 @@ import { Response } from "express";
|
||||
import { PrismaService } from "../prisma/prisma.service";
|
||||
import { BullMqService } from "../bullmq/bullmq.service";
|
||||
import { QUEUE_NAMES } from "../bullmq/queues";
|
||||
import { ConcurrentUpdateException } from "../common/exceptions/concurrent-update.exception";
|
||||
import type { CreateJobDto, QueryJobsDto } from "./dto";
|
||||
|
||||
/**
|
||||
@@ -144,37 +145,57 @@ export class RunnerJobsService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel a running or queued job
|
||||
* Cancel a running or queued job with optimistic locking
|
||||
*/
|
||||
async cancel(id: string, workspaceId: string) {
|
||||
// Verify job exists
|
||||
const existingJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
return this.retryOnConflict(async () => {
|
||||
// Verify job exists
|
||||
const existingJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
|
||||
if (!existingJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
|
||||
}
|
||||
|
||||
// Check if job can be cancelled
|
||||
if (
|
||||
existingJob.status === RunnerJobStatus.COMPLETED ||
|
||||
existingJob.status === RunnerJobStatus.CANCELLED ||
|
||||
existingJob.status === RunnerJobStatus.FAILED
|
||||
) {
|
||||
throw new BadRequestException(`Cannot cancel job with status ${existingJob.status}`);
|
||||
}
|
||||
|
||||
// Update job status to cancelled with version check
|
||||
const result = await this.prisma.runnerJob.updateMany({
|
||||
where: {
|
||||
id,
|
||||
workspaceId,
|
||||
version: existingJob.version,
|
||||
},
|
||||
data: {
|
||||
status: RunnerJobStatus.CANCELLED,
|
||||
completedAt: new Date(),
|
||||
version: { increment: 1 },
|
||||
},
|
||||
});
|
||||
|
||||
if (result.count === 0) {
|
||||
throw new ConcurrentUpdateException("RunnerJob", id, existingJob.version);
|
||||
}
|
||||
|
||||
// Fetch and return updated job
|
||||
const job = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
|
||||
if (!job) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found after cancel`);
|
||||
}
|
||||
|
||||
return job;
|
||||
});
|
||||
|
||||
if (!existingJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
|
||||
}
|
||||
|
||||
// Check if job can be cancelled
|
||||
if (
|
||||
existingJob.status === RunnerJobStatus.COMPLETED ||
|
||||
existingJob.status === RunnerJobStatus.CANCELLED ||
|
||||
existingJob.status === RunnerJobStatus.FAILED
|
||||
) {
|
||||
throw new BadRequestException(`Cannot cancel job with status ${existingJob.status}`);
|
||||
}
|
||||
|
||||
// Update job status to cancelled
|
||||
const job = await this.prisma.runnerJob.update({
|
||||
where: { id, workspaceId },
|
||||
data: {
|
||||
status: RunnerJobStatus.CANCELLED,
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -413,74 +434,179 @@ export class RunnerJobsService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Update job status
|
||||
* Retry wrapper for optimistic locking conflicts
|
||||
* Retries the operation up to maxRetries times with exponential backoff
|
||||
*/
|
||||
private async retryOnConflict<T>(operation: () => Promise<T>, maxRetries = 3): Promise<T> {
|
||||
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
||||
try {
|
||||
return await operation();
|
||||
} catch (error) {
|
||||
if (error instanceof ConcurrentUpdateException && attempt < maxRetries - 1) {
|
||||
// Exponential backoff: 100ms, 200ms, 400ms
|
||||
const delayMs = Math.pow(2, attempt) * 100;
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
continue;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
throw new Error("Retry logic failed unexpectedly");
|
||||
}
|
||||
|
||||
/**
|
||||
* Update job status with optimistic locking
|
||||
*/
|
||||
async updateStatus(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
status: RunnerJobStatus,
|
||||
data?: { result?: unknown; error?: string }
|
||||
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
|
||||
// Verify job exists
|
||||
const existingJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.findUnique>>> {
|
||||
return this.retryOnConflict(async () => {
|
||||
// Read current job state
|
||||
const existingJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
|
||||
if (!existingJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
|
||||
}
|
||||
if (!existingJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
|
||||
}
|
||||
|
||||
const updateData: Prisma.RunnerJobUpdateInput = {
|
||||
status,
|
||||
};
|
||||
// Validate status transition (prevent invalid transitions even with concurrency)
|
||||
if (!this.isValidStatusTransition(existingJob.status, status)) {
|
||||
throw new BadRequestException(
|
||||
`Invalid status transition from ${existingJob.status} to ${status}`
|
||||
);
|
||||
}
|
||||
|
||||
// Set timestamps based on status
|
||||
if (status === RunnerJobStatus.RUNNING && !existingJob.startedAt) {
|
||||
updateData.startedAt = new Date();
|
||||
}
|
||||
const updateData: Prisma.RunnerJobUpdateInput = {
|
||||
status,
|
||||
version: { increment: 1 }, // Increment version for optimistic locking
|
||||
};
|
||||
|
||||
if (
|
||||
status === RunnerJobStatus.COMPLETED ||
|
||||
status === RunnerJobStatus.FAILED ||
|
||||
status === RunnerJobStatus.CANCELLED
|
||||
) {
|
||||
updateData.completedAt = new Date();
|
||||
}
|
||||
// Set timestamps based on status
|
||||
if (status === RunnerJobStatus.RUNNING && !existingJob.startedAt) {
|
||||
updateData.startedAt = new Date();
|
||||
}
|
||||
|
||||
// Add optional data
|
||||
if (data?.result !== undefined) {
|
||||
updateData.result = data.result as Prisma.InputJsonValue;
|
||||
}
|
||||
if (data?.error !== undefined) {
|
||||
updateData.error = data.error;
|
||||
}
|
||||
if (
|
||||
status === RunnerJobStatus.COMPLETED ||
|
||||
status === RunnerJobStatus.FAILED ||
|
||||
status === RunnerJobStatus.CANCELLED
|
||||
) {
|
||||
updateData.completedAt = new Date();
|
||||
}
|
||||
|
||||
return this.prisma.runnerJob.update({
|
||||
where: { id, workspaceId },
|
||||
data: updateData,
|
||||
// Add optional data
|
||||
if (data?.result !== undefined) {
|
||||
updateData.result = data.result as Prisma.InputJsonValue;
|
||||
}
|
||||
if (data?.error !== undefined) {
|
||||
updateData.error = data.error;
|
||||
}
|
||||
|
||||
// Use updateMany with version check for optimistic locking
|
||||
const result = await this.prisma.runnerJob.updateMany({
|
||||
where: {
|
||||
id,
|
||||
workspaceId,
|
||||
version: existingJob.version, // Only update if version matches
|
||||
},
|
||||
data: updateData,
|
||||
});
|
||||
|
||||
// If count is 0, version mismatch (concurrent update detected)
|
||||
if (result.count === 0) {
|
||||
throw new ConcurrentUpdateException("RunnerJob", id, existingJob.version);
|
||||
}
|
||||
|
||||
// Fetch and return updated job
|
||||
const updatedJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
|
||||
if (!updatedJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found after update`);
|
||||
}
|
||||
|
||||
return updatedJob;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update job progress percentage
|
||||
* Validate status transitions
|
||||
*/
|
||||
private isValidStatusTransition(
|
||||
currentStatus: RunnerJobStatus,
|
||||
newStatus: RunnerJobStatus
|
||||
): boolean {
|
||||
// Define valid transitions
|
||||
const validTransitions: Record<RunnerJobStatus, RunnerJobStatus[]> = {
|
||||
[RunnerJobStatus.PENDING]: [
|
||||
RunnerJobStatus.QUEUED,
|
||||
RunnerJobStatus.RUNNING,
|
||||
RunnerJobStatus.CANCELLED,
|
||||
],
|
||||
[RunnerJobStatus.QUEUED]: [RunnerJobStatus.RUNNING, RunnerJobStatus.CANCELLED],
|
||||
[RunnerJobStatus.RUNNING]: [
|
||||
RunnerJobStatus.COMPLETED,
|
||||
RunnerJobStatus.FAILED,
|
||||
RunnerJobStatus.CANCELLED,
|
||||
],
|
||||
[RunnerJobStatus.COMPLETED]: [],
|
||||
[RunnerJobStatus.FAILED]: [],
|
||||
[RunnerJobStatus.CANCELLED]: [],
|
||||
};
|
||||
|
||||
return validTransitions[currentStatus].includes(newStatus);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update job progress percentage with optimistic locking
|
||||
*/
|
||||
async updateProgress(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
progressPercent: number
|
||||
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
|
||||
// Verify job exists
|
||||
const existingJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.findUnique>>> {
|
||||
return this.retryOnConflict(async () => {
|
||||
// Read current job state
|
||||
const existingJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
|
||||
if (!existingJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
|
||||
}
|
||||
if (!existingJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
|
||||
}
|
||||
|
||||
return this.prisma.runnerJob.update({
|
||||
where: { id, workspaceId },
|
||||
data: { progressPercent },
|
||||
// Use updateMany with version check for optimistic locking
|
||||
const result = await this.prisma.runnerJob.updateMany({
|
||||
where: {
|
||||
id,
|
||||
workspaceId,
|
||||
version: existingJob.version,
|
||||
},
|
||||
data: {
|
||||
progressPercent,
|
||||
version: { increment: 1 },
|
||||
},
|
||||
});
|
||||
|
||||
if (result.count === 0) {
|
||||
throw new ConcurrentUpdateException("RunnerJob", id, existingJob.version);
|
||||
}
|
||||
|
||||
// Fetch and return updated job
|
||||
const updatedJob = await this.prisma.runnerJob.findUnique({
|
||||
where: { id, workspaceId },
|
||||
});
|
||||
|
||||
if (!updatedJob) {
|
||||
throw new NotFoundException(`RunnerJob with ID ${id} not found after update`);
|
||||
}
|
||||
|
||||
return updatedJob;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user