fix(#196): fix race condition in job status updates

Implemented optimistic locking with version field and SELECT FOR UPDATE
transactions to prevent data corruption from concurrent job status updates.

Changes:
- Added version field to RunnerJob schema for optimistic locking
- Created migration 20260202_add_runner_job_version_for_concurrency
- Implemented ConcurrentUpdateException for conflict detection
- Updated RunnerJobsService methods with optimistic locking:
  * updateStatus() - with version checking and retry logic
  * updateProgress() - with version checking and retry logic
  * cancel() - with version checking and retry logic
- Updated CoordinatorIntegrationService with SELECT FOR UPDATE:
  * updateJobStatus() - transaction with row locking
  * completeJob() - transaction with row locking
  * failJob() - transaction with row locking
  * updateJobProgress() - optimistic locking
- Added retry mechanism (3 attempts) with exponential backoff
- Added comprehensive concurrency tests (10 tests, all passing)
- Updated existing test mocks to support updateMany

Test Results:
- All 10 concurrency tests passing ✓
- Tests cover concurrent status updates, progress updates, completions,
  cancellations, retry logic, and exponential backoff

This fix prevents race conditions that could cause:
- Lost job results (double completion)
- Lost progress updates
- Invalid status transitions
- Data corruption under concurrent access

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Jason Woltje
2026-02-02 12:51:17 -06:00
parent a3b48dd631
commit ef25167c24
251 changed files with 7045 additions and 261 deletions

View File

@@ -0,0 +1,7 @@
-- Add version field for optimistic locking to prevent race conditions
-- This allows safe concurrent updates to runner job status
ALTER TABLE "runner_jobs" ADD COLUMN "version" INTEGER NOT NULL DEFAULT 1;
-- Create index for better performance on version checks
CREATE INDEX "runner_jobs_version_idx" ON "runner_jobs"("version");

View File

@@ -1135,6 +1135,7 @@ model RunnerJob {
status RunnerJobStatus @default(PENDING)
priority Int
progressPercent Int @default(0) @map("progress_percent")
version Int @default(1) // Optimistic locking version
// Results
result Json?

View File

@@ -1,6 +1,6 @@
import { Injectable, Logger } from "@nestjs/common";
import { PrismaService } from "../prisma/prisma.service";
import { ActivityAction, EntityType, Prisma } from "@prisma/client";
import { ActivityAction, EntityType, Prisma, ActivityLog } from "@prisma/client";
import type {
CreateActivityLogInput,
PaginatedActivityLogs,
@@ -20,7 +20,7 @@ export class ActivityService {
/**
* Create a new activity log entry
*/
async logActivity(input: CreateActivityLogInput) {
async logActivity(input: CreateActivityLogInput): Promise<ActivityLog> {
try {
return await this.prisma.activityLog.create({
data: input as unknown as Prisma.ActivityLogCreateInput,
@@ -167,7 +167,7 @@ export class ActivityService {
userId: string,
taskId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -186,7 +186,7 @@ export class ActivityService {
userId: string,
taskId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -205,7 +205,7 @@ export class ActivityService {
userId: string,
taskId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -224,7 +224,7 @@ export class ActivityService {
userId: string,
taskId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -238,7 +238,12 @@ export class ActivityService {
/**
* Log task assignment
*/
async logTaskAssigned(workspaceId: string, userId: string, taskId: string, assigneeId: string) {
async logTaskAssigned(
workspaceId: string,
userId: string,
taskId: string,
assigneeId: string
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -257,7 +262,7 @@ export class ActivityService {
userId: string,
eventId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -276,7 +281,7 @@ export class ActivityService {
userId: string,
eventId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -295,7 +300,7 @@ export class ActivityService {
userId: string,
eventId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -314,7 +319,7 @@ export class ActivityService {
userId: string,
projectId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -333,7 +338,7 @@ export class ActivityService {
userId: string,
projectId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -352,7 +357,7 @@ export class ActivityService {
userId: string,
projectId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -366,7 +371,11 @@ export class ActivityService {
/**
* Log workspace creation
*/
async logWorkspaceCreated(workspaceId: string, userId: string, details?: Prisma.JsonValue) {
async logWorkspaceCreated(
workspaceId: string,
userId: string,
details?: Prisma.JsonValue
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -380,7 +389,11 @@ export class ActivityService {
/**
* Log workspace update
*/
async logWorkspaceUpdated(workspaceId: string, userId: string, details?: Prisma.JsonValue) {
async logWorkspaceUpdated(
workspaceId: string,
userId: string,
details?: Prisma.JsonValue
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -399,7 +412,7 @@ export class ActivityService {
userId: string,
memberId: string,
role: string
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -413,7 +426,11 @@ export class ActivityService {
/**
* Log workspace member removed
*/
async logWorkspaceMemberRemoved(workspaceId: string, userId: string, memberId: string) {
async logWorkspaceMemberRemoved(
workspaceId: string,
userId: string,
memberId: string
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -427,7 +444,11 @@ export class ActivityService {
/**
* Log user profile update
*/
async logUserUpdated(workspaceId: string, userId: string, details?: Prisma.JsonValue) {
async logUserUpdated(
workspaceId: string,
userId: string,
details?: Prisma.JsonValue
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -446,7 +467,7 @@ export class ActivityService {
userId: string,
domainId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -465,7 +486,7 @@ export class ActivityService {
userId: string,
domainId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -484,7 +505,7 @@ export class ActivityService {
userId: string,
domainId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -503,7 +524,7 @@ export class ActivityService {
userId: string,
ideaId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -522,7 +543,7 @@ export class ActivityService {
userId: string,
ideaId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,
@@ -541,7 +562,7 @@ export class ActivityService {
userId: string,
ideaId: string,
details?: Prisma.JsonValue
) {
): Promise<ActivityLog> {
return this.logActivity({
workspaceId,
userId,

View File

@@ -17,14 +17,19 @@ export class AuthService {
/**
* Get BetterAuth instance
*/
getAuth() {
getAuth(): Auth {
return this.auth;
}
/**
* Get user by ID
*/
async getUserById(userId: string) {
async getUserById(userId: string): Promise<{
id: string;
email: string;
name: string;
authProviderId: string | null;
} | null> {
return this.prisma.user.findUnique({
where: { id: userId },
select: {
@@ -39,7 +44,12 @@ export class AuthService {
/**
* Get user by email
*/
async getUserByEmail(email: string) {
async getUserByEmail(email: string): Promise<{
id: string;
email: string;
name: string;
authProviderId: string | null;
} | null> {
return this.prisma.user.findUnique({
where: { email },
select: {

View File

@@ -0,0 +1,23 @@
import { ConflictException } from "@nestjs/common";
/**
* Exception thrown when a concurrent update conflict is detected
* This occurs when optimistic locking detects that a record has been
* modified by another process between read and write operations
*/
export class ConcurrentUpdateException extends ConflictException {
constructor(resourceType: string, resourceId: string, currentVersion?: number) {
const message = currentVersion
? `Concurrent update detected for ${resourceType} ${resourceId} at version ${currentVersion}. The record was modified by another process.`
: `Concurrent update detected for ${resourceType} ${resourceId}. The record was modified by another process.`;
super({
message,
error: "Concurrent Update Conflict",
resourceType,
resourceId,
currentVersion,
retryable: true,
});
}
}

View File

@@ -0,0 +1,392 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import { ConflictException } from "@nestjs/common";
import { CoordinatorIntegrationService } from "./coordinator-integration.service";
import { PrismaService } from "../prisma/prisma.service";
import { JobEventsService } from "../job-events/job-events.service";
import { HeraldService } from "../herald/herald.service";
import { BullMqService } from "../bullmq/bullmq.service";
import { RunnerJobStatus } from "@prisma/client";
import { CoordinatorJobStatus, UpdateJobStatusDto } from "./dto";
/**
* Concurrency tests for CoordinatorIntegrationService
* Focus on race conditions during coordinator job status updates
*/
describe("CoordinatorIntegrationService - Concurrency", () => {
let service: CoordinatorIntegrationService;
let prisma: PrismaService;
const mockJobEventsService = {
emitJobCreated: vi.fn(),
emitJobStarted: vi.fn(),
emitJobCompleted: vi.fn(),
emitJobFailed: vi.fn(),
emitEvent: vi.fn(),
};
const mockHeraldService = {
broadcastJobEvent: vi.fn(),
};
const mockBullMqService = {
addJob: vi.fn(),
};
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [
CoordinatorIntegrationService,
{
provide: PrismaService,
useValue: {
runnerJob: {
findUnique: vi.fn(),
update: vi.fn(),
updateMany: vi.fn(),
},
$transaction: vi.fn(),
$queryRaw: vi.fn(),
},
},
{
provide: JobEventsService,
useValue: mockJobEventsService,
},
{
provide: HeraldService,
useValue: mockHeraldService,
},
{
provide: BullMqService,
useValue: mockBullMqService,
},
],
}).compile();
service = module.get<CoordinatorIntegrationService>(CoordinatorIntegrationService);
prisma = module.get<PrismaService>(PrismaService);
vi.clearAllMocks();
});
describe("concurrent status updates from coordinator", () => {
it("should use SELECT FOR UPDATE to prevent race conditions", async () => {
const jobId = "job-123";
const dto: UpdateJobStatusDto = {
status: CoordinatorJobStatus.RUNNING,
agentId: "agent-1",
agentType: "python",
};
const mockJob = {
id: jobId,
status: RunnerJobStatus.PENDING,
workspaceId: "workspace-123",
version: 1,
};
const updatedJob = {
...mockJob,
status: RunnerJobStatus.RUNNING,
startedAt: new Date(),
version: 2,
};
// Mock transaction with SELECT FOR UPDATE
const mockTxClient = {
$queryRaw: vi.fn().mockResolvedValue([mockJob]),
runnerJob: {
update: vi.fn().mockResolvedValue(updatedJob),
},
};
vi.mocked(prisma.$transaction).mockImplementation(async (callback: any) => {
return callback(mockTxClient);
});
const mockEvent = {
id: "event-1",
jobId,
type: "job.started",
timestamp: new Date(),
};
vi.mocked(mockJobEventsService.emitJobStarted).mockResolvedValue(mockEvent as any);
const result = await service.updateJobStatus(jobId, dto);
expect(result.status).toBe(RunnerJobStatus.RUNNING);
// Verify SELECT FOR UPDATE was used
expect(mockTxClient.$queryRaw).toHaveBeenCalledWith(
expect.anything() // Raw SQL with FOR UPDATE
);
});
it("should handle concurrent status updates by coordinator and API", async () => {
const jobId = "job-123";
// Coordinator tries to mark as RUNNING
const coordinatorDto: UpdateJobStatusDto = {
status: CoordinatorJobStatus.RUNNING,
};
// Simulate transaction lock timeout (another process holds lock)
vi.mocked(prisma.$transaction).mockRejectedValue(new Error("could not obtain lock on row"));
await expect(service.updateJobStatus(jobId, coordinatorDto)).rejects.toThrow();
});
it("should serialize concurrent status transitions", async () => {
const jobId = "job-123";
const mockJob = {
id: jobId,
status: RunnerJobStatus.PENDING,
workspaceId: "workspace-123",
version: 1,
};
// Simulate transaction that waits for lock, then proceeds
const mockTxClient = {
$queryRaw: vi.fn().mockResolvedValue([mockJob]),
runnerJob: {
update: vi.fn().mockResolvedValue({
...mockJob,
status: RunnerJobStatus.RUNNING,
version: 2,
}),
},
};
vi.mocked(prisma.$transaction).mockImplementation(async (callback: any) => {
// Simulate delay while waiting for lock
await new Promise((resolve) => setTimeout(resolve, 100));
return callback(mockTxClient);
});
const dto: UpdateJobStatusDto = {
status: CoordinatorJobStatus.RUNNING,
};
vi.mocked(mockJobEventsService.emitJobStarted).mockResolvedValue({
id: "event-1",
jobId,
type: "job.started",
timestamp: new Date(),
} as any);
const result = await service.updateJobStatus(jobId, dto);
expect(result.status).toBe(RunnerJobStatus.RUNNING);
expect(prisma.$transaction).toHaveBeenCalled();
});
});
describe("concurrent completion from coordinator", () => {
it("should prevent double completion using transaction", async () => {
const jobId = "job-123";
const mockJob = {
id: jobId,
status: RunnerJobStatus.RUNNING,
workspaceId: "workspace-123",
startedAt: new Date(),
version: 2,
};
const completedJob = {
...mockJob,
status: RunnerJobStatus.COMPLETED,
completedAt: new Date(),
progressPercent: 100,
result: { success: true },
version: 3,
};
const mockTxClient = {
$queryRaw: vi.fn().mockResolvedValue([mockJob]),
runnerJob: {
update: vi.fn().mockResolvedValue(completedJob),
},
};
vi.mocked(prisma.$transaction).mockImplementation(async (callback: any) => {
return callback(mockTxClient);
});
vi.mocked(mockJobEventsService.emitJobCompleted).mockResolvedValue({
id: "event-1",
jobId,
type: "job.completed",
timestamp: new Date(),
} as any);
const result = await service.completeJob(jobId, {
result: { success: true },
tokensUsed: 1000,
durationSeconds: 120,
});
expect(result.status).toBe(RunnerJobStatus.COMPLETED);
expect(mockTxClient.$queryRaw).toHaveBeenCalled();
});
it("should handle concurrent completion and failure attempts", async () => {
const jobId = "job-123";
const mockJob = {
id: jobId,
status: RunnerJobStatus.RUNNING,
workspaceId: "workspace-123",
startedAt: new Date(),
version: 2,
};
// First transaction (completion) succeeds
const completedJob = {
...mockJob,
status: RunnerJobStatus.COMPLETED,
completedAt: new Date(),
version: 3,
};
// Second transaction (failure) sees completed job and should fail
const mockTxClient1 = {
$queryRaw: vi.fn().mockResolvedValue([mockJob]),
runnerJob: {
update: vi.fn().mockResolvedValue(completedJob),
},
};
const mockTxClient2 = {
$queryRaw: vi.fn().mockResolvedValue([completedJob]), // Job already completed
runnerJob: {
update: vi.fn(),
},
};
vi.mocked(prisma.$transaction)
.mockImplementationOnce(async (callback: any) => callback(mockTxClient1))
.mockImplementationOnce(async (callback: any) => callback(mockTxClient2));
vi.mocked(mockJobEventsService.emitJobCompleted).mockResolvedValue({
id: "event-1",
jobId,
type: "job.completed",
timestamp: new Date(),
} as any);
// First call (completion) succeeds
const result1 = await service.completeJob(jobId, {
result: { success: true },
});
expect(result1.status).toBe(RunnerJobStatus.COMPLETED);
// Second call (failure) should be rejected due to invalid status transition
await expect(
service.failJob(jobId, {
error: "Something went wrong",
})
).rejects.toThrow();
});
});
describe("concurrent progress updates from coordinator", () => {
it("should handle rapid progress updates safely", async () => {
const jobId = "job-123";
const progressUpdates = [25, 50, 75];
for (const progress of progressUpdates) {
const mockJob = {
id: jobId,
status: RunnerJobStatus.RUNNING,
progressPercent: progress - 25,
version: progress / 25, // version increases with each update
};
const updatedJob = {
...mockJob,
progressPercent: progress,
version: mockJob.version + 1,
};
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 1 });
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValueOnce(updatedJob as any);
const result = await service.updateJobProgress(jobId, {
progressPercent: progress,
});
expect(result.progressPercent).toBe(progress);
}
expect(mockJobEventsService.emitEvent).toHaveBeenCalledTimes(3);
});
it("should detect version conflicts in progress updates", async () => {
const jobId = "job-123";
const mockJob = {
id: jobId,
status: RunnerJobStatus.RUNNING,
progressPercent: 50,
version: 2,
};
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
// Simulate version conflict (another update happened)
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 0 });
await expect(
service.updateJobProgress(jobId, {
progressPercent: 75,
})
).rejects.toThrow(ConflictException);
});
});
describe("transaction isolation", () => {
it("should use appropriate transaction isolation level", async () => {
const jobId = "job-123";
const mockJob = {
id: jobId,
status: RunnerJobStatus.PENDING,
version: 1,
};
const mockTxClient = {
$queryRaw: vi.fn().mockResolvedValue([mockJob]),
runnerJob: {
update: vi.fn().mockResolvedValue({
...mockJob,
status: RunnerJobStatus.RUNNING,
version: 2,
}),
},
};
vi.mocked(prisma.$transaction).mockImplementation(async (callback: any) => {
return callback(mockTxClient);
});
vi.mocked(mockJobEventsService.emitJobStarted).mockResolvedValue({
id: "event-1",
jobId,
type: "job.started",
timestamp: new Date(),
} as any);
await service.updateJobStatus(jobId, {
status: CoordinatorJobStatus.RUNNING,
});
// Verify transaction was used (isolates the operation)
expect(prisma.$transaction).toHaveBeenCalled();
});
});
});

View File

@@ -6,6 +6,7 @@ import { HeraldService } from "../herald/herald.service";
import { BullMqService } from "../bullmq/bullmq.service";
import { QUEUE_NAMES } from "../bullmq/queues";
import { JOB_PROGRESS } from "../job-events/event-types";
import { ConcurrentUpdateException } from "../common/exceptions/concurrent-update.exception";
import {
CoordinatorJobStatus,
type CreateCoordinatorJobDto,
@@ -98,7 +99,8 @@ export class CoordinatorIntegrationService {
}
/**
* Update job status from the coordinator
* Update job status from the coordinator using transaction with SELECT FOR UPDATE
* This ensures serialized access to job status updates from the coordinator
*/
async updateJobStatus(
jobId: string,
@@ -106,64 +108,74 @@ export class CoordinatorIntegrationService {
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
this.logger.log(`Updating job ${jobId} status to ${dto.status}`);
// Verify job exists
const job = await this.prisma.runnerJob.findUnique({
where: { id: jobId },
select: { id: true, status: true, workspaceId: true },
});
return this.prisma.$transaction(async (tx) => {
// Use SELECT FOR UPDATE to lock the row during this transaction
// This prevents concurrent updates from coordinator and ensures serialization
const jobs = await tx.$queryRaw<
Array<{ id: string; status: RunnerJobStatus; workspace_id: string; version: number }>
>`
SELECT id, status, workspace_id, version
FROM runner_jobs
WHERE id = ${jobId}::uuid
FOR UPDATE
`;
if (!job) {
throw new NotFoundException(`RunnerJob with ID ${jobId} not found`);
}
if (!jobs || jobs.length === 0) {
throw new NotFoundException(`RunnerJob with ID ${jobId} not found`);
}
// Validate status transition
if (!this.isValidStatusTransition(job.status, dto.status as RunnerJobStatus)) {
throw new BadRequestException(
`Invalid status transition from ${job.status} to ${dto.status}`
);
}
const job = jobs[0];
const updateData: Prisma.RunnerJobUpdateInput = {
status: dto.status as RunnerJobStatus,
};
// Validate status transition
if (!this.isValidStatusTransition(job.status, dto.status as RunnerJobStatus)) {
throw new BadRequestException(
`Invalid status transition from ${job.status} to ${dto.status}`
);
}
// Set startedAt when transitioning to RUNNING
if (dto.status === CoordinatorJobStatus.RUNNING) {
updateData.startedAt = new Date();
}
const updateData: Prisma.RunnerJobUpdateInput = {
status: dto.status as RunnerJobStatus,
version: { increment: 1 },
};
const updatedJob = await this.prisma.runnerJob.update({
where: { id: jobId },
data: updateData,
});
// Set startedAt when transitioning to RUNNING
if (dto.status === CoordinatorJobStatus.RUNNING) {
updateData.startedAt = new Date();
}
// Emit appropriate event
if (dto.status === CoordinatorJobStatus.RUNNING) {
const event = await this.jobEvents.emitJobStarted(jobId, {
agentId: dto.agentId,
agentType: dto.agentType,
const updatedJob = await tx.runnerJob.update({
where: { id: jobId },
data: updateData,
});
// Broadcast via Herald
await this.herald.broadcastJobEvent(jobId, event);
}
// Emit appropriate event (outside of critical section but inside transaction)
if (dto.status === CoordinatorJobStatus.RUNNING) {
const event = await this.jobEvents.emitJobStarted(jobId, {
agentId: dto.agentId,
agentType: dto.agentType,
});
return updatedJob;
// Broadcast via Herald
await this.herald.broadcastJobEvent(jobId, event);
}
return updatedJob;
});
}
/**
* Update job progress from the coordinator
* Update job progress from the coordinator with optimistic locking
*/
async updateJobProgress(
jobId: string,
dto: UpdateJobProgressDto
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.findUnique>>> {
this.logger.log(`Updating job ${jobId} progress to ${String(dto.progressPercent)}%`);
// Verify job exists and is running
// Read current job state
const job = await this.prisma.runnerJob.findUnique({
where: { id: jobId },
select: { id: true, status: true },
select: { id: true, status: true, version: true },
});
if (!job) {
@@ -174,11 +186,31 @@ export class CoordinatorIntegrationService {
throw new BadRequestException(`Cannot update progress for job with status ${job.status}`);
}
const updatedJob = await this.prisma.runnerJob.update({
where: { id: jobId },
data: { progressPercent: dto.progressPercent },
// Use updateMany with version check for optimistic locking
const result = await this.prisma.runnerJob.updateMany({
where: {
id: jobId,
version: job.version,
},
data: {
progressPercent: dto.progressPercent,
version: { increment: 1 },
},
});
if (result.count === 0) {
throw new ConcurrentUpdateException("RunnerJob", jobId, job.version);
}
// Fetch updated job
const updatedJob = await this.prisma.runnerJob.findUnique({
where: { id: jobId },
});
if (!updatedJob) {
throw new NotFoundException(`RunnerJob with ID ${jobId} not found after update`);
}
// Emit progress event
await this.jobEvents.emitEvent(jobId, {
type: JOB_PROGRESS,
@@ -194,7 +226,7 @@ export class CoordinatorIntegrationService {
}
/**
* Mark job as completed from the coordinator
* Mark job as completed from the coordinator using transaction with SELECT FOR UPDATE
*/
async completeJob(
jobId: string,
@@ -202,57 +234,68 @@ export class CoordinatorIntegrationService {
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
this.logger.log(`Completing job ${jobId}`);
// Verify job exists
const job = await this.prisma.runnerJob.findUnique({
where: { id: jobId },
select: { id: true, status: true, startedAt: true },
return this.prisma.$transaction(async (tx) => {
// Lock the row to prevent concurrent completion/failure
const jobs = await tx.$queryRaw<
Array<{ id: string; status: RunnerJobStatus; started_at: Date | null; version: number }>
>`
SELECT id, status, started_at, version
FROM runner_jobs
WHERE id = ${jobId}::uuid
FOR UPDATE
`;
if (!jobs || jobs.length === 0) {
throw new NotFoundException(`RunnerJob with ID ${jobId} not found`);
}
const job = jobs[0];
// Validate status transition
if (!this.isValidStatusTransition(job.status, RunnerJobStatus.COMPLETED)) {
throw new BadRequestException(`Cannot complete job with status ${job.status}`);
}
// Calculate duration if not provided
let durationSeconds = dto.durationSeconds;
if (durationSeconds === undefined && job.started_at) {
durationSeconds = Math.round(
(new Date().getTime() - new Date(job.started_at).getTime()) / 1000
);
}
const updateData: Prisma.RunnerJobUpdateInput = {
status: RunnerJobStatus.COMPLETED,
progressPercent: 100,
completedAt: new Date(),
version: { increment: 1 },
};
if (dto.result) {
updateData.result = dto.result as Prisma.InputJsonValue;
}
const updatedJob = await tx.runnerJob.update({
where: { id: jobId },
data: updateData,
});
// Emit completion event
const event = await this.jobEvents.emitJobCompleted(jobId, {
result: dto.result,
tokensUsed: dto.tokensUsed,
durationSeconds,
});
// Broadcast via Herald
await this.herald.broadcastJobEvent(jobId, event);
return updatedJob;
});
if (!job) {
throw new NotFoundException(`RunnerJob with ID ${jobId} not found`);
}
// Validate status transition
if (!this.isValidStatusTransition(job.status, RunnerJobStatus.COMPLETED)) {
throw new BadRequestException(`Cannot complete job with status ${job.status}`);
}
// Calculate duration if not provided
let durationSeconds = dto.durationSeconds;
if (durationSeconds === undefined && job.startedAt) {
durationSeconds = Math.round((new Date().getTime() - job.startedAt.getTime()) / 1000);
}
const updateData: Prisma.RunnerJobUpdateInput = {
status: RunnerJobStatus.COMPLETED,
progressPercent: 100,
completedAt: new Date(),
};
if (dto.result) {
updateData.result = dto.result as Prisma.InputJsonValue;
}
const updatedJob = await this.prisma.runnerJob.update({
where: { id: jobId },
data: updateData,
});
// Emit completion event
const event = await this.jobEvents.emitJobCompleted(jobId, {
result: dto.result,
tokensUsed: dto.tokensUsed,
durationSeconds,
});
// Broadcast via Herald
await this.herald.broadcastJobEvent(jobId, event);
return updatedJob;
}
/**
* Mark job as failed from the coordinator
* Mark job as failed from the coordinator using transaction with SELECT FOR UPDATE
*/
async failJob(
jobId: string,
@@ -260,42 +303,51 @@ export class CoordinatorIntegrationService {
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
this.logger.log(`Failing job ${jobId}: ${dto.error}`);
// Verify job exists
const job = await this.prisma.runnerJob.findUnique({
where: { id: jobId },
select: { id: true, status: true },
});
return this.prisma.$transaction(async (tx) => {
// Lock the row to prevent concurrent completion/failure
const jobs = await tx.$queryRaw<
Array<{ id: string; status: RunnerJobStatus; version: number }>
>`
SELECT id, status, version
FROM runner_jobs
WHERE id = ${jobId}::uuid
FOR UPDATE
`;
if (!job) {
throw new NotFoundException(`RunnerJob with ID ${jobId} not found`);
}
if (!jobs || jobs.length === 0) {
throw new NotFoundException(`RunnerJob with ID ${jobId} not found`);
}
// Validate status transition
if (!this.isValidStatusTransition(job.status, RunnerJobStatus.FAILED)) {
throw new BadRequestException(`Cannot fail job with status ${job.status}`);
}
const job = jobs[0];
const updatedJob = await this.prisma.runnerJob.update({
where: { id: jobId },
data: {
status: RunnerJobStatus.FAILED,
// Validate status transition
if (!this.isValidStatusTransition(job.status, RunnerJobStatus.FAILED)) {
throw new BadRequestException(`Cannot fail job with status ${job.status}`);
}
const updatedJob = await tx.runnerJob.update({
where: { id: jobId },
data: {
status: RunnerJobStatus.FAILED,
error: dto.error,
completedAt: new Date(),
version: { increment: 1 },
},
});
// Emit failure event
const event = await this.jobEvents.emitJobFailed(jobId, {
error: dto.error,
completedAt: new Date(),
},
gateResults: dto.gateResults,
failedStep: dto.failedStep,
continuationPrompt: dto.continuationPrompt,
});
// Broadcast via Herald
await this.herald.broadcastJobEvent(jobId, event);
return updatedJob;
});
// Emit failure event
const event = await this.jobEvents.emitJobFailed(jobId, {
error: dto.error,
gateResults: dto.gateResults,
failedStep: dto.failedStep,
continuationPrompt: dto.continuationPrompt,
});
// Broadcast via Herald
await this.herald.broadcastJobEvent(jobId, event);
return updatedJob;
}
/**

View File

@@ -1,4 +1,15 @@
import { IsString, IsOptional, IsNumber, IsObject, Min, Max, IsUUID, MinLength, MaxLength, IsInt } from "class-validator";
import {
IsString,
IsOptional,
IsNumber,
IsObject,
Min,
Max,
IsUUID,
MinLength,
MaxLength,
IsInt,
} from "class-validator";
/**
* DTO for creating a job from the coordinator

View File

@@ -1,9 +1,13 @@
import { Injectable, NotFoundException } from "@nestjs/common";
import { Prisma } from "@prisma/client";
import { Prisma, Domain } from "@prisma/client";
import { PrismaService } from "../prisma/prisma.service";
import { ActivityService } from "../activity/activity.service";
import type { CreateDomainDto, UpdateDomainDto, QueryDomainsDto } from "./dto";
type DomainWithCount = Domain & {
_count: { tasks: number; events: number; projects: number; ideas: number };
};
/**
* Service for managing domains
*/
@@ -17,7 +21,11 @@ export class DomainsService {
/**
* Create a new domain
*/
async create(workspaceId: string, userId: string, createDomainDto: CreateDomainDto) {
async create(
workspaceId: string,
userId: string,
createDomainDto: CreateDomainDto
): Promise<DomainWithCount> {
const domain = await this.prisma.domain.create({
data: {
name: createDomainDto.name,
@@ -49,7 +57,15 @@ export class DomainsService {
/**
* Get paginated domains with filters
*/
async findAll(query: QueryDomainsDto) {
async findAll(query: QueryDomainsDto): Promise<{
data: DomainWithCount[];
meta: {
total: number;
page: number;
limit: number;
totalPages: number;
};
}> {
const page = query.page ?? 1;
const limit = query.limit ?? 50;
const skip = (page - 1) * limit;
@@ -101,7 +117,7 @@ export class DomainsService {
/**
* Get a single domain by ID
*/
async findOne(id: string, workspaceId: string) {
async findOne(id: string, workspaceId: string): Promise<DomainWithCount> {
const domain = await this.prisma.domain.findUnique({
where: {
id,
@@ -124,7 +140,12 @@ export class DomainsService {
/**
* Update a domain
*/
async update(id: string, workspaceId: string, userId: string, updateDomainDto: UpdateDomainDto) {
async update(
id: string,
workspaceId: string,
userId: string,
updateDomainDto: UpdateDomainDto
): Promise<DomainWithCount> {
// Verify domain exists
const existingDomain = await this.prisma.domain.findUnique({
where: { id, workspaceId },
@@ -170,7 +191,7 @@ export class DomainsService {
/**
* Delete a domain
*/
async remove(id: string, workspaceId: string, userId: string) {
async remove(id: string, workspaceId: string, userId: string): Promise<void> {
// Verify domain exists
const domain = await this.prisma.domain.findUnique({
where: { id, workspaceId },

View File

@@ -1,9 +1,14 @@
import { Injectable, NotFoundException } from "@nestjs/common";
import { Prisma } from "@prisma/client";
import { Prisma, Event } from "@prisma/client";
import { PrismaService } from "../prisma/prisma.service";
import { ActivityService } from "../activity/activity.service";
import type { CreateEventDto, UpdateEventDto, QueryEventsDto } from "./dto";
type EventWithRelations = Event & {
creator: { id: string; name: string; email: string };
project: { id: string; name: string; color: string | null } | null;
};
/**
* Service for managing events
*/
@@ -17,7 +22,11 @@ export class EventsService {
/**
* Create a new event
*/
async create(workspaceId: string, userId: string, createEventDto: CreateEventDto) {
async create(
workspaceId: string,
userId: string,
createEventDto: CreateEventDto
): Promise<EventWithRelations> {
const projectConnection = createEventDto.projectId
? { connect: { id: createEventDto.projectId } }
: undefined;
@@ -60,7 +69,15 @@ export class EventsService {
/**
* Get paginated events with filters
*/
async findAll(query: QueryEventsDto) {
async findAll(query: QueryEventsDto): Promise<{
data: EventWithRelations[];
meta: {
total: number;
page: number;
limit: number;
totalPages: number;
};
}> {
const page = query.page ?? 1;
const limit = query.limit ?? 50;
const skip = (page - 1) * limit;
@@ -125,7 +142,7 @@ export class EventsService {
/**
* Get a single event by ID
*/
async findOne(id: string, workspaceId: string) {
async findOne(id: string, workspaceId: string): Promise<EventWithRelations> {
const event = await this.prisma.event.findUnique({
where: {
id,
@@ -151,7 +168,12 @@ export class EventsService {
/**
* Update an event
*/
async update(id: string, workspaceId: string, userId: string, updateEventDto: UpdateEventDto) {
async update(
id: string,
workspaceId: string,
userId: string,
updateEventDto: UpdateEventDto
): Promise<EventWithRelations> {
// Verify event exists
const existingEvent = await this.prisma.event.findUnique({
where: { id, workspaceId },
@@ -208,7 +230,7 @@ export class EventsService {
/**
* Delete an event
*/
async remove(id: string, workspaceId: string, userId: string) {
async remove(id: string, workspaceId: string, userId: string): Promise<void> {
// Verify event exists
const event = await this.prisma.event.findUnique({
where: { id, workspaceId },

View File

@@ -1,10 +1,20 @@
import { Injectable, NotFoundException } from "@nestjs/common";
import { Prisma } from "@prisma/client";
import { Prisma, Idea } from "@prisma/client";
import { PrismaService } from "../prisma/prisma.service";
import { ActivityService } from "../activity/activity.service";
import { IdeaStatus } from "@prisma/client";
import type { CreateIdeaDto, CaptureIdeaDto, UpdateIdeaDto, QueryIdeasDto } from "./dto";
type IdeaWithRelations = Idea & {
creator: { id: string; name: string; email: string };
domain: { id: string; name: string; color: string | null } | null;
project: { id: string; name: string; color: string | null } | null;
};
type IdeaCaptured = Idea & {
creator: { id: string; name: string; email: string };
};
/**
* Service for managing ideas
*/
@@ -18,7 +28,11 @@ export class IdeasService {
/**
* Create a new idea
*/
async create(workspaceId: string, userId: string, createIdeaDto: CreateIdeaDto) {
async create(
workspaceId: string,
userId: string,
createIdeaDto: CreateIdeaDto
): Promise<IdeaWithRelations> {
const domainConnection = createIdeaDto.domainId
? { connect: { id: createIdeaDto.domainId } }
: undefined;
@@ -70,7 +84,11 @@ export class IdeasService {
* Quick capture - create an idea with minimal fields
* Optimized for rapid idea capture from the front-end
*/
async capture(workspaceId: string, userId: string, captureIdeaDto: CaptureIdeaDto) {
async capture(
workspaceId: string,
userId: string,
captureIdeaDto: CaptureIdeaDto
): Promise<IdeaCaptured> {
const data: Prisma.IdeaCreateInput = {
workspace: { connect: { id: workspaceId } },
creator: { connect: { id: userId } },
@@ -103,7 +121,15 @@ export class IdeasService {
/**
* Get paginated ideas with filters
*/
async findAll(query: QueryIdeasDto) {
async findAll(query: QueryIdeasDto): Promise<{
data: IdeaWithRelations[];
meta: {
total: number;
page: number;
limit: number;
totalPages: number;
};
}> {
const page = query.page ?? 1;
const limit = query.limit ?? 50;
const skip = (page - 1) * limit;
@@ -177,7 +203,7 @@ export class IdeasService {
/**
* Get a single idea by ID
*/
async findOne(id: string, workspaceId: string) {
async findOne(id: string, workspaceId: string): Promise<IdeaWithRelations> {
const idea = await this.prisma.idea.findUnique({
where: {
id,
@@ -206,7 +232,12 @@ export class IdeasService {
/**
* Update an idea
*/
async update(id: string, workspaceId: string, userId: string, updateIdeaDto: UpdateIdeaDto) {
async update(
id: string,
workspaceId: string,
userId: string,
updateIdeaDto: UpdateIdeaDto
): Promise<IdeaWithRelations> {
// Verify idea exists
const existingIdea = await this.prisma.idea.findUnique({
where: { id, workspaceId },
@@ -265,7 +296,7 @@ export class IdeasService {
/**
* Delete an idea
*/
async remove(id: string, workspaceId: string, userId: string) {
async remove(id: string, workspaceId: string, userId: string): Promise<void> {
// Verify idea exists
const idea = await this.prisma.idea.findUnique({
where: { id, workspaceId },

View File

@@ -1,5 +1,5 @@
import { Injectable, NotFoundException } from "@nestjs/common";
import { Prisma } from "@prisma/client";
import { Prisma, UserLayout } from "@prisma/client";
import { PrismaService } from "../prisma/prisma.service";
import type { CreateLayoutDto, UpdateLayoutDto } from "./dto";
@@ -13,7 +13,7 @@ export class LayoutsService {
/**
* Get all layouts for a user
*/
async findAll(workspaceId: string, userId: string) {
async findAll(workspaceId: string, userId: string): Promise<UserLayout[]> {
return this.prisma.userLayout.findMany({
where: {
workspaceId,
@@ -29,7 +29,7 @@ export class LayoutsService {
/**
* Get the default layout for a user
*/
async findDefault(workspaceId: string, userId: string) {
async findDefault(workspaceId: string, userId: string): Promise<UserLayout> {
const layout = await this.prisma.userLayout.findFirst({
where: {
workspaceId,
@@ -63,7 +63,7 @@ export class LayoutsService {
/**
* Get a single layout by ID
*/
async findOne(id: string, workspaceId: string, userId: string) {
async findOne(id: string, workspaceId: string, userId: string): Promise<UserLayout> {
const layout = await this.prisma.userLayout.findUnique({
where: {
id,
@@ -82,7 +82,11 @@ export class LayoutsService {
/**
* Create a new layout
*/
async create(workspaceId: string, userId: string, createLayoutDto: CreateLayoutDto) {
async create(
workspaceId: string,
userId: string,
createLayoutDto: CreateLayoutDto
): Promise<UserLayout> {
// Use transaction to ensure atomicity when setting default
return this.prisma.$transaction(async (tx) => {
// If setting as default, unset other defaults first
@@ -114,7 +118,12 @@ export class LayoutsService {
/**
* Update a layout
*/
async update(id: string, workspaceId: string, userId: string, updateLayoutDto: UpdateLayoutDto) {
async update(
id: string,
workspaceId: string,
userId: string,
updateLayoutDto: UpdateLayoutDto
): Promise<UserLayout> {
// Use transaction to ensure atomicity when setting default
return this.prisma.$transaction(async (tx) => {
// Verify layout exists
@@ -163,7 +172,7 @@ export class LayoutsService {
/**
* Delete a layout
*/
async remove(id: string, workspaceId: string, userId: string) {
async remove(id: string, workspaceId: string, userId: string): Promise<void> {
// Verify layout exists
const layout = await this.prisma.userLayout.findUnique({
where: { id, workspaceId, userId },

View File

@@ -1,10 +1,33 @@
import { Injectable, NotFoundException } from "@nestjs/common";
import { Prisma } from "@prisma/client";
import { Prisma, Project } from "@prisma/client";
import { PrismaService } from "../prisma/prisma.service";
import { ActivityService } from "../activity/activity.service";
import { ProjectStatus } from "@prisma/client";
import type { CreateProjectDto, UpdateProjectDto, QueryProjectsDto } from "./dto";
type ProjectWithRelations = Project & {
creator: { id: string; name: string; email: string };
_count: { tasks: number; events: number };
};
type ProjectWithDetails = Project & {
creator: { id: string; name: string; email: string };
tasks: {
id: string;
title: string;
status: string;
priority: string;
dueDate: Date | null;
}[];
events: {
id: string;
title: string;
startTime: Date;
endTime: Date | null;
}[];
_count: { tasks: number; events: number };
};
/**
* Service for managing projects
*/
@@ -18,7 +41,11 @@ export class ProjectsService {
/**
* Create a new project
*/
async create(workspaceId: string, userId: string, createProjectDto: CreateProjectDto) {
async create(
workspaceId: string,
userId: string,
createProjectDto: CreateProjectDto
): Promise<ProjectWithRelations> {
const data: Prisma.ProjectCreateInput = {
name: createProjectDto.name,
description: createProjectDto.description ?? null,
@@ -56,7 +83,15 @@ export class ProjectsService {
/**
* Get paginated projects with filters
*/
async findAll(query: QueryProjectsDto) {
async findAll(query: QueryProjectsDto): Promise<{
data: ProjectWithRelations[];
meta: {
total: number;
page: number;
limit: number;
totalPages: number;
};
}> {
const page = query.page ?? 1;
const limit = query.limit ?? 50;
const skip = (page - 1) * limit;
@@ -117,7 +152,7 @@ export class ProjectsService {
/**
* Get a single project by ID
*/
async findOne(id: string, workspaceId: string) {
async findOne(id: string, workspaceId: string): Promise<ProjectWithDetails> {
const project = await this.prisma.project.findUnique({
where: {
id,
@@ -167,7 +202,7 @@ export class ProjectsService {
workspaceId: string,
userId: string,
updateProjectDto: UpdateProjectDto
) {
): Promise<ProjectWithRelations> {
// Verify project exists
const existingProject = await this.prisma.project.findUnique({
where: { id, workspaceId },
@@ -217,7 +252,7 @@ export class ProjectsService {
/**
* Delete a project
*/
async remove(id: string, workspaceId: string, userId: string) {
async remove(id: string, workspaceId: string, userId: string): Promise<void> {
// Verify project exists
const project = await this.prisma.project.findUnique({
where: { id, workspaceId },

View File

@@ -0,0 +1,394 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import { RunnerJobsService } from "./runner-jobs.service";
import { PrismaService } from "../prisma/prisma.service";
import { BullMqService } from "../bullmq/bullmq.service";
import { RunnerJobStatus } from "@prisma/client";
import { ConflictException, BadRequestException } from "@nestjs/common";
/**
* Concurrency tests for RunnerJobsService
* These tests verify that race conditions in job status updates are properly handled
*/
describe("RunnerJobsService - Concurrency", () => {
let service: RunnerJobsService;
let prisma: PrismaService;
const mockBullMqService = {
addJob: vi.fn(),
getQueue: vi.fn(),
};
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [
RunnerJobsService,
{
provide: PrismaService,
useValue: {
runnerJob: {
findUnique: vi.fn(),
update: vi.fn(),
updateMany: vi.fn(),
},
},
},
{
provide: BullMqService,
useValue: mockBullMqService,
},
],
}).compile();
service = module.get<RunnerJobsService>(RunnerJobsService);
prisma = module.get<PrismaService>(PrismaService);
vi.clearAllMocks();
});
describe("concurrent status updates", () => {
it("should detect concurrent status update conflict using version field", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
// Mock job with version 1
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 1,
startedAt: new Date(),
};
// First findUnique returns job with version 1
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
// updateMany returns 0 (no rows updated - version mismatch)
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 0 });
// Should throw ConflictException when concurrent update detected
await expect(
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED)
).rejects.toThrow(ConflictException);
// Verify updateMany was called with version check
expect(prisma.runnerJob.updateMany).toHaveBeenCalledWith(
expect.objectContaining({
where: expect.objectContaining({
id: jobId,
workspaceId,
version: 1,
}),
})
);
});
it("should successfully update when no concurrent conflict exists", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 1,
startedAt: new Date(),
};
const updatedJob = {
...mockJob,
status: RunnerJobStatus.COMPLETED,
version: 2,
completedAt: new Date(),
};
// First call for initial read
vi.mocked(prisma.runnerJob.findUnique)
.mockResolvedValueOnce(mockJob as any)
// Second call after updateMany succeeds
.mockResolvedValueOnce(updatedJob as any);
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 1 });
const result = await service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED);
expect(result.status).toBe(RunnerJobStatus.COMPLETED);
expect(result.version).toBe(2);
});
it("should retry on conflict and succeed on second attempt", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
const mockJobV1 = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 1,
};
const mockJobV2 = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 2,
};
const updatedJob = {
...mockJobV2,
status: RunnerJobStatus.COMPLETED,
version: 3,
completedAt: new Date(),
};
// First attempt: version 1, updateMany returns 0 (conflict)
vi.mocked(prisma.runnerJob.findUnique)
.mockResolvedValueOnce(mockJobV1 as any) // Initial read
.mockResolvedValueOnce(mockJobV2 as any) // Retry read
.mockResolvedValueOnce(updatedJob as any); // Final read after update
vi.mocked(prisma.runnerJob.updateMany)
.mockResolvedValueOnce({ count: 0 }) // First attempt fails
.mockResolvedValueOnce({ count: 1 }); // Retry succeeds
const result = await service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED);
expect(result.status).toBe(RunnerJobStatus.COMPLETED);
expect(prisma.runnerJob.updateMany).toHaveBeenCalledTimes(2);
});
});
describe("concurrent progress updates", () => {
it("should detect concurrent progress update conflict", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
progressPercent: 50,
version: 5,
};
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValue({ count: 0 });
await expect(service.updateProgress(jobId, workspaceId, 75)).rejects.toThrow(
ConflictException
);
});
it("should handle rapid sequential progress updates", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
// Simulate 5 rapid progress updates
const progressValues = [20, 40, 60, 80, 100];
let version = 1;
for (const progress of progressValues) {
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
progressPercent: progress - 20,
version,
};
const updatedJob = {
...mockJob,
progressPercent: progress,
version: version + 1,
};
vi.mocked(prisma.runnerJob.findUnique)
.mockResolvedValueOnce(mockJob as any)
.mockResolvedValueOnce(updatedJob as any);
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValueOnce({ count: 1 });
const result = await service.updateProgress(jobId, workspaceId, progress);
expect(result.progressPercent).toBe(progress);
expect(result.version).toBe(version + 1);
version++;
}
});
});
describe("concurrent completion", () => {
it("should prevent double completion with different results", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 1,
startedAt: new Date(),
};
const updatedJob = {
...mockJob,
status: RunnerJobStatus.COMPLETED,
version: 2,
result: { outcome: "success-A" },
completedAt: new Date(),
};
// Test first completion (succeeds)
vi.mocked(prisma.runnerJob.findUnique)
.mockResolvedValueOnce(mockJob as any) // First completion - initial read
.mockResolvedValueOnce(updatedJob as any); // First completion - after update
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValueOnce({ count: 1 });
const result1 = await service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED, {
result: { outcome: "success-A" },
});
expect(result1.status).toBe(RunnerJobStatus.COMPLETED);
// Test second completion (fails due to version mismatch - will retry 3 times)
vi.mocked(prisma.runnerJob.findUnique)
.mockResolvedValueOnce(mockJob as any) // Attempt 1: Reads stale version
.mockResolvedValueOnce(mockJob as any) // Attempt 2: Retry reads stale version
.mockResolvedValueOnce(mockJob as any); // Attempt 3: Final retry reads stale version
vi.mocked(prisma.runnerJob.updateMany)
.mockResolvedValueOnce({ count: 0 }) // Attempt 1: Version conflict
.mockResolvedValueOnce({ count: 0 }) // Attempt 2: Version conflict
.mockResolvedValueOnce({ count: 0 }); // Attempt 3: Version conflict
await expect(
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED, {
result: { outcome: "success-B" },
})
).rejects.toThrow(ConflictException);
});
});
describe("concurrent cancel operations", () => {
it("should handle concurrent cancel attempts", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 1,
};
const cancelledJob = {
...mockJob,
status: RunnerJobStatus.CANCELLED,
version: 2,
completedAt: new Date(),
};
// Setup mocks
vi.mocked(prisma.runnerJob.findUnique)
.mockResolvedValueOnce(mockJob as any) // First cancel - initial read
.mockResolvedValueOnce(cancelledJob as any) // First cancel - after update
.mockResolvedValueOnce(cancelledJob as any); // Second cancel - sees already cancelled
vi.mocked(prisma.runnerJob.updateMany).mockResolvedValueOnce({ count: 1 });
const result1 = await service.cancel(jobId, workspaceId);
expect(result1.status).toBe(RunnerJobStatus.CANCELLED);
// Second cancel attempt should fail (job already cancelled)
await expect(service.cancel(jobId, workspaceId)).rejects.toThrow(BadRequestException);
});
});
describe("retry mechanism", () => {
it("should retry up to max attempts on version conflicts", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 1,
};
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
// All retry attempts fail
vi.mocked(prisma.runnerJob.updateMany)
.mockResolvedValueOnce({ count: 0 })
.mockResolvedValueOnce({ count: 0 })
.mockResolvedValueOnce({ count: 0 });
// Should throw after max retries (3)
await expect(
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED)
).rejects.toThrow(ConflictException);
expect(prisma.runnerJob.updateMany).toHaveBeenCalledTimes(3);
});
it("should use exponential backoff between retries", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.RUNNING,
version: 1,
};
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
const updateManyCalls: number[] = [];
vi.mocked(prisma.runnerJob.updateMany).mockImplementation(async () => {
updateManyCalls.push(Date.now());
return { count: 0 };
});
await expect(
service.updateStatus(jobId, workspaceId, RunnerJobStatus.COMPLETED)
).rejects.toThrow(ConflictException);
// Verify delays between calls increase (exponential backoff)
expect(updateManyCalls.length).toBe(3);
if (updateManyCalls.length >= 3) {
const delay1 = updateManyCalls[1] - updateManyCalls[0];
const delay2 = updateManyCalls[2] - updateManyCalls[1];
// Second delay should be >= first delay (exponential)
expect(delay2).toBeGreaterThanOrEqual(delay1);
}
});
});
describe("status transition validation with concurrency", () => {
it("should prevent invalid transitions even under concurrent updates", async () => {
const jobId = "job-123";
const workspaceId = "workspace-123";
// Job is already completed
const mockJob = {
id: jobId,
workspaceId,
status: RunnerJobStatus.COMPLETED,
version: 5,
completedAt: new Date(),
};
vi.mocked(prisma.runnerJob.findUnique).mockResolvedValue(mockJob as any);
// Should reject transition from COMPLETED to RUNNING
await expect(
service.updateStatus(jobId, workspaceId, RunnerJobStatus.RUNNING)
).rejects.toThrow();
});
});
});

View File

@@ -19,6 +19,7 @@ describe("RunnerJobsService", () => {
count: vi.fn(),
findUnique: vi.fn(),
update: vi.fn(),
updateMany: vi.fn(),
},
jobEvent: {
findMany: vi.fn(),

View File

@@ -4,6 +4,7 @@ import { Response } from "express";
import { PrismaService } from "../prisma/prisma.service";
import { BullMqService } from "../bullmq/bullmq.service";
import { QUEUE_NAMES } from "../bullmq/queues";
import { ConcurrentUpdateException } from "../common/exceptions/concurrent-update.exception";
import type { CreateJobDto, QueryJobsDto } from "./dto";
/**
@@ -144,37 +145,57 @@ export class RunnerJobsService {
}
/**
* Cancel a running or queued job
* Cancel a running or queued job with optimistic locking
*/
async cancel(id: string, workspaceId: string) {
// Verify job exists
const existingJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
return this.retryOnConflict(async () => {
// Verify job exists
const existingJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
if (!existingJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
}
// Check if job can be cancelled
if (
existingJob.status === RunnerJobStatus.COMPLETED ||
existingJob.status === RunnerJobStatus.CANCELLED ||
existingJob.status === RunnerJobStatus.FAILED
) {
throw new BadRequestException(`Cannot cancel job with status ${existingJob.status}`);
}
// Update job status to cancelled with version check
const result = await this.prisma.runnerJob.updateMany({
where: {
id,
workspaceId,
version: existingJob.version,
},
data: {
status: RunnerJobStatus.CANCELLED,
completedAt: new Date(),
version: { increment: 1 },
},
});
if (result.count === 0) {
throw new ConcurrentUpdateException("RunnerJob", id, existingJob.version);
}
// Fetch and return updated job
const job = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
if (!job) {
throw new NotFoundException(`RunnerJob with ID ${id} not found after cancel`);
}
return job;
});
if (!existingJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
}
// Check if job can be cancelled
if (
existingJob.status === RunnerJobStatus.COMPLETED ||
existingJob.status === RunnerJobStatus.CANCELLED ||
existingJob.status === RunnerJobStatus.FAILED
) {
throw new BadRequestException(`Cannot cancel job with status ${existingJob.status}`);
}
// Update job status to cancelled
const job = await this.prisma.runnerJob.update({
where: { id, workspaceId },
data: {
status: RunnerJobStatus.CANCELLED,
completedAt: new Date(),
},
});
return job;
}
/**
@@ -413,74 +434,179 @@ export class RunnerJobsService {
}
/**
* Update job status
* Retry wrapper for optimistic locking conflicts
* Retries the operation up to maxRetries times with exponential backoff
*/
private async retryOnConflict<T>(operation: () => Promise<T>, maxRetries = 3): Promise<T> {
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
return await operation();
} catch (error) {
if (error instanceof ConcurrentUpdateException && attempt < maxRetries - 1) {
// Exponential backoff: 100ms, 200ms, 400ms
const delayMs = Math.pow(2, attempt) * 100;
await new Promise((resolve) => setTimeout(resolve, delayMs));
continue;
}
throw error;
}
}
throw new Error("Retry logic failed unexpectedly");
}
/**
* Update job status with optimistic locking
*/
async updateStatus(
id: string,
workspaceId: string,
status: RunnerJobStatus,
data?: { result?: unknown; error?: string }
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
// Verify job exists
const existingJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.findUnique>>> {
return this.retryOnConflict(async () => {
// Read current job state
const existingJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
if (!existingJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
}
if (!existingJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
}
const updateData: Prisma.RunnerJobUpdateInput = {
status,
};
// Validate status transition (prevent invalid transitions even with concurrency)
if (!this.isValidStatusTransition(existingJob.status, status)) {
throw new BadRequestException(
`Invalid status transition from ${existingJob.status} to ${status}`
);
}
// Set timestamps based on status
if (status === RunnerJobStatus.RUNNING && !existingJob.startedAt) {
updateData.startedAt = new Date();
}
const updateData: Prisma.RunnerJobUpdateInput = {
status,
version: { increment: 1 }, // Increment version for optimistic locking
};
if (
status === RunnerJobStatus.COMPLETED ||
status === RunnerJobStatus.FAILED ||
status === RunnerJobStatus.CANCELLED
) {
updateData.completedAt = new Date();
}
// Set timestamps based on status
if (status === RunnerJobStatus.RUNNING && !existingJob.startedAt) {
updateData.startedAt = new Date();
}
// Add optional data
if (data?.result !== undefined) {
updateData.result = data.result as Prisma.InputJsonValue;
}
if (data?.error !== undefined) {
updateData.error = data.error;
}
if (
status === RunnerJobStatus.COMPLETED ||
status === RunnerJobStatus.FAILED ||
status === RunnerJobStatus.CANCELLED
) {
updateData.completedAt = new Date();
}
return this.prisma.runnerJob.update({
where: { id, workspaceId },
data: updateData,
// Add optional data
if (data?.result !== undefined) {
updateData.result = data.result as Prisma.InputJsonValue;
}
if (data?.error !== undefined) {
updateData.error = data.error;
}
// Use updateMany with version check for optimistic locking
const result = await this.prisma.runnerJob.updateMany({
where: {
id,
workspaceId,
version: existingJob.version, // Only update if version matches
},
data: updateData,
});
// If count is 0, version mismatch (concurrent update detected)
if (result.count === 0) {
throw new ConcurrentUpdateException("RunnerJob", id, existingJob.version);
}
// Fetch and return updated job
const updatedJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
if (!updatedJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found after update`);
}
return updatedJob;
});
}
/**
* Update job progress percentage
* Validate status transitions
*/
private isValidStatusTransition(
currentStatus: RunnerJobStatus,
newStatus: RunnerJobStatus
): boolean {
// Define valid transitions
const validTransitions: Record<RunnerJobStatus, RunnerJobStatus[]> = {
[RunnerJobStatus.PENDING]: [
RunnerJobStatus.QUEUED,
RunnerJobStatus.RUNNING,
RunnerJobStatus.CANCELLED,
],
[RunnerJobStatus.QUEUED]: [RunnerJobStatus.RUNNING, RunnerJobStatus.CANCELLED],
[RunnerJobStatus.RUNNING]: [
RunnerJobStatus.COMPLETED,
RunnerJobStatus.FAILED,
RunnerJobStatus.CANCELLED,
],
[RunnerJobStatus.COMPLETED]: [],
[RunnerJobStatus.FAILED]: [],
[RunnerJobStatus.CANCELLED]: [],
};
return validTransitions[currentStatus].includes(newStatus);
}
/**
* Update job progress percentage with optimistic locking
*/
async updateProgress(
id: string,
workspaceId: string,
progressPercent: number
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.update>>> {
// Verify job exists
const existingJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
): Promise<Awaited<ReturnType<typeof this.prisma.runnerJob.findUnique>>> {
return this.retryOnConflict(async () => {
// Read current job state
const existingJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
if (!existingJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
}
if (!existingJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found`);
}
return this.prisma.runnerJob.update({
where: { id, workspaceId },
data: { progressPercent },
// Use updateMany with version check for optimistic locking
const result = await this.prisma.runnerJob.updateMany({
where: {
id,
workspaceId,
version: existingJob.version,
},
data: {
progressPercent,
version: { increment: 1 },
},
});
if (result.count === 0) {
throw new ConcurrentUpdateException("RunnerJob", id, existingJob.version);
}
// Fetch and return updated job
const updatedJob = await this.prisma.runnerJob.findUnique({
where: { id, workspaceId },
});
if (!updatedJob) {
throw new NotFoundException(`RunnerJob with ID ${id} not found after update`);
}
return updatedJob;
});
}
}

View File

@@ -1,4 +1,13 @@
import { IsString, IsUUID, IsOptional, IsObject, ValidateNested, MinLength, MaxLength, IsEnum } from "class-validator";
import {
IsString,
IsUUID,
IsOptional,
IsObject,
ValidateNested,
MinLength,
MaxLength,
IsEnum,
} from "class-validator";
import { Type } from "class-transformer";
/**

View File

@@ -1,10 +1,19 @@
import { Injectable, NotFoundException } from "@nestjs/common";
import { Prisma } from "@prisma/client";
import { Prisma, Task } from "@prisma/client";
import { PrismaService } from "../prisma/prisma.service";
import { ActivityService } from "../activity/activity.service";
import { TaskStatus, TaskPriority } from "@prisma/client";
import type { CreateTaskDto, UpdateTaskDto, QueryTasksDto } from "./dto";
type TaskWithRelations = Task & {
assignee: { id: string; name: string; email: string } | null;
creator: { id: string; name: string; email: string };
project: { id: string; name: string; color: string | null } | null;
subtasks?: (Task & {
assignee: { id: string; name: string; email: string } | null;
})[];
};
/**
* Service for managing tasks
*/
@@ -18,7 +27,11 @@ export class TasksService {
/**
* Create a new task
*/
async create(workspaceId: string, userId: string, createTaskDto: CreateTaskDto) {
async create(
workspaceId: string,
userId: string,
createTaskDto: CreateTaskDto
): Promise<Omit<TaskWithRelations, "subtasks">> {
const assigneeConnection = createTaskDto.assigneeId
? { connect: { id: createTaskDto.assigneeId } }
: undefined;
@@ -79,7 +92,15 @@ export class TasksService {
/**
* Get paginated tasks with filters
*/
async findAll(query: QueryTasksDto) {
async findAll(query: QueryTasksDto): Promise<{
data: Omit<TaskWithRelations, "subtasks">[];
meta: {
total: number;
page: number;
limit: number;
totalPages: number;
};
}> {
const page = query.page ?? 1;
const limit = query.limit ?? 50;
const skip = (page - 1) * limit;
@@ -159,7 +180,7 @@ export class TasksService {
/**
* Get a single task by ID
*/
async findOne(id: string, workspaceId: string) {
async findOne(id: string, workspaceId: string): Promise<TaskWithRelations> {
const task = await this.prisma.task.findUnique({
where: {
id,
@@ -195,7 +216,12 @@ export class TasksService {
/**
* Update a task
*/
async update(id: string, workspaceId: string, userId: string, updateTaskDto: UpdateTaskDto) {
async update(
id: string,
workspaceId: string,
userId: string,
updateTaskDto: UpdateTaskDto
): Promise<Omit<TaskWithRelations, "subtasks">> {
// Verify task exists
const existingTask = await this.prisma.task.findUnique({
where: { id, workspaceId },
@@ -305,7 +331,7 @@ export class TasksService {
/**
* Delete a task
*/
async remove(id: string, workspaceId: string, userId: string) {
async remove(id: string, workspaceId: string, userId: string): Promise<void> {
// Verify task exists
const task = await this.prisma.task.findUnique({
where: { id, workspaceId },