Files
stack/apps/orchestrator/src/queue/queue.service.ts

344 lines
9.7 KiB
TypeScript

import { Injectable, OnModuleDestroy, OnModuleInit, Optional, Logger } from "@nestjs/common";
import { ConfigService } from "@nestjs/config";
import { Queue, Worker, Job } from "bullmq";
import { ValkeyService } from "../valkey/valkey.service";
import { AgentSpawnerService } from "../spawner/agent-spawner.service";
import { AgentLifecycleService } from "../spawner/agent-lifecycle.service";
import type { TaskContext } from "../valkey/types";
import type {
QueuedTask,
QueueStats,
AddTaskOptions,
RetryConfig,
TaskProcessingResult,
} from "./types";
/**
* Queue service for managing task queue with priority and retry logic
*/
@Injectable()
export class QueueService implements OnModuleInit, OnModuleDestroy {
private readonly logger = new Logger(QueueService.name);
private queue!: Queue<QueuedTask>;
private worker!: Worker<QueuedTask, TaskProcessingResult>;
private readonly queueName: string;
private readonly retryConfig: RetryConfig;
constructor(
private readonly valkeyService: ValkeyService,
private readonly configService: ConfigService,
@Optional() private readonly spawnerService?: AgentSpawnerService,
@Optional() private readonly lifecycleService?: AgentLifecycleService
) {
this.queueName = this.configService.get<string>(
"orchestrator.queue.name",
"orchestrator-tasks"
);
this.retryConfig = {
maxRetries: this.configService.get<number>("orchestrator.queue.maxRetries", 3),
baseDelay: this.configService.get<number>("orchestrator.queue.baseDelay", 1000),
maxDelay: this.configService.get<number>("orchestrator.queue.maxDelay", 60000),
};
}
onModuleInit(): void {
// Initialize BullMQ with Valkey connection
const connection = {
host: this.configService.get<string>("orchestrator.valkey.host", "localhost"),
port: this.configService.get<number>("orchestrator.valkey.port", 6379),
password: this.configService.get<string>("orchestrator.valkey.password"),
};
// Read retention config
const completedRetentionAge = this.configService.get<number>(
"orchestrator.queue.completedRetentionAgeSeconds",
3600
);
const completedRetentionCount = this.configService.get<number>(
"orchestrator.queue.completedRetentionCount",
100
);
const failedRetentionAge = this.configService.get<number>(
"orchestrator.queue.failedRetentionAgeSeconds",
86400
);
const failedRetentionCount = this.configService.get<number>(
"orchestrator.queue.failedRetentionCount",
1000
);
// Create queue
this.queue = new Queue<QueuedTask>(this.queueName, {
connection,
defaultJobOptions: {
removeOnComplete: {
age: completedRetentionAge,
count: completedRetentionCount,
},
removeOnFail: {
age: failedRetentionAge,
count: failedRetentionCount,
},
},
});
// Create worker
this.worker = new Worker<QueuedTask, TaskProcessingResult>(
this.queueName,
async (job: Job<QueuedTask>) => {
return this.processTask(job);
},
{
connection,
concurrency: this.configService.get<number>("orchestrator.queue.concurrency", 5),
}
);
// Setup error handlers
this.worker.on("failed", (job, err) => {
if (job) {
void this.handleTaskFailure(job.data.taskId, err);
}
});
this.worker.on("completed", (job) => {
void this.handleTaskCompletion(job.data.taskId);
});
}
async onModuleDestroy(): Promise<void> {
await this.worker.close();
await this.queue.close();
}
/**
* Add task to queue
*/
async addTask(taskId: string, context: TaskContext, options?: AddTaskOptions): Promise<void> {
// Validate options
const priority = options?.priority ?? 5;
const maxRetries = options?.maxRetries ?? this.retryConfig.maxRetries;
const delay = options?.delay ?? 0;
if (priority < 1 || priority > 10) {
throw new Error("Priority must be between 1 and 10");
}
if (maxRetries < 0) {
throw new Error("maxRetries must be non-negative");
}
const queuedTask: QueuedTask = {
taskId,
priority,
retries: 0,
maxRetries,
context,
};
// Ensure task state exists before queue lifecycle updates.
const getTaskState = (this.valkeyService as Partial<ValkeyService>).getTaskState;
const createTask = (this.valkeyService as Partial<ValkeyService>).createTask;
if (typeof getTaskState === "function" && typeof createTask === "function") {
const existingTask = await getTaskState.call(this.valkeyService, taskId);
if (!existingTask) {
await createTask.call(this.valkeyService, taskId, context);
}
}
// Add to BullMQ queue
await this.queue.add(taskId, queuedTask, {
priority: 10 - priority + 1, // BullMQ: lower number = higher priority, so invert
attempts: maxRetries + 1, // +1 for initial attempt
backoff: {
type: "custom",
},
delay,
});
// Update task state in Valkey
await this.valkeyService.updateTaskStatus(taskId, "pending");
// Publish event
await this.valkeyService.publishEvent({
type: "task.queued",
timestamp: new Date().toISOString(),
taskId,
data: { priority },
});
}
/**
* Get queue statistics
*/
async getStats(): Promise<QueueStats> {
const counts = await this.queue.getJobCounts(
"waiting",
"active",
"completed",
"failed",
"delayed"
);
return {
pending: counts.waiting || 0,
active: counts.active || 0,
completed: counts.completed || 0,
failed: counts.failed || 0,
delayed: counts.delayed || 0,
};
}
/**
* Calculate exponential backoff delay
*/
calculateBackoffDelay(attemptNumber: number, baseDelay: number, maxDelay: number): number {
const delay = baseDelay * Math.pow(2, attemptNumber);
return Math.min(delay, maxDelay);
}
/**
* Pause queue processing
*/
async pause(): Promise<void> {
await this.queue.pause();
}
/**
* Resume queue processing
*/
async resume(): Promise<void> {
await this.queue.resume();
}
/**
* Remove task from queue
*/
async removeTask(taskId: string): Promise<void> {
const job = await this.queue.getJob(taskId);
if (job) {
await job.remove();
}
}
/**
* Process task (called by worker)
*/
private async processTask(job: Job<QueuedTask>): Promise<TaskProcessingResult> {
const { taskId } = job.data;
try {
const session = this.spawnerService?.findAgentSessionByTaskId(taskId);
const agentId = session?.agentId;
if (agentId) {
if (this.lifecycleService) {
await this.lifecycleService.transitionToRunning(agentId);
}
this.spawnerService?.setSessionState(agentId, "running");
}
// Update task state to executing
await this.valkeyService.updateTaskStatus(taskId, "executing", agentId);
// Publish event
await this.valkeyService.publishEvent({
type: "task.executing",
timestamp: new Date().toISOString(),
taskId,
agentId,
data: {
attempt: job.attemptsMade + 1,
dispatchedByQueue: true,
},
});
return {
success: true,
metadata: {
attempt: job.attemptsMade + 1,
...(agentId && { agentId }),
},
};
} catch (error) {
// Handle retry logic
const shouldRetry = job.attemptsMade < job.data.maxRetries;
if (shouldRetry) {
// Calculate backoff delay for next retry
const delay = this.calculateBackoffDelay(
job.attemptsMade + 1,
this.retryConfig.baseDelay,
this.retryConfig.maxDelay
);
// BullMQ will automatically retry with the backoff
await job.updateData({
...job.data,
retries: job.attemptsMade + 1,
});
await this.valkeyService.publishEvent({
type: "task.retry",
timestamp: new Date().toISOString(),
taskId,
data: {
attempt: job.attemptsMade + 1,
nextDelay: delay,
},
});
}
throw error;
}
}
/**
* Handle task failure
*/
private async handleTaskFailure(taskId: string, error: Error): Promise<void> {
const session = this.spawnerService?.findAgentSessionByTaskId(taskId);
if (session) {
this.spawnerService?.setSessionState(session.agentId, "failed", error.message, new Date());
if (this.lifecycleService) {
await this.lifecycleService.transitionToFailed(session.agentId, error.message);
}
}
await this.valkeyService.updateTaskStatus(taskId, "failed", undefined, error.message);
await this.valkeyService.publishEvent({
type: "task.failed",
timestamp: new Date().toISOString(),
taskId,
error: error.message,
});
}
/**
* Handle task completion
*/
private async handleTaskCompletion(taskId: string): Promise<void> {
const session = this.spawnerService?.findAgentSessionByTaskId(taskId);
if (session) {
this.spawnerService?.setSessionState(session.agentId, "completed", undefined, new Date());
if (this.lifecycleService) {
await this.lifecycleService.transitionToCompleted(session.agentId);
}
} else {
this.logger.warn(
`Queue completed task ${taskId} but no session was found; using queue-only completion state`
);
}
await this.valkeyService.updateTaskStatus(taskId, "completed");
await this.valkeyService.publishEvent({
type: "task.completed",
timestamp: new Date().toISOString(),
taskId,
...(session && { agentId: session.agentId }),
});
}
}