diff --git a/apps/api/prisma/schema.prisma b/apps/api/prisma/schema.prisma index 5413525..eb0d770 100644 --- a/apps/api/prisma/schema.prisma +++ b/apps/api/prisma/schema.prisma @@ -1053,3 +1053,37 @@ model TaskRejection { @@index([manualReview]) @@map("task_rejections") } + +model TokenBudget { + id String @id @default(uuid()) @db.Uuid + taskId String @unique @map("task_id") @db.Uuid + workspaceId String @map("workspace_id") @db.Uuid + agentId String @map("agent_id") + + // Budget allocation + allocatedTokens Int @map("allocated_tokens") + estimatedComplexity String @map("estimated_complexity") // "low", "medium", "high", "critical" + + // Usage tracking + inputTokensUsed Int @default(0) @map("input_tokens_used") + outputTokensUsed Int @default(0) @map("output_tokens_used") + totalTokensUsed Int @default(0) @map("total_tokens_used") + + // Cost tracking + estimatedCost Decimal? @map("estimated_cost") @db.Decimal(10, 6) + + // State + startedAt DateTime @default(now()) @map("started_at") @db.Timestamptz + lastUpdatedAt DateTime @updatedAt @map("last_updated_at") @db.Timestamptz + completedAt DateTime? @map("completed_at") @db.Timestamptz + + // Analysis + budgetUtilization Float? @map("budget_utilization") // 0.0 - 1.0 + suspiciousPattern Boolean @default(false) @map("suspicious_pattern") + suspiciousReason String? @map("suspicious_reason") + + @@index([taskId]) + @@index([workspaceId]) + @@index([suspiciousPattern]) + @@map("token_budgets") +} diff --git a/apps/api/src/quality-orchestrator/integration/quality-orchestrator.integration.spec.ts b/apps/api/src/quality-orchestrator/integration/quality-orchestrator.integration.spec.ts index 00571e5..8a5e00e 100644 --- a/apps/api/src/quality-orchestrator/integration/quality-orchestrator.integration.spec.ts +++ b/apps/api/src/quality-orchestrator/integration/quality-orchestrator.integration.spec.ts @@ -10,6 +10,7 @@ import { CompletionVerificationService } from "../../completion-verification/com import { ContinuationPromptsService } from "../../continuation-prompts/continuation-prompts.service"; import { RejectionHandlerService } from "../../rejection-handler/rejection-handler.service"; import { PrismaService } from "../../prisma/prisma.service"; +import { TokenBudgetService } from "../../token-budget/token-budget.service"; import type { CompletionClaim, OrchestrationConfig, QualityGate } from "../interfaces"; import type { RejectionContext } from "../../rejection-handler/interfaces"; import { MOCK_OUTPUTS, MOCK_FILE_CHANGES } from "./test-fixtures"; @@ -69,6 +70,12 @@ describe("Non-AI Coordinator Integration", () => { provide: PrismaService, useValue: mockPrisma, }, + { + provide: TokenBudgetService, + useValue: { + checkSuspiciousDoneClaim: vi.fn().mockResolvedValue({ suspicious: false }), + }, + }, ], }).compile(); diff --git a/apps/api/src/quality-orchestrator/quality-orchestrator.module.ts b/apps/api/src/quality-orchestrator/quality-orchestrator.module.ts index a96578e..57423f6 100644 --- a/apps/api/src/quality-orchestrator/quality-orchestrator.module.ts +++ b/apps/api/src/quality-orchestrator/quality-orchestrator.module.ts @@ -1,11 +1,13 @@ import { Module } from "@nestjs/common"; import { QualityOrchestratorService } from "./quality-orchestrator.service"; +import { TokenBudgetModule } from "../token-budget/token-budget.module"; /** * Quality Orchestrator Module * Provides quality enforcement for AI agent task completions */ @Module({ + imports: [TokenBudgetModule], providers: [QualityOrchestratorService], exports: [QualityOrchestratorService], }) diff --git a/apps/api/src/quality-orchestrator/quality-orchestrator.service.spec.ts b/apps/api/src/quality-orchestrator/quality-orchestrator.service.spec.ts index a0027b4..af3c518 100644 --- a/apps/api/src/quality-orchestrator/quality-orchestrator.service.spec.ts +++ b/apps/api/src/quality-orchestrator/quality-orchestrator.service.spec.ts @@ -1,6 +1,7 @@ -import { describe, it, expect, beforeEach } from "vitest"; +import { describe, it, expect, beforeEach, vi } from "vitest"; import { Test, TestingModule } from "@nestjs/testing"; import { QualityOrchestratorService } from "./quality-orchestrator.service"; +import { TokenBudgetService } from "../token-budget/token-budget.service"; import type { QualityGate, CompletionClaim, @@ -17,7 +18,15 @@ describe("QualityOrchestratorService", () => { beforeEach(async () => { const module: TestingModule = await Test.createTestingModule({ - providers: [QualityOrchestratorService], + providers: [ + QualityOrchestratorService, + { + provide: TokenBudgetService, + useValue: { + checkSuspiciousDoneClaim: vi.fn().mockResolvedValue({ suspicious: false }), + }, + }, + ], }).compile(); service = module.get(QualityOrchestratorService); diff --git a/apps/api/src/quality-orchestrator/quality-orchestrator.service.ts b/apps/api/src/quality-orchestrator/quality-orchestrator.service.ts index 64f4940..bce70d4 100644 --- a/apps/api/src/quality-orchestrator/quality-orchestrator.service.ts +++ b/apps/api/src/quality-orchestrator/quality-orchestrator.service.ts @@ -8,6 +8,7 @@ import type { CompletionValidation, OrchestrationConfig, } from "./interfaces"; +import { TokenBudgetService } from "../token-budget/token-budget.service"; const execAsync = promisify(exec); @@ -62,6 +63,8 @@ const DEFAULT_GATES: QualityGate[] = [ export class QualityOrchestratorService { private readonly logger = new Logger(QualityOrchestratorService.name); + constructor(private readonly tokenBudgetService: TokenBudgetService) {} + /** * Validate a completion claim against quality gates */ @@ -93,10 +96,27 @@ export class QualityOrchestratorService { return gate?.required ?? false; }); + // Check token budget for suspicious patterns + let budgetCheck: { suspicious: boolean; reason?: string } | null = null; + try { + budgetCheck = await this.tokenBudgetService.checkSuspiciousDoneClaim(claim.taskId); + } catch { + // Token budget not found - not an error, just means tracking wasn't enabled + this.logger.debug(`No token budget found for task ${claim.taskId}`); + } + // Determine verdict let verdict: "accepted" | "rejected" | "needs-continuation"; if (allGatesPassed) { - verdict = "accepted"; + // Even if all gates passed, check for suspicious budget patterns + if (budgetCheck?.suspicious) { + verdict = "needs-continuation"; + this.logger.warn( + `Suspicious budget pattern detected for task ${claim.taskId}: ${budgetCheck.reason ?? "unknown reason"}` + ); + } else { + verdict = "accepted"; + } } else if (requiredGatesFailed.length > 0) { verdict = "rejected"; } else if (config.strictMode) { @@ -117,6 +137,14 @@ export class QualityOrchestratorService { if (verdict !== "accepted") { result.feedback = this.generateRejectionFeedback(result); result.suggestedActions = this.generateSuggestedActions(gateResults, config); + + // Add budget feedback if suspicious pattern detected + if (budgetCheck?.suspicious && budgetCheck.reason) { + result.feedback += `\n\nToken budget analysis: ${budgetCheck.reason}`; + result.suggestedActions.push( + "Review task completion - significant budget remains or suspicious usage pattern detected" + ); + } } return result; diff --git a/apps/api/src/token-budget/dto/allocate-budget.dto.ts b/apps/api/src/token-budget/dto/allocate-budget.dto.ts new file mode 100644 index 0000000..baa19cc --- /dev/null +++ b/apps/api/src/token-budget/dto/allocate-budget.dto.ts @@ -0,0 +1,25 @@ +import { IsString, IsUUID, IsInt, IsIn, Min } from "class-validator"; +import type { TaskComplexity } from "../interfaces"; + +/** + * DTO for allocating a token budget for a task + */ +export class AllocateBudgetDto { + @IsUUID("4", { message: "taskId must be a valid UUID" }) + taskId!: string; + + @IsUUID("4", { message: "workspaceId must be a valid UUID" }) + workspaceId!: string; + + @IsString({ message: "agentId must be a string" }) + agentId!: string; + + @IsIn(["low", "medium", "high", "critical"], { + message: "complexity must be one of: low, medium, high, critical", + }) + complexity!: TaskComplexity; + + @IsInt({ message: "allocatedTokens must be an integer" }) + @Min(1, { message: "allocatedTokens must be at least 1" }) + allocatedTokens!: number; +} diff --git a/apps/api/src/token-budget/dto/budget-analysis.dto.ts b/apps/api/src/token-budget/dto/budget-analysis.dto.ts new file mode 100644 index 0000000..5bc7b8d --- /dev/null +++ b/apps/api/src/token-budget/dto/budget-analysis.dto.ts @@ -0,0 +1,33 @@ +/** + * DTO for budget analysis results + */ +export class BudgetAnalysisDto { + taskId: string; + allocatedTokens: number; + usedTokens: number; + remainingTokens: number; + utilizationPercentage: number; + suspiciousPattern: boolean; + suspiciousReason: string | null; + recommendation: "accept" | "continue" | "review"; + + constructor(data: { + taskId: string; + allocatedTokens: number; + usedTokens: number; + remainingTokens: number; + utilizationPercentage: number; + suspiciousPattern: boolean; + suspiciousReason: string | null; + recommendation: "accept" | "continue" | "review"; + }) { + this.taskId = data.taskId; + this.allocatedTokens = data.allocatedTokens; + this.usedTokens = data.usedTokens; + this.remainingTokens = data.remainingTokens; + this.utilizationPercentage = data.utilizationPercentage; + this.suspiciousPattern = data.suspiciousPattern; + this.suspiciousReason = data.suspiciousReason; + this.recommendation = data.recommendation; + } +} diff --git a/apps/api/src/token-budget/dto/index.ts b/apps/api/src/token-budget/dto/index.ts new file mode 100644 index 0000000..cadec45 --- /dev/null +++ b/apps/api/src/token-budget/dto/index.ts @@ -0,0 +1,3 @@ +export * from "./allocate-budget.dto"; +export * from "./update-usage.dto"; +export * from "./budget-analysis.dto"; diff --git a/apps/api/src/token-budget/dto/update-usage.dto.ts b/apps/api/src/token-budget/dto/update-usage.dto.ts new file mode 100644 index 0000000..216d910 --- /dev/null +++ b/apps/api/src/token-budget/dto/update-usage.dto.ts @@ -0,0 +1,14 @@ +import { IsInt, Min } from "class-validator"; + +/** + * DTO for updating token usage for a task + */ +export class UpdateUsageDto { + @IsInt({ message: "inputTokens must be an integer" }) + @Min(0, { message: "inputTokens must be non-negative" }) + inputTokens!: number; + + @IsInt({ message: "outputTokens must be an integer" }) + @Min(0, { message: "outputTokens must be non-negative" }) + outputTokens!: number; +} diff --git a/apps/api/src/token-budget/index.ts b/apps/api/src/token-budget/index.ts new file mode 100644 index 0000000..7d42895 --- /dev/null +++ b/apps/api/src/token-budget/index.ts @@ -0,0 +1,4 @@ +export * from "./token-budget.module"; +export * from "./token-budget.service"; +export * from "./interfaces"; +export * from "./dto"; diff --git a/apps/api/src/token-budget/interfaces/index.ts b/apps/api/src/token-budget/interfaces/index.ts new file mode 100644 index 0000000..0e03a31 --- /dev/null +++ b/apps/api/src/token-budget/interfaces/index.ts @@ -0,0 +1 @@ +export * from "./token-budget.interface"; diff --git a/apps/api/src/token-budget/interfaces/token-budget.interface.ts b/apps/api/src/token-budget/interfaces/token-budget.interface.ts new file mode 100644 index 0000000..7e042b9 --- /dev/null +++ b/apps/api/src/token-budget/interfaces/token-budget.interface.ts @@ -0,0 +1,69 @@ +/** + * Task complexity levels for budget allocation + */ +export type TaskComplexity = "low" | "medium" | "high" | "critical"; + +/** + * Token budget data structure + */ +export interface TokenBudgetData { + id: string; + taskId: string; + workspaceId: string; + agentId: string; + allocatedTokens: number; + estimatedComplexity: TaskComplexity; + inputTokensUsed: number; + outputTokensUsed: number; + totalTokensUsed: number; + estimatedCost: number | null; + startedAt: Date; + lastUpdatedAt: Date; + completedAt: Date | null; + budgetUtilization: number | null; + suspiciousPattern: boolean; + suspiciousReason: string | null; +} + +/** + * Budget analysis result + */ +export interface BudgetAnalysis { + taskId: string; + allocatedTokens: number; + usedTokens: number; + remainingTokens: number; + utilizationPercentage: number; + suspiciousPattern: boolean; + suspiciousReason: string | null; + recommendation: "accept" | "continue" | "review"; +} + +/** + * Suspicious pattern detection result + */ +export interface SuspiciousPattern { + triggered: boolean; + reason?: string; + severity: "low" | "medium" | "high"; + recommendation: "accept" | "continue" | "review"; +} + +/** + * Complexity-based budget allocation + */ +export const COMPLEXITY_BUDGETS: Record = { + low: 50000, // Simple fixes, typos + medium: 150000, // Standard features + high: 350000, // Complex features + critical: 750000, // Major refactoring +}; + +/** + * Token budget thresholds for suspicious pattern detection + */ +export const BUDGET_THRESHOLDS = { + SUSPICIOUS_REMAINING: 0.2, // >20% budget remaining + gates failing = suspicious + VERY_LOW_UTILIZATION: 0.1, // <10% utilization = suspicious + VERY_HIGH_UTILIZATION: 0.95, // >95% utilization but gates failing = suspicious +}; diff --git a/apps/api/src/token-budget/token-budget.module.ts b/apps/api/src/token-budget/token-budget.module.ts new file mode 100644 index 0000000..e116f34 --- /dev/null +++ b/apps/api/src/token-budget/token-budget.module.ts @@ -0,0 +1,14 @@ +import { Module } from "@nestjs/common"; +import { TokenBudgetService } from "./token-budget.service"; +import { PrismaModule } from "../prisma/prisma.module"; + +/** + * Token Budget Module + * Tracks token usage and prevents premature done claims + */ +@Module({ + imports: [PrismaModule], + providers: [TokenBudgetService], + exports: [TokenBudgetService], +}) +export class TokenBudgetModule {} diff --git a/apps/api/src/token-budget/token-budget.service.spec.ts b/apps/api/src/token-budget/token-budget.service.spec.ts new file mode 100644 index 0000000..23e5b47 --- /dev/null +++ b/apps/api/src/token-budget/token-budget.service.spec.ts @@ -0,0 +1,293 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { Test, TestingModule } from "@nestjs/testing"; +import { TokenBudgetService } from "./token-budget.service"; +import { PrismaService } from "../prisma/prisma.service"; +import { NotFoundException } from "@nestjs/common"; +import type { TaskComplexity } from "./interfaces"; +import { COMPLEXITY_BUDGETS } from "./interfaces"; + +describe("TokenBudgetService", () => { + let service: TokenBudgetService; + let prisma: PrismaService; + + const mockPrismaService = { + tokenBudget: { + create: vi.fn(), + findUnique: vi.fn(), + update: vi.fn(), + }, + }; + + const mockWorkspaceId = "550e8400-e29b-41d4-a716-446655440001"; + const mockTaskId = "550e8400-e29b-41d4-a716-446655440002"; + const mockAgentId = "test-agent-001"; + + const mockTokenBudget = { + id: "550e8400-e29b-41d4-a716-446655440003", + taskId: mockTaskId, + workspaceId: mockWorkspaceId, + agentId: mockAgentId, + allocatedTokens: 150000, + estimatedComplexity: "medium" as TaskComplexity, + inputTokensUsed: 50000, + outputTokensUsed: 30000, + totalTokensUsed: 80000, + estimatedCost: null, + startedAt: new Date("2026-01-31T10:00:00Z"), + lastUpdatedAt: new Date("2026-01-31T10:30:00Z"), + completedAt: null, + budgetUtilization: 0.533, + suspiciousPattern: false, + suspiciousReason: null, + }; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + providers: [ + TokenBudgetService, + { + provide: PrismaService, + useValue: mockPrismaService, + }, + ], + }).compile(); + + service = module.get(TokenBudgetService); + prisma = module.get(PrismaService); + + vi.clearAllMocks(); + }); + + it("should be defined", () => { + expect(service).toBeDefined(); + }); + + describe("allocateBudget", () => { + it("should allocate budget for a new task", async () => { + const allocateDto = { + taskId: mockTaskId, + workspaceId: mockWorkspaceId, + agentId: mockAgentId, + complexity: "medium" as TaskComplexity, + allocatedTokens: 150000, + }; + + mockPrismaService.tokenBudget.create.mockResolvedValue(mockTokenBudget); + + const result = await service.allocateBudget(allocateDto); + + expect(result).toEqual(mockTokenBudget); + expect(mockPrismaService.tokenBudget.create).toHaveBeenCalledWith({ + data: { + taskId: allocateDto.taskId, + workspaceId: allocateDto.workspaceId, + agentId: allocateDto.agentId, + allocatedTokens: allocateDto.allocatedTokens, + estimatedComplexity: allocateDto.complexity, + }, + }); + }); + }); + + describe("updateUsage", () => { + it("should update token usage and recalculate utilization", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget); + + const updatedBudget = { + ...mockTokenBudget, + inputTokensUsed: 60000, + outputTokensUsed: 40000, + totalTokensUsed: 100000, + budgetUtilization: 0.667, + }; + + mockPrismaService.tokenBudget.update.mockResolvedValue(updatedBudget); + + const result = await service.updateUsage(mockTaskId, 10000, 10000); + + expect(result).toEqual(updatedBudget); + expect(mockPrismaService.tokenBudget.findUnique).toHaveBeenCalledWith({ + where: { taskId: mockTaskId }, + }); + expect(mockPrismaService.tokenBudget.update).toHaveBeenCalledWith({ + where: { taskId: mockTaskId }, + data: { + inputTokensUsed: 60000, + outputTokensUsed: 40000, + totalTokensUsed: 100000, + budgetUtilization: expect.closeTo(0.667, 2), + }, + }); + }); + + it("should throw NotFoundException if budget does not exist", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null); + + await expect(service.updateUsage(mockTaskId, 1000, 1000)).rejects.toThrow(NotFoundException); + }); + }); + + describe("analyzeBudget", () => { + it("should analyze budget and detect suspicious pattern for high remaining budget", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget); + + const result = await service.analyzeBudget(mockTaskId); + + expect(result.taskId).toBe(mockTaskId); + expect(result.allocatedTokens).toBe(150000); + expect(result.usedTokens).toBe(80000); + expect(result.remainingTokens).toBe(70000); + expect(result.utilizationPercentage).toBeCloseTo(53.3, 1); + // 46.7% remaining is suspicious (>20% threshold) + expect(result.suspiciousPattern).toBe(true); + expect(result.recommendation).toBe("review"); + }); + + it("should not detect suspicious pattern when utilization is high", async () => { + // 85% utilization (15% remaining - below 20% threshold) + const highUtilizationBudget = { + ...mockTokenBudget, + inputTokensUsed: 65000, + outputTokensUsed: 62500, + totalTokensUsed: 127500, + budgetUtilization: 0.85, + }; + + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(highUtilizationBudget); + + const result = await service.analyzeBudget(mockTaskId); + + expect(result.utilizationPercentage).toBeCloseTo(85.0, 1); + expect(result.suspiciousPattern).toBe(false); + expect(result.recommendation).toBe("accept"); + }); + + it("should throw NotFoundException if budget does not exist", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null); + + await expect(service.analyzeBudget(mockTaskId)).rejects.toThrow(NotFoundException); + }); + }); + + describe("checkSuspiciousDoneClaim", () => { + it("should detect suspicious pattern when >20% budget remaining", async () => { + // 30% budget remaining + const budgetWithRemaining = { + ...mockTokenBudget, + inputTokensUsed: 50000, + outputTokensUsed: 55000, + totalTokensUsed: 105000, + budgetUtilization: 0.7, + }; + + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetWithRemaining); + + const result = await service.checkSuspiciousDoneClaim(mockTaskId); + + expect(result.suspicious).toBe(true); + expect(result.reason).toContain("30.0%"); + }); + + it("should not flag as suspicious when <20% budget remaining", async () => { + // 10% budget remaining + const budgetNearlyDone = { + ...mockTokenBudget, + inputTokensUsed: 70000, + outputTokensUsed: 65000, + totalTokensUsed: 135000, + budgetUtilization: 0.9, + }; + + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetNearlyDone); + + const result = await service.checkSuspiciousDoneClaim(mockTaskId); + + expect(result.suspicious).toBe(false); + expect(result.reason).toBeUndefined(); + }); + + it("should detect very low utilization (<10%)", async () => { + // 5% utilization + const budgetVeryLowUsage = { + ...mockTokenBudget, + inputTokensUsed: 4000, + outputTokensUsed: 3500, + totalTokensUsed: 7500, + budgetUtilization: 0.05, + }; + + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetVeryLowUsage); + + const result = await service.checkSuspiciousDoneClaim(mockTaskId); + + expect(result.suspicious).toBe(true); + expect(result.reason).toContain("Very low budget utilization"); + }); + }); + + describe("getBudgetUtilization", () => { + it("should return budget utilization percentage", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget); + + const result = await service.getBudgetUtilization(mockTaskId); + + expect(result).toBeCloseTo(53.3, 1); + }); + + it("should throw NotFoundException if budget does not exist", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null); + + await expect(service.getBudgetUtilization(mockTaskId)).rejects.toThrow(NotFoundException); + }); + }); + + describe("markCompleted", () => { + it("should mark budget as completed", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget); + + const completedBudget = { + ...mockTokenBudget, + completedAt: new Date("2026-01-31T11:00:00Z"), + }; + + mockPrismaService.tokenBudget.update.mockResolvedValue(completedBudget); + + await service.markCompleted(mockTaskId); + + expect(mockPrismaService.tokenBudget.update).toHaveBeenCalledWith({ + where: { taskId: mockTaskId }, + data: { + completedAt: expect.any(Date), + }, + }); + }); + + it("should throw NotFoundException if budget does not exist", async () => { + mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null); + + await expect(service.markCompleted(mockTaskId)).rejects.toThrow(NotFoundException); + }); + }); + + describe("getDefaultBudgetForComplexity", () => { + it("should return correct budget for low complexity", () => { + const result = service.getDefaultBudgetForComplexity("low"); + expect(result).toBe(COMPLEXITY_BUDGETS.low); + }); + + it("should return correct budget for medium complexity", () => { + const result = service.getDefaultBudgetForComplexity("medium"); + expect(result).toBe(COMPLEXITY_BUDGETS.medium); + }); + + it("should return correct budget for high complexity", () => { + const result = service.getDefaultBudgetForComplexity("high"); + expect(result).toBe(COMPLEXITY_BUDGETS.high); + }); + + it("should return correct budget for critical complexity", () => { + const result = service.getDefaultBudgetForComplexity("critical"); + expect(result).toBe(COMPLEXITY_BUDGETS.critical); + }); + }); +}); diff --git a/apps/api/src/token-budget/token-budget.service.ts b/apps/api/src/token-budget/token-budget.service.ts new file mode 100644 index 0000000..dfdff9a --- /dev/null +++ b/apps/api/src/token-budget/token-budget.service.ts @@ -0,0 +1,256 @@ +import { Injectable, Logger, NotFoundException } from "@nestjs/common"; +import { PrismaService } from "../prisma/prisma.service"; +import type { TokenBudget } from "@prisma/client"; +import type { TaskComplexity, BudgetAnalysis } from "./interfaces"; +import { COMPLEXITY_BUDGETS, BUDGET_THRESHOLDS } from "./interfaces"; +import type { AllocateBudgetDto } from "./dto"; +import { BudgetAnalysisDto } from "./dto"; + +/** + * Token Budget Service + * Tracks token usage and prevents premature done claims with significant budget remaining + */ +@Injectable() +export class TokenBudgetService { + private readonly logger = new Logger(TokenBudgetService.name); + + constructor(private readonly prisma: PrismaService) {} + + /** + * Allocate budget for a new task + */ + async allocateBudget(dto: AllocateBudgetDto): Promise { + this.logger.log(`Allocating ${String(dto.allocatedTokens)} tokens for task ${dto.taskId}`); + + const budget = await this.prisma.tokenBudget.create({ + data: { + taskId: dto.taskId, + workspaceId: dto.workspaceId, + agentId: dto.agentId, + allocatedTokens: dto.allocatedTokens, + estimatedComplexity: dto.complexity, + }, + }); + + return budget; + } + + /** + * Update usage after agent response + */ + async updateUsage( + taskId: string, + inputTokens: number, + outputTokens: number + ): Promise { + this.logger.debug( + `Updating usage for task ${taskId}: +${String(inputTokens)} input, +${String(outputTokens)} output` + ); + + // Get current budget + const budget = await this.prisma.tokenBudget.findUnique({ + where: { taskId }, + }); + + if (!budget) { + throw new NotFoundException(`Token budget not found for task ${taskId}`); + } + + // Calculate new totals + const newInputTokens = budget.inputTokensUsed + inputTokens; + const newOutputTokens = budget.outputTokensUsed + outputTokens; + const newTotalTokens = newInputTokens + newOutputTokens; + + // Calculate utilization + const utilization = newTotalTokens / budget.allocatedTokens; + + // Update budget + const updatedBudget = await this.prisma.tokenBudget.update({ + where: { taskId }, + data: { + inputTokensUsed: newInputTokens, + outputTokensUsed: newOutputTokens, + totalTokensUsed: newTotalTokens, + budgetUtilization: utilization, + }, + }); + + return updatedBudget; + } + + /** + * Analyze budget for suspicious patterns + */ + async analyzeBudget(taskId: string): Promise { + this.logger.debug(`Analyzing budget for task ${taskId}`); + + const budget = await this.prisma.tokenBudget.findUnique({ + where: { taskId }, + }); + + if (!budget) { + throw new NotFoundException(`Token budget not found for task ${taskId}`); + } + + const usedTokens = budget.totalTokensUsed; + const allocatedTokens = budget.allocatedTokens; + const remainingTokens = allocatedTokens - usedTokens; + const utilizationPercentage = (usedTokens / allocatedTokens) * 100; + + // Detect suspicious patterns + const suspiciousPattern = this.detectSuspiciousPattern(budget); + + // Determine recommendation + let recommendation: "accept" | "continue" | "review"; + if (suspiciousPattern.triggered) { + if (suspiciousPattern.severity === "high") { + recommendation = "continue"; + } else { + recommendation = "review"; + } + } else { + recommendation = "accept"; + } + + return new BudgetAnalysisDto({ + taskId, + allocatedTokens, + usedTokens, + remainingTokens, + utilizationPercentage, + suspiciousPattern: suspiciousPattern.triggered, + suspiciousReason: suspiciousPattern.reason ?? null, + recommendation, + }); + } + + /** + * Check if done claim is suspicious (>20% budget remaining) + */ + async checkSuspiciousDoneClaim( + taskId: string + ): Promise<{ suspicious: boolean; reason?: string }> { + this.logger.debug(`Checking done claim for task ${taskId}`); + + const budget = await this.prisma.tokenBudget.findUnique({ + where: { taskId }, + }); + + if (!budget) { + throw new NotFoundException(`Token budget not found for task ${taskId}`); + } + + const suspiciousPattern = this.detectSuspiciousPattern(budget); + + if (suspiciousPattern.triggered && suspiciousPattern.reason) { + return { + suspicious: true, + reason: suspiciousPattern.reason, + }; + } + + if (suspiciousPattern.triggered) { + return { + suspicious: true, + }; + } + + return { suspicious: false }; + } + + /** + * Get budget utilization percentage + */ + async getBudgetUtilization(taskId: string): Promise { + const budget = await this.prisma.tokenBudget.findUnique({ + where: { taskId }, + }); + + if (!budget) { + throw new NotFoundException(`Token budget not found for task ${taskId}`); + } + + const utilizationPercentage = (budget.totalTokensUsed / budget.allocatedTokens) * 100; + + return utilizationPercentage; + } + + /** + * Mark task as completed + */ + async markCompleted(taskId: string): Promise { + this.logger.log(`Marking budget as completed for task ${taskId}`); + + const budget = await this.prisma.tokenBudget.findUnique({ + where: { taskId }, + }); + + if (!budget) { + throw new NotFoundException(`Token budget not found for task ${taskId}`); + } + + await this.prisma.tokenBudget.update({ + where: { taskId }, + data: { + completedAt: new Date(), + }, + }); + } + + /** + * Get complexity-based budget allocation + */ + getDefaultBudgetForComplexity(complexity: TaskComplexity): number { + return COMPLEXITY_BUDGETS[complexity]; + } + + /** + * Detect suspicious patterns in budget usage + * @private + */ + private detectSuspiciousPattern(budget: TokenBudget): { + triggered: boolean; + reason?: string; + severity: "low" | "medium" | "high"; + recommendation: "accept" | "continue" | "review"; + } { + const utilization = budget.totalTokensUsed / budget.allocatedTokens; + const remainingPercentage = (1 - utilization) * 100; + + // Pattern 1: Very low utilization (<10%) + if (utilization < BUDGET_THRESHOLDS.VERY_LOW_UTILIZATION) { + return { + triggered: true, + reason: `Very low budget utilization (${(utilization * 100).toFixed(1)}%). This suggests minimal work was performed.`, + severity: "high", + recommendation: "continue", + }; + } + + // Pattern 2: Done claimed with >20% budget remaining + if (utilization < 1 - BUDGET_THRESHOLDS.SUSPICIOUS_REMAINING) { + return { + triggered: true, + reason: `Task claimed done with ${remainingPercentage.toFixed(1)}% budget remaining (${String(budget.allocatedTokens - budget.totalTokensUsed)} tokens). This may indicate premature completion.`, + severity: "medium", + recommendation: "review", + }; + } + + // Pattern 3: Extremely high utilization (>95%) - might indicate inefficiency + if (utilization > BUDGET_THRESHOLDS.VERY_HIGH_UTILIZATION) { + return { + triggered: true, + reason: `Very high budget utilization (${(utilization * 100).toFixed(1)}%). Task may need more budget or review for efficiency.`, + severity: "low", + recommendation: "review", + }; + } + + return { + triggered: false, + severity: "low", + recommendation: "accept", + }; + } +} diff --git a/docs/3-architecture/non-ai-coordinator-overlap-analysis.md b/docs/3-architecture/non-ai-coordinator-overlap-analysis.md new file mode 100644 index 0000000..8c73317 --- /dev/null +++ b/docs/3-architecture/non-ai-coordinator-overlap-analysis.md @@ -0,0 +1,417 @@ +# Non-AI Coordinator Pattern - Overlap Analysis + +**Date:** 2026-01-31 +**Purpose:** Identify overlaps and differences between two complementary architecture documents + +--- + +## Documents Compared + +### Document A: Mosaic Stack Non-AI Coordinator Pattern + +**Location:** `/home/jwoltje/src/mosaic-stack/docs/3-architecture/non-ai-coordinator-pattern.md` +**Length:** 903 lines +**Problem Space:** L-015 Agent Premature Completion +**Focus:** Single-agent quality enforcement + +### Document B: Quality-Rails Orchestration Architecture + +**Location:** `/home/jwoltje/src/jarvis-brain/docs/work/quality-rails-orchestration-architecture.md` +**Length:** ~600 lines +**Problem Space:** Context exhaustion in multi-issue orchestration +**Focus:** Multi-agent lifecycle management at scale + +--- + +## Summary Table + +| Aspect | Document A (Existing) | Document B (New) | Overlap? | +| -------------------------- | ------------------------------------------- | ---------------------------------------- | ------------------ | +| **Primary Problem** | Agents claim "done" prematurely | Agents pause at 95% context | Different | +| **Coordinator Type** | Non-AI (TypeScript/NestJS) | Non-AI (Python/Node.js) | ✅ Overlap | +| **Quality Gates** | BuildGate, LintGate, TestGate, CoverageGate | Mechanical gates (lint, typecheck, test) | ✅ Overlap | +| **Agent Scope** | Single agent per issue | Multi-agent orchestration | Different | +| **Context Management** | Not addressed | Core feature (80% compact, 95% rotate) | Different | +| **Model Assignment** | Not addressed | Agent profiles + difficulty matching | Different | +| **Issue Sizing** | Not addressed | 50% rule, epic decomposition | Different | +| **Implementation Status** | Full TypeScript code | Python pseudocode + PoC plan | Different | +| **Forced Continuation** | Yes (rejection loop) | No (preventive via context mgmt) | Different approach | +| **Non-negotiable Quality** | Yes | Yes | ✅ Overlap | + +--- + +## Unique to Document A (Existing Mosaic Stack Pattern) + +### 1. **Premature Completion Problem** + +- **Problem:** Agents claim work is "done" when tests fail, files are missing, or requirements are incomplete +- **Root cause:** Agent interprets partial completion as success +- **Example:** Agent implements feature, tests fail, agent says "done" anyway + +### 2. **Rejection Loop & Forced Continuation** + +```typescript +// CompletionVerificationEngine +if (!allGatesPassed) { + return this.forcedContinuationService.generateContinuationPrompt({ + failedGates, + tone: "non-negotiable", + }); +} +``` + +**Key innovation:** When agent claims "done" but gates fail, coordinator injects prompt forcing continuation: + +``` +COMPLETION REJECTED. The following quality gates have failed: +- Build Gate: Compilation errors detected +- Test Gate: 3/15 tests failing + +You must continue working until ALL quality gates pass. +This is not optional. Do not claim completion until gates pass. +``` + +### 3. **State Machine for Completion Claims** + +``` +Agent Working → Claims Done → Run Gates → Pass/Reject + ↓ + Reject → Force Continue → Agent Working +``` + +### 4. **TypeScript/NestJS Implementation** + +- Full production-ready service code +- QualityOrchestrator service +- Gate interfaces and implementations +- Dependency injection architecture + +### 5. **CompletionVerificationEngine** + +- Intercepts agent completion claims +- Runs all gates synchronously +- Blocks "done" status until gates pass + +--- + +## Unique to Document B (New Quality-Rails Orchestration) + +### 1. **Context Exhaustion Problem** + +- **Problem:** AI orchestrators pause at 95% context usage, losing autonomy +- **Root cause:** Linear context growth without compaction +- **Example:** M4 session completed 11 issues, paused at 95%, required manual restart + +### 2. **50% Rule for Issue Sizing** + +``` +Issue context estimate MUST NOT exceed 50% of target agent's context limit. + +Example: +- Sonnet agent: 200K context limit +- Maximum issue estimate: 100K tokens +- Reasoning: Leaves 100K for system prompts, conversation, safety buffer +``` + +### 3. **Agent Profiles & Model Assignment** + +```python +AGENT_PROFILES = { + 'opus': { + 'context_limit': 200000, + 'cost_per_mtok': 15.00, + 'capabilities': ['high', 'medium', 'low'] + }, + 'sonnet': { + 'context_limit': 200000, + 'cost_per_mtok': 3.00, + 'capabilities': ['medium', 'low'] + }, + 'glm': { + 'context_limit': 128000, + 'cost_per_mtok': 0.00, # Self-hosted + 'capabilities': ['medium', 'low'] + } +} +``` + +**Assignment logic:** Choose cheapest capable agent based on: + +- Estimated context usage +- Difficulty level +- Agent capabilities + +### 4. **Context Monitoring & Session Rotation** + +```python +def monitor_agent_context(agent_id: str) -> ContextAction: + usage = get_context_usage(agent_id) + + if usage > 0.95: + return ContextAction.ROTATE_SESSION # Start fresh agent + elif usage > 0.80: + return ContextAction.COMPACT # Summarize completed work + else: + return ContextAction.CONTINUE # Keep working +``` + +### 5. **Context Estimation Formula** + +```python +def estimate_context(issue: Issue) -> int: + base = ( + issue.files_to_modify * 7000 + # Average file size + issue.implementation_complexity * 20000 + # Code writing + issue.test_requirements * 10000 + # Test writing + issue.documentation * 3000 # Docs + ) + + buffer = base * 1.3 # 30% safety margin + return int(buffer) +``` + +### 6. **Epic Decomposition Workflow** + +``` +User creates Epic → Coordinator analyzes scope → Decomposes into sub-issues + ↓ + Each issue ≤ 50% agent context limit + ↓ + Assigns metadata: estimated_context, difficulty +``` + +### 7. **Multi-Model Support** + +- Supports Opus, Sonnet, Haiku, GLM, MiniMax, Cogito +- Cost optimization through model selection +- Self-hosted model preference when capable + +### 8. **Proactive Context Management** + +- Prevents context exhaustion BEFORE it happens +- No manual intervention needed +- Maintains autonomy through entire queue + +--- + +## Overlaps (Both Documents) + +### 1. **Non-AI Coordinator Pattern** ✅ + +Both use deterministic code (not AI) as the orchestrator: + +- **Doc A:** TypeScript/NestJS service +- **Doc B:** Python/Node.js coordinator +- **Rationale:** Avoid AI orchestrator context limits and inconsistency + +### 2. **Mechanical Quality Gates** ✅ + +Both enforce quality through automated checks: + +**Doc A gates:** + +- BuildGate (compilation) +- LintGate (code style) +- TestGate (unit/integration tests) +- CoverageGate (test coverage threshold) + +**Doc B gates:** + +- lint (code quality) +- typecheck (type safety) +- test (functionality) +- coverage (same as Doc A) + +### 3. **Programmatic Enforcement** ✅ + +Both prevent agent from bypassing quality: + +- **Doc A:** Rejection loop blocks completion until gates pass +- **Doc B:** Coordinator enforces gates before allowing next issue +- **Shared principle:** Quality is a requirement, not a suggestion + +### 4. **Non-Negotiable Quality Standards** ✅ + +Both use firm language about quality requirements: + +- **Doc A:** "This is not optional. Do not claim completion until gates pass." +- **Doc B:** "Quality gates are mechanical blockers, not suggestions." + +### 5. **State Management** ✅ + +Both track work state programmatically: + +- **Doc A:** Agent state machine (working → claimed done → verified → actual done) +- **Doc B:** Issue state in tracking system (pending → in-progress → gate-check → completed) + +### 6. **Validation Before Progression** ✅ + +Both prevent moving forward with broken code: + +- **Doc A:** Cannot claim "done" until gates pass +- **Doc B:** Cannot start next issue until current issue passes gates + +--- + +## Complementary Nature + +These documents solve **different problems in the same architectural pattern**: + +### Document A (Existing): Quality Enforcement + +**Problem:** "How do we prevent an agent from claiming work is done when it's not?" +**Solution:** Rejection loop with forced continuation +**Scope:** Single agent working on single issue +**Lifecycle stage:** Task completion verification + +### Document B (New): Orchestration at Scale + +**Problem:** "How do we manage multiple agents working through dozens of issues without context exhaustion?" +**Solution:** Proactive context management + intelligent agent assignment +**Scope:** Multi-agent orchestration across entire milestone +**Lifecycle stage:** Agent selection, session management, queue progression + +### Together They Form: + +``` +┌─────────────────────────────────────────────────────────┐ +│ Non-AI Coordinator (Document B) │ +│ - Monitors context usage across all agents │ +│ - Assigns issues based on context estimates │ +│ - Rotates agents at 95% context │ +│ - Enforces 50% rule during issue creation │ +└─────────────────────────┬───────────────────────────────┘ + │ + ┌─────────────────┼─────────────────┐ + ▼ ▼ ▼ + Agent 1 Agent 2 Agent 3 + Issue #42 Issue #57 Issue #89 + │ │ │ + └─────────────────┴─────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────┐ + │ Quality Orchestrator (Document A) │ + │ - Intercepts completion claims │ + │ - Runs quality gates │ + │ - Forces continuation if gates fail │ + │ - Only allows "done" when gates pass │ + └─────────────────────────────────────────────────┘ +``` + +**Document B (new)** manages the **agent lifecycle and orchestration**. +**Document A (existing)** manages the **quality enforcement per agent**. + +--- + +## Integration Recommendations + +### Option 1: Merge into Single Document (Recommended) + +**Reason:** They're parts of the same system + +**Structure:** + +```markdown +# Non-AI Coordinator Pattern Architecture + +## Part 1: Multi-Agent Orchestration (from Doc B) + +- Context management +- Agent assignment +- Session rotation +- 50% rule +- Epic decomposition + +## Part 2: Quality Enforcement (from Doc A) + +- Premature completion problem +- Quality gates +- Rejection loop +- Forced continuation +- CompletionVerificationEngine + +## Part 3: Implementation + +- TypeScript/NestJS orchestrator (from Doc A) +- Python coordinator enhancements (from Doc B) +- Integration points +``` + +### Option 2: Keep Separate, Create Integration Doc + +**Reason:** Different audiences (orchestration vs quality enforcement) + +**Documents:** + +1. `orchestration-architecture.md` (Doc B) - For understanding multi-agent coordination +2. `quality-enforcement-architecture.md` (Doc A) - For understanding quality gates +3. `non-ai-coordinator-integration.md` (NEW) - How they work together + +### Option 3: Hierarchical Documentation + +**Reason:** Layers of abstraction + +``` +non-ai-coordinator-pattern.md (Overview) +├── orchestration-layer.md (Doc B content) +└── quality-layer.md (Doc A content) +``` + +--- + +## Action Items + +Based on overlap analysis, recommend: + +1. **Merge the documents** into comprehensive architecture guide + - Use Doc A's problem statement for quality enforcement + - Use Doc B's problem statement for context exhaustion + - Show how both problems require non-AI coordinator + - Integrate TypeScript implementation with context monitoring + +2. **Update Mosaic Stack issue #140** + - Current: "Document Non-AI Coordinator Pattern Architecture" + - Expand scope: Include both quality enforcement AND orchestration + - Reference both problem spaces (L-015 + context exhaustion) + +3. **Create unified PoC plan** + - Phase 1: Context monitoring (from Doc B) + - Phase 2: Agent assignment logic (from Doc B) + - Phase 3: Quality gate integration (from Doc A) + - Phase 4: Forced continuation (from Doc A) + +4. **Preserve unique innovations from each** + - Doc A: Rejection loop, forced continuation prompts + - Doc B: 50% rule, agent profiles, context estimation formula + +--- + +## Conclusion + +**These documents are highly complementary, not duplicative.** + +- **~20% overlap:** Both use non-AI coordinator, mechanical gates, non-negotiable quality +- **80% unique value:** Doc A solves premature completion, Doc B solves context exhaustion + +**Best path forward:** Merge into single comprehensive architecture document that addresses both problems within the unified non-AI coordinator pattern. + +The pattern is: + +1. Non-AI coordinator assigns issues based on context estimates (Doc B) +2. Agent works on issue +3. Quality gates enforce completion standards (Doc A) +4. Context monitoring prevents exhaustion (Doc B) +5. Forced continuation prevents premature "done" (Doc A) +6. Next issue assigned when ready (Doc B) + +Together they create a **robust, autonomous, quality-enforcing orchestration system** that scales beyond single-agent, single-issue scenarios. + +--- + +**Next Steps:** + +1. User review of this analysis +2. Decision on integration approach (Option 1, 2, or 3) +3. Update Mosaic Stack documentation accordingly +4. Proceed with PoC implementation