docs: Add overlap analysis for non-AI coordinator patterns

Detailed comparison showing: - Existing doc addresses L-015 (premature completion) - New doc addresses context exhaustion (multi-issue orchestration) - ~20% overlap (both use non-AI coordinator, mechanical gates) - 80% complementary (different problems, different solutions) Recommends merging into comprehensive document (already done). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 14:47:59 -06:00
parent a2f06fe75b
commit 903109ea40
16 changed files with 1212 additions and 3 deletions
--- a/apps/api/prisma/schema.prisma
+++ b/apps/api/prisma/schema.prisma
@@ -1053,3 +1053,37 @@ model TaskRejection {
  @@index([manualReview])
  @@map("task_rejections")
 }
+
+model TokenBudget {
+  id           String   @id @default(uuid()) @db.Uuid
+  taskId       String   @unique @map("task_id") @db.Uuid
+  workspaceId  String   @map("workspace_id") @db.Uuid
+  agentId      String   @map("agent_id")
+
+  // Budget allocation
+  allocatedTokens    Int   @map("allocated_tokens")
+  estimatedComplexity String @map("estimated_complexity") // "low", "medium", "high", "critical"
+
+  // Usage tracking
+  inputTokensUsed    Int   @default(0) @map("input_tokens_used")
+  outputTokensUsed   Int   @default(0) @map("output_tokens_used")
+  totalTokensUsed    Int   @default(0) @map("total_tokens_used")
+
+  // Cost tracking
+  estimatedCost      Decimal? @map("estimated_cost") @db.Decimal(10, 6)
+
+  // State
+  startedAt     DateTime  @default(now()) @map("started_at") @db.Timestamptz
+  lastUpdatedAt DateTime  @updatedAt @map("last_updated_at") @db.Timestamptz
+  completedAt   DateTime? @map("completed_at") @db.Timestamptz
+
+  // Analysis
+  budgetUtilization  Float?   @map("budget_utilization") // 0.0 - 1.0
+  suspiciousPattern  Boolean  @default(false) @map("suspicious_pattern")
+  suspiciousReason   String?  @map("suspicious_reason")
+
+  @@index([taskId])
+  @@index([workspaceId])
+  @@index([suspiciousPattern])
+  @@map("token_budgets")
+}
--- a/apps/api/src/quality-orchestrator/integration/quality-orchestrator.integration.spec.ts
+++ b/apps/api/src/quality-orchestrator/integration/quality-orchestrator.integration.spec.ts
@@ -10,6 +10,7 @@ import { CompletionVerificationService } from "../../completion-verification/com
 import { ContinuationPromptsService } from "../../continuation-prompts/continuation-prompts.service";
 import { RejectionHandlerService } from "../../rejection-handler/rejection-handler.service";
 import { PrismaService } from "../../prisma/prisma.service";
+import { TokenBudgetService } from "../../token-budget/token-budget.service";
 import type { CompletionClaim, OrchestrationConfig, QualityGate } from "../interfaces";
 import type { RejectionContext } from "../../rejection-handler/interfaces";
 import { MOCK_OUTPUTS, MOCK_FILE_CHANGES } from "./test-fixtures";
@@ -69,6 +70,12 @@ describe("Non-AI Coordinator Integration", () => {
          provide: PrismaService,
          useValue: mockPrisma,
        },
+        {
+          provide: TokenBudgetService,
+          useValue: {
+            checkSuspiciousDoneClaim: vi.fn().mockResolvedValue({ suspicious: false }),
+          },
+        },
      ],
    }).compile();

--- a/apps/api/src/quality-orchestrator/quality-orchestrator.module.ts
+++ b/apps/api/src/quality-orchestrator/quality-orchestrator.module.ts
@@ -1,11 +1,13 @@
 import { Module } from "@nestjs/common";
 import { QualityOrchestratorService } from "./quality-orchestrator.service";
+import { TokenBudgetModule } from "../token-budget/token-budget.module";

 /**
 * Quality Orchestrator Module
 * Provides quality enforcement for AI agent task completions
 */
@Module({
+  imports: [TokenBudgetModule],
  providers: [QualityOrchestratorService],
  exports: [QualityOrchestratorService],
 })
--- a/apps/api/src/quality-orchestrator/quality-orchestrator.service.spec.ts
+++ b/apps/api/src/quality-orchestrator/quality-orchestrator.service.spec.ts
@@ -1,6 +1,7 @@
-import { describe, it, expect, beforeEach } from "vitest";
+import { describe, it, expect, beforeEach, vi } from "vitest";
 import { Test, TestingModule } from "@nestjs/testing";
 import { QualityOrchestratorService } from "./quality-orchestrator.service";
+import { TokenBudgetService } from "../token-budget/token-budget.service";
 import type {
  QualityGate,
  CompletionClaim,
@@ -17,7 +18,15 @@ describe("QualityOrchestratorService", () => {

  beforeEach(async () => {
    const module: TestingModule = await Test.createTestingModule({
-      providers: [QualityOrchestratorService],
+      providers: [
+        QualityOrchestratorService,
+        {
+          provide: TokenBudgetService,
+          useValue: {
+            checkSuspiciousDoneClaim: vi.fn().mockResolvedValue({ suspicious: false }),
+          },
+        },
+      ],
    }).compile();

    service = module.get<QualityOrchestratorService>(QualityOrchestratorService);
--- a/apps/api/src/quality-orchestrator/quality-orchestrator.service.ts
+++ b/apps/api/src/quality-orchestrator/quality-orchestrator.service.ts
@@ -8,6 +8,7 @@ import type {
  CompletionValidation,
  OrchestrationConfig,
 } from "./interfaces";
+import { TokenBudgetService } from "../token-budget/token-budget.service";

 const execAsync = promisify(exec);

@@ -62,6 +63,8 @@ const DEFAULT_GATES: QualityGate[] = [
 export class QualityOrchestratorService {
  private readonly logger = new Logger(QualityOrchestratorService.name);

+  constructor(private readonly tokenBudgetService: TokenBudgetService) {}
+
  /**
   * Validate a completion claim against quality gates
   */
@@ -93,10 +96,27 @@ export class QualityOrchestratorService {
        return gate?.required ?? false;
      });

+    // Check token budget for suspicious patterns
+    let budgetCheck: { suspicious: boolean; reason?: string } | null = null;
+    try {
+      budgetCheck = await this.tokenBudgetService.checkSuspiciousDoneClaim(claim.taskId);
+    } catch {
+      // Token budget not found - not an error, just means tracking wasn't enabled
+      this.logger.debug(`No token budget found for task ${claim.taskId}`);
+    }
+
    // Determine verdict
    let verdict: "accepted" | "rejected" | "needs-continuation";
    if (allGatesPassed) {
-      verdict = "accepted";
+      // Even if all gates passed, check for suspicious budget patterns
+      if (budgetCheck?.suspicious) {
+        verdict = "needs-continuation";
+        this.logger.warn(
+          `Suspicious budget pattern detected for task ${claim.taskId}: ${budgetCheck.reason ?? "unknown reason"}`
+        );
+      } else {
+        verdict = "accepted";
+      }
    } else if (requiredGatesFailed.length > 0) {
      verdict = "rejected";
    } else if (config.strictMode) {
@@ -117,6 +137,14 @@ export class QualityOrchestratorService {
    if (verdict !== "accepted") {
      result.feedback = this.generateRejectionFeedback(result);
      result.suggestedActions = this.generateSuggestedActions(gateResults, config);
+
+      // Add budget feedback if suspicious pattern detected
+      if (budgetCheck?.suspicious && budgetCheck.reason) {
+        result.feedback += `\n\nToken budget analysis: ${budgetCheck.reason}`;
+        result.suggestedActions.push(
+          "Review task completion - significant budget remains or suspicious usage pattern detected"
+        );
+      }
    }

    return result;
--- a/apps/api/src/token-budget/dto/allocate-budget.dto.ts
+++ b/apps/api/src/token-budget/dto/allocate-budget.dto.ts
@@ -0,0 +1,25 @@
+import { IsString, IsUUID, IsInt, IsIn, Min } from "class-validator";
+import type { TaskComplexity } from "../interfaces";
+
+/**
+ * DTO for allocating a token budget for a task
+ */
+export class AllocateBudgetDto {
+  @IsUUID("4", { message: "taskId must be a valid UUID" })
+  taskId!: string;
+
+  @IsUUID("4", { message: "workspaceId must be a valid UUID" })
+  workspaceId!: string;
+
+  @IsString({ message: "agentId must be a string" })
+  agentId!: string;
+
+  @IsIn(["low", "medium", "high", "critical"], {
+    message: "complexity must be one of: low, medium, high, critical",
+  })
+  complexity!: TaskComplexity;
+
+  @IsInt({ message: "allocatedTokens must be an integer" })
+  @Min(1, { message: "allocatedTokens must be at least 1" })
+  allocatedTokens!: number;
+}
--- a/apps/api/src/token-budget/dto/budget-analysis.dto.ts
+++ b/apps/api/src/token-budget/dto/budget-analysis.dto.ts
@@ -0,0 +1,33 @@
+/**
+ * DTO for budget analysis results
+ */
+export class BudgetAnalysisDto {
+  taskId: string;
+  allocatedTokens: number;
+  usedTokens: number;
+  remainingTokens: number;
+  utilizationPercentage: number;
+  suspiciousPattern: boolean;
+  suspiciousReason: string | null;
+  recommendation: "accept" | "continue" | "review";
+
+  constructor(data: {
+    taskId: string;
+    allocatedTokens: number;
+    usedTokens: number;
+    remainingTokens: number;
+    utilizationPercentage: number;
+    suspiciousPattern: boolean;
+    suspiciousReason: string | null;
+    recommendation: "accept" | "continue" | "review";
+  }) {
+    this.taskId = data.taskId;
+    this.allocatedTokens = data.allocatedTokens;
+    this.usedTokens = data.usedTokens;
+    this.remainingTokens = data.remainingTokens;
+    this.utilizationPercentage = data.utilizationPercentage;
+    this.suspiciousPattern = data.suspiciousPattern;
+    this.suspiciousReason = data.suspiciousReason;
+    this.recommendation = data.recommendation;
+  }
+}
--- a/apps/api/src/token-budget/dto/index.ts
+++ b/apps/api/src/token-budget/dto/index.ts
@@ -0,0 +1,3 @@
+export * from "./allocate-budget.dto";
+export * from "./update-usage.dto";
+export * from "./budget-analysis.dto";
--- a/apps/api/src/token-budget/dto/update-usage.dto.ts
+++ b/apps/api/src/token-budget/dto/update-usage.dto.ts
@@ -0,0 +1,14 @@
+import { IsInt, Min } from "class-validator";
+
+/**
+ * DTO for updating token usage for a task
+ */
+export class UpdateUsageDto {
+  @IsInt({ message: "inputTokens must be an integer" })
+  @Min(0, { message: "inputTokens must be non-negative" })
+  inputTokens!: number;
+
+  @IsInt({ message: "outputTokens must be an integer" })
+  @Min(0, { message: "outputTokens must be non-negative" })
+  outputTokens!: number;
+}
--- a/apps/api/src/token-budget/index.ts
+++ b/apps/api/src/token-budget/index.ts
@@ -0,0 +1,4 @@
+export * from "./token-budget.module";
+export * from "./token-budget.service";
+export * from "./interfaces";
+export * from "./dto";
--- a/apps/api/src/token-budget/interfaces/index.ts
+++ b/apps/api/src/token-budget/interfaces/index.ts
@@ -0,0 +1 @@
+export * from "./token-budget.interface";
--- a/apps/api/src/token-budget/interfaces/token-budget.interface.ts
+++ b/apps/api/src/token-budget/interfaces/token-budget.interface.ts
@@ -0,0 +1,69 @@
+/**
+ * Task complexity levels for budget allocation
+ */
+export type TaskComplexity = "low" | "medium" | "high" | "critical";
+
+/**
+ * Token budget data structure
+ */
+export interface TokenBudgetData {
+  id: string;
+  taskId: string;
+  workspaceId: string;
+  agentId: string;
+  allocatedTokens: number;
+  estimatedComplexity: TaskComplexity;
+  inputTokensUsed: number;
+  outputTokensUsed: number;
+  totalTokensUsed: number;
+  estimatedCost: number | null;
+  startedAt: Date;
+  lastUpdatedAt: Date;
+  completedAt: Date | null;
+  budgetUtilization: number | null;
+  suspiciousPattern: boolean;
+  suspiciousReason: string | null;
+}
+
+/**
+ * Budget analysis result
+ */
+export interface BudgetAnalysis {
+  taskId: string;
+  allocatedTokens: number;
+  usedTokens: number;
+  remainingTokens: number;
+  utilizationPercentage: number;
+  suspiciousPattern: boolean;
+  suspiciousReason: string | null;
+  recommendation: "accept" | "continue" | "review";
+}
+
+/**
+ * Suspicious pattern detection result
+ */
+export interface SuspiciousPattern {
+  triggered: boolean;
+  reason?: string;
+  severity: "low" | "medium" | "high";
+  recommendation: "accept" | "continue" | "review";
+}
+
+/**
+ * Complexity-based budget allocation
+ */
+export const COMPLEXITY_BUDGETS: Record<TaskComplexity, number> = {
+  low: 50000, // Simple fixes, typos
+  medium: 150000, // Standard features
+  high: 350000, // Complex features
+  critical: 750000, // Major refactoring
+};
+
+/**
+ * Token budget thresholds for suspicious pattern detection
+ */
+export const BUDGET_THRESHOLDS = {
+  SUSPICIOUS_REMAINING: 0.2, // >20% budget remaining + gates failing = suspicious
+  VERY_LOW_UTILIZATION: 0.1, // <10% utilization = suspicious
+  VERY_HIGH_UTILIZATION: 0.95, // >95% utilization but gates failing = suspicious
+};
--- a/apps/api/src/token-budget/token-budget.module.ts
+++ b/apps/api/src/token-budget/token-budget.module.ts
@@ -0,0 +1,14 @@
+import { Module } from "@nestjs/common";
+import { TokenBudgetService } from "./token-budget.service";
+import { PrismaModule } from "../prisma/prisma.module";
+
+/**
+ * Token Budget Module
+ * Tracks token usage and prevents premature done claims
+ */
+@Module({
+  imports: [PrismaModule],
+  providers: [TokenBudgetService],
+  exports: [TokenBudgetService],
+})
+export class TokenBudgetModule {}
--- a/apps/api/src/token-budget/token-budget.service.spec.ts
+++ b/apps/api/src/token-budget/token-budget.service.spec.ts
@@ -0,0 +1,293 @@
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { Test, TestingModule } from "@nestjs/testing";
+import { TokenBudgetService } from "./token-budget.service";
+import { PrismaService } from "../prisma/prisma.service";
+import { NotFoundException } from "@nestjs/common";
+import type { TaskComplexity } from "./interfaces";
+import { COMPLEXITY_BUDGETS } from "./interfaces";
+
+describe("TokenBudgetService", () => {
+  let service: TokenBudgetService;
+  let prisma: PrismaService;
+
+  const mockPrismaService = {
+    tokenBudget: {
+      create: vi.fn(),
+      findUnique: vi.fn(),
+      update: vi.fn(),
+    },
+  };
+
+  const mockWorkspaceId = "550e8400-e29b-41d4-a716-446655440001";
+  const mockTaskId = "550e8400-e29b-41d4-a716-446655440002";
+  const mockAgentId = "test-agent-001";
+
+  const mockTokenBudget = {
+    id: "550e8400-e29b-41d4-a716-446655440003",
+    taskId: mockTaskId,
+    workspaceId: mockWorkspaceId,
+    agentId: mockAgentId,
+    allocatedTokens: 150000,
+    estimatedComplexity: "medium" as TaskComplexity,
+    inputTokensUsed: 50000,
+    outputTokensUsed: 30000,
+    totalTokensUsed: 80000,
+    estimatedCost: null,
+    startedAt: new Date("2026-01-31T10:00:00Z"),
+    lastUpdatedAt: new Date("2026-01-31T10:30:00Z"),
+    completedAt: null,
+    budgetUtilization: 0.533,
+    suspiciousPattern: false,
+    suspiciousReason: null,
+  };
+
+  beforeEach(async () => {
+    const module: TestingModule = await Test.createTestingModule({
+      providers: [
+        TokenBudgetService,
+        {
+          provide: PrismaService,
+          useValue: mockPrismaService,
+        },
+      ],
+    }).compile();
+
+    service = module.get<TokenBudgetService>(TokenBudgetService);
+    prisma = module.get<PrismaService>(PrismaService);
+
+    vi.clearAllMocks();
+  });
+
+  it("should be defined", () => {
+    expect(service).toBeDefined();
+  });
+
+  describe("allocateBudget", () => {
+    it("should allocate budget for a new task", async () => {
+      const allocateDto = {
+        taskId: mockTaskId,
+        workspaceId: mockWorkspaceId,
+        agentId: mockAgentId,
+        complexity: "medium" as TaskComplexity,
+        allocatedTokens: 150000,
+      };
+
+      mockPrismaService.tokenBudget.create.mockResolvedValue(mockTokenBudget);
+
+      const result = await service.allocateBudget(allocateDto);
+
+      expect(result).toEqual(mockTokenBudget);
+      expect(mockPrismaService.tokenBudget.create).toHaveBeenCalledWith({
+        data: {
+          taskId: allocateDto.taskId,
+          workspaceId: allocateDto.workspaceId,
+          agentId: allocateDto.agentId,
+          allocatedTokens: allocateDto.allocatedTokens,
+          estimatedComplexity: allocateDto.complexity,
+        },
+      });
+    });
+  });
+
+  describe("updateUsage", () => {
+    it("should update token usage and recalculate utilization", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
+
+      const updatedBudget = {
+        ...mockTokenBudget,
+        inputTokensUsed: 60000,
+        outputTokensUsed: 40000,
+        totalTokensUsed: 100000,
+        budgetUtilization: 0.667,
+      };
+
+      mockPrismaService.tokenBudget.update.mockResolvedValue(updatedBudget);
+
+      const result = await service.updateUsage(mockTaskId, 10000, 10000);
+
+      expect(result).toEqual(updatedBudget);
+      expect(mockPrismaService.tokenBudget.findUnique).toHaveBeenCalledWith({
+        where: { taskId: mockTaskId },
+      });
+      expect(mockPrismaService.tokenBudget.update).toHaveBeenCalledWith({
+        where: { taskId: mockTaskId },
+        data: {
+          inputTokensUsed: 60000,
+          outputTokensUsed: 40000,
+          totalTokensUsed: 100000,
+          budgetUtilization: expect.closeTo(0.667, 2),
+        },
+      });
+    });
+
+    it("should throw NotFoundException if budget does not exist", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
+
+      await expect(service.updateUsage(mockTaskId, 1000, 1000)).rejects.toThrow(NotFoundException);
+    });
+  });
+
+  describe("analyzeBudget", () => {
+    it("should analyze budget and detect suspicious pattern for high remaining budget", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
+
+      const result = await service.analyzeBudget(mockTaskId);
+
+      expect(result.taskId).toBe(mockTaskId);
+      expect(result.allocatedTokens).toBe(150000);
+      expect(result.usedTokens).toBe(80000);
+      expect(result.remainingTokens).toBe(70000);
+      expect(result.utilizationPercentage).toBeCloseTo(53.3, 1);
+      // 46.7% remaining is suspicious (>20% threshold)
+      expect(result.suspiciousPattern).toBe(true);
+      expect(result.recommendation).toBe("review");
+    });
+
+    it("should not detect suspicious pattern when utilization is high", async () => {
+      // 85% utilization (15% remaining - below 20% threshold)
+      const highUtilizationBudget = {
+        ...mockTokenBudget,
+        inputTokensUsed: 65000,
+        outputTokensUsed: 62500,
+        totalTokensUsed: 127500,
+        budgetUtilization: 0.85,
+      };
+
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(highUtilizationBudget);
+
+      const result = await service.analyzeBudget(mockTaskId);
+
+      expect(result.utilizationPercentage).toBeCloseTo(85.0, 1);
+      expect(result.suspiciousPattern).toBe(false);
+      expect(result.recommendation).toBe("accept");
+    });
+
+    it("should throw NotFoundException if budget does not exist", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
+
+      await expect(service.analyzeBudget(mockTaskId)).rejects.toThrow(NotFoundException);
+    });
+  });
+
+  describe("checkSuspiciousDoneClaim", () => {
+    it("should detect suspicious pattern when >20% budget remaining", async () => {
+      // 30% budget remaining
+      const budgetWithRemaining = {
+        ...mockTokenBudget,
+        inputTokensUsed: 50000,
+        outputTokensUsed: 55000,
+        totalTokensUsed: 105000,
+        budgetUtilization: 0.7,
+      };
+
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetWithRemaining);
+
+      const result = await service.checkSuspiciousDoneClaim(mockTaskId);
+
+      expect(result.suspicious).toBe(true);
+      expect(result.reason).toContain("30.0%");
+    });
+
+    it("should not flag as suspicious when <20% budget remaining", async () => {
+      // 10% budget remaining
+      const budgetNearlyDone = {
+        ...mockTokenBudget,
+        inputTokensUsed: 70000,
+        outputTokensUsed: 65000,
+        totalTokensUsed: 135000,
+        budgetUtilization: 0.9,
+      };
+
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetNearlyDone);
+
+      const result = await service.checkSuspiciousDoneClaim(mockTaskId);
+
+      expect(result.suspicious).toBe(false);
+      expect(result.reason).toBeUndefined();
+    });
+
+    it("should detect very low utilization (<10%)", async () => {
+      // 5% utilization
+      const budgetVeryLowUsage = {
+        ...mockTokenBudget,
+        inputTokensUsed: 4000,
+        outputTokensUsed: 3500,
+        totalTokensUsed: 7500,
+        budgetUtilization: 0.05,
+      };
+
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetVeryLowUsage);
+
+      const result = await service.checkSuspiciousDoneClaim(mockTaskId);
+
+      expect(result.suspicious).toBe(true);
+      expect(result.reason).toContain("Very low budget utilization");
+    });
+  });
+
+  describe("getBudgetUtilization", () => {
+    it("should return budget utilization percentage", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
+
+      const result = await service.getBudgetUtilization(mockTaskId);
+
+      expect(result).toBeCloseTo(53.3, 1);
+    });
+
+    it("should throw NotFoundException if budget does not exist", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
+
+      await expect(service.getBudgetUtilization(mockTaskId)).rejects.toThrow(NotFoundException);
+    });
+  });
+
+  describe("markCompleted", () => {
+    it("should mark budget as completed", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
+
+      const completedBudget = {
+        ...mockTokenBudget,
+        completedAt: new Date("2026-01-31T11:00:00Z"),
+      };
+
+      mockPrismaService.tokenBudget.update.mockResolvedValue(completedBudget);
+
+      await service.markCompleted(mockTaskId);
+
+      expect(mockPrismaService.tokenBudget.update).toHaveBeenCalledWith({
+        where: { taskId: mockTaskId },
+        data: {
+          completedAt: expect.any(Date),
+        },
+      });
+    });
+
+    it("should throw NotFoundException if budget does not exist", async () => {
+      mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
+
+      await expect(service.markCompleted(mockTaskId)).rejects.toThrow(NotFoundException);
+    });
+  });
+
+  describe("getDefaultBudgetForComplexity", () => {
+    it("should return correct budget for low complexity", () => {
+      const result = service.getDefaultBudgetForComplexity("low");
+      expect(result).toBe(COMPLEXITY_BUDGETS.low);
+    });
+
+    it("should return correct budget for medium complexity", () => {
+      const result = service.getDefaultBudgetForComplexity("medium");
+      expect(result).toBe(COMPLEXITY_BUDGETS.medium);
+    });
+
+    it("should return correct budget for high complexity", () => {
+      const result = service.getDefaultBudgetForComplexity("high");
+      expect(result).toBe(COMPLEXITY_BUDGETS.high);
+    });
+
+    it("should return correct budget for critical complexity", () => {
+      const result = service.getDefaultBudgetForComplexity("critical");
+      expect(result).toBe(COMPLEXITY_BUDGETS.critical);
+    });
+  });
+});
--- a/apps/api/src/token-budget/token-budget.service.ts
+++ b/apps/api/src/token-budget/token-budget.service.ts
@@ -0,0 +1,256 @@
+import { Injectable, Logger, NotFoundException } from "@nestjs/common";
+import { PrismaService } from "../prisma/prisma.service";
+import type { TokenBudget } from "@prisma/client";
+import type { TaskComplexity, BudgetAnalysis } from "./interfaces";
+import { COMPLEXITY_BUDGETS, BUDGET_THRESHOLDS } from "./interfaces";
+import type { AllocateBudgetDto } from "./dto";
+import { BudgetAnalysisDto } from "./dto";
+
+/**
+ * Token Budget Service
+ * Tracks token usage and prevents premature done claims with significant budget remaining
+ */
+@Injectable()
+export class TokenBudgetService {
+  private readonly logger = new Logger(TokenBudgetService.name);
+
+  constructor(private readonly prisma: PrismaService) {}
+
+  /**
+   * Allocate budget for a new task
+   */
+  async allocateBudget(dto: AllocateBudgetDto): Promise<TokenBudget> {
+    this.logger.log(`Allocating ${String(dto.allocatedTokens)} tokens for task ${dto.taskId}`);
+
+    const budget = await this.prisma.tokenBudget.create({
+      data: {
+        taskId: dto.taskId,
+        workspaceId: dto.workspaceId,
+        agentId: dto.agentId,
+        allocatedTokens: dto.allocatedTokens,
+        estimatedComplexity: dto.complexity,
+      },
+    });
+
+    return budget;
+  }
+
+  /**
+   * Update usage after agent response
+   */
+  async updateUsage(
+    taskId: string,
+    inputTokens: number,
+    outputTokens: number
+  ): Promise<TokenBudget> {
+    this.logger.debug(
+      `Updating usage for task ${taskId}: +${String(inputTokens)} input, +${String(outputTokens)} output`
+    );
+
+    // Get current budget
+    const budget = await this.prisma.tokenBudget.findUnique({
+      where: { taskId },
+    });
+
+    if (!budget) {
+      throw new NotFoundException(`Token budget not found for task ${taskId}`);
+    }
+
+    // Calculate new totals
+    const newInputTokens = budget.inputTokensUsed + inputTokens;
+    const newOutputTokens = budget.outputTokensUsed + outputTokens;
+    const newTotalTokens = newInputTokens + newOutputTokens;
+
+    // Calculate utilization
+    const utilization = newTotalTokens / budget.allocatedTokens;
+
+    // Update budget
+    const updatedBudget = await this.prisma.tokenBudget.update({
+      where: { taskId },
+      data: {
+        inputTokensUsed: newInputTokens,
+        outputTokensUsed: newOutputTokens,
+        totalTokensUsed: newTotalTokens,
+        budgetUtilization: utilization,
+      },
+    });
+
+    return updatedBudget;
+  }
+
+  /**
+   * Analyze budget for suspicious patterns
+   */
+  async analyzeBudget(taskId: string): Promise<BudgetAnalysis> {
+    this.logger.debug(`Analyzing budget for task ${taskId}`);
+
+    const budget = await this.prisma.tokenBudget.findUnique({
+      where: { taskId },
+    });
+
+    if (!budget) {
+      throw new NotFoundException(`Token budget not found for task ${taskId}`);
+    }
+
+    const usedTokens = budget.totalTokensUsed;
+    const allocatedTokens = budget.allocatedTokens;
+    const remainingTokens = allocatedTokens - usedTokens;
+    const utilizationPercentage = (usedTokens / allocatedTokens) * 100;
+
+    // Detect suspicious patterns
+    const suspiciousPattern = this.detectSuspiciousPattern(budget);
+
+    // Determine recommendation
+    let recommendation: "accept" | "continue" | "review";
+    if (suspiciousPattern.triggered) {
+      if (suspiciousPattern.severity === "high") {
+        recommendation = "continue";
+      } else {
+        recommendation = "review";
+      }
+    } else {
+      recommendation = "accept";
+    }
+
+    return new BudgetAnalysisDto({
+      taskId,
+      allocatedTokens,
+      usedTokens,
+      remainingTokens,
+      utilizationPercentage,
+      suspiciousPattern: suspiciousPattern.triggered,
+      suspiciousReason: suspiciousPattern.reason ?? null,
+      recommendation,
+    });
+  }
+
+  /**
+   * Check if done claim is suspicious (>20% budget remaining)
+   */
+  async checkSuspiciousDoneClaim(
+    taskId: string
+  ): Promise<{ suspicious: boolean; reason?: string }> {
+    this.logger.debug(`Checking done claim for task ${taskId}`);
+
+    const budget = await this.prisma.tokenBudget.findUnique({
+      where: { taskId },
+    });
+
+    if (!budget) {
+      throw new NotFoundException(`Token budget not found for task ${taskId}`);
+    }
+
+    const suspiciousPattern = this.detectSuspiciousPattern(budget);
+
+    if (suspiciousPattern.triggered && suspiciousPattern.reason) {
+      return {
+        suspicious: true,
+        reason: suspiciousPattern.reason,
+      };
+    }
+
+    if (suspiciousPattern.triggered) {
+      return {
+        suspicious: true,
+      };
+    }
+
+    return { suspicious: false };
+  }
+
+  /**
+   * Get budget utilization percentage
+   */
+  async getBudgetUtilization(taskId: string): Promise<number> {
+    const budget = await this.prisma.tokenBudget.findUnique({
+      where: { taskId },
+    });
+
+    if (!budget) {
+      throw new NotFoundException(`Token budget not found for task ${taskId}`);
+    }
+
+    const utilizationPercentage = (budget.totalTokensUsed / budget.allocatedTokens) * 100;
+
+    return utilizationPercentage;
+  }
+
+  /**
+   * Mark task as completed
+   */
+  async markCompleted(taskId: string): Promise<void> {
+    this.logger.log(`Marking budget as completed for task ${taskId}`);
+
+    const budget = await this.prisma.tokenBudget.findUnique({
+      where: { taskId },
+    });
+
+    if (!budget) {
+      throw new NotFoundException(`Token budget not found for task ${taskId}`);
+    }
+
+    await this.prisma.tokenBudget.update({
+      where: { taskId },
+      data: {
+        completedAt: new Date(),
+      },
+    });
+  }
+
+  /**
+   * Get complexity-based budget allocation
+   */
+  getDefaultBudgetForComplexity(complexity: TaskComplexity): number {
+    return COMPLEXITY_BUDGETS[complexity];
+  }
+
+  /**
+   * Detect suspicious patterns in budget usage
+   * @private
+   */
+  private detectSuspiciousPattern(budget: TokenBudget): {
+    triggered: boolean;
+    reason?: string;
+    severity: "low" | "medium" | "high";
+    recommendation: "accept" | "continue" | "review";
+  } {
+    const utilization = budget.totalTokensUsed / budget.allocatedTokens;
+    const remainingPercentage = (1 - utilization) * 100;
+
+    // Pattern 1: Very low utilization (<10%)
+    if (utilization < BUDGET_THRESHOLDS.VERY_LOW_UTILIZATION) {
+      return {
+        triggered: true,
+        reason: `Very low budget utilization (${(utilization * 100).toFixed(1)}%). This suggests minimal work was performed.`,
+        severity: "high",
+        recommendation: "continue",
+      };
+    }
+
+    // Pattern 2: Done claimed with >20% budget remaining
+    if (utilization < 1 - BUDGET_THRESHOLDS.SUSPICIOUS_REMAINING) {
+      return {
+        triggered: true,
+        reason: `Task claimed done with ${remainingPercentage.toFixed(1)}% budget remaining (${String(budget.allocatedTokens - budget.totalTokensUsed)} tokens). This may indicate premature completion.`,
+        severity: "medium",
+        recommendation: "review",
+      };
+    }
+
+    // Pattern 3: Extremely high utilization (>95%) - might indicate inefficiency
+    if (utilization > BUDGET_THRESHOLDS.VERY_HIGH_UTILIZATION) {
+      return {
+        triggered: true,
+        reason: `Very high budget utilization (${(utilization * 100).toFixed(1)}%). Task may need more budget or review for efficiency.`,
+        severity: "low",
+        recommendation: "review",
+      };
+    }
+
+    return {
+      triggered: false,
+      severity: "low",
+      recommendation: "accept",
+    };
+  }
+}
--- a/docs/3-architecture/non-ai-coordinator-overlap-analysis.md
+++ b/docs/3-architecture/non-ai-coordinator-overlap-analysis.md
@@ -0,0 +1,417 @@
+# Non-AI Coordinator Pattern - Overlap Analysis
+
+**Date:** 2026-01-31
+**Purpose:** Identify overlaps and differences between two complementary architecture documents
+
+---
+
+## Documents Compared
+
+### Document A: Mosaic Stack Non-AI Coordinator Pattern
+
+**Location:** `/home/jwoltje/src/mosaic-stack/docs/3-architecture/non-ai-coordinator-pattern.md`
+**Length:** 903 lines
+**Problem Space:** L-015 Agent Premature Completion
+**Focus:** Single-agent quality enforcement
+
+### Document B: Quality-Rails Orchestration Architecture
+
+**Location:** `/home/jwoltje/src/jarvis-brain/docs/work/quality-rails-orchestration-architecture.md`
+**Length:** ~600 lines
+**Problem Space:** Context exhaustion in multi-issue orchestration
+**Focus:** Multi-agent lifecycle management at scale
+
+---
+
+## Summary Table
+
+| Aspect                     | Document A (Existing)                       | Document B (New)                         | Overlap?           |
+| -------------------------- | ------------------------------------------- | ---------------------------------------- | ------------------ |
+| **Primary Problem**        | Agents claim "done" prematurely             | Agents pause at 95% context              | Different          |
+| **Coordinator Type**       | Non-AI (TypeScript/NestJS)                  | Non-AI (Python/Node.js)                  | ✅ Overlap         |
+| **Quality Gates**          | BuildGate, LintGate, TestGate, CoverageGate | Mechanical gates (lint, typecheck, test) | ✅ Overlap         |
+| **Agent Scope**            | Single agent per issue                      | Multi-agent orchestration                | Different          |
+| **Context Management**     | Not addressed                               | Core feature (80% compact, 95% rotate)   | Different          |
+| **Model Assignment**       | Not addressed                               | Agent profiles + difficulty matching     | Different          |
+| **Issue Sizing**           | Not addressed                               | 50% rule, epic decomposition             | Different          |
+| **Implementation Status**  | Full TypeScript code                        | Python pseudocode + PoC plan             | Different          |
+| **Forced Continuation**    | Yes (rejection loop)                        | No (preventive via context mgmt)         | Different approach |
+| **Non-negotiable Quality** | Yes                                         | Yes                                      | ✅ Overlap         |
+
+---
+
+## Unique to Document A (Existing Mosaic Stack Pattern)
+
+### 1. **Premature Completion Problem**
+
+- **Problem:** Agents claim work is "done" when tests fail, files are missing, or requirements are incomplete
+- **Root cause:** Agent interprets partial completion as success
+- **Example:** Agent implements feature, tests fail, agent says "done" anyway
+
+### 2. **Rejection Loop & Forced Continuation**
+
+```typescript
+// CompletionVerificationEngine
+if (!allGatesPassed) {
+  return this.forcedContinuationService.generateContinuationPrompt({
+    failedGates,
+    tone: "non-negotiable",
+  });
+}
+```
+
+**Key innovation:** When agent claims "done" but gates fail, coordinator injects prompt forcing continuation:
+
+```
+COMPLETION REJECTED. The following quality gates have failed:
+- Build Gate: Compilation errors detected
+- Test Gate: 3/15 tests failing
+
+You must continue working until ALL quality gates pass.
+This is not optional. Do not claim completion until gates pass.
+```
+
+### 3. **State Machine for Completion Claims**
+
+```
+Agent Working → Claims Done → Run Gates → Pass/Reject
+                                   ↓
+                              Reject → Force Continue → Agent Working
+```
+
+### 4. **TypeScript/NestJS Implementation**
+
+- Full production-ready service code
+- QualityOrchestrator service
+- Gate interfaces and implementations
+- Dependency injection architecture
+
+### 5. **CompletionVerificationEngine**
+
+- Intercepts agent completion claims
+- Runs all gates synchronously
+- Blocks "done" status until gates pass
+
+---
+
+## Unique to Document B (New Quality-Rails Orchestration)
+
+### 1. **Context Exhaustion Problem**
+
+- **Problem:** AI orchestrators pause at 95% context usage, losing autonomy
+- **Root cause:** Linear context growth without compaction
+- **Example:** M4 session completed 11 issues, paused at 95%, required manual restart
+
+### 2. **50% Rule for Issue Sizing**
+
+```
+Issue context estimate MUST NOT exceed 50% of target agent's context limit.
+
+Example:
+- Sonnet agent: 200K context limit
+- Maximum issue estimate: 100K tokens
+- Reasoning: Leaves 100K for system prompts, conversation, safety buffer
+```
+
+### 3. **Agent Profiles & Model Assignment**
+
+```python
+AGENT_PROFILES = {
+    'opus': {
+        'context_limit': 200000,
+        'cost_per_mtok': 15.00,
+        'capabilities': ['high', 'medium', 'low']
+    },
+    'sonnet': {
+        'context_limit': 200000,
+        'cost_per_mtok': 3.00,
+        'capabilities': ['medium', 'low']
+    },
+    'glm': {
+        'context_limit': 128000,
+        'cost_per_mtok': 0.00,  # Self-hosted
+        'capabilities': ['medium', 'low']
+    }
+}
+```
+
+**Assignment logic:** Choose cheapest capable agent based on:
+
+- Estimated context usage
+- Difficulty level
+- Agent capabilities
+
+### 4. **Context Monitoring & Session Rotation**
+
+```python
+def monitor_agent_context(agent_id: str) -> ContextAction:
+    usage = get_context_usage(agent_id)
+
+    if usage > 0.95:
+        return ContextAction.ROTATE_SESSION  # Start fresh agent
+    elif usage > 0.80:
+        return ContextAction.COMPACT  # Summarize completed work
+    else:
+        return ContextAction.CONTINUE  # Keep working
+```
+
+### 5. **Context Estimation Formula**
+
+```python
+def estimate_context(issue: Issue) -> int:
+    base = (
+        issue.files_to_modify * 7000 +  # Average file size
+        issue.implementation_complexity * 20000 +  # Code writing
+        issue.test_requirements * 10000 +  # Test writing
+        issue.documentation * 3000  # Docs
+    )
+
+    buffer = base * 1.3  # 30% safety margin
+    return int(buffer)
+```
+
+### 6. **Epic Decomposition Workflow**
+
+```
+User creates Epic → Coordinator analyzes scope → Decomposes into sub-issues
+                                                        ↓
+                                        Each issue ≤ 50% agent context limit
+                                                        ↓
+                                        Assigns metadata: estimated_context, difficulty
+```
+
+### 7. **Multi-Model Support**
+
+- Supports Opus, Sonnet, Haiku, GLM, MiniMax, Cogito
+- Cost optimization through model selection
+- Self-hosted model preference when capable
+
+### 8. **Proactive Context Management**
+
+- Prevents context exhaustion BEFORE it happens
+- No manual intervention needed
+- Maintains autonomy through entire queue
+
+---
+
+## Overlaps (Both Documents)
+
+### 1. **Non-AI Coordinator Pattern** ✅
+
+Both use deterministic code (not AI) as the orchestrator:
+
+- **Doc A:** TypeScript/NestJS service
+- **Doc B:** Python/Node.js coordinator
+- **Rationale:** Avoid AI orchestrator context limits and inconsistency
+
+### 2. **Mechanical Quality Gates** ✅
+
+Both enforce quality through automated checks:
+
+**Doc A gates:**
+
+- BuildGate (compilation)
+- LintGate (code style)
+- TestGate (unit/integration tests)
+- CoverageGate (test coverage threshold)
+
+**Doc B gates:**
+
+- lint (code quality)
+- typecheck (type safety)
+- test (functionality)
+- coverage (same as Doc A)
+
+### 3. **Programmatic Enforcement** ✅
+
+Both prevent agent from bypassing quality:
+
+- **Doc A:** Rejection loop blocks completion until gates pass
+- **Doc B:** Coordinator enforces gates before allowing next issue
+- **Shared principle:** Quality is a requirement, not a suggestion
+
+### 4. **Non-Negotiable Quality Standards** ✅
+
+Both use firm language about quality requirements:
+
+- **Doc A:** "This is not optional. Do not claim completion until gates pass."
+- **Doc B:** "Quality gates are mechanical blockers, not suggestions."
+
+### 5. **State Management** ✅
+
+Both track work state programmatically:
+
+- **Doc A:** Agent state machine (working → claimed done → verified → actual done)
+- **Doc B:** Issue state in tracking system (pending → in-progress → gate-check → completed)
+
+### 6. **Validation Before Progression** ✅
+
+Both prevent moving forward with broken code:
+
+- **Doc A:** Cannot claim "done" until gates pass
+- **Doc B:** Cannot start next issue until current issue passes gates
+
+---
+
+## Complementary Nature
+
+These documents solve **different problems in the same architectural pattern**:
+
+### Document A (Existing): Quality Enforcement
+
+**Problem:** "How do we prevent an agent from claiming work is done when it's not?"
+**Solution:** Rejection loop with forced continuation
+**Scope:** Single agent working on single issue
+**Lifecycle stage:** Task completion verification
+
+### Document B (New): Orchestration at Scale
+
+**Problem:** "How do we manage multiple agents working through dozens of issues without context exhaustion?"
+**Solution:** Proactive context management + intelligent agent assignment
+**Scope:** Multi-agent orchestration across entire milestone
+**Lifecycle stage:** Agent selection, session management, queue progression
+
+### Together They Form:
+
+```
+┌─────────────────────────────────────────────────────────┐
+│         Non-AI Coordinator (Document B)                 │
+│  - Monitors context usage across all agents             │
+│  - Assigns issues based on context estimates            │
+│  - Rotates agents at 95% context                        │
+│  - Enforces 50% rule during issue creation              │
+└─────────────────────────┬───────────────────────────────┘
+                          │
+        ┌─────────────────┼─────────────────┐
+        ▼                 ▼                 ▼
+   Agent 1           Agent 2           Agent 3
+   Issue #42         Issue #57         Issue #89
+        │                 │                 │
+        └─────────────────┴─────────────────┘
+                          │
+                          ▼
+        ┌─────────────────────────────────────────────────┐
+        │   Quality Orchestrator (Document A)             │
+        │   - Intercepts completion claims                │
+        │   - Runs quality gates                          │
+        │   - Forces continuation if gates fail           │
+        │   - Only allows "done" when gates pass          │
+        └─────────────────────────────────────────────────┘
+```
+
+**Document B (new)** manages the **agent lifecycle and orchestration**.
+**Document A (existing)** manages the **quality enforcement per agent**.
+
+---
+
+## Integration Recommendations
+
+### Option 1: Merge into Single Document (Recommended)
+
+**Reason:** They're parts of the same system
+
+**Structure:**
+
+```markdown
+# Non-AI Coordinator Pattern Architecture
+
+## Part 1: Multi-Agent Orchestration (from Doc B)
+
+- Context management
+- Agent assignment
+- Session rotation
+- 50% rule
+- Epic decomposition
+
+## Part 2: Quality Enforcement (from Doc A)
+
+- Premature completion problem
+- Quality gates
+- Rejection loop
+- Forced continuation
+- CompletionVerificationEngine
+
+## Part 3: Implementation
+
+- TypeScript/NestJS orchestrator (from Doc A)
+- Python coordinator enhancements (from Doc B)
+- Integration points
+```
+
+### Option 2: Keep Separate, Create Integration Doc
+
+**Reason:** Different audiences (orchestration vs quality enforcement)
+
+**Documents:**
+
+1. `orchestration-architecture.md` (Doc B) - For understanding multi-agent coordination
+2. `quality-enforcement-architecture.md` (Doc A) - For understanding quality gates
+3. `non-ai-coordinator-integration.md` (NEW) - How they work together
+
+### Option 3: Hierarchical Documentation
+
+**Reason:** Layers of abstraction
+
+```
+non-ai-coordinator-pattern.md (Overview)
+├── orchestration-layer.md (Doc B content)
+└── quality-layer.md (Doc A content)
+```
+
+---
+
+## Action Items
+
+Based on overlap analysis, recommend:
+
+1. **Merge the documents** into comprehensive architecture guide
+   - Use Doc A's problem statement for quality enforcement
+   - Use Doc B's problem statement for context exhaustion
+   - Show how both problems require non-AI coordinator
+   - Integrate TypeScript implementation with context monitoring
+
+2. **Update Mosaic Stack issue #140**
+   - Current: "Document Non-AI Coordinator Pattern Architecture"
+   - Expand scope: Include both quality enforcement AND orchestration
+   - Reference both problem spaces (L-015 + context exhaustion)
+
+3. **Create unified PoC plan**
+   - Phase 1: Context monitoring (from Doc B)
+   - Phase 2: Agent assignment logic (from Doc B)
+   - Phase 3: Quality gate integration (from Doc A)
+   - Phase 4: Forced continuation (from Doc A)
+
+4. **Preserve unique innovations from each**
+   - Doc A: Rejection loop, forced continuation prompts
+   - Doc B: 50% rule, agent profiles, context estimation formula
+
+---
+
+## Conclusion
+
+**These documents are highly complementary, not duplicative.**
+
+- **~20% overlap:** Both use non-AI coordinator, mechanical gates, non-negotiable quality
+- **80% unique value:** Doc A solves premature completion, Doc B solves context exhaustion
+
+**Best path forward:** Merge into single comprehensive architecture document that addresses both problems within the unified non-AI coordinator pattern.
+
+The pattern is:
+
+1. Non-AI coordinator assigns issues based on context estimates (Doc B)
+2. Agent works on issue
+3. Quality gates enforce completion standards (Doc A)
+4. Context monitoring prevents exhaustion (Doc B)
+5. Forced continuation prevents premature "done" (Doc A)
+6. Next issue assigned when ready (Doc B)
+
+Together they create a **robust, autonomous, quality-enforcing orchestration system** that scales beyond single-agent, single-issue scenarios.
+
+---
+
+**Next Steps:**
+
+1. User review of this analysis
+2. Decision on integration approach (Option 1, 2, or 3)
+3. Update Mosaic Stack documentation accordingly
+4. Proceed with PoC implementation