docs: Add overlap analysis for non-AI coordinator patterns
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Detailed comparison showing: - Existing doc addresses L-015 (premature completion) - New doc addresses context exhaustion (multi-issue orchestration) - ~20% overlap (both use non-AI coordinator, mechanical gates) - 80% complementary (different problems, different solutions) Recommends merging into comprehensive document (already done). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1053,3 +1053,37 @@ model TaskRejection {
|
|||||||
@@index([manualReview])
|
@@index([manualReview])
|
||||||
@@map("task_rejections")
|
@@map("task_rejections")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model TokenBudget {
|
||||||
|
id String @id @default(uuid()) @db.Uuid
|
||||||
|
taskId String @unique @map("task_id") @db.Uuid
|
||||||
|
workspaceId String @map("workspace_id") @db.Uuid
|
||||||
|
agentId String @map("agent_id")
|
||||||
|
|
||||||
|
// Budget allocation
|
||||||
|
allocatedTokens Int @map("allocated_tokens")
|
||||||
|
estimatedComplexity String @map("estimated_complexity") // "low", "medium", "high", "critical"
|
||||||
|
|
||||||
|
// Usage tracking
|
||||||
|
inputTokensUsed Int @default(0) @map("input_tokens_used")
|
||||||
|
outputTokensUsed Int @default(0) @map("output_tokens_used")
|
||||||
|
totalTokensUsed Int @default(0) @map("total_tokens_used")
|
||||||
|
|
||||||
|
// Cost tracking
|
||||||
|
estimatedCost Decimal? @map("estimated_cost") @db.Decimal(10, 6)
|
||||||
|
|
||||||
|
// State
|
||||||
|
startedAt DateTime @default(now()) @map("started_at") @db.Timestamptz
|
||||||
|
lastUpdatedAt DateTime @updatedAt @map("last_updated_at") @db.Timestamptz
|
||||||
|
completedAt DateTime? @map("completed_at") @db.Timestamptz
|
||||||
|
|
||||||
|
// Analysis
|
||||||
|
budgetUtilization Float? @map("budget_utilization") // 0.0 - 1.0
|
||||||
|
suspiciousPattern Boolean @default(false) @map("suspicious_pattern")
|
||||||
|
suspiciousReason String? @map("suspicious_reason")
|
||||||
|
|
||||||
|
@@index([taskId])
|
||||||
|
@@index([workspaceId])
|
||||||
|
@@index([suspiciousPattern])
|
||||||
|
@@map("token_budgets")
|
||||||
|
}
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import { CompletionVerificationService } from "../../completion-verification/com
|
|||||||
import { ContinuationPromptsService } from "../../continuation-prompts/continuation-prompts.service";
|
import { ContinuationPromptsService } from "../../continuation-prompts/continuation-prompts.service";
|
||||||
import { RejectionHandlerService } from "../../rejection-handler/rejection-handler.service";
|
import { RejectionHandlerService } from "../../rejection-handler/rejection-handler.service";
|
||||||
import { PrismaService } from "../../prisma/prisma.service";
|
import { PrismaService } from "../../prisma/prisma.service";
|
||||||
|
import { TokenBudgetService } from "../../token-budget/token-budget.service";
|
||||||
import type { CompletionClaim, OrchestrationConfig, QualityGate } from "../interfaces";
|
import type { CompletionClaim, OrchestrationConfig, QualityGate } from "../interfaces";
|
||||||
import type { RejectionContext } from "../../rejection-handler/interfaces";
|
import type { RejectionContext } from "../../rejection-handler/interfaces";
|
||||||
import { MOCK_OUTPUTS, MOCK_FILE_CHANGES } from "./test-fixtures";
|
import { MOCK_OUTPUTS, MOCK_FILE_CHANGES } from "./test-fixtures";
|
||||||
@@ -69,6 +70,12 @@ describe("Non-AI Coordinator Integration", () => {
|
|||||||
provide: PrismaService,
|
provide: PrismaService,
|
||||||
useValue: mockPrisma,
|
useValue: mockPrisma,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
provide: TokenBudgetService,
|
||||||
|
useValue: {
|
||||||
|
checkSuspiciousDoneClaim: vi.fn().mockResolvedValue({ suspicious: false }),
|
||||||
|
},
|
||||||
|
},
|
||||||
],
|
],
|
||||||
}).compile();
|
}).compile();
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
import { Module } from "@nestjs/common";
|
import { Module } from "@nestjs/common";
|
||||||
import { QualityOrchestratorService } from "./quality-orchestrator.service";
|
import { QualityOrchestratorService } from "./quality-orchestrator.service";
|
||||||
|
import { TokenBudgetModule } from "../token-budget/token-budget.module";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Quality Orchestrator Module
|
* Quality Orchestrator Module
|
||||||
* Provides quality enforcement for AI agent task completions
|
* Provides quality enforcement for AI agent task completions
|
||||||
*/
|
*/
|
||||||
@Module({
|
@Module({
|
||||||
|
imports: [TokenBudgetModule],
|
||||||
providers: [QualityOrchestratorService],
|
providers: [QualityOrchestratorService],
|
||||||
exports: [QualityOrchestratorService],
|
exports: [QualityOrchestratorService],
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { describe, it, expect, beforeEach } from "vitest";
|
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||||
import { Test, TestingModule } from "@nestjs/testing";
|
import { Test, TestingModule } from "@nestjs/testing";
|
||||||
import { QualityOrchestratorService } from "./quality-orchestrator.service";
|
import { QualityOrchestratorService } from "./quality-orchestrator.service";
|
||||||
|
import { TokenBudgetService } from "../token-budget/token-budget.service";
|
||||||
import type {
|
import type {
|
||||||
QualityGate,
|
QualityGate,
|
||||||
CompletionClaim,
|
CompletionClaim,
|
||||||
@@ -17,7 +18,15 @@ describe("QualityOrchestratorService", () => {
|
|||||||
|
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
const module: TestingModule = await Test.createTestingModule({
|
const module: TestingModule = await Test.createTestingModule({
|
||||||
providers: [QualityOrchestratorService],
|
providers: [
|
||||||
|
QualityOrchestratorService,
|
||||||
|
{
|
||||||
|
provide: TokenBudgetService,
|
||||||
|
useValue: {
|
||||||
|
checkSuspiciousDoneClaim: vi.fn().mockResolvedValue({ suspicious: false }),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
}).compile();
|
}).compile();
|
||||||
|
|
||||||
service = module.get<QualityOrchestratorService>(QualityOrchestratorService);
|
service = module.get<QualityOrchestratorService>(QualityOrchestratorService);
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import type {
|
|||||||
CompletionValidation,
|
CompletionValidation,
|
||||||
OrchestrationConfig,
|
OrchestrationConfig,
|
||||||
} from "./interfaces";
|
} from "./interfaces";
|
||||||
|
import { TokenBudgetService } from "../token-budget/token-budget.service";
|
||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
@@ -62,6 +63,8 @@ const DEFAULT_GATES: QualityGate[] = [
|
|||||||
export class QualityOrchestratorService {
|
export class QualityOrchestratorService {
|
||||||
private readonly logger = new Logger(QualityOrchestratorService.name);
|
private readonly logger = new Logger(QualityOrchestratorService.name);
|
||||||
|
|
||||||
|
constructor(private readonly tokenBudgetService: TokenBudgetService) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validate a completion claim against quality gates
|
* Validate a completion claim against quality gates
|
||||||
*/
|
*/
|
||||||
@@ -93,10 +96,27 @@ export class QualityOrchestratorService {
|
|||||||
return gate?.required ?? false;
|
return gate?.required ?? false;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Check token budget for suspicious patterns
|
||||||
|
let budgetCheck: { suspicious: boolean; reason?: string } | null = null;
|
||||||
|
try {
|
||||||
|
budgetCheck = await this.tokenBudgetService.checkSuspiciousDoneClaim(claim.taskId);
|
||||||
|
} catch {
|
||||||
|
// Token budget not found - not an error, just means tracking wasn't enabled
|
||||||
|
this.logger.debug(`No token budget found for task ${claim.taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
// Determine verdict
|
// Determine verdict
|
||||||
let verdict: "accepted" | "rejected" | "needs-continuation";
|
let verdict: "accepted" | "rejected" | "needs-continuation";
|
||||||
if (allGatesPassed) {
|
if (allGatesPassed) {
|
||||||
verdict = "accepted";
|
// Even if all gates passed, check for suspicious budget patterns
|
||||||
|
if (budgetCheck?.suspicious) {
|
||||||
|
verdict = "needs-continuation";
|
||||||
|
this.logger.warn(
|
||||||
|
`Suspicious budget pattern detected for task ${claim.taskId}: ${budgetCheck.reason ?? "unknown reason"}`
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
verdict = "accepted";
|
||||||
|
}
|
||||||
} else if (requiredGatesFailed.length > 0) {
|
} else if (requiredGatesFailed.length > 0) {
|
||||||
verdict = "rejected";
|
verdict = "rejected";
|
||||||
} else if (config.strictMode) {
|
} else if (config.strictMode) {
|
||||||
@@ -117,6 +137,14 @@ export class QualityOrchestratorService {
|
|||||||
if (verdict !== "accepted") {
|
if (verdict !== "accepted") {
|
||||||
result.feedback = this.generateRejectionFeedback(result);
|
result.feedback = this.generateRejectionFeedback(result);
|
||||||
result.suggestedActions = this.generateSuggestedActions(gateResults, config);
|
result.suggestedActions = this.generateSuggestedActions(gateResults, config);
|
||||||
|
|
||||||
|
// Add budget feedback if suspicious pattern detected
|
||||||
|
if (budgetCheck?.suspicious && budgetCheck.reason) {
|
||||||
|
result.feedback += `\n\nToken budget analysis: ${budgetCheck.reason}`;
|
||||||
|
result.suggestedActions.push(
|
||||||
|
"Review task completion - significant budget remains or suspicious usage pattern detected"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|||||||
25
apps/api/src/token-budget/dto/allocate-budget.dto.ts
Normal file
25
apps/api/src/token-budget/dto/allocate-budget.dto.ts
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import { IsString, IsUUID, IsInt, IsIn, Min } from "class-validator";
|
||||||
|
import type { TaskComplexity } from "../interfaces";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DTO for allocating a token budget for a task
|
||||||
|
*/
|
||||||
|
export class AllocateBudgetDto {
|
||||||
|
@IsUUID("4", { message: "taskId must be a valid UUID" })
|
||||||
|
taskId!: string;
|
||||||
|
|
||||||
|
@IsUUID("4", { message: "workspaceId must be a valid UUID" })
|
||||||
|
workspaceId!: string;
|
||||||
|
|
||||||
|
@IsString({ message: "agentId must be a string" })
|
||||||
|
agentId!: string;
|
||||||
|
|
||||||
|
@IsIn(["low", "medium", "high", "critical"], {
|
||||||
|
message: "complexity must be one of: low, medium, high, critical",
|
||||||
|
})
|
||||||
|
complexity!: TaskComplexity;
|
||||||
|
|
||||||
|
@IsInt({ message: "allocatedTokens must be an integer" })
|
||||||
|
@Min(1, { message: "allocatedTokens must be at least 1" })
|
||||||
|
allocatedTokens!: number;
|
||||||
|
}
|
||||||
33
apps/api/src/token-budget/dto/budget-analysis.dto.ts
Normal file
33
apps/api/src/token-budget/dto/budget-analysis.dto.ts
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
/**
|
||||||
|
* DTO for budget analysis results
|
||||||
|
*/
|
||||||
|
export class BudgetAnalysisDto {
|
||||||
|
taskId: string;
|
||||||
|
allocatedTokens: number;
|
||||||
|
usedTokens: number;
|
||||||
|
remainingTokens: number;
|
||||||
|
utilizationPercentage: number;
|
||||||
|
suspiciousPattern: boolean;
|
||||||
|
suspiciousReason: string | null;
|
||||||
|
recommendation: "accept" | "continue" | "review";
|
||||||
|
|
||||||
|
constructor(data: {
|
||||||
|
taskId: string;
|
||||||
|
allocatedTokens: number;
|
||||||
|
usedTokens: number;
|
||||||
|
remainingTokens: number;
|
||||||
|
utilizationPercentage: number;
|
||||||
|
suspiciousPattern: boolean;
|
||||||
|
suspiciousReason: string | null;
|
||||||
|
recommendation: "accept" | "continue" | "review";
|
||||||
|
}) {
|
||||||
|
this.taskId = data.taskId;
|
||||||
|
this.allocatedTokens = data.allocatedTokens;
|
||||||
|
this.usedTokens = data.usedTokens;
|
||||||
|
this.remainingTokens = data.remainingTokens;
|
||||||
|
this.utilizationPercentage = data.utilizationPercentage;
|
||||||
|
this.suspiciousPattern = data.suspiciousPattern;
|
||||||
|
this.suspiciousReason = data.suspiciousReason;
|
||||||
|
this.recommendation = data.recommendation;
|
||||||
|
}
|
||||||
|
}
|
||||||
3
apps/api/src/token-budget/dto/index.ts
Normal file
3
apps/api/src/token-budget/dto/index.ts
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
export * from "./allocate-budget.dto";
|
||||||
|
export * from "./update-usage.dto";
|
||||||
|
export * from "./budget-analysis.dto";
|
||||||
14
apps/api/src/token-budget/dto/update-usage.dto.ts
Normal file
14
apps/api/src/token-budget/dto/update-usage.dto.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import { IsInt, Min } from "class-validator";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DTO for updating token usage for a task
|
||||||
|
*/
|
||||||
|
export class UpdateUsageDto {
|
||||||
|
@IsInt({ message: "inputTokens must be an integer" })
|
||||||
|
@Min(0, { message: "inputTokens must be non-negative" })
|
||||||
|
inputTokens!: number;
|
||||||
|
|
||||||
|
@IsInt({ message: "outputTokens must be an integer" })
|
||||||
|
@Min(0, { message: "outputTokens must be non-negative" })
|
||||||
|
outputTokens!: number;
|
||||||
|
}
|
||||||
4
apps/api/src/token-budget/index.ts
Normal file
4
apps/api/src/token-budget/index.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
export * from "./token-budget.module";
|
||||||
|
export * from "./token-budget.service";
|
||||||
|
export * from "./interfaces";
|
||||||
|
export * from "./dto";
|
||||||
1
apps/api/src/token-budget/interfaces/index.ts
Normal file
1
apps/api/src/token-budget/interfaces/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./token-budget.interface";
|
||||||
@@ -0,0 +1,69 @@
|
|||||||
|
/**
|
||||||
|
* Task complexity levels for budget allocation
|
||||||
|
*/
|
||||||
|
export type TaskComplexity = "low" | "medium" | "high" | "critical";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token budget data structure
|
||||||
|
*/
|
||||||
|
export interface TokenBudgetData {
|
||||||
|
id: string;
|
||||||
|
taskId: string;
|
||||||
|
workspaceId: string;
|
||||||
|
agentId: string;
|
||||||
|
allocatedTokens: number;
|
||||||
|
estimatedComplexity: TaskComplexity;
|
||||||
|
inputTokensUsed: number;
|
||||||
|
outputTokensUsed: number;
|
||||||
|
totalTokensUsed: number;
|
||||||
|
estimatedCost: number | null;
|
||||||
|
startedAt: Date;
|
||||||
|
lastUpdatedAt: Date;
|
||||||
|
completedAt: Date | null;
|
||||||
|
budgetUtilization: number | null;
|
||||||
|
suspiciousPattern: boolean;
|
||||||
|
suspiciousReason: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Budget analysis result
|
||||||
|
*/
|
||||||
|
export interface BudgetAnalysis {
|
||||||
|
taskId: string;
|
||||||
|
allocatedTokens: number;
|
||||||
|
usedTokens: number;
|
||||||
|
remainingTokens: number;
|
||||||
|
utilizationPercentage: number;
|
||||||
|
suspiciousPattern: boolean;
|
||||||
|
suspiciousReason: string | null;
|
||||||
|
recommendation: "accept" | "continue" | "review";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suspicious pattern detection result
|
||||||
|
*/
|
||||||
|
export interface SuspiciousPattern {
|
||||||
|
triggered: boolean;
|
||||||
|
reason?: string;
|
||||||
|
severity: "low" | "medium" | "high";
|
||||||
|
recommendation: "accept" | "continue" | "review";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Complexity-based budget allocation
|
||||||
|
*/
|
||||||
|
export const COMPLEXITY_BUDGETS: Record<TaskComplexity, number> = {
|
||||||
|
low: 50000, // Simple fixes, typos
|
||||||
|
medium: 150000, // Standard features
|
||||||
|
high: 350000, // Complex features
|
||||||
|
critical: 750000, // Major refactoring
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token budget thresholds for suspicious pattern detection
|
||||||
|
*/
|
||||||
|
export const BUDGET_THRESHOLDS = {
|
||||||
|
SUSPICIOUS_REMAINING: 0.2, // >20% budget remaining + gates failing = suspicious
|
||||||
|
VERY_LOW_UTILIZATION: 0.1, // <10% utilization = suspicious
|
||||||
|
VERY_HIGH_UTILIZATION: 0.95, // >95% utilization but gates failing = suspicious
|
||||||
|
};
|
||||||
14
apps/api/src/token-budget/token-budget.module.ts
Normal file
14
apps/api/src/token-budget/token-budget.module.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import { Module } from "@nestjs/common";
|
||||||
|
import { TokenBudgetService } from "./token-budget.service";
|
||||||
|
import { PrismaModule } from "../prisma/prisma.module";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token Budget Module
|
||||||
|
* Tracks token usage and prevents premature done claims
|
||||||
|
*/
|
||||||
|
@Module({
|
||||||
|
imports: [PrismaModule],
|
||||||
|
providers: [TokenBudgetService],
|
||||||
|
exports: [TokenBudgetService],
|
||||||
|
})
|
||||||
|
export class TokenBudgetModule {}
|
||||||
293
apps/api/src/token-budget/token-budget.service.spec.ts
Normal file
293
apps/api/src/token-budget/token-budget.service.spec.ts
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||||
|
import { Test, TestingModule } from "@nestjs/testing";
|
||||||
|
import { TokenBudgetService } from "./token-budget.service";
|
||||||
|
import { PrismaService } from "../prisma/prisma.service";
|
||||||
|
import { NotFoundException } from "@nestjs/common";
|
||||||
|
import type { TaskComplexity } from "./interfaces";
|
||||||
|
import { COMPLEXITY_BUDGETS } from "./interfaces";
|
||||||
|
|
||||||
|
describe("TokenBudgetService", () => {
|
||||||
|
let service: TokenBudgetService;
|
||||||
|
let prisma: PrismaService;
|
||||||
|
|
||||||
|
const mockPrismaService = {
|
||||||
|
tokenBudget: {
|
||||||
|
create: vi.fn(),
|
||||||
|
findUnique: vi.fn(),
|
||||||
|
update: vi.fn(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockWorkspaceId = "550e8400-e29b-41d4-a716-446655440001";
|
||||||
|
const mockTaskId = "550e8400-e29b-41d4-a716-446655440002";
|
||||||
|
const mockAgentId = "test-agent-001";
|
||||||
|
|
||||||
|
const mockTokenBudget = {
|
||||||
|
id: "550e8400-e29b-41d4-a716-446655440003",
|
||||||
|
taskId: mockTaskId,
|
||||||
|
workspaceId: mockWorkspaceId,
|
||||||
|
agentId: mockAgentId,
|
||||||
|
allocatedTokens: 150000,
|
||||||
|
estimatedComplexity: "medium" as TaskComplexity,
|
||||||
|
inputTokensUsed: 50000,
|
||||||
|
outputTokensUsed: 30000,
|
||||||
|
totalTokensUsed: 80000,
|
||||||
|
estimatedCost: null,
|
||||||
|
startedAt: new Date("2026-01-31T10:00:00Z"),
|
||||||
|
lastUpdatedAt: new Date("2026-01-31T10:30:00Z"),
|
||||||
|
completedAt: null,
|
||||||
|
budgetUtilization: 0.533,
|
||||||
|
suspiciousPattern: false,
|
||||||
|
suspiciousReason: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
const module: TestingModule = await Test.createTestingModule({
|
||||||
|
providers: [
|
||||||
|
TokenBudgetService,
|
||||||
|
{
|
||||||
|
provide: PrismaService,
|
||||||
|
useValue: mockPrismaService,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}).compile();
|
||||||
|
|
||||||
|
service = module.get<TokenBudgetService>(TokenBudgetService);
|
||||||
|
prisma = module.get<PrismaService>(PrismaService);
|
||||||
|
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should be defined", () => {
|
||||||
|
expect(service).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("allocateBudget", () => {
|
||||||
|
it("should allocate budget for a new task", async () => {
|
||||||
|
const allocateDto = {
|
||||||
|
taskId: mockTaskId,
|
||||||
|
workspaceId: mockWorkspaceId,
|
||||||
|
agentId: mockAgentId,
|
||||||
|
complexity: "medium" as TaskComplexity,
|
||||||
|
allocatedTokens: 150000,
|
||||||
|
};
|
||||||
|
|
||||||
|
mockPrismaService.tokenBudget.create.mockResolvedValue(mockTokenBudget);
|
||||||
|
|
||||||
|
const result = await service.allocateBudget(allocateDto);
|
||||||
|
|
||||||
|
expect(result).toEqual(mockTokenBudget);
|
||||||
|
expect(mockPrismaService.tokenBudget.create).toHaveBeenCalledWith({
|
||||||
|
data: {
|
||||||
|
taskId: allocateDto.taskId,
|
||||||
|
workspaceId: allocateDto.workspaceId,
|
||||||
|
agentId: allocateDto.agentId,
|
||||||
|
allocatedTokens: allocateDto.allocatedTokens,
|
||||||
|
estimatedComplexity: allocateDto.complexity,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("updateUsage", () => {
|
||||||
|
it("should update token usage and recalculate utilization", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
|
||||||
|
|
||||||
|
const updatedBudget = {
|
||||||
|
...mockTokenBudget,
|
||||||
|
inputTokensUsed: 60000,
|
||||||
|
outputTokensUsed: 40000,
|
||||||
|
totalTokensUsed: 100000,
|
||||||
|
budgetUtilization: 0.667,
|
||||||
|
};
|
||||||
|
|
||||||
|
mockPrismaService.tokenBudget.update.mockResolvedValue(updatedBudget);
|
||||||
|
|
||||||
|
const result = await service.updateUsage(mockTaskId, 10000, 10000);
|
||||||
|
|
||||||
|
expect(result).toEqual(updatedBudget);
|
||||||
|
expect(mockPrismaService.tokenBudget.findUnique).toHaveBeenCalledWith({
|
||||||
|
where: { taskId: mockTaskId },
|
||||||
|
});
|
||||||
|
expect(mockPrismaService.tokenBudget.update).toHaveBeenCalledWith({
|
||||||
|
where: { taskId: mockTaskId },
|
||||||
|
data: {
|
||||||
|
inputTokensUsed: 60000,
|
||||||
|
outputTokensUsed: 40000,
|
||||||
|
totalTokensUsed: 100000,
|
||||||
|
budgetUtilization: expect.closeTo(0.667, 2),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw NotFoundException if budget does not exist", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
|
||||||
|
|
||||||
|
await expect(service.updateUsage(mockTaskId, 1000, 1000)).rejects.toThrow(NotFoundException);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("analyzeBudget", () => {
|
||||||
|
it("should analyze budget and detect suspicious pattern for high remaining budget", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
|
||||||
|
|
||||||
|
const result = await service.analyzeBudget(mockTaskId);
|
||||||
|
|
||||||
|
expect(result.taskId).toBe(mockTaskId);
|
||||||
|
expect(result.allocatedTokens).toBe(150000);
|
||||||
|
expect(result.usedTokens).toBe(80000);
|
||||||
|
expect(result.remainingTokens).toBe(70000);
|
||||||
|
expect(result.utilizationPercentage).toBeCloseTo(53.3, 1);
|
||||||
|
// 46.7% remaining is suspicious (>20% threshold)
|
||||||
|
expect(result.suspiciousPattern).toBe(true);
|
||||||
|
expect(result.recommendation).toBe("review");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should not detect suspicious pattern when utilization is high", async () => {
|
||||||
|
// 85% utilization (15% remaining - below 20% threshold)
|
||||||
|
const highUtilizationBudget = {
|
||||||
|
...mockTokenBudget,
|
||||||
|
inputTokensUsed: 65000,
|
||||||
|
outputTokensUsed: 62500,
|
||||||
|
totalTokensUsed: 127500,
|
||||||
|
budgetUtilization: 0.85,
|
||||||
|
};
|
||||||
|
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(highUtilizationBudget);
|
||||||
|
|
||||||
|
const result = await service.analyzeBudget(mockTaskId);
|
||||||
|
|
||||||
|
expect(result.utilizationPercentage).toBeCloseTo(85.0, 1);
|
||||||
|
expect(result.suspiciousPattern).toBe(false);
|
||||||
|
expect(result.recommendation).toBe("accept");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw NotFoundException if budget does not exist", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
|
||||||
|
|
||||||
|
await expect(service.analyzeBudget(mockTaskId)).rejects.toThrow(NotFoundException);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("checkSuspiciousDoneClaim", () => {
|
||||||
|
it("should detect suspicious pattern when >20% budget remaining", async () => {
|
||||||
|
// 30% budget remaining
|
||||||
|
const budgetWithRemaining = {
|
||||||
|
...mockTokenBudget,
|
||||||
|
inputTokensUsed: 50000,
|
||||||
|
outputTokensUsed: 55000,
|
||||||
|
totalTokensUsed: 105000,
|
||||||
|
budgetUtilization: 0.7,
|
||||||
|
};
|
||||||
|
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetWithRemaining);
|
||||||
|
|
||||||
|
const result = await service.checkSuspiciousDoneClaim(mockTaskId);
|
||||||
|
|
||||||
|
expect(result.suspicious).toBe(true);
|
||||||
|
expect(result.reason).toContain("30.0%");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should not flag as suspicious when <20% budget remaining", async () => {
|
||||||
|
// 10% budget remaining
|
||||||
|
const budgetNearlyDone = {
|
||||||
|
...mockTokenBudget,
|
||||||
|
inputTokensUsed: 70000,
|
||||||
|
outputTokensUsed: 65000,
|
||||||
|
totalTokensUsed: 135000,
|
||||||
|
budgetUtilization: 0.9,
|
||||||
|
};
|
||||||
|
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetNearlyDone);
|
||||||
|
|
||||||
|
const result = await service.checkSuspiciousDoneClaim(mockTaskId);
|
||||||
|
|
||||||
|
expect(result.suspicious).toBe(false);
|
||||||
|
expect(result.reason).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should detect very low utilization (<10%)", async () => {
|
||||||
|
// 5% utilization
|
||||||
|
const budgetVeryLowUsage = {
|
||||||
|
...mockTokenBudget,
|
||||||
|
inputTokensUsed: 4000,
|
||||||
|
outputTokensUsed: 3500,
|
||||||
|
totalTokensUsed: 7500,
|
||||||
|
budgetUtilization: 0.05,
|
||||||
|
};
|
||||||
|
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(budgetVeryLowUsage);
|
||||||
|
|
||||||
|
const result = await service.checkSuspiciousDoneClaim(mockTaskId);
|
||||||
|
|
||||||
|
expect(result.suspicious).toBe(true);
|
||||||
|
expect(result.reason).toContain("Very low budget utilization");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("getBudgetUtilization", () => {
|
||||||
|
it("should return budget utilization percentage", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
|
||||||
|
|
||||||
|
const result = await service.getBudgetUtilization(mockTaskId);
|
||||||
|
|
||||||
|
expect(result).toBeCloseTo(53.3, 1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw NotFoundException if budget does not exist", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
|
||||||
|
|
||||||
|
await expect(service.getBudgetUtilization(mockTaskId)).rejects.toThrow(NotFoundException);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("markCompleted", () => {
|
||||||
|
it("should mark budget as completed", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(mockTokenBudget);
|
||||||
|
|
||||||
|
const completedBudget = {
|
||||||
|
...mockTokenBudget,
|
||||||
|
completedAt: new Date("2026-01-31T11:00:00Z"),
|
||||||
|
};
|
||||||
|
|
||||||
|
mockPrismaService.tokenBudget.update.mockResolvedValue(completedBudget);
|
||||||
|
|
||||||
|
await service.markCompleted(mockTaskId);
|
||||||
|
|
||||||
|
expect(mockPrismaService.tokenBudget.update).toHaveBeenCalledWith({
|
||||||
|
where: { taskId: mockTaskId },
|
||||||
|
data: {
|
||||||
|
completedAt: expect.any(Date),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw NotFoundException if budget does not exist", async () => {
|
||||||
|
mockPrismaService.tokenBudget.findUnique.mockResolvedValue(null);
|
||||||
|
|
||||||
|
await expect(service.markCompleted(mockTaskId)).rejects.toThrow(NotFoundException);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("getDefaultBudgetForComplexity", () => {
|
||||||
|
it("should return correct budget for low complexity", () => {
|
||||||
|
const result = service.getDefaultBudgetForComplexity("low");
|
||||||
|
expect(result).toBe(COMPLEXITY_BUDGETS.low);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return correct budget for medium complexity", () => {
|
||||||
|
const result = service.getDefaultBudgetForComplexity("medium");
|
||||||
|
expect(result).toBe(COMPLEXITY_BUDGETS.medium);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return correct budget for high complexity", () => {
|
||||||
|
const result = service.getDefaultBudgetForComplexity("high");
|
||||||
|
expect(result).toBe(COMPLEXITY_BUDGETS.high);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return correct budget for critical complexity", () => {
|
||||||
|
const result = service.getDefaultBudgetForComplexity("critical");
|
||||||
|
expect(result).toBe(COMPLEXITY_BUDGETS.critical);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
256
apps/api/src/token-budget/token-budget.service.ts
Normal file
256
apps/api/src/token-budget/token-budget.service.ts
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
import { Injectable, Logger, NotFoundException } from "@nestjs/common";
|
||||||
|
import { PrismaService } from "../prisma/prisma.service";
|
||||||
|
import type { TokenBudget } from "@prisma/client";
|
||||||
|
import type { TaskComplexity, BudgetAnalysis } from "./interfaces";
|
||||||
|
import { COMPLEXITY_BUDGETS, BUDGET_THRESHOLDS } from "./interfaces";
|
||||||
|
import type { AllocateBudgetDto } from "./dto";
|
||||||
|
import { BudgetAnalysisDto } from "./dto";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token Budget Service
|
||||||
|
* Tracks token usage and prevents premature done claims with significant budget remaining
|
||||||
|
*/
|
||||||
|
@Injectable()
|
||||||
|
export class TokenBudgetService {
|
||||||
|
private readonly logger = new Logger(TokenBudgetService.name);
|
||||||
|
|
||||||
|
constructor(private readonly prisma: PrismaService) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate budget for a new task
|
||||||
|
*/
|
||||||
|
async allocateBudget(dto: AllocateBudgetDto): Promise<TokenBudget> {
|
||||||
|
this.logger.log(`Allocating ${String(dto.allocatedTokens)} tokens for task ${dto.taskId}`);
|
||||||
|
|
||||||
|
const budget = await this.prisma.tokenBudget.create({
|
||||||
|
data: {
|
||||||
|
taskId: dto.taskId,
|
||||||
|
workspaceId: dto.workspaceId,
|
||||||
|
agentId: dto.agentId,
|
||||||
|
allocatedTokens: dto.allocatedTokens,
|
||||||
|
estimatedComplexity: dto.complexity,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return budget;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update usage after agent response
|
||||||
|
*/
|
||||||
|
async updateUsage(
|
||||||
|
taskId: string,
|
||||||
|
inputTokens: number,
|
||||||
|
outputTokens: number
|
||||||
|
): Promise<TokenBudget> {
|
||||||
|
this.logger.debug(
|
||||||
|
`Updating usage for task ${taskId}: +${String(inputTokens)} input, +${String(outputTokens)} output`
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get current budget
|
||||||
|
const budget = await this.prisma.tokenBudget.findUnique({
|
||||||
|
where: { taskId },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!budget) {
|
||||||
|
throw new NotFoundException(`Token budget not found for task ${taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate new totals
|
||||||
|
const newInputTokens = budget.inputTokensUsed + inputTokens;
|
||||||
|
const newOutputTokens = budget.outputTokensUsed + outputTokens;
|
||||||
|
const newTotalTokens = newInputTokens + newOutputTokens;
|
||||||
|
|
||||||
|
// Calculate utilization
|
||||||
|
const utilization = newTotalTokens / budget.allocatedTokens;
|
||||||
|
|
||||||
|
// Update budget
|
||||||
|
const updatedBudget = await this.prisma.tokenBudget.update({
|
||||||
|
where: { taskId },
|
||||||
|
data: {
|
||||||
|
inputTokensUsed: newInputTokens,
|
||||||
|
outputTokensUsed: newOutputTokens,
|
||||||
|
totalTokensUsed: newTotalTokens,
|
||||||
|
budgetUtilization: utilization,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return updatedBudget;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze budget for suspicious patterns
|
||||||
|
*/
|
||||||
|
async analyzeBudget(taskId: string): Promise<BudgetAnalysis> {
|
||||||
|
this.logger.debug(`Analyzing budget for task ${taskId}`);
|
||||||
|
|
||||||
|
const budget = await this.prisma.tokenBudget.findUnique({
|
||||||
|
where: { taskId },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!budget) {
|
||||||
|
throw new NotFoundException(`Token budget not found for task ${taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const usedTokens = budget.totalTokensUsed;
|
||||||
|
const allocatedTokens = budget.allocatedTokens;
|
||||||
|
const remainingTokens = allocatedTokens - usedTokens;
|
||||||
|
const utilizationPercentage = (usedTokens / allocatedTokens) * 100;
|
||||||
|
|
||||||
|
// Detect suspicious patterns
|
||||||
|
const suspiciousPattern = this.detectSuspiciousPattern(budget);
|
||||||
|
|
||||||
|
// Determine recommendation
|
||||||
|
let recommendation: "accept" | "continue" | "review";
|
||||||
|
if (suspiciousPattern.triggered) {
|
||||||
|
if (suspiciousPattern.severity === "high") {
|
||||||
|
recommendation = "continue";
|
||||||
|
} else {
|
||||||
|
recommendation = "review";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
recommendation = "accept";
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BudgetAnalysisDto({
|
||||||
|
taskId,
|
||||||
|
allocatedTokens,
|
||||||
|
usedTokens,
|
||||||
|
remainingTokens,
|
||||||
|
utilizationPercentage,
|
||||||
|
suspiciousPattern: suspiciousPattern.triggered,
|
||||||
|
suspiciousReason: suspiciousPattern.reason ?? null,
|
||||||
|
recommendation,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if done claim is suspicious (>20% budget remaining)
|
||||||
|
*/
|
||||||
|
async checkSuspiciousDoneClaim(
|
||||||
|
taskId: string
|
||||||
|
): Promise<{ suspicious: boolean; reason?: string }> {
|
||||||
|
this.logger.debug(`Checking done claim for task ${taskId}`);
|
||||||
|
|
||||||
|
const budget = await this.prisma.tokenBudget.findUnique({
|
||||||
|
where: { taskId },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!budget) {
|
||||||
|
throw new NotFoundException(`Token budget not found for task ${taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const suspiciousPattern = this.detectSuspiciousPattern(budget);
|
||||||
|
|
||||||
|
if (suspiciousPattern.triggered && suspiciousPattern.reason) {
|
||||||
|
return {
|
||||||
|
suspicious: true,
|
||||||
|
reason: suspiciousPattern.reason,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (suspiciousPattern.triggered) {
|
||||||
|
return {
|
||||||
|
suspicious: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { suspicious: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get budget utilization percentage
|
||||||
|
*/
|
||||||
|
async getBudgetUtilization(taskId: string): Promise<number> {
|
||||||
|
const budget = await this.prisma.tokenBudget.findUnique({
|
||||||
|
where: { taskId },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!budget) {
|
||||||
|
throw new NotFoundException(`Token budget not found for task ${taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const utilizationPercentage = (budget.totalTokensUsed / budget.allocatedTokens) * 100;
|
||||||
|
|
||||||
|
return utilizationPercentage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark task as completed
|
||||||
|
*/
|
||||||
|
async markCompleted(taskId: string): Promise<void> {
|
||||||
|
this.logger.log(`Marking budget as completed for task ${taskId}`);
|
||||||
|
|
||||||
|
const budget = await this.prisma.tokenBudget.findUnique({
|
||||||
|
where: { taskId },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!budget) {
|
||||||
|
throw new NotFoundException(`Token budget not found for task ${taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.prisma.tokenBudget.update({
|
||||||
|
where: { taskId },
|
||||||
|
data: {
|
||||||
|
completedAt: new Date(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get complexity-based budget allocation
|
||||||
|
*/
|
||||||
|
getDefaultBudgetForComplexity(complexity: TaskComplexity): number {
|
||||||
|
return COMPLEXITY_BUDGETS[complexity];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect suspicious patterns in budget usage
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
private detectSuspiciousPattern(budget: TokenBudget): {
|
||||||
|
triggered: boolean;
|
||||||
|
reason?: string;
|
||||||
|
severity: "low" | "medium" | "high";
|
||||||
|
recommendation: "accept" | "continue" | "review";
|
||||||
|
} {
|
||||||
|
const utilization = budget.totalTokensUsed / budget.allocatedTokens;
|
||||||
|
const remainingPercentage = (1 - utilization) * 100;
|
||||||
|
|
||||||
|
// Pattern 1: Very low utilization (<10%)
|
||||||
|
if (utilization < BUDGET_THRESHOLDS.VERY_LOW_UTILIZATION) {
|
||||||
|
return {
|
||||||
|
triggered: true,
|
||||||
|
reason: `Very low budget utilization (${(utilization * 100).toFixed(1)}%). This suggests minimal work was performed.`,
|
||||||
|
severity: "high",
|
||||||
|
recommendation: "continue",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pattern 2: Done claimed with >20% budget remaining
|
||||||
|
if (utilization < 1 - BUDGET_THRESHOLDS.SUSPICIOUS_REMAINING) {
|
||||||
|
return {
|
||||||
|
triggered: true,
|
||||||
|
reason: `Task claimed done with ${remainingPercentage.toFixed(1)}% budget remaining (${String(budget.allocatedTokens - budget.totalTokensUsed)} tokens). This may indicate premature completion.`,
|
||||||
|
severity: "medium",
|
||||||
|
recommendation: "review",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pattern 3: Extremely high utilization (>95%) - might indicate inefficiency
|
||||||
|
if (utilization > BUDGET_THRESHOLDS.VERY_HIGH_UTILIZATION) {
|
||||||
|
return {
|
||||||
|
triggered: true,
|
||||||
|
reason: `Very high budget utilization (${(utilization * 100).toFixed(1)}%). Task may need more budget or review for efficiency.`,
|
||||||
|
severity: "low",
|
||||||
|
recommendation: "review",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
triggered: false,
|
||||||
|
severity: "low",
|
||||||
|
recommendation: "accept",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
417
docs/3-architecture/non-ai-coordinator-overlap-analysis.md
Normal file
417
docs/3-architecture/non-ai-coordinator-overlap-analysis.md
Normal file
@@ -0,0 +1,417 @@
|
|||||||
|
# Non-AI Coordinator Pattern - Overlap Analysis
|
||||||
|
|
||||||
|
**Date:** 2026-01-31
|
||||||
|
**Purpose:** Identify overlaps and differences between two complementary architecture documents
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Documents Compared
|
||||||
|
|
||||||
|
### Document A: Mosaic Stack Non-AI Coordinator Pattern
|
||||||
|
|
||||||
|
**Location:** `/home/jwoltje/src/mosaic-stack/docs/3-architecture/non-ai-coordinator-pattern.md`
|
||||||
|
**Length:** 903 lines
|
||||||
|
**Problem Space:** L-015 Agent Premature Completion
|
||||||
|
**Focus:** Single-agent quality enforcement
|
||||||
|
|
||||||
|
### Document B: Quality-Rails Orchestration Architecture
|
||||||
|
|
||||||
|
**Location:** `/home/jwoltje/src/jarvis-brain/docs/work/quality-rails-orchestration-architecture.md`
|
||||||
|
**Length:** ~600 lines
|
||||||
|
**Problem Space:** Context exhaustion in multi-issue orchestration
|
||||||
|
**Focus:** Multi-agent lifecycle management at scale
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Summary Table
|
||||||
|
|
||||||
|
| Aspect | Document A (Existing) | Document B (New) | Overlap? |
|
||||||
|
| -------------------------- | ------------------------------------------- | ---------------------------------------- | ------------------ |
|
||||||
|
| **Primary Problem** | Agents claim "done" prematurely | Agents pause at 95% context | Different |
|
||||||
|
| **Coordinator Type** | Non-AI (TypeScript/NestJS) | Non-AI (Python/Node.js) | ✅ Overlap |
|
||||||
|
| **Quality Gates** | BuildGate, LintGate, TestGate, CoverageGate | Mechanical gates (lint, typecheck, test) | ✅ Overlap |
|
||||||
|
| **Agent Scope** | Single agent per issue | Multi-agent orchestration | Different |
|
||||||
|
| **Context Management** | Not addressed | Core feature (80% compact, 95% rotate) | Different |
|
||||||
|
| **Model Assignment** | Not addressed | Agent profiles + difficulty matching | Different |
|
||||||
|
| **Issue Sizing** | Not addressed | 50% rule, epic decomposition | Different |
|
||||||
|
| **Implementation Status** | Full TypeScript code | Python pseudocode + PoC plan | Different |
|
||||||
|
| **Forced Continuation** | Yes (rejection loop) | No (preventive via context mgmt) | Different approach |
|
||||||
|
| **Non-negotiable Quality** | Yes | Yes | ✅ Overlap |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Unique to Document A (Existing Mosaic Stack Pattern)
|
||||||
|
|
||||||
|
### 1. **Premature Completion Problem**
|
||||||
|
|
||||||
|
- **Problem:** Agents claim work is "done" when tests fail, files are missing, or requirements are incomplete
|
||||||
|
- **Root cause:** Agent interprets partial completion as success
|
||||||
|
- **Example:** Agent implements feature, tests fail, agent says "done" anyway
|
||||||
|
|
||||||
|
### 2. **Rejection Loop & Forced Continuation**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// CompletionVerificationEngine
|
||||||
|
if (!allGatesPassed) {
|
||||||
|
return this.forcedContinuationService.generateContinuationPrompt({
|
||||||
|
failedGates,
|
||||||
|
tone: "non-negotiable",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key innovation:** When agent claims "done" but gates fail, coordinator injects prompt forcing continuation:
|
||||||
|
|
||||||
|
```
|
||||||
|
COMPLETION REJECTED. The following quality gates have failed:
|
||||||
|
- Build Gate: Compilation errors detected
|
||||||
|
- Test Gate: 3/15 tests failing
|
||||||
|
|
||||||
|
You must continue working until ALL quality gates pass.
|
||||||
|
This is not optional. Do not claim completion until gates pass.
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. **State Machine for Completion Claims**
|
||||||
|
|
||||||
|
```
|
||||||
|
Agent Working → Claims Done → Run Gates → Pass/Reject
|
||||||
|
↓
|
||||||
|
Reject → Force Continue → Agent Working
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. **TypeScript/NestJS Implementation**
|
||||||
|
|
||||||
|
- Full production-ready service code
|
||||||
|
- QualityOrchestrator service
|
||||||
|
- Gate interfaces and implementations
|
||||||
|
- Dependency injection architecture
|
||||||
|
|
||||||
|
### 5. **CompletionVerificationEngine**
|
||||||
|
|
||||||
|
- Intercepts agent completion claims
|
||||||
|
- Runs all gates synchronously
|
||||||
|
- Blocks "done" status until gates pass
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Unique to Document B (New Quality-Rails Orchestration)
|
||||||
|
|
||||||
|
### 1. **Context Exhaustion Problem**
|
||||||
|
|
||||||
|
- **Problem:** AI orchestrators pause at 95% context usage, losing autonomy
|
||||||
|
- **Root cause:** Linear context growth without compaction
|
||||||
|
- **Example:** M4 session completed 11 issues, paused at 95%, required manual restart
|
||||||
|
|
||||||
|
### 2. **50% Rule for Issue Sizing**
|
||||||
|
|
||||||
|
```
|
||||||
|
Issue context estimate MUST NOT exceed 50% of target agent's context limit.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- Sonnet agent: 200K context limit
|
||||||
|
- Maximum issue estimate: 100K tokens
|
||||||
|
- Reasoning: Leaves 100K for system prompts, conversation, safety buffer
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. **Agent Profiles & Model Assignment**
|
||||||
|
|
||||||
|
```python
|
||||||
|
AGENT_PROFILES = {
|
||||||
|
'opus': {
|
||||||
|
'context_limit': 200000,
|
||||||
|
'cost_per_mtok': 15.00,
|
||||||
|
'capabilities': ['high', 'medium', 'low']
|
||||||
|
},
|
||||||
|
'sonnet': {
|
||||||
|
'context_limit': 200000,
|
||||||
|
'cost_per_mtok': 3.00,
|
||||||
|
'capabilities': ['medium', 'low']
|
||||||
|
},
|
||||||
|
'glm': {
|
||||||
|
'context_limit': 128000,
|
||||||
|
'cost_per_mtok': 0.00, # Self-hosted
|
||||||
|
'capabilities': ['medium', 'low']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Assignment logic:** Choose cheapest capable agent based on:
|
||||||
|
|
||||||
|
- Estimated context usage
|
||||||
|
- Difficulty level
|
||||||
|
- Agent capabilities
|
||||||
|
|
||||||
|
### 4. **Context Monitoring & Session Rotation**
|
||||||
|
|
||||||
|
```python
|
||||||
|
def monitor_agent_context(agent_id: str) -> ContextAction:
|
||||||
|
usage = get_context_usage(agent_id)
|
||||||
|
|
||||||
|
if usage > 0.95:
|
||||||
|
return ContextAction.ROTATE_SESSION # Start fresh agent
|
||||||
|
elif usage > 0.80:
|
||||||
|
return ContextAction.COMPACT # Summarize completed work
|
||||||
|
else:
|
||||||
|
return ContextAction.CONTINUE # Keep working
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. **Context Estimation Formula**
|
||||||
|
|
||||||
|
```python
|
||||||
|
def estimate_context(issue: Issue) -> int:
|
||||||
|
base = (
|
||||||
|
issue.files_to_modify * 7000 + # Average file size
|
||||||
|
issue.implementation_complexity * 20000 + # Code writing
|
||||||
|
issue.test_requirements * 10000 + # Test writing
|
||||||
|
issue.documentation * 3000 # Docs
|
||||||
|
)
|
||||||
|
|
||||||
|
buffer = base * 1.3 # 30% safety margin
|
||||||
|
return int(buffer)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. **Epic Decomposition Workflow**
|
||||||
|
|
||||||
|
```
|
||||||
|
User creates Epic → Coordinator analyzes scope → Decomposes into sub-issues
|
||||||
|
↓
|
||||||
|
Each issue ≤ 50% agent context limit
|
||||||
|
↓
|
||||||
|
Assigns metadata: estimated_context, difficulty
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. **Multi-Model Support**
|
||||||
|
|
||||||
|
- Supports Opus, Sonnet, Haiku, GLM, MiniMax, Cogito
|
||||||
|
- Cost optimization through model selection
|
||||||
|
- Self-hosted model preference when capable
|
||||||
|
|
||||||
|
### 8. **Proactive Context Management**
|
||||||
|
|
||||||
|
- Prevents context exhaustion BEFORE it happens
|
||||||
|
- No manual intervention needed
|
||||||
|
- Maintains autonomy through entire queue
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overlaps (Both Documents)
|
||||||
|
|
||||||
|
### 1. **Non-AI Coordinator Pattern** ✅
|
||||||
|
|
||||||
|
Both use deterministic code (not AI) as the orchestrator:
|
||||||
|
|
||||||
|
- **Doc A:** TypeScript/NestJS service
|
||||||
|
- **Doc B:** Python/Node.js coordinator
|
||||||
|
- **Rationale:** Avoid AI orchestrator context limits and inconsistency
|
||||||
|
|
||||||
|
### 2. **Mechanical Quality Gates** ✅
|
||||||
|
|
||||||
|
Both enforce quality through automated checks:
|
||||||
|
|
||||||
|
**Doc A gates:**
|
||||||
|
|
||||||
|
- BuildGate (compilation)
|
||||||
|
- LintGate (code style)
|
||||||
|
- TestGate (unit/integration tests)
|
||||||
|
- CoverageGate (test coverage threshold)
|
||||||
|
|
||||||
|
**Doc B gates:**
|
||||||
|
|
||||||
|
- lint (code quality)
|
||||||
|
- typecheck (type safety)
|
||||||
|
- test (functionality)
|
||||||
|
- coverage (same as Doc A)
|
||||||
|
|
||||||
|
### 3. **Programmatic Enforcement** ✅
|
||||||
|
|
||||||
|
Both prevent agent from bypassing quality:
|
||||||
|
|
||||||
|
- **Doc A:** Rejection loop blocks completion until gates pass
|
||||||
|
- **Doc B:** Coordinator enforces gates before allowing next issue
|
||||||
|
- **Shared principle:** Quality is a requirement, not a suggestion
|
||||||
|
|
||||||
|
### 4. **Non-Negotiable Quality Standards** ✅
|
||||||
|
|
||||||
|
Both use firm language about quality requirements:
|
||||||
|
|
||||||
|
- **Doc A:** "This is not optional. Do not claim completion until gates pass."
|
||||||
|
- **Doc B:** "Quality gates are mechanical blockers, not suggestions."
|
||||||
|
|
||||||
|
### 5. **State Management** ✅
|
||||||
|
|
||||||
|
Both track work state programmatically:
|
||||||
|
|
||||||
|
- **Doc A:** Agent state machine (working → claimed done → verified → actual done)
|
||||||
|
- **Doc B:** Issue state in tracking system (pending → in-progress → gate-check → completed)
|
||||||
|
|
||||||
|
### 6. **Validation Before Progression** ✅
|
||||||
|
|
||||||
|
Both prevent moving forward with broken code:
|
||||||
|
|
||||||
|
- **Doc A:** Cannot claim "done" until gates pass
|
||||||
|
- **Doc B:** Cannot start next issue until current issue passes gates
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Complementary Nature
|
||||||
|
|
||||||
|
These documents solve **different problems in the same architectural pattern**:
|
||||||
|
|
||||||
|
### Document A (Existing): Quality Enforcement
|
||||||
|
|
||||||
|
**Problem:** "How do we prevent an agent from claiming work is done when it's not?"
|
||||||
|
**Solution:** Rejection loop with forced continuation
|
||||||
|
**Scope:** Single agent working on single issue
|
||||||
|
**Lifecycle stage:** Task completion verification
|
||||||
|
|
||||||
|
### Document B (New): Orchestration at Scale
|
||||||
|
|
||||||
|
**Problem:** "How do we manage multiple agents working through dozens of issues without context exhaustion?"
|
||||||
|
**Solution:** Proactive context management + intelligent agent assignment
|
||||||
|
**Scope:** Multi-agent orchestration across entire milestone
|
||||||
|
**Lifecycle stage:** Agent selection, session management, queue progression
|
||||||
|
|
||||||
|
### Together They Form:
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────┐
|
||||||
|
│ Non-AI Coordinator (Document B) │
|
||||||
|
│ - Monitors context usage across all agents │
|
||||||
|
│ - Assigns issues based on context estimates │
|
||||||
|
│ - Rotates agents at 95% context │
|
||||||
|
│ - Enforces 50% rule during issue creation │
|
||||||
|
└─────────────────────────┬───────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌─────────────────┼─────────────────┐
|
||||||
|
▼ ▼ ▼
|
||||||
|
Agent 1 Agent 2 Agent 3
|
||||||
|
Issue #42 Issue #57 Issue #89
|
||||||
|
│ │ │
|
||||||
|
└─────────────────┴─────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────┐
|
||||||
|
│ Quality Orchestrator (Document A) │
|
||||||
|
│ - Intercepts completion claims │
|
||||||
|
│ - Runs quality gates │
|
||||||
|
│ - Forces continuation if gates fail │
|
||||||
|
│ - Only allows "done" when gates pass │
|
||||||
|
└─────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Document B (new)** manages the **agent lifecycle and orchestration**.
|
||||||
|
**Document A (existing)** manages the **quality enforcement per agent**.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Integration Recommendations
|
||||||
|
|
||||||
|
### Option 1: Merge into Single Document (Recommended)
|
||||||
|
|
||||||
|
**Reason:** They're parts of the same system
|
||||||
|
|
||||||
|
**Structure:**
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# Non-AI Coordinator Pattern Architecture
|
||||||
|
|
||||||
|
## Part 1: Multi-Agent Orchestration (from Doc B)
|
||||||
|
|
||||||
|
- Context management
|
||||||
|
- Agent assignment
|
||||||
|
- Session rotation
|
||||||
|
- 50% rule
|
||||||
|
- Epic decomposition
|
||||||
|
|
||||||
|
## Part 2: Quality Enforcement (from Doc A)
|
||||||
|
|
||||||
|
- Premature completion problem
|
||||||
|
- Quality gates
|
||||||
|
- Rejection loop
|
||||||
|
- Forced continuation
|
||||||
|
- CompletionVerificationEngine
|
||||||
|
|
||||||
|
## Part 3: Implementation
|
||||||
|
|
||||||
|
- TypeScript/NestJS orchestrator (from Doc A)
|
||||||
|
- Python coordinator enhancements (from Doc B)
|
||||||
|
- Integration points
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 2: Keep Separate, Create Integration Doc
|
||||||
|
|
||||||
|
**Reason:** Different audiences (orchestration vs quality enforcement)
|
||||||
|
|
||||||
|
**Documents:**
|
||||||
|
|
||||||
|
1. `orchestration-architecture.md` (Doc B) - For understanding multi-agent coordination
|
||||||
|
2. `quality-enforcement-architecture.md` (Doc A) - For understanding quality gates
|
||||||
|
3. `non-ai-coordinator-integration.md` (NEW) - How they work together
|
||||||
|
|
||||||
|
### Option 3: Hierarchical Documentation
|
||||||
|
|
||||||
|
**Reason:** Layers of abstraction
|
||||||
|
|
||||||
|
```
|
||||||
|
non-ai-coordinator-pattern.md (Overview)
|
||||||
|
├── orchestration-layer.md (Doc B content)
|
||||||
|
└── quality-layer.md (Doc A content)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Action Items
|
||||||
|
|
||||||
|
Based on overlap analysis, recommend:
|
||||||
|
|
||||||
|
1. **Merge the documents** into comprehensive architecture guide
|
||||||
|
- Use Doc A's problem statement for quality enforcement
|
||||||
|
- Use Doc B's problem statement for context exhaustion
|
||||||
|
- Show how both problems require non-AI coordinator
|
||||||
|
- Integrate TypeScript implementation with context monitoring
|
||||||
|
|
||||||
|
2. **Update Mosaic Stack issue #140**
|
||||||
|
- Current: "Document Non-AI Coordinator Pattern Architecture"
|
||||||
|
- Expand scope: Include both quality enforcement AND orchestration
|
||||||
|
- Reference both problem spaces (L-015 + context exhaustion)
|
||||||
|
|
||||||
|
3. **Create unified PoC plan**
|
||||||
|
- Phase 1: Context monitoring (from Doc B)
|
||||||
|
- Phase 2: Agent assignment logic (from Doc B)
|
||||||
|
- Phase 3: Quality gate integration (from Doc A)
|
||||||
|
- Phase 4: Forced continuation (from Doc A)
|
||||||
|
|
||||||
|
4. **Preserve unique innovations from each**
|
||||||
|
- Doc A: Rejection loop, forced continuation prompts
|
||||||
|
- Doc B: 50% rule, agent profiles, context estimation formula
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
**These documents are highly complementary, not duplicative.**
|
||||||
|
|
||||||
|
- **~20% overlap:** Both use non-AI coordinator, mechanical gates, non-negotiable quality
|
||||||
|
- **80% unique value:** Doc A solves premature completion, Doc B solves context exhaustion
|
||||||
|
|
||||||
|
**Best path forward:** Merge into single comprehensive architecture document that addresses both problems within the unified non-AI coordinator pattern.
|
||||||
|
|
||||||
|
The pattern is:
|
||||||
|
|
||||||
|
1. Non-AI coordinator assigns issues based on context estimates (Doc B)
|
||||||
|
2. Agent works on issue
|
||||||
|
3. Quality gates enforce completion standards (Doc A)
|
||||||
|
4. Context monitoring prevents exhaustion (Doc B)
|
||||||
|
5. Forced continuation prevents premature "done" (Doc A)
|
||||||
|
6. Next issue assigned when ready (Doc B)
|
||||||
|
|
||||||
|
Together they create a **robust, autonomous, quality-enforcing orchestration system** that scales beyond single-agent, single-issue scenarios.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Next Steps:**
|
||||||
|
|
||||||
|
1. User review of this analysis
|
||||||
|
2. Decision on integration approach (Option 1, 2, or 3)
|
||||||
|
3. Update Mosaic Stack documentation accordingly
|
||||||
|
4. Proceed with PoC implementation
|
||||||
Reference in New Issue
Block a user