From bed440dc36d6adc8b821f47434725435e8ccb8d5 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Wed, 4 Feb 2026 07:50:22 -0600 Subject: [PATCH] docs(m6): Add Usage Budget Management section Add comprehensive usage budget management design to M6 orchestration architecture. FEATURES: - Real-time usage tracking across agents - Budget allocation per task/milestone/project - Usage projection and burn rate calculation - Throttling decisions to prevent budget exhaustion - Model tier optimization (Haiku/Sonnet/Opus) - Pre-commit usage validation DATA MODEL: - usage_budgets table (allocated/consumed/remaining) - agent_usage_logs table (per-agent tracking) - Valkey keys for real-time state BUDGET CHECKPOINTS: 1. Task assignment - can afford this task? 2. Agent spawn - verify budget headroom 3. Checkpoint intervals - periodic compliance 4. Pre-commit validation - usage efficiency PRIORITY: MVP (M6 Phase 3) for basic tracking, Phase 5 for advanced projection and optimization. Co-Authored-By: Claude Sonnet 4.5 --- docs/design/agent-orchestration.md | 337 +++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) diff --git a/docs/design/agent-orchestration.md b/docs/design/agent-orchestration.md index db32709..4cd44df 100644 --- a/docs/design/agent-orchestration.md +++ b/docs/design/agent-orchestration.md @@ -908,6 +908,343 @@ export class CoordinatorService { --- +## Usage Budget Management + +**Version:** 1.0 +**Date:** 2026-02-04 +**Status:** Required for MVP + +### Problem Statement + +Autonomous agents using Claude Code can consume significant API tokens without proper governance. Without real-time usage tracking and budgeting, projects risk: + +1. **Cost overruns** — Agents exceed budget before milestone completion +2. **Service disruption** — Hit API rate limits mid-task +3. **Unpredictable momentum** — Can't estimate project velocity +4. **Budget exhaustion** — Agents consume entire monthly budget in days + +### Requirements + +The orchestration layer must provide: + +- **Real-time usage tracking** — Current token usage across all active agents +- **Budget allocation** — Pre-allocate budgets per task/milestone/project +- **Usage projection** — Estimate remaining work vs remaining budget +- **Throttling decisions** — Pause/slow agents approaching limits +- **Cost optimization** — Route tasks to appropriate model tiers (Haiku/Sonnet/Opus) + +### Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Usage Budget Manager Service │ +│ ┌───────────────────┐ ┌────────────────────────────┐ │ +│ │ Usage Tracker │ │ Budget Allocator │ │ +│ │ - Query API │ │ - Per-task budgets │ │ +│ │ - Real-time sum │ │ - Per-milestone budgets │ │ +│ │ - Agent rollup │ │ - Global limits │ │ +│ └────────┬──────────┘ └──────────┬─────────────────┘ │ +│ │ │ │ +│ ┌────────▼────────────────────────▼─────────────────┐ │ +│ │ Projection Engine │ │ +│ │ - Estimate remaining work │ │ +│ │ - Calculate burn rate │ │ +│ │ - Predict budget exhaustion │ │ +│ │ - Recommend throttle/pause │ │ +│ └───────────────────────────┬───────────────────────┘ │ +└────────────────────────────────┼─────────────────────────┘ + │ + ┌────────────┼────────────┐ + │ │ │ + ┌───────▼──────┐ ┌──▼──────┐ ┌──▼────────────┐ + │Queue Manager │ │Agent Mgr│ │ Coordinator │ + │- Check budget│ │- Spawn │ │ - Pre-commit │ + │ before queue│ │ check │ │ validation │ + └──────────────┘ └─────────┘ └───────────────┘ +``` + +### Budget Check Points + +**1. Task Assignment** (Queue Manager) + +```typescript +async canAffordTask(taskId: string): Promise { + const task = await getTask(taskId); + const currentUsage = await usageTracker.getCurrentUsage(); + const budget = await budgetAllocator.getTaskBudget(taskId); + + const projectedCost = estimateTaskCost(task); + const remaining = budget.limit - currentUsage.total; + + if (projectedCost > remaining) { + return { + canProceed: false, + reason: 'Insufficient budget', + recommendation: 'Pause or reallocate budget', + }; + } + + return { canProceed: true }; +} +``` + +**2. Agent Spawn** (Agent Manager) + +Before spawning a Claude Code agent, verify budget headroom: + +```typescript +async spawnAgent(config: AgentConfig): Promise { + const budgetCheck = await usageBudgetManager.canAffordTask(config.taskId); + + if (!budgetCheck.canProceed) { + throw new InsufficientBudgetError(budgetCheck.reason); + } + + // Proceed with spawn + const agent = await claudeCode.spawn(config); + + // Track agent for usage rollup + await usageTracker.registerAgent(agent.id, config.taskId); + + return agent; +} +``` + +**3. Checkpoint Intervals** (Coordinator) + +During task execution, periodically verify budget compliance: + +```typescript +async checkpointBudgetCompliance(taskId: string): Promise { + const usage = await usageTracker.getTaskUsage(taskId); + const budget = await budgetAllocator.getTaskBudget(taskId); + + const percentUsed = (usage.current / budget.allocated) * 100; + + if (percentUsed > 90) { + await coordinator.sendWarning(taskId, 'Approaching budget limit'); + } + + if (percentUsed > 100) { + await coordinator.pauseTask(taskId, 'Budget exceeded'); + await notifyUser(taskId, 'Task paused: budget exhausted'); + } +} +``` + +**4. Pre-commit Validation** (Quality Gates) + +Before committing work, verify usage is reasonable: + +```typescript +async validateUsageEfficiency(taskId: string): Promise { + const usage = await usageTracker.getTaskUsage(taskId); + const linesChanged = await git.getChangedLines(taskId); + const testsAdded = await git.getTestFiles(taskId); + + // Cost per line heuristic (adjust based on learnings) + const expectedTokensPerLine = 150; // baseline + TDD overhead + const expectedUsage = linesChanged * expectedTokensPerLine; + + const efficiency = expectedUsage / usage.current; + + if (efficiency < 0.5) { + return { + valid: false, + reason: 'Usage appears inefficient', + recommendation: 'Review agent logs for token waste', + }; + } + + return { valid: true }; +} +``` + +### Data Model + +#### `usage_budgets` Table + +```sql +CREATE TABLE usage_budgets ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + workspace_id UUID NOT NULL REFERENCES workspaces(id), + + -- Scope: 'global', 'project', 'milestone', 'task' + scope VARCHAR(20) NOT NULL, + scope_id VARCHAR(100), -- project_id, milestone_id, or task_id + + -- Budget limits (tokens) + allocated BIGINT NOT NULL, + consumed BIGINT NOT NULL DEFAULT 0, + remaining BIGINT GENERATED ALWAYS AS (allocated - consumed) STORED, + + -- Tracking + period_start TIMESTAMPTZ NOT NULL, + period_end TIMESTAMPTZ NOT NULL, + + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX idx_usage_budgets_scope ON usage_budgets(scope, scope_id); +CREATE INDEX idx_usage_budgets_workspace ON usage_budgets(workspace_id); +``` + +#### `agent_usage_logs` Table + +```sql +CREATE TABLE agent_usage_logs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + workspace_id UUID NOT NULL REFERENCES workspaces(id), + agent_session_id UUID NOT NULL, + task_id UUID REFERENCES agent_tasks(id), + + -- Usage details + input_tokens BIGINT NOT NULL, + output_tokens BIGINT NOT NULL, + total_tokens BIGINT NOT NULL, + model VARCHAR(100) NOT NULL, -- 'claude-sonnet-4', 'claude-haiku-3.5', etc. + + -- Cost tracking + estimated_cost_usd DECIMAL(10, 6), + + -- Context + operation VARCHAR(100), -- 'task_execution', 'quality_review', etc. + + logged_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX idx_agent_usage_task ON agent_usage_logs(task_id); +CREATE INDEX idx_agent_usage_session ON agent_usage_logs(agent_session_id); +CREATE INDEX idx_agent_usage_workspace ON agent_usage_logs(workspace_id); +``` + +### Valkey/Redis Keys + +``` +# Current usage (real-time) +usage:current:{workspace_id} -> HASH + { + "total_tokens": "1234567", + "total_cost_usd": "12.34", + "last_updated": "2026-02-04T10:30:00Z" + } + +# Per-task usage +usage:task:{task_id} -> HASH + { + "allocated": "50000", + "consumed": "23456", + "remaining": "26544", + "agents": ["agent-1", "agent-2"] + } + +# Budget alerts +usage:alerts:{workspace_id} -> LIST + [ + { + "task_id": "task-123", + "level": "warning", + "percent_used": 92, + "message": "Task approaching budget limit" + } + ] +``` + +### Cost Estimation Formulas + +Based on autonomous execution learnings: + +```typescript +function estimateTaskCost(task: Task): number { + const baselineTokens = task.estimatedComplexity * 1000; // tokens per complexity point + + // Overhead factors + const tddOverhead = 1.2; // +20% for test writing + const baselineBuffer = 1.3; // +30% general buffer + const phaseBuffer = 1.15; // +15% phase-specific uncertainty + + const estimated = baselineTokens * tddOverhead * baselineBuffer * phaseBuffer; + + return Math.ceil(estimated); +} +``` + +### Model Tier Optimization + +Route tasks to appropriate model tiers for cost efficiency: + +| Model | Cost/MTok (input) | Cost/MTok (output) | Use Case | +| ---------------- | ----------------- | ------------------ | ------------------------------------ | +| Claude Haiku 3.5 | $0.80 | $4.00 | Simple CRUD, boilerplate, linting | +| Claude Sonnet 4 | $3.00 | $15.00 | Standard development, refactoring | +| Claude Opus 4 | $15.00 | $75.00 | Complex architecture, critical fixes | + +**Routing logic:** + +```typescript +function selectModel(task: Task): ModelTier { + if (task.priority === "critical" || task.complexity > 8) { + return "opus"; + } + + if (task.type === "boilerplate" || task.estimatedTokens < 10000) { + return "haiku"; + } + + return "sonnet"; // default +} +``` + +### Projection Algorithm + +Predict budget exhaustion: + +```typescript +async function projectBudgetExhaustion(workspaceId: string): Promise { + const usage = await usageTracker.getCurrentUsage(workspaceId); + const budget = await budgetAllocator.getGlobalBudget(workspaceId); + + const dailyBurnRate = usage.tokensLast24h; + const daysRemaining = budget.remaining / dailyBurnRate; + + const exhaustionDate = new Date(); + exhaustionDate.setDate(exhaustionDate.getDate() + daysRemaining); + + return { + remaining_tokens: budget.remaining, + daily_burn_rate: dailyBurnRate, + days_until_exhaustion: daysRemaining, + projected_exhaustion_date: exhaustionDate, + recommendation: daysRemaining < 7 ? "THROTTLE" : "CONTINUE", + }; +} +``` + +### Implementation Priority + +**MVP (M6 Phase 3):** + +- ✅ Basic usage tracking (log tokens per task) +- ✅ Simple budget checks (can afford this task?) +- ✅ Alert on budget exceeded + +**Post-MVP (M6 Phase 5):** + +- Projection engine (when will budget run out?) +- Model tier optimization (Haiku/Sonnet/Opus routing) +- Historical analysis (actual vs estimated) +- Budget reallocation (move budget between projects) + +### Success Metrics + +- **Budget accuracy**: Estimated vs actual within 20% +- **Cost optimization**: 40%+ savings from model tier routing +- **No surprise exhaustion**: Zero instances of unexpected budget depletion +- **Steady momentum**: Projects maintain velocity without budget interruptions + +--- + ## Implementation Phases ### Phase 1: Foundation (Week 1-2)