feat(#329): Add usage budget management and cost governance

Implement BudgetService for tracking and enforcing agent usage limits: - Daily token limit tracking (default 10M tokens) - Per-agent token limit enforcement (default 2M tokens) - Maximum concurrent agent cap (default 10) - Task duration limits (default 120 minutes) - Hard/soft limit enforcement modes - Real-time usage summaries with budget status (within_budget/approaching_limit/at_limit/exceeded) - Per-agent usage breakdown with percentage calculations Includes BudgetModule for NestJS DI and 23 unit tests. Fixes #329 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 13:00:26 -06:00
parent 06fa8f7402
commit 22dc964503
4 changed files with 580 additions and 0 deletions
--- a/apps/orchestrator/src/budget/budget.module.ts
+++ b/apps/orchestrator/src/budget/budget.module.ts
@@ -0,0 +1,10 @@
+import { Module } from "@nestjs/common";
+import { ConfigModule } from "@nestjs/config";
+import { BudgetService } from "./budget.service";
+
+@Module({
+  imports: [ConfigModule],
+  providers: [BudgetService],
+  exports: [BudgetService],
+})
+export class BudgetModule {}
--- a/apps/orchestrator/src/budget/budget.service.spec.ts
+++ b/apps/orchestrator/src/budget/budget.service.spec.ts
@@ -0,0 +1,296 @@
+/**
+ * BudgetService Unit Tests
+ *
+ * Tests usage budget tracking, enforcement, and reporting.
+ * Covers issue #329 (ORCH-135)
+ */
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { BudgetService } from "./budget.service";
+import { ConfigService } from "@nestjs/config";
+
+describe("BudgetService", () => {
+  let service: BudgetService;
+
+  const mockConfigService = {
+    get: vi.fn((_key: string, defaultValue?: unknown) => defaultValue),
+  };
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    service = new BudgetService(mockConfigService as unknown as ConfigService);
+  });
+
+  describe("initialization", () => {
+    it("should initialize with default budget values", () => {
+      const budget = service.getBudget();
+      expect(budget.dailyTokenLimit).toBe(10_000_000);
+      expect(budget.perAgentTokenLimit).toBe(2_000_000);
+      expect(budget.maxConcurrentAgents).toBe(10);
+      expect(budget.maxTaskDurationMinutes).toBe(120);
+      expect(budget.enforceHardLimits).toBe(false);
+    });
+
+    it("should use config values when provided", () => {
+      const customConfig = {
+        get: vi.fn((key: string) => {
+          const config: Record<string, unknown> = {
+            "orchestrator.budget.dailyTokenLimit": 5_000_000,
+            "orchestrator.budget.perAgentTokenLimit": 1_000_000,
+            "orchestrator.budget.maxConcurrentAgents": 5,
+            "orchestrator.budget.maxTaskDurationMinutes": 60,
+            "orchestrator.budget.enforceHardLimits": true,
+          };
+          return config[key];
+        }),
+      };
+
+      const customService = new BudgetService(customConfig as unknown as ConfigService);
+      const budget = customService.getBudget();
+
+      expect(budget.dailyTokenLimit).toBe(5_000_000);
+      expect(budget.perAgentTokenLimit).toBe(1_000_000);
+      expect(budget.maxConcurrentAgents).toBe(5);
+      expect(budget.maxTaskDurationMinutes).toBe(60);
+      expect(budget.enforceHardLimits).toBe(true);
+    });
+  });
+
+  describe("recordUsage", () => {
+    it("should record token usage", () => {
+      service.recordUsage("agent-1", "task-1", 1000, 500);
+
+      const summary = service.getUsageSummary();
+      expect(summary.dailyTokensUsed).toBe(1500);
+    });
+
+    it("should accumulate usage across multiple records", () => {
+      service.recordUsage("agent-1", "task-1", 1000, 500);
+      service.recordUsage("agent-1", "task-1", 2000, 1000);
+
+      const summary = service.getUsageSummary();
+      expect(summary.dailyTokensUsed).toBe(4500);
+    });
+
+    it("should track usage per agent", () => {
+      service.recordUsage("agent-1", "task-1", 1000, 500);
+      service.recordUsage("agent-2", "task-2", 3000, 1500);
+
+      const summary = service.getUsageSummary();
+      expect(summary.agentUsage).toHaveLength(2);
+
+      const agent1 = summary.agentUsage.find((a) => a.agentId === "agent-1");
+      const agent2 = summary.agentUsage.find((a) => a.agentId === "agent-2");
+
+      expect(agent1?.totalTokens).toBe(1500);
+      expect(agent2?.totalTokens).toBe(4500);
+    });
+  });
+
+  describe("canSpawnAgent", () => {
+    it("should allow spawning when under limits", () => {
+      const result = service.canSpawnAgent();
+      expect(result.allowed).toBe(true);
+    });
+
+    it("should block spawning when at max concurrent agents", () => {
+      for (let i = 0; i < 10; i++) {
+        service.agentStarted();
+      }
+
+      const result = service.canSpawnAgent();
+      expect(result.allowed).toBe(false);
+      expect(result.reason).toContain("Maximum concurrent agents");
+    });
+
+    it("should allow spawning after agent stops", () => {
+      for (let i = 0; i < 10; i++) {
+        service.agentStarted();
+      }
+
+      expect(service.canSpawnAgent().allowed).toBe(false);
+
+      service.agentStopped();
+
+      expect(service.canSpawnAgent().allowed).toBe(true);
+    });
+
+    it("should block spawning when daily budget exceeded with hard limits", () => {
+      const strictConfig = {
+        get: vi.fn((key: string) => {
+          const config: Record<string, unknown> = {
+            "orchestrator.budget.dailyTokenLimit": 1000,
+            "orchestrator.budget.enforceHardLimits": true,
+          };
+          return config[key];
+        }),
+      };
+
+      const strictService = new BudgetService(strictConfig as unknown as ConfigService);
+      strictService.recordUsage("agent-1", "task-1", 800, 300);
+
+      const result = strictService.canSpawnAgent();
+      expect(result.allowed).toBe(false);
+      expect(result.reason).toContain("Daily token budget exceeded");
+    });
+
+    it("should allow spawning when over budget without hard limits", () => {
+      service.recordUsage("agent-1", "task-1", 5_000_000, 5_000_000);
+
+      const result = service.canSpawnAgent();
+      expect(result.allowed).toBe(true);
+    });
+  });
+
+  describe("isAgentOverBudget", () => {
+    it("should return false when agent is within budget", () => {
+      service.recordUsage("agent-1", "task-1", 100_000, 50_000);
+
+      const result = service.isAgentOverBudget("agent-1");
+      expect(result.overBudget).toBe(false);
+      expect(result.totalTokens).toBe(150_000);
+    });
+
+    it("should return true when agent exceeds per-agent limit", () => {
+      service.recordUsage("agent-1", "task-1", 1_000_000, 1_000_000);
+
+      const result = service.isAgentOverBudget("agent-1");
+      expect(result.overBudget).toBe(true);
+      expect(result.totalTokens).toBe(2_000_000);
+    });
+
+    it("should return false for unknown agent", () => {
+      const result = service.isAgentOverBudget("non-existent");
+      expect(result.overBudget).toBe(false);
+      expect(result.totalTokens).toBe(0);
+    });
+  });
+
+  describe("agentStarted / agentStopped", () => {
+    it("should track active agent count", () => {
+      service.agentStarted();
+      service.agentStarted();
+      service.agentStarted();
+
+      const summary = service.getUsageSummary();
+      expect(summary.activeAgents).toBe(3);
+    });
+
+    it("should decrement active count on stop", () => {
+      service.agentStarted();
+      service.agentStarted();
+      service.agentStopped();
+
+      const summary = service.getUsageSummary();
+      expect(summary.activeAgents).toBe(1);
+    });
+
+    it("should not go below zero", () => {
+      service.agentStopped();
+      service.agentStopped();
+
+      const summary = service.getUsageSummary();
+      expect(summary.activeAgents).toBe(0);
+    });
+  });
+
+  describe("getUsageSummary", () => {
+    it("should return complete summary with no usage", () => {
+      const summary = service.getUsageSummary();
+
+      expect(summary.dailyTokensUsed).toBe(0);
+      expect(summary.dailyTokenLimit).toBe(10_000_000);
+      expect(summary.dailyUsagePercent).toBe(0);
+      expect(summary.agentUsage).toHaveLength(0);
+      expect(summary.activeAgents).toBe(0);
+      expect(summary.maxConcurrentAgents).toBe(10);
+      expect(summary.budgetStatus).toBe("within_budget");
+    });
+
+    it("should calculate usage percentage correctly", () => {
+      const customConfig = {
+        get: vi.fn((key: string) => {
+          const config: Record<string, unknown> = {
+            "orchestrator.budget.dailyTokenLimit": 10_000,
+          };
+          return config[key];
+        }),
+      };
+
+      const customService = new BudgetService(customConfig as unknown as ConfigService);
+      customService.recordUsage("agent-1", "task-1", 5000, 0);
+
+      const summary = customService.getUsageSummary();
+      expect(summary.dailyUsagePercent).toBe(50);
+    });
+
+    it("should report 'approaching_limit' at 80%", () => {
+      const customConfig = {
+        get: vi.fn((key: string) => {
+          const config: Record<string, unknown> = {
+            "orchestrator.budget.dailyTokenLimit": 10_000,
+          };
+          return config[key];
+        }),
+      };
+
+      const customService = new BudgetService(customConfig as unknown as ConfigService);
+      customService.recordUsage("agent-1", "task-1", 8500, 0);
+
+      const summary = customService.getUsageSummary();
+      expect(summary.budgetStatus).toBe("approaching_limit");
+    });
+
+    it("should report 'at_limit' at 95%", () => {
+      const customConfig = {
+        get: vi.fn((key: string) => {
+          const config: Record<string, unknown> = {
+            "orchestrator.budget.dailyTokenLimit": 10_000,
+          };
+          return config[key];
+        }),
+      };
+
+      const customService = new BudgetService(customConfig as unknown as ConfigService);
+      customService.recordUsage("agent-1", "task-1", 9600, 0);
+
+      const summary = customService.getUsageSummary();
+      expect(summary.budgetStatus).toBe("at_limit");
+    });
+
+    it("should report 'exceeded' over 100%", () => {
+      const customConfig = {
+        get: vi.fn((key: string) => {
+          const config: Record<string, unknown> = {
+            "orchestrator.budget.dailyTokenLimit": 10_000,
+          };
+          return config[key];
+        }),
+      };
+
+      const customService = new BudgetService(customConfig as unknown as ConfigService);
+      customService.recordUsage("agent-1", "task-1", 11_000, 0);
+
+      const summary = customService.getUsageSummary();
+      expect(summary.budgetStatus).toBe("exceeded");
+    });
+
+    it("should calculate per-agent usage percentage", () => {
+      service.recordUsage("agent-1", "task-1", 500_000, 500_000);
+
+      const summary = service.getUsageSummary();
+      const agent = summary.agentUsage.find((a) => a.agentId === "agent-1");
+
+      expect(agent?.usagePercent).toBe(50);
+    });
+  });
+
+  describe("getBudget", () => {
+    it("should return a copy of the budget", () => {
+      const budget1 = service.getBudget();
+      const budget2 = service.getBudget();
+
+      expect(budget1).toEqual(budget2);
+      expect(budget1).not.toBe(budget2); // Different reference
+    });
+  });
+});
--- a/apps/orchestrator/src/budget/budget.service.ts
+++ b/apps/orchestrator/src/budget/budget.service.ts
@@ -0,0 +1,205 @@
+/**
+ * Usage Budget Management Service
+ *
+ * Tracks token usage per agent and enforces budget limits.
+ * Provides real-time usage summaries and budget status checks.
+ */
+import { Injectable, Logger } from "@nestjs/common";
+import { ConfigService } from "@nestjs/config";
+import type {
+  UsageBudget,
+  UsageRecord,
+  UsageSummary,
+  AgentUsageSummary,
+  BudgetStatus,
+} from "./budget.types";
+import { DEFAULT_BUDGET } from "./budget.types";
+
+@Injectable()
+export class BudgetService {
+  private readonly logger = new Logger(BudgetService.name);
+  private readonly budget: UsageBudget;
+  private readonly records: UsageRecord[] = [];
+  private readonly activeAgentCount = { value: 0 };
+
+  constructor(private readonly configService: ConfigService) {
+    this.budget = {
+      dailyTokenLimit:
+        this.configService.get<number>("orchestrator.budget.dailyTokenLimit") ??
+        DEFAULT_BUDGET.dailyTokenLimit,
+      perAgentTokenLimit:
+        this.configService.get<number>("orchestrator.budget.perAgentTokenLimit") ??
+        DEFAULT_BUDGET.perAgentTokenLimit,
+      maxConcurrentAgents:
+        this.configService.get<number>("orchestrator.budget.maxConcurrentAgents") ??
+        DEFAULT_BUDGET.maxConcurrentAgents,
+      maxTaskDurationMinutes:
+        this.configService.get<number>("orchestrator.budget.maxTaskDurationMinutes") ??
+        DEFAULT_BUDGET.maxTaskDurationMinutes,
+      enforceHardLimits:
+        this.configService.get<boolean>("orchestrator.budget.enforceHardLimits") ??
+        DEFAULT_BUDGET.enforceHardLimits,
+    };
+
+    this.logger.log(
+      `BudgetService initialized: daily=${String(this.budget.dailyTokenLimit)} tokens, ` +
+        `perAgent=${String(this.budget.perAgentTokenLimit)} tokens, ` +
+        `maxConcurrent=${String(this.budget.maxConcurrentAgents)}`
+    );
+  }
+
+  /**
+   * Record token usage for an agent
+   */
+  recordUsage(agentId: string, taskId: string, inputTokens: number, outputTokens: number): void {
+    const record: UsageRecord = {
+      agentId,
+      taskId,
+      inputTokens,
+      outputTokens,
+      timestamp: new Date().toISOString(),
+    };
+
+    this.records.push(record);
+
+    this.logger.debug(
+      `Usage recorded: agent=${agentId} input=${String(inputTokens)} output=${String(outputTokens)}`
+    );
+  }
+
+  /**
+   * Check if an agent can be spawned (concurrency and budget check)
+   */
+  canSpawnAgent(): { allowed: boolean; reason?: string } {
+    if (this.activeAgentCount.value >= this.budget.maxConcurrentAgents) {
+      return {
+        allowed: false,
+        reason: `Maximum concurrent agents reached (${String(this.budget.maxConcurrentAgents)})`,
+      };
+    }
+
+    const dailyUsed = this.getDailyTokensUsed();
+    if (this.budget.enforceHardLimits && dailyUsed >= this.budget.dailyTokenLimit) {
+      return {
+        allowed: false,
+        reason: `Daily token budget exceeded (${String(dailyUsed)}/${String(this.budget.dailyTokenLimit)})`,
+      };
+    }
+
+    return { allowed: true };
+  }
+
+  /**
+   * Check if an agent has exceeded its per-task budget
+   */
+  isAgentOverBudget(agentId: string): { overBudget: boolean; totalTokens: number } {
+    const agentRecords = this.records.filter((r) => r.agentId === agentId);
+    const totalTokens = agentRecords.reduce((sum, r) => sum + r.inputTokens + r.outputTokens, 0);
+
+    return {
+      overBudget: totalTokens >= this.budget.perAgentTokenLimit,
+      totalTokens,
+    };
+  }
+
+  /**
+   * Notify that an agent has started (increment active count)
+   */
+  agentStarted(): void {
+    this.activeAgentCount.value++;
+  }
+
+  /**
+   * Notify that an agent has stopped (decrement active count)
+   */
+  agentStopped(): void {
+    this.activeAgentCount.value = Math.max(0, this.activeAgentCount.value - 1);
+  }
+
+  /**
+   * Get comprehensive usage summary
+   */
+  getUsageSummary(): UsageSummary {
+    const dailyTokensUsed = this.getDailyTokensUsed();
+    const dailyUsagePercent =
+      this.budget.dailyTokenLimit > 0 ? (dailyTokensUsed / this.budget.dailyTokenLimit) * 100 : 0;
+
+    return {
+      dailyTokensUsed,
+      dailyTokenLimit: this.budget.dailyTokenLimit,
+      dailyUsagePercent: Math.round(dailyUsagePercent * 100) / 100,
+      agentUsage: this.getAgentUsageSummaries(),
+      activeAgents: this.activeAgentCount.value,
+      maxConcurrentAgents: this.budget.maxConcurrentAgents,
+      budgetStatus: this.getBudgetStatus(dailyUsagePercent),
+    };
+  }
+
+  /**
+   * Get the configured budget
+   */
+  getBudget(): UsageBudget {
+    return { ...this.budget };
+  }
+
+  /**
+   * Get total tokens used today
+   */
+  private getDailyTokensUsed(): number {
+    const todayStart = new Date();
+    todayStart.setHours(0, 0, 0, 0);
+    const todayIso = todayStart.toISOString();
+
+    return this.records
+      .filter((r) => r.timestamp >= todayIso)
+      .reduce((sum, r) => sum + r.inputTokens + r.outputTokens, 0);
+  }
+
+  /**
+   * Get per-agent usage summaries
+   */
+  private getAgentUsageSummaries(): AgentUsageSummary[] {
+    const agentMap = new Map<string, { taskId: string; input: number; output: number }>();
+
+    for (const record of this.records) {
+      const existing = agentMap.get(record.agentId);
+      if (existing) {
+        existing.input += record.inputTokens;
+        existing.output += record.outputTokens;
+      } else {
+        agentMap.set(record.agentId, {
+          taskId: record.taskId,
+          input: record.inputTokens,
+          output: record.outputTokens,
+        });
+      }
+    }
+
+    return Array.from(agentMap.entries()).map(([agentId, data]) => {
+      const totalTokens = data.input + data.output;
+      const usagePercent =
+        this.budget.perAgentTokenLimit > 0
+          ? Math.round((totalTokens / this.budget.perAgentTokenLimit) * 10000) / 100
+          : 0;
+
+      return {
+        agentId,
+        taskId: data.taskId,
+        inputTokens: data.input,
+        outputTokens: data.output,
+        totalTokens,
+        usagePercent,
+      };
+    });
+  }
+
+  /**
+   * Determine overall budget status
+   */
+  private getBudgetStatus(dailyUsagePercent: number): BudgetStatus {
+    if (dailyUsagePercent >= 100) return "exceeded";
+    if (dailyUsagePercent >= 95) return "at_limit";
+    if (dailyUsagePercent >= 80) return "approaching_limit";
+    return "within_budget";
+  }
+}
--- a/apps/orchestrator/src/budget/budget.types.ts
+++ b/apps/orchestrator/src/budget/budget.types.ts
@@ -0,0 +1,69 @@
+/**
+ * Usage Budget Management types
+ *
+ * Defines types for tracking and enforcing agent usage budgets
+ * including token limits, cost caps, and time-based constraints.
+ */
+
+export interface UsageBudget {
+  /** Daily token limit across all agents */
+  dailyTokenLimit: number;
+  /** Per-agent token limit per task */
+  perAgentTokenLimit: number;
+  /** Maximum concurrent agents */
+  maxConcurrentAgents: number;
+  /** Maximum task duration in minutes */
+  maxTaskDurationMinutes: number;
+  /** Whether to hard-stop agents exceeding budget */
+  enforceHardLimits: boolean;
+}
+
+export interface UsageRecord {
+  /** Agent that consumed tokens */
+  agentId: string;
+  /** Task being worked on */
+  taskId: string;
+  /** Number of input tokens used */
+  inputTokens: number;
+  /** Number of output tokens used */
+  outputTokens: number;
+  /** Timestamp of usage */
+  timestamp: string;
+}
+
+export interface UsageSummary {
+  /** Total tokens used today */
+  dailyTokensUsed: number;
+  /** Daily token limit */
+  dailyTokenLimit: number;
+  /** Percentage of daily budget used */
+  dailyUsagePercent: number;
+  /** Per-agent usage breakdown */
+  agentUsage: AgentUsageSummary[];
+  /** Number of currently active agents */
+  activeAgents: number;
+  /** Maximum concurrent agents allowed */
+  maxConcurrentAgents: number;
+  /** Whether any budget thresholds are approaching */
+  budgetStatus: BudgetStatus;
+}
+
+export interface AgentUsageSummary {
+  agentId: string;
+  taskId: string;
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+  /** Percentage of per-agent limit used */
+  usagePercent: number;
+}
+
+export type BudgetStatus = "within_budget" | "approaching_limit" | "at_limit" | "exceeded";
+
+export const DEFAULT_BUDGET: UsageBudget = {
+  dailyTokenLimit: 10_000_000,
+  perAgentTokenLimit: 2_000_000,
+  maxConcurrentAgents: 10,
+  maxTaskDurationMinutes: 120,
+  enforceHardLimits: false,
+};