From 77da12a5eeceb2797608447a230750069daf10f3 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Fri, 13 Mar 2026 03:40:07 +0000 Subject: [PATCH] =?UTF-8?q?test:=20verify=20Phase=202=20=E2=80=94=20routin?= =?UTF-8?q?g=20+=20coord=20tests=20(P2-007)=20(#79)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jason Woltje Co-committed-by: Jason Woltje --- .../agent/__tests__/routing.service.test.ts | 138 ++++++++++++++++++ docs/TASKS.md | 6 +- .../coord/src/__tests__/tasks-file.test.ts | 102 +++++++++++++ 3 files changed, 243 insertions(+), 3 deletions(-) create mode 100644 apps/gateway/src/agent/__tests__/routing.service.test.ts create mode 100644 packages/coord/src/__tests__/tasks-file.test.ts diff --git a/apps/gateway/src/agent/__tests__/routing.service.test.ts b/apps/gateway/src/agent/__tests__/routing.service.test.ts new file mode 100644 index 0000000..0960777 --- /dev/null +++ b/apps/gateway/src/agent/__tests__/routing.service.test.ts @@ -0,0 +1,138 @@ +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { RoutingService } from '../routing.service.js'; +import type { ModelInfo } from '@mosaic/types'; + +const mockModels: ModelInfo[] = [ + { + id: 'claude-3-haiku', + provider: 'anthropic', + name: 'Claude 3 Haiku', + reasoning: false, + contextWindow: 200_000, + maxTokens: 4096, + inputTypes: ['text', 'image'], + cost: { input: 0.25, output: 1.25, cacheRead: 0.03, cacheWrite: 0.3 }, + }, + { + id: 'claude-3-sonnet', + provider: 'anthropic', + name: 'Claude 3 Sonnet', + reasoning: true, + contextWindow: 200_000, + maxTokens: 8192, + inputTypes: ['text', 'image'], + cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + }, + { + id: 'llama3.2', + provider: 'ollama', + name: 'Llama 3.2', + reasoning: false, + contextWindow: 128_000, + maxTokens: 4096, + inputTypes: ['text'], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + }, +]; + +function createMockProviderService() { + return { + listAvailableModels: vi.fn().mockReturnValue(mockModels), + findModel: vi.fn(), + getDefaultModel: vi.fn(), + getRegistry: vi.fn(), + listProviders: vi.fn(), + registerCustomProvider: vi.fn(), + }; +} + +describe('RoutingService', () => { + let routingService: RoutingService; + let mockProviderService: ReturnType; + + beforeEach(() => { + mockProviderService = createMockProviderService(); + routingService = new RoutingService(mockProviderService as never); + }); + + it('returns a model when no criteria specified', () => { + const result = routingService.route(); + expect(result).not.toBeNull(); + expect(result!.provider).toBeDefined(); + expect(result!.modelId).toBeDefined(); + expect(result!.score).toBeGreaterThan(0); + }); + + it('returns null when no models available', () => { + mockProviderService.listAvailableModels.mockReturnValue([]); + const result = routingService.route(); + expect(result).toBeNull(); + }); + + it('selects preferred model when specified', () => { + const result = routingService.route({ + preferredProvider: 'anthropic', + preferredModel: 'claude-3-sonnet', + }); + expect(result).not.toBeNull(); + expect(result!.provider).toBe('anthropic'); + expect(result!.modelId).toBe('claude-3-sonnet'); + expect(result!.score).toBe(100); + }); + + it('disqualifies models without reasoning when required', () => { + const result = routingService.route({ requireReasoning: true }); + expect(result).not.toBeNull(); + expect(result!.modelId).toBe('claude-3-sonnet'); + }); + + it('disqualifies models without image input when required', () => { + const result = routingService.route({ requireImageInput: true }); + expect(result).not.toBeNull(); + // Llama doesn't support images, should be excluded + expect(result!.provider).toBe('anthropic'); + }); + + it('respects minimum context window', () => { + const result = routingService.route({ minContextWindow: 150_000 }); + expect(result).not.toBeNull(); + // Only anthropic models have 200k context + expect(result!.provider).toBe('anthropic'); + }); + + it('favors cheap models when costTier is cheap', () => { + const result = routingService.route({ costTier: 'cheap' }); + expect(result).not.toBeNull(); + // Ollama (free) and Haiku ($0.25/M) are cheap + expect(['ollama', 'anthropic'].includes(result!.provider)).toBe(true); + if (result!.provider === 'anthropic') { + expect(result!.modelId).toBe('claude-3-haiku'); + } + }); + + it('ranks all models and returns sorted results', () => { + const ranked = routingService.rank({ taskType: 'coding' }); + expect(ranked.length).toBeGreaterThan(0); + // Should be sorted by score descending + for (let i = 1; i < ranked.length; i++) { + expect(ranked[i]!.score).toBeLessThanOrEqual(ranked[i - 1]!.score); + } + }); + + it('gives reasoning bonus for coding tasks', () => { + const ranked = routingService.rank({ taskType: 'coding' }); + const sonnet = ranked.find((r) => r.modelId === 'claude-3-sonnet'); + const haiku = ranked.find((r) => r.modelId === 'claude-3-haiku'); + expect(sonnet).toBeDefined(); + expect(haiku).toBeDefined(); + // Sonnet (reasoning) should score higher for coding than haiku (no reasoning) + expect(sonnet!.score).toBeGreaterThan(haiku!.score); + }); + + it('prefers specified provider', () => { + const ranked = routingService.rank({ preferredProvider: 'ollama' }); + const ollamaModel = ranked.find((r) => r.provider === 'ollama'); + expect(ollamaModel).toBeDefined(); + expect(ollamaModel!.reasoning).toContain('preferred provider'); + }); +}); diff --git a/docs/TASKS.md b/docs/TASKS.md index b1354e6..3e07e31 100644 --- a/docs/TASKS.md +++ b/docs/TASKS.md @@ -27,8 +27,8 @@ | P2-003 | done | Phase 2 | Agent routing engine — cost/capability matrix | #75 | #21 | | P2-004 | done | Phase 2 | Tool registration — brain, queue, memory tools | #76 | #22 | | P2-005 | done | Phase 2 | @mosaic/coord — migrate from v0, gateway integration | #77 | #23 | -| P2-006 | in-progress | Phase 2 | Agent session management — tmux + monitoring | — | #24 | -| P2-007 | not-started | Phase 2 | Verify Phase 2 — multi-provider routing works | — | #25 | +| P2-006 | done | Phase 2 | Agent session management — tmux + monitoring | #78 | #24 | +| P2-007 | in-progress | Phase 2 | Verify Phase 2 — multi-provider routing works | — | #25 | | P3-001 | not-started | Phase 3 | apps/web scaffold — Next.js 16 + BetterAuth + Tailwind | — | #26 | | P3-002 | not-started | Phase 3 | Auth pages — login, registration, SSO redirect | — | #27 | | P3-003 | not-started | Phase 3 | Chat UI — conversations, messages, streaming | — | #28 | @@ -63,6 +63,6 @@ | P7-006 | not-started | Phase 7 | Documentation — user guide, admin guide, dev guide | — | #57 | | P7-007 | not-started | Phase 7 | Bare-metal deployment docs + .env.example | — | #58 | | P7-008 | not-started | Phase 7 | Beta release gate — v0.1.0 tag | — | #59 | -| FIX-01 | not-started | Backlog | Call piSession.dispose() in AgentService.destroySession | — | #62 | +| FIX-01 | done | Backlog | Call piSession.dispose() in AgentService.destroySession | #78 | #62 | | FIX-02 | not-started | Backlog | TUI agent:end — fix React state updater side-effect | — | #63 | | FIX-03 | not-started | Backlog | Agent session — cwd sandbox, system prompt, tool restrictions | — | #64 | diff --git a/packages/coord/src/__tests__/tasks-file.test.ts b/packages/coord/src/__tests__/tasks-file.test.ts new file mode 100644 index 0000000..baeb126 --- /dev/null +++ b/packages/coord/src/__tests__/tasks-file.test.ts @@ -0,0 +1,102 @@ +import { describe, it, expect } from 'vitest'; +import { parseTasksFile, writeTasksFile } from '../tasks-file.js'; +import type { MissionTask } from '../types.js'; + +const SAMPLE_TASKS_MD = `# Tasks — MVP + +> Single-writer: orchestrator only. Workers read but never modify. + +| id | status | milestone | description | pr | notes | +| ------ | ----------- | --------- | ------------------------------------------ | --- | ----- | +| P0-001 | done | Phase 0 | Scaffold monorepo | #60 | #1 | +| P0-002 | done | Phase 0 | @mosaic/types — migrate and extend | #65 | #2 | +| P1-001 | in-progress | Phase 1 | apps/gateway scaffold | #61 | #10 | +| P2-001 | not-started | Phase 2 | @mosaic/agent — Pi SDK integration | — | #19 | +| P2-002 | blocked | Phase 2 | Multi-provider support | — | #20 | +`; + +describe('parseTasksFile', () => { + it('parses a valid TASKS.md into MissionTask[]', () => { + const tasks = parseTasksFile(SAMPLE_TASKS_MD); + expect(tasks).toHaveLength(5); + }); + + it('extracts task IDs correctly', () => { + const tasks = parseTasksFile(SAMPLE_TASKS_MD); + expect(tasks.map((t) => t.id)).toEqual(['P0-001', 'P0-002', 'P1-001', 'P2-001', 'P2-002']); + }); + + it('extracts statuses correctly', () => { + const tasks = parseTasksFile(SAMPLE_TASKS_MD); + expect(tasks.map((t) => t.status)).toEqual([ + 'done', + 'done', + 'in-progress', + 'not-started', + 'blocked', + ]); + }); + + it('extracts milestones correctly', () => { + const tasks = parseTasksFile(SAMPLE_TASKS_MD); + expect(tasks[0]!.milestone).toBe('Phase 0'); + expect(tasks[2]!.milestone).toBe('Phase 1'); + expect(tasks[3]!.milestone).toBe('Phase 2'); + }); + + it('extracts PR references', () => { + const tasks = parseTasksFile(SAMPLE_TASKS_MD); + expect(tasks[0]!.pr).toBe('#60'); + expect(tasks[3]!.pr).toBe('—'); + }); + + it('returns empty array for empty content', () => { + expect(parseTasksFile('')).toEqual([]); + expect(parseTasksFile('# No table here')).toEqual([]); + }); + + it('handles legacy status values', () => { + const content = `| id | status | description | +|----|--------|-------------| +| T1 | completed | Task one | +| T2 | pending | Task two | +| T3 | failed | Task three | +`; + const tasks = parseTasksFile(content); + expect(tasks[0]!.status).toBe('done'); + expect(tasks[1]!.status).toBe('not-started'); + expect(tasks[2]!.status).toBe('blocked'); + }); +}); + +describe('writeTasksFile', () => { + it('generates valid markdown table', () => { + const tasks: MissionTask[] = [ + { + id: 'T-001', + title: 'Test task', + status: 'done', + dependencies: [], + milestone: 'Phase 1', + pr: '#42', + notes: '#1', + }, + ]; + + const output = writeTasksFile(tasks); + expect(output).toContain('| T-001 | done | Phase 1 | Test task | #42 | #1 |'); + expect(output).toContain('# Tasks'); + }); + + it('roundtrips parse/write', () => { + const tasks = parseTasksFile(SAMPLE_TASKS_MD); + const output = writeTasksFile(tasks); + const reparsed = parseTasksFile(output); + expect(reparsed).toHaveLength(tasks.length); + for (let i = 0; i < tasks.length; i++) { + expect(reparsed[i]!.id).toBe(tasks[i]!.id); + expect(reparsed[i]!.status).toBe(tasks[i]!.status); + expect(reparsed[i]!.title).toBe(tasks[i]!.title); + } + }); +});