From 7bf2ad1f920af02686b2d56901d49a0a6347cd1a Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 22 Mar 2026 19:48:06 -0500 Subject: [PATCH] feat(M4-007,M4-008,M4-012): wire routing engine into ChatGateway, add /model override and transparency M4-012: In ChatGateway.handleMessage(), call routingEngine.resolve() before creating a new agent session when no explicit provider/model is supplied. The routing decision's provider/model are passed to AgentService.createSession(). M4-007: Add sticky per-session /model override via ChatGateway.setModelOverride(). When active, the routing engine is bypassed entirely. CommandExecutorService handleModel() is updated to set/clear the override. /model clear resets to automatic routing. /model with no args shows the current override or usage hint. M4-008: Include routingDecision in the session:info socket event. Add RoutingDecisionInfo to SessionInfoPayload in @mosaic/types. useSocket() tracks the routing decision in state and the BottomBar TUI component displays a "Routed: ()" line when a routing decision is present. Co-Authored-By: Claude Sonnet 4.6 --- apps/gateway/src/chat/chat.gateway.ts | 86 ++++++++++++++++++- packages/cli/src/tui/app.tsx | 1 + .../cli/src/tui/components/bottom-bar.tsx | 13 +++ packages/cli/src/tui/hooks/use-socket.ts | 9 ++ packages/types/src/chat/events.ts | 10 +++ packages/types/src/chat/index.ts | 1 + 6 files changed, 116 insertions(+), 4 deletions(-) diff --git a/apps/gateway/src/chat/chat.gateway.ts b/apps/gateway/src/chat/chat.gateway.ts index 0795e85..7e24b12 100644 --- a/apps/gateway/src/chat/chat.gateway.ts +++ b/apps/gateway/src/chat/chat.gateway.ts @@ -13,12 +13,18 @@ import { Server, Socket } from 'socket.io'; import type { AgentSessionEvent } from '@mariozechner/pi-coding-agent'; import type { Auth } from '@mosaic/auth'; import type { Brain } from '@mosaic/brain'; -import type { SetThinkingPayload, SlashCommandPayload, SystemReloadPayload } from '@mosaic/types'; +import type { + SetThinkingPayload, + SlashCommandPayload, + SystemReloadPayload, + RoutingDecisionInfo, +} from '@mosaic/types'; import { AgentService, type ConversationHistoryMessage } from '../agent/agent.service.js'; import { AUTH } from '../auth/auth.tokens.js'; import { BRAIN } from '../brain/brain.tokens.js'; import { CommandRegistryService } from '../commands/command-registry.service.js'; import { CommandExecutorService } from '../commands/command-executor.service.js'; +import { RoutingEngineService } from '../agent/routing/routing-engine.service.js'; import { v4 as uuid } from 'uuid'; import { ChatSocketMessageDto } from './chat.dto.js'; import { validateSocketSession } from './chat.gateway-auth.js'; @@ -33,8 +39,16 @@ interface ClientSession { toolCalls: Array<{ toolCallId: string; toolName: string; args: unknown; isError: boolean }>; /** Tool calls in-flight (started but not ended yet). */ pendingToolCalls: Map; + /** Last routing decision made for this session (M4-008) */ + lastRoutingDecision?: RoutingDecisionInfo; } +/** + * Per-conversation model overrides set via /model command (M4-007). + * Keyed by conversationId, value is the model name to use. + */ +const modelOverrides = new Map(); + @WebSocketGateway({ cors: { origin: process.env['GATEWAY_CORS_ORIGIN'] ?? 'http://localhost:3000', @@ -54,6 +68,7 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa @Inject(BRAIN) private readonly brain: Brain, @Inject(CommandRegistryService) private readonly commandRegistry: CommandRegistryService, @Inject(CommandExecutorService) private readonly commandExecutor: CommandExecutorService, + @Inject(RoutingEngineService) private readonly routingEngine: RoutingEngineService, ) {} afterInit(): void { @@ -97,15 +112,50 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa this.logger.log(`Message from ${client.id} in conversation ${conversationId}`); // Ensure agent session exists for this conversation + let sessionRoutingDecision: RoutingDecisionInfo | undefined; try { let agentSession = this.agentService.getSession(conversationId); if (!agentSession) { // When resuming an existing conversation, load prior messages to inject as context (M1-004) const conversationHistory = await this.loadConversationHistory(conversationId, userId); + // Determine provider/model via routing engine or per-session /model override (M4-012 / M4-007) + let resolvedProvider = data.provider; + let resolvedModelId = data.modelId; + + const modelOverride = modelOverrides.get(conversationId); + if (modelOverride) { + // /model override bypasses routing engine (M4-007) + resolvedModelId = modelOverride; + this.logger.log( + `Using /model override "${modelOverride}" for conversation=${conversationId}`, + ); + } else if (!resolvedProvider && !resolvedModelId) { + // No explicit provider/model from client — use routing engine (M4-012) + try { + const routingDecision = await this.routingEngine.resolve(data.content, userId); + resolvedProvider = routingDecision.provider; + resolvedModelId = routingDecision.model; + sessionRoutingDecision = { + model: routingDecision.model, + provider: routingDecision.provider, + ruleName: routingDecision.ruleName, + reason: routingDecision.reason, + }; + this.logger.log( + `Routing decision for conversation=${conversationId}: ${routingDecision.provider}/${routingDecision.model} (rule="${routingDecision.ruleName}")`, + ); + } catch (routingErr) { + this.logger.warn( + `Routing engine failed for conversation=${conversationId}, using defaults`, + routingErr instanceof Error ? routingErr.message : String(routingErr), + ); + } + } + agentSession = await this.agentService.createSession(conversationId, { - provider: data.provider, - modelId: data.modelId, + provider: resolvedProvider, + modelId: resolvedModelId, agentConfigId: data.agentId, userId, conversationHistory: conversationHistory.length > 0 ? conversationHistory : undefined, @@ -167,18 +217,23 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa this.relayEvent(client, conversationId, event); }); + // Preserve routing decision from the existing client session if we didn't get a new one + const prevClientSession = this.clientSessions.get(client.id); + const routingDecisionToStore = sessionRoutingDecision ?? prevClientSession?.lastRoutingDecision; + this.clientSessions.set(client.id, { conversationId, cleanup, assistantText: '', toolCalls: [], pendingToolCalls: new Map(), + lastRoutingDecision: routingDecisionToStore, }); // Track channel connection this.agentService.addChannel(conversationId, `websocket:${client.id}`); - // Send session info so the client knows the model/provider + // Send session info so the client knows the model/provider (M4-008: include routing decision) { const agentSession = this.agentService.getSession(conversationId); if (agentSession) { @@ -189,6 +244,7 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa modelId: agentSession.modelId, thinkingLevel: piSession.thinkingLevel, availableThinkingLevels: piSession.getAvailableThinkingLevels(), + ...(routingDecisionToStore ? { routingDecision: routingDecisionToStore } : {}), }); } } @@ -263,6 +319,28 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa this.logger.log('Broadcasted system:reload to all connected clients'); } + /** + * Set a per-conversation model override (M4-007). + * When set, the routing engine is bypassed and the specified model is used. + * Pass null to clear the override and resume automatic routing. + */ + setModelOverride(conversationId: string, modelName: string | null): void { + if (modelName) { + modelOverrides.set(conversationId, modelName); + this.logger.log(`Model override set: conversation=${conversationId} model="${modelName}"`); + } else { + modelOverrides.delete(conversationId); + this.logger.log(`Model override cleared: conversation=${conversationId}`); + } + } + + /** + * Return the active model override for a conversation, or undefined if none. + */ + getModelOverride(conversationId: string): string | undefined { + return modelOverrides.get(conversationId); + } + /** * Ensure a conversation record exists in the DB. * Creates it if absent — safe to call concurrently since a duplicate insert diff --git a/packages/cli/src/tui/app.tsx b/packages/cli/src/tui/app.tsx index 2c556a8..d81cab6 100644 --- a/packages/cli/src/tui/app.tsx +++ b/packages/cli/src/tui/app.tsx @@ -403,6 +403,7 @@ export function TuiApp({ providerName={socket.providerName} thinkingLevel={socket.thinkingLevel} conversationId={socket.conversationId} + routingDecision={socket.routingDecision} /> ); diff --git a/packages/cli/src/tui/components/bottom-bar.tsx b/packages/cli/src/tui/components/bottom-bar.tsx index 53c80fe..f875501 100644 --- a/packages/cli/src/tui/components/bottom-bar.tsx +++ b/packages/cli/src/tui/components/bottom-bar.tsx @@ -1,5 +1,6 @@ import React from 'react'; import { Box, Text } from 'ink'; +import type { RoutingDecisionInfo } from '@mosaic/types'; import type { TokenUsage } from '../hooks/use-socket.js'; import type { GitInfo } from '../hooks/use-git-info.js'; @@ -12,6 +13,8 @@ export interface BottomBarProps { providerName: string | null; thinkingLevel: string; conversationId: string | undefined; + /** Routing decision info for transparency display (M4-008) */ + routingDecision?: RoutingDecisionInfo | null; } function formatTokens(n: number): string { @@ -38,6 +41,7 @@ export function BottomBar({ providerName, thinkingLevel, conversationId, + routingDecision, }: BottomBarProps) { const gatewayStatus = connected ? 'Connected' : connecting ? 'Connecting…' : 'Disconnected'; const gatewayColor = connected ? 'green' : connecting ? 'yellow' : 'red'; @@ -120,6 +124,15 @@ export function BottomBar({ + + {/* Line 4: routing transparency (M4-008) — only shown when a routing decision is available */} + {routingDecision && ( + + + Routed: {routingDecision.model} ({routingDecision.reason}) + + + )} ); } diff --git a/packages/cli/src/tui/hooks/use-socket.ts b/packages/cli/src/tui/hooks/use-socket.ts index 08ca792..0635e01 100644 --- a/packages/cli/src/tui/hooks/use-socket.ts +++ b/packages/cli/src/tui/hooks/use-socket.ts @@ -14,6 +14,7 @@ import type { CommandManifestPayload, SlashCommandResultPayload, SystemReloadPayload, + RoutingDecisionInfo, } from '@mosaic/types'; import { commandRegistry } from '../commands/index.js'; @@ -66,6 +67,8 @@ export interface UseSocketReturn { providerName: string | null; thinkingLevel: string; availableThinkingLevels: string[]; + /** Last routing decision received from the gateway (M4-008) */ + routingDecision: RoutingDecisionInfo | null; sendMessage: (content: string) => void; addSystemMessage: (content: string) => void; setThinkingLevel: (level: string) => void; @@ -109,6 +112,7 @@ export function useSocket(opts: UseSocketOptions): UseSocketReturn { const [providerName, setProviderName] = useState(null); const [thinkingLevel, setThinkingLevelState] = useState('off'); const [availableThinkingLevels, setAvailableThinkingLevels] = useState([]); + const [routingDecision, setRoutingDecision] = useState(null); const [connectionError, setConnectionError] = useState(null); const socketRef = useRef(null); @@ -154,6 +158,10 @@ export function useSocket(opts: UseSocketOptions): UseSocketReturn { setModelName(data.modelId); setThinkingLevelState(data.thinkingLevel); setAvailableThinkingLevels(data.availableThinkingLevels); + // Update routing decision if provided (M4-008) + if (data.routingDecision) { + setRoutingDecision(data.routingDecision); + } }); socket.on('agent:start', () => { @@ -319,6 +327,7 @@ export function useSocket(opts: UseSocketOptions): UseSocketReturn { providerName, thinkingLevel, availableThinkingLevels, + routingDecision, sendMessage, addSystemMessage, setThinkingLevel, diff --git a/packages/types/src/chat/events.ts b/packages/types/src/chat/events.ts index 5d6bcf2..313bb6c 100644 --- a/packages/types/src/chat/events.ts +++ b/packages/types/src/chat/events.ts @@ -74,6 +74,14 @@ export interface ChatMessagePayload { agentId?: string; } +/** Routing decision summary included in session:info for transparency */ +export interface RoutingDecisionInfo { + model: string; + provider: string; + ruleName: string; + reason: string; +} + /** Session info pushed when session is created or model changes */ export interface SessionInfoPayload { conversationId: string; @@ -81,6 +89,8 @@ export interface SessionInfoPayload { modelId: string; thinkingLevel: string; availableThinkingLevels: string[]; + /** Present when automatic routing determined the model for this session */ + routingDecision?: RoutingDecisionInfo; } /** Client request to change thinking level */ diff --git a/packages/types/src/chat/index.ts b/packages/types/src/chat/index.ts index 7d039a7..a8440ad 100644 --- a/packages/types/src/chat/index.ts +++ b/packages/types/src/chat/index.ts @@ -9,6 +9,7 @@ export type { ToolEndPayload, SessionUsagePayload, SessionInfoPayload, + RoutingDecisionInfo, SetThinkingPayload, ErrorPayload, ChatMessagePayload,