feat(gateway): load conversation history on session resume (M1-004, M1-005)

When a session is created for a conversation that already has messages in the DB, prior messages are loaded via ConversationsRepo.findMessages() and injected into the agent session as a system prompt addition so the agent has full context of the prior exchange. Context window management (M1-005): if the full history would exceed 80% of the model's context window (estimated at ~4 chars/token), older messages are summarized as a simple extractive summary and only the most recent messages are kept verbatim. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 15:46:44 -05:00
parent 02ff3b3256
commit c6d66d2d58
2 changed files with 153 additions and 3 deletions
--- a/apps/gateway/src/agent/agent.service.ts
+++ b/apps/gateway/src/agent/agent.service.ts
@@ -28,6 +28,13 @@ import { SystemOverrideService } from '../preferences/system-override.service.js
 import { PreferencesService } from '../preferences/preferences.service.js';
 import { SessionGCService } from '../gc/session-gc.service.js';
 /** A single message from DB conversation history, used for context injection. */
 export interface ConversationHistoryMessage {
  role: 'user' | 'assistant' | 'system';
  content: string;
  createdAt: Date;
 }
 export interface AgentSessionOptions {
  provider?: string;
  modelId?: string;
@@ -60,6 +67,12 @@ export interface AgentSessionOptions {
  agentConfigId?: string;
  /** ID of the user who owns this session. Used for preferences and system override lookups. */
  userId?: string;
  /**
   * Prior conversation messages to inject as context when resuming a session.
   * These messages are formatted and prepended to the system prompt so the
   * agent is aware of what was discussed in previous sessions.
   */
  conversationHistory?: ConversationHistoryMessage[];
 }
 export interface AgentSession {
@@ -244,8 +257,20 @@ export class AgentService implements OnModuleDestroy {
    // Build system prompt: platform prompt + skill additions appended
    const platformPrompt =
      mergedOptions?.systemPrompt ?? process.env['AGENT_SYSTEM_PROMPT'] ?? undefined;
-    const appendSystemPrompt =
+
-      promptAdditions.length > 0 ? promptAdditions.join('\n\n') : undefined;
+    // Format conversation history for context injection (M1-004 / M1-005)
    const historyPromptSection = mergedOptions?.conversationHistory?.length
      ? this.buildHistoryPromptSection(
          mergedOptions.conversationHistory,
          model?.contextWindow ?? 8192,
          sessionId,
        )
      : undefined;
    const appendParts: string[] = [];
    if (promptAdditions.length > 0) appendParts.push(promptAdditions.join('\n\n'));
    if (historyPromptSection) appendParts.push(historyPromptSection);
    const appendSystemPrompt = appendParts.length > 0 ? appendParts.join('\n\n') : undefined;
    // Construct a resource loader that injects the configured system prompt
    const resourceLoader = new DefaultResourceLoader({
@@ -313,6 +338,92 @@ export class AgentService implements OnModuleDestroy {
    return session;
  }
  /**
   * Estimate token count for a string using a rough 4-chars-per-token heuristic.
   */
  private estimateTokens(text: string): number {
    return Math.ceil(text.length / 4);
  }
  /**
   * Build a conversation history section for injection into the system prompt.
   * Implements M1-004 (history loading) and M1-005 (context window management).
   *
   * - Formats messages as a readable conversation transcript.
   * - If the full history exceeds 80% of the model's context window, older messages
   *   are summarized and only the most recent messages are kept verbatim.
   * - Summarization is a simple extractive approach (no LLM required).
   */
  private buildHistoryPromptSection(
    history: ConversationHistoryMessage[],
    contextWindow: number,
    sessionId: string,
  ): string {
    const TOKEN_BUDGET = Math.floor(contextWindow * 0.8);
    const HISTORY_HEADER = '## Conversation History (resumed session)\n\n';
    const formatMessage = (msg: ConversationHistoryMessage): string => {
      const roleLabel =
        msg.role === 'user' ? 'User' : msg.role === 'assistant' ? 'Assistant' : 'System';
      return `**${roleLabel}:** ${msg.content}`;
    };
    const formatted = history.map((msg) => formatMessage(msg));
    const fullHistory = formatted.join('\n\n');
    const fullTokens = this.estimateTokens(HISTORY_HEADER + fullHistory);
    if (fullTokens <= TOKEN_BUDGET) {
      this.logger.debug(
        `Session ${sessionId}: injecting full history (${history.length} msgs, ~${fullTokens} tokens)`,
      );
      return HISTORY_HEADER + fullHistory;
    }
    // History exceeds budget — summarize oldest messages, keep recent verbatim
    this.logger.log(
      `Session ${sessionId}: history (~${fullTokens} tokens) exceeds ${TOKEN_BUDGET} token budget; summarizing oldest messages`,
    );
    // Reserve 20% of the budget for the summary prefix, rest for verbatim messages
    const SUMMARY_RESERVE = Math.floor(TOKEN_BUDGET * 0.2);
    const verbatimBudget = TOKEN_BUDGET - SUMMARY_RESERVE;
    let verbatimTokens = 0;
    let verbatimCutIndex = history.length;
    for (let i = history.length - 1; i >= 0; i--) {
      const t = this.estimateTokens(formatted[i]!);
      if (verbatimTokens + t > verbatimBudget) break;
      verbatimTokens += t;
      verbatimCutIndex = i;
    }
    const summarizedMessages = history.slice(0, verbatimCutIndex);
    const verbatimMessages = history.slice(verbatimCutIndex);
    let summaryText = '';
    if (summarizedMessages.length > 0) {
      const topics = summarizedMessages
        .filter((m) => m.role === 'user')
        .map((m) => m.content.slice(0, 120).replace(/\n/g, ' '))
        .join('; ');
      summaryText =
        `**Previous conversation summary** (${summarizedMessages.length} messages omitted for brevity):\n` +
        `Topics discussed: ${topics || '(no user messages in summarized portion)'}`;
    }
    const verbatimSection = verbatimMessages.map((m) => formatMessage(m)).join('\n\n');
    const parts: string[] = [HISTORY_HEADER];
    if (summaryText) parts.push(summaryText);
    if (verbatimSection) parts.push(verbatimSection);
    const result = parts.join('\n\n');
    this.logger.log(
      `Session ${sessionId}: summarized ${summarizedMessages.length} messages, kept ${verbatimMessages.length} verbatim (~${this.estimateTokens(result)} tokens)`,
    );
    return result;
  }
  private resolveModel(options?: AgentSessionOptions) {
    if (!options?.provider && !options?.modelId) {
      return this.providerService.getDefaultModel() ?? null;
--- a/apps/gateway/src/chat/chat.gateway.ts
+++ b/apps/gateway/src/chat/chat.gateway.ts
@@ -14,7 +14,7 @@ import type { AgentSessionEvent } from '@mariozechner/pi-coding-agent';
 import type { Auth } from '@mosaic/auth';
 import type { Brain } from '@mosaic/brain';
 import type { SetThinkingPayload, SlashCommandPayload, SystemReloadPayload } from '@mosaic/types';
-import { AgentService } from '../agent/agent.service.js';
+import { AgentService, type ConversationHistoryMessage } from '../agent/agent.service.js';
 import { AUTH } from '../auth/auth.tokens.js';
 import { BRAIN } from '../brain/brain.tokens.js';
 import { CommandRegistryService } from '../commands/command-registry.service.js';
@@ -100,12 +100,22 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
    try {
      let agentSession = this.agentService.getSession(conversationId);
      if (!agentSession) {
        // When resuming an existing conversation, load prior messages to inject as context (M1-004)
        const conversationHistory = await this.loadConversationHistory(conversationId, userId);
        agentSession = await this.agentService.createSession(conversationId, {
          provider: data.provider,
          modelId: data.modelId,
          agentConfigId: data.agentId,
          userId,
          conversationHistory: conversationHistory.length > 0 ? conversationHistory : undefined,
        });
        if (conversationHistory.length > 0) {
          this.logger.log(
            `Loaded ${conversationHistory.length} prior messages for conversation=${conversationId}`,
          );
        }
      }
    } catch (err) {
      this.logger.error(
@@ -275,6 +285,35 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
    }
  }
  /**
   * Load prior conversation messages from DB for context injection on session resume (M1-004).
   * Returns an empty array when no history exists, the conversation is not owned by the user,
   * or userId is not provided.
   */
  private async loadConversationHistory(
    conversationId: string,
    userId: string | undefined,
  ): Promise<ConversationHistoryMessage[]> {
    if (!userId) return [];
    try {
      const messages = await this.brain.conversations.findMessages(conversationId, userId);
      if (messages.length === 0) return [];
      return messages.map((msg) => ({
        role: msg.role as 'user' | 'assistant' | 'system',
        content: msg.content,
        createdAt: msg.createdAt,
      }));
    } catch (err) {
      this.logger.error(
        `Failed to load conversation history for conversation=${conversationId}`,
        err instanceof Error ? err.stack : String(err),
      );
      return [];
    }
  }
  private relayEvent(client: Socket, conversationId: string, event: AgentSessionEvent): void {
    if (!client.connected) {
      this.logger.warn(