feat(gateway): load conversation history on session resume (M1-004, M1-005)

When a session is created for a conversation that already has messages in the DB, prior messages are loaded via ConversationsRepo.findMessages() and injected into the agent session as a system prompt addition so the agent has full context of the prior exchange. Context window management (M1-005): if the full history would exceed 80% of the model's context window (estimated at ~4 chars/token), older messages are summarized as a simple extractive summary and only the most recent messages are kept verbatim. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 15:46:44 -05:00
parent 02ff3b3256
commit c6d66d2d58
2 changed files with 153 additions and 3 deletions
--- a/apps/gateway/src/agent/agent.service.ts
+++ b/apps/gateway/src/agent/agent.service.ts
@@ -28,6 +28,13 @@ import { SystemOverrideService } from '../preferences/system-override.service.js
 import { PreferencesService } from '../preferences/preferences.service.js';
 import { SessionGCService } from '../gc/session-gc.service.js';

+/** A single message from DB conversation history, used for context injection. */
+export interface ConversationHistoryMessage {
+  role: 'user' | 'assistant' | 'system';
+  content: string;
+  createdAt: Date;
+}
+
 export interface AgentSessionOptions {
  provider?: string;
  modelId?: string;
@@ -60,6 +67,12 @@ export interface AgentSessionOptions {
  agentConfigId?: string;
  /** ID of the user who owns this session. Used for preferences and system override lookups. */
  userId?: string;
+  /**
+   * Prior conversation messages to inject as context when resuming a session.
+   * These messages are formatted and prepended to the system prompt so the
+   * agent is aware of what was discussed in previous sessions.
+   */
+  conversationHistory?: ConversationHistoryMessage[];
 }

 export interface AgentSession {
@@ -244,8 +257,20 @@ export class AgentService implements OnModuleDestroy {
    // Build system prompt: platform prompt + skill additions appended
    const platformPrompt =
      mergedOptions?.systemPrompt ?? process.env['AGENT_SYSTEM_PROMPT'] ?? undefined;
-    const appendSystemPrompt =
-      promptAdditions.length > 0 ? promptAdditions.join('\n\n') : undefined;
+
+    // Format conversation history for context injection (M1-004 / M1-005)
+    const historyPromptSection = mergedOptions?.conversationHistory?.length
+      ? this.buildHistoryPromptSection(
+          mergedOptions.conversationHistory,
+          model?.contextWindow ?? 8192,
+          sessionId,
+        )
+      : undefined;
+
+    const appendParts: string[] = [];
+    if (promptAdditions.length > 0) appendParts.push(promptAdditions.join('\n\n'));
+    if (historyPromptSection) appendParts.push(historyPromptSection);
+    const appendSystemPrompt = appendParts.length > 0 ? appendParts.join('\n\n') : undefined;

    // Construct a resource loader that injects the configured system prompt
    const resourceLoader = new DefaultResourceLoader({
@@ -313,6 +338,92 @@ export class AgentService implements OnModuleDestroy {
    return session;
  }

+  /**
+   * Estimate token count for a string using a rough 4-chars-per-token heuristic.
+   */
+  private estimateTokens(text: string): number {
+    return Math.ceil(text.length / 4);
+  }
+
+  /**
+   * Build a conversation history section for injection into the system prompt.
+   * Implements M1-004 (history loading) and M1-005 (context window management).
+   *
+   * - Formats messages as a readable conversation transcript.
+   * - If the full history exceeds 80% of the model's context window, older messages
+   *   are summarized and only the most recent messages are kept verbatim.
+   * - Summarization is a simple extractive approach (no LLM required).
+   */
+  private buildHistoryPromptSection(
+    history: ConversationHistoryMessage[],
+    contextWindow: number,
+    sessionId: string,
+  ): string {
+    const TOKEN_BUDGET = Math.floor(contextWindow * 0.8);
+    const HISTORY_HEADER = '## Conversation History (resumed session)\n\n';
+
+    const formatMessage = (msg: ConversationHistoryMessage): string => {
+      const roleLabel =
+        msg.role === 'user' ? 'User' : msg.role === 'assistant' ? 'Assistant' : 'System';
+      return `**${roleLabel}:** ${msg.content}`;
+    };
+
+    const formatted = history.map((msg) => formatMessage(msg));
+    const fullHistory = formatted.join('\n\n');
+    const fullTokens = this.estimateTokens(HISTORY_HEADER + fullHistory);
+
+    if (fullTokens <= TOKEN_BUDGET) {
+      this.logger.debug(
+        `Session ${sessionId}: injecting full history (${history.length} msgs, ~${fullTokens} tokens)`,
+      );
+      return HISTORY_HEADER + fullHistory;
+    }
+
+    // History exceeds budget — summarize oldest messages, keep recent verbatim
+    this.logger.log(
+      `Session ${sessionId}: history (~${fullTokens} tokens) exceeds ${TOKEN_BUDGET} token budget; summarizing oldest messages`,
+    );
+
+    // Reserve 20% of the budget for the summary prefix, rest for verbatim messages
+    const SUMMARY_RESERVE = Math.floor(TOKEN_BUDGET * 0.2);
+    const verbatimBudget = TOKEN_BUDGET - SUMMARY_RESERVE;
+
+    let verbatimTokens = 0;
+    let verbatimCutIndex = history.length;
+    for (let i = history.length - 1; i >= 0; i--) {
+      const t = this.estimateTokens(formatted[i]!);
+      if (verbatimTokens + t > verbatimBudget) break;
+      verbatimTokens += t;
+      verbatimCutIndex = i;
+    }
+
+    const summarizedMessages = history.slice(0, verbatimCutIndex);
+    const verbatimMessages = history.slice(verbatimCutIndex);
+
+    let summaryText = '';
+    if (summarizedMessages.length > 0) {
+      const topics = summarizedMessages
+        .filter((m) => m.role === 'user')
+        .map((m) => m.content.slice(0, 120).replace(/\n/g, ' '))
+        .join('; ');
+      summaryText =
+        `**Previous conversation summary** (${summarizedMessages.length} messages omitted for brevity):\n` +
+        `Topics discussed: ${topics || '(no user messages in summarized portion)'}`;
+    }
+
+    const verbatimSection = verbatimMessages.map((m) => formatMessage(m)).join('\n\n');
+
+    const parts: string[] = [HISTORY_HEADER];
+    if (summaryText) parts.push(summaryText);
+    if (verbatimSection) parts.push(verbatimSection);
+
+    const result = parts.join('\n\n');
+    this.logger.log(
+      `Session ${sessionId}: summarized ${summarizedMessages.length} messages, kept ${verbatimMessages.length} verbatim (~${this.estimateTokens(result)} tokens)`,
+    );
+    return result;
+  }
+
  private resolveModel(options?: AgentSessionOptions) {
    if (!options?.provider && !options?.modelId) {
      return this.providerService.getDefaultModel() ?? null;
--- a/apps/gateway/src/chat/chat.gateway.ts
+++ b/apps/gateway/src/chat/chat.gateway.ts
@@ -14,7 +14,7 @@ import type { AgentSessionEvent } from '@mariozechner/pi-coding-agent';
 import type { Auth } from '@mosaic/auth';
 import type { Brain } from '@mosaic/brain';
 import type { SetThinkingPayload, SlashCommandPayload, SystemReloadPayload } from '@mosaic/types';
-import { AgentService } from '../agent/agent.service.js';
+import { AgentService, type ConversationHistoryMessage } from '../agent/agent.service.js';
 import { AUTH } from '../auth/auth.tokens.js';
 import { BRAIN } from '../brain/brain.tokens.js';
 import { CommandRegistryService } from '../commands/command-registry.service.js';
@@ -100,12 +100,22 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
    try {
      let agentSession = this.agentService.getSession(conversationId);
      if (!agentSession) {
+        // When resuming an existing conversation, load prior messages to inject as context (M1-004)
+        const conversationHistory = await this.loadConversationHistory(conversationId, userId);
+
        agentSession = await this.agentService.createSession(conversationId, {
          provider: data.provider,
          modelId: data.modelId,
          agentConfigId: data.agentId,
          userId,
+          conversationHistory: conversationHistory.length > 0 ? conversationHistory : undefined,
        });
+
+        if (conversationHistory.length > 0) {
+          this.logger.log(
+            `Loaded ${conversationHistory.length} prior messages for conversation=${conversationId}`,
+          );
+        }
      }
    } catch (err) {
      this.logger.error(
@@ -275,6 +285,35 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
    }
  }

+  /**
+   * Load prior conversation messages from DB for context injection on session resume (M1-004).
+   * Returns an empty array when no history exists, the conversation is not owned by the user,
+   * or userId is not provided.
+   */
+  private async loadConversationHistory(
+    conversationId: string,
+    userId: string | undefined,
+  ): Promise<ConversationHistoryMessage[]> {
+    if (!userId) return [];
+
+    try {
+      const messages = await this.brain.conversations.findMessages(conversationId, userId);
+      if (messages.length === 0) return [];
+
+      return messages.map((msg) => ({
+        role: msg.role as 'user' | 'assistant' | 'system',
+        content: msg.content,
+        createdAt: msg.createdAt,
+      }));
+    } catch (err) {
+      this.logger.error(
+        `Failed to load conversation history for conversation=${conversationId}`,
+        err instanceof Error ? err.stack : String(err),
+      );
+      return [];
+    }
+  }
+
  private relayEvent(client: Socket, conversationId: string, event: AgentSessionEvent): void {
    if (!client.connected) {
      this.logger.warn(