feat(gateway): load conversation history on session resume (M1-004, M1-005)
When a session is created for a conversation that already has messages in the DB, prior messages are loaded via ConversationsRepo.findMessages() and injected into the agent session as a system prompt addition so the agent has full context of the prior exchange. Context window management (M1-005): if the full history would exceed 80% of the model's context window (estimated at ~4 chars/token), older messages are summarized as a simple extractive summary and only the most recent messages are kept verbatim. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,13 @@ import { SystemOverrideService } from '../preferences/system-override.service.js
|
||||
import { PreferencesService } from '../preferences/preferences.service.js';
|
||||
import { SessionGCService } from '../gc/session-gc.service.js';
|
||||
|
||||
/** A single message from DB conversation history, used for context injection. */
|
||||
export interface ConversationHistoryMessage {
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string;
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
export interface AgentSessionOptions {
|
||||
provider?: string;
|
||||
modelId?: string;
|
||||
@@ -60,6 +67,12 @@ export interface AgentSessionOptions {
|
||||
agentConfigId?: string;
|
||||
/** ID of the user who owns this session. Used for preferences and system override lookups. */
|
||||
userId?: string;
|
||||
/**
|
||||
* Prior conversation messages to inject as context when resuming a session.
|
||||
* These messages are formatted and prepended to the system prompt so the
|
||||
* agent is aware of what was discussed in previous sessions.
|
||||
*/
|
||||
conversationHistory?: ConversationHistoryMessage[];
|
||||
}
|
||||
|
||||
export interface AgentSession {
|
||||
@@ -244,8 +257,20 @@ export class AgentService implements OnModuleDestroy {
|
||||
// Build system prompt: platform prompt + skill additions appended
|
||||
const platformPrompt =
|
||||
mergedOptions?.systemPrompt ?? process.env['AGENT_SYSTEM_PROMPT'] ?? undefined;
|
||||
const appendSystemPrompt =
|
||||
promptAdditions.length > 0 ? promptAdditions.join('\n\n') : undefined;
|
||||
|
||||
// Format conversation history for context injection (M1-004 / M1-005)
|
||||
const historyPromptSection = mergedOptions?.conversationHistory?.length
|
||||
? this.buildHistoryPromptSection(
|
||||
mergedOptions.conversationHistory,
|
||||
model?.contextWindow ?? 8192,
|
||||
sessionId,
|
||||
)
|
||||
: undefined;
|
||||
|
||||
const appendParts: string[] = [];
|
||||
if (promptAdditions.length > 0) appendParts.push(promptAdditions.join('\n\n'));
|
||||
if (historyPromptSection) appendParts.push(historyPromptSection);
|
||||
const appendSystemPrompt = appendParts.length > 0 ? appendParts.join('\n\n') : undefined;
|
||||
|
||||
// Construct a resource loader that injects the configured system prompt
|
||||
const resourceLoader = new DefaultResourceLoader({
|
||||
@@ -313,6 +338,92 @@ export class AgentService implements OnModuleDestroy {
|
||||
return session;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate token count for a string using a rough 4-chars-per-token heuristic.
|
||||
*/
|
||||
private estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a conversation history section for injection into the system prompt.
|
||||
* Implements M1-004 (history loading) and M1-005 (context window management).
|
||||
*
|
||||
* - Formats messages as a readable conversation transcript.
|
||||
* - If the full history exceeds 80% of the model's context window, older messages
|
||||
* are summarized and only the most recent messages are kept verbatim.
|
||||
* - Summarization is a simple extractive approach (no LLM required).
|
||||
*/
|
||||
private buildHistoryPromptSection(
|
||||
history: ConversationHistoryMessage[],
|
||||
contextWindow: number,
|
||||
sessionId: string,
|
||||
): string {
|
||||
const TOKEN_BUDGET = Math.floor(contextWindow * 0.8);
|
||||
const HISTORY_HEADER = '## Conversation History (resumed session)\n\n';
|
||||
|
||||
const formatMessage = (msg: ConversationHistoryMessage): string => {
|
||||
const roleLabel =
|
||||
msg.role === 'user' ? 'User' : msg.role === 'assistant' ? 'Assistant' : 'System';
|
||||
return `**${roleLabel}:** ${msg.content}`;
|
||||
};
|
||||
|
||||
const formatted = history.map((msg) => formatMessage(msg));
|
||||
const fullHistory = formatted.join('\n\n');
|
||||
const fullTokens = this.estimateTokens(HISTORY_HEADER + fullHistory);
|
||||
|
||||
if (fullTokens <= TOKEN_BUDGET) {
|
||||
this.logger.debug(
|
||||
`Session ${sessionId}: injecting full history (${history.length} msgs, ~${fullTokens} tokens)`,
|
||||
);
|
||||
return HISTORY_HEADER + fullHistory;
|
||||
}
|
||||
|
||||
// History exceeds budget — summarize oldest messages, keep recent verbatim
|
||||
this.logger.log(
|
||||
`Session ${sessionId}: history (~${fullTokens} tokens) exceeds ${TOKEN_BUDGET} token budget; summarizing oldest messages`,
|
||||
);
|
||||
|
||||
// Reserve 20% of the budget for the summary prefix, rest for verbatim messages
|
||||
const SUMMARY_RESERVE = Math.floor(TOKEN_BUDGET * 0.2);
|
||||
const verbatimBudget = TOKEN_BUDGET - SUMMARY_RESERVE;
|
||||
|
||||
let verbatimTokens = 0;
|
||||
let verbatimCutIndex = history.length;
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
const t = this.estimateTokens(formatted[i]!);
|
||||
if (verbatimTokens + t > verbatimBudget) break;
|
||||
verbatimTokens += t;
|
||||
verbatimCutIndex = i;
|
||||
}
|
||||
|
||||
const summarizedMessages = history.slice(0, verbatimCutIndex);
|
||||
const verbatimMessages = history.slice(verbatimCutIndex);
|
||||
|
||||
let summaryText = '';
|
||||
if (summarizedMessages.length > 0) {
|
||||
const topics = summarizedMessages
|
||||
.filter((m) => m.role === 'user')
|
||||
.map((m) => m.content.slice(0, 120).replace(/\n/g, ' '))
|
||||
.join('; ');
|
||||
summaryText =
|
||||
`**Previous conversation summary** (${summarizedMessages.length} messages omitted for brevity):\n` +
|
||||
`Topics discussed: ${topics || '(no user messages in summarized portion)'}`;
|
||||
}
|
||||
|
||||
const verbatimSection = verbatimMessages.map((m) => formatMessage(m)).join('\n\n');
|
||||
|
||||
const parts: string[] = [HISTORY_HEADER];
|
||||
if (summaryText) parts.push(summaryText);
|
||||
if (verbatimSection) parts.push(verbatimSection);
|
||||
|
||||
const result = parts.join('\n\n');
|
||||
this.logger.log(
|
||||
`Session ${sessionId}: summarized ${summarizedMessages.length} messages, kept ${verbatimMessages.length} verbatim (~${this.estimateTokens(result)} tokens)`,
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
private resolveModel(options?: AgentSessionOptions) {
|
||||
if (!options?.provider && !options?.modelId) {
|
||||
return this.providerService.getDefaultModel() ?? null;
|
||||
|
||||
@@ -14,7 +14,7 @@ import type { AgentSessionEvent } from '@mariozechner/pi-coding-agent';
|
||||
import type { Auth } from '@mosaic/auth';
|
||||
import type { Brain } from '@mosaic/brain';
|
||||
import type { SetThinkingPayload, SlashCommandPayload, SystemReloadPayload } from '@mosaic/types';
|
||||
import { AgentService } from '../agent/agent.service.js';
|
||||
import { AgentService, type ConversationHistoryMessage } from '../agent/agent.service.js';
|
||||
import { AUTH } from '../auth/auth.tokens.js';
|
||||
import { BRAIN } from '../brain/brain.tokens.js';
|
||||
import { CommandRegistryService } from '../commands/command-registry.service.js';
|
||||
@@ -100,12 +100,22 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
|
||||
try {
|
||||
let agentSession = this.agentService.getSession(conversationId);
|
||||
if (!agentSession) {
|
||||
// When resuming an existing conversation, load prior messages to inject as context (M1-004)
|
||||
const conversationHistory = await this.loadConversationHistory(conversationId, userId);
|
||||
|
||||
agentSession = await this.agentService.createSession(conversationId, {
|
||||
provider: data.provider,
|
||||
modelId: data.modelId,
|
||||
agentConfigId: data.agentId,
|
||||
userId,
|
||||
conversationHistory: conversationHistory.length > 0 ? conversationHistory : undefined,
|
||||
});
|
||||
|
||||
if (conversationHistory.length > 0) {
|
||||
this.logger.log(
|
||||
`Loaded ${conversationHistory.length} prior messages for conversation=${conversationId}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
@@ -275,6 +285,35 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load prior conversation messages from DB for context injection on session resume (M1-004).
|
||||
* Returns an empty array when no history exists, the conversation is not owned by the user,
|
||||
* or userId is not provided.
|
||||
*/
|
||||
private async loadConversationHistory(
|
||||
conversationId: string,
|
||||
userId: string | undefined,
|
||||
): Promise<ConversationHistoryMessage[]> {
|
||||
if (!userId) return [];
|
||||
|
||||
try {
|
||||
const messages = await this.brain.conversations.findMessages(conversationId, userId);
|
||||
if (messages.length === 0) return [];
|
||||
|
||||
return messages.map((msg) => ({
|
||||
role: msg.role as 'user' | 'assistant' | 'system',
|
||||
content: msg.content,
|
||||
createdAt: msg.createdAt,
|
||||
}));
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`Failed to load conversation history for conversation=${conversationId}`,
|
||||
err instanceof Error ? err.stack : String(err),
|
||||
);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private relayEvent(client: Socket, conversationId: string, event: AgentSessionEvent): void {
|
||||
if (!client.connected) {
|
||||
this.logger.warn(
|
||||
|
||||
Reference in New Issue
Block a user