feat(gateway): load conversation history on session resume (M1-004, M1-005)
When a session is created for a conversation that already has messages in the DB, prior messages are loaded via ConversationsRepo.findMessages() and injected into the agent session as a system prompt addition so the agent has full context of the prior exchange. Context window management (M1-005): if the full history would exceed 80% of the model's context window (estimated at ~4 chars/token), older messages are summarized as a simple extractive summary and only the most recent messages are kept verbatim. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,13 @@ import { SystemOverrideService } from '../preferences/system-override.service.js
|
|||||||
import { PreferencesService } from '../preferences/preferences.service.js';
|
import { PreferencesService } from '../preferences/preferences.service.js';
|
||||||
import { SessionGCService } from '../gc/session-gc.service.js';
|
import { SessionGCService } from '../gc/session-gc.service.js';
|
||||||
|
|
||||||
|
/** A single message from DB conversation history, used for context injection. */
|
||||||
|
export interface ConversationHistoryMessage {
|
||||||
|
role: 'user' | 'assistant' | 'system';
|
||||||
|
content: string;
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
export interface AgentSessionOptions {
|
export interface AgentSessionOptions {
|
||||||
provider?: string;
|
provider?: string;
|
||||||
modelId?: string;
|
modelId?: string;
|
||||||
@@ -60,6 +67,12 @@ export interface AgentSessionOptions {
|
|||||||
agentConfigId?: string;
|
agentConfigId?: string;
|
||||||
/** ID of the user who owns this session. Used for preferences and system override lookups. */
|
/** ID of the user who owns this session. Used for preferences and system override lookups. */
|
||||||
userId?: string;
|
userId?: string;
|
||||||
|
/**
|
||||||
|
* Prior conversation messages to inject as context when resuming a session.
|
||||||
|
* These messages are formatted and prepended to the system prompt so the
|
||||||
|
* agent is aware of what was discussed in previous sessions.
|
||||||
|
*/
|
||||||
|
conversationHistory?: ConversationHistoryMessage[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface AgentSession {
|
export interface AgentSession {
|
||||||
@@ -244,8 +257,20 @@ export class AgentService implements OnModuleDestroy {
|
|||||||
// Build system prompt: platform prompt + skill additions appended
|
// Build system prompt: platform prompt + skill additions appended
|
||||||
const platformPrompt =
|
const platformPrompt =
|
||||||
mergedOptions?.systemPrompt ?? process.env['AGENT_SYSTEM_PROMPT'] ?? undefined;
|
mergedOptions?.systemPrompt ?? process.env['AGENT_SYSTEM_PROMPT'] ?? undefined;
|
||||||
const appendSystemPrompt =
|
|
||||||
promptAdditions.length > 0 ? promptAdditions.join('\n\n') : undefined;
|
// Format conversation history for context injection (M1-004 / M1-005)
|
||||||
|
const historyPromptSection = mergedOptions?.conversationHistory?.length
|
||||||
|
? this.buildHistoryPromptSection(
|
||||||
|
mergedOptions.conversationHistory,
|
||||||
|
model?.contextWindow ?? 8192,
|
||||||
|
sessionId,
|
||||||
|
)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
const appendParts: string[] = [];
|
||||||
|
if (promptAdditions.length > 0) appendParts.push(promptAdditions.join('\n\n'));
|
||||||
|
if (historyPromptSection) appendParts.push(historyPromptSection);
|
||||||
|
const appendSystemPrompt = appendParts.length > 0 ? appendParts.join('\n\n') : undefined;
|
||||||
|
|
||||||
// Construct a resource loader that injects the configured system prompt
|
// Construct a resource loader that injects the configured system prompt
|
||||||
const resourceLoader = new DefaultResourceLoader({
|
const resourceLoader = new DefaultResourceLoader({
|
||||||
@@ -313,6 +338,92 @@ export class AgentService implements OnModuleDestroy {
|
|||||||
return session;
|
return session;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate token count for a string using a rough 4-chars-per-token heuristic.
|
||||||
|
*/
|
||||||
|
private estimateTokens(text: string): number {
|
||||||
|
return Math.ceil(text.length / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a conversation history section for injection into the system prompt.
|
||||||
|
* Implements M1-004 (history loading) and M1-005 (context window management).
|
||||||
|
*
|
||||||
|
* - Formats messages as a readable conversation transcript.
|
||||||
|
* - If the full history exceeds 80% of the model's context window, older messages
|
||||||
|
* are summarized and only the most recent messages are kept verbatim.
|
||||||
|
* - Summarization is a simple extractive approach (no LLM required).
|
||||||
|
*/
|
||||||
|
private buildHistoryPromptSection(
|
||||||
|
history: ConversationHistoryMessage[],
|
||||||
|
contextWindow: number,
|
||||||
|
sessionId: string,
|
||||||
|
): string {
|
||||||
|
const TOKEN_BUDGET = Math.floor(contextWindow * 0.8);
|
||||||
|
const HISTORY_HEADER = '## Conversation History (resumed session)\n\n';
|
||||||
|
|
||||||
|
const formatMessage = (msg: ConversationHistoryMessage): string => {
|
||||||
|
const roleLabel =
|
||||||
|
msg.role === 'user' ? 'User' : msg.role === 'assistant' ? 'Assistant' : 'System';
|
||||||
|
return `**${roleLabel}:** ${msg.content}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const formatted = history.map((msg) => formatMessage(msg));
|
||||||
|
const fullHistory = formatted.join('\n\n');
|
||||||
|
const fullTokens = this.estimateTokens(HISTORY_HEADER + fullHistory);
|
||||||
|
|
||||||
|
if (fullTokens <= TOKEN_BUDGET) {
|
||||||
|
this.logger.debug(
|
||||||
|
`Session ${sessionId}: injecting full history (${history.length} msgs, ~${fullTokens} tokens)`,
|
||||||
|
);
|
||||||
|
return HISTORY_HEADER + fullHistory;
|
||||||
|
}
|
||||||
|
|
||||||
|
// History exceeds budget — summarize oldest messages, keep recent verbatim
|
||||||
|
this.logger.log(
|
||||||
|
`Session ${sessionId}: history (~${fullTokens} tokens) exceeds ${TOKEN_BUDGET} token budget; summarizing oldest messages`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reserve 20% of the budget for the summary prefix, rest for verbatim messages
|
||||||
|
const SUMMARY_RESERVE = Math.floor(TOKEN_BUDGET * 0.2);
|
||||||
|
const verbatimBudget = TOKEN_BUDGET - SUMMARY_RESERVE;
|
||||||
|
|
||||||
|
let verbatimTokens = 0;
|
||||||
|
let verbatimCutIndex = history.length;
|
||||||
|
for (let i = history.length - 1; i >= 0; i--) {
|
||||||
|
const t = this.estimateTokens(formatted[i]!);
|
||||||
|
if (verbatimTokens + t > verbatimBudget) break;
|
||||||
|
verbatimTokens += t;
|
||||||
|
verbatimCutIndex = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
const summarizedMessages = history.slice(0, verbatimCutIndex);
|
||||||
|
const verbatimMessages = history.slice(verbatimCutIndex);
|
||||||
|
|
||||||
|
let summaryText = '';
|
||||||
|
if (summarizedMessages.length > 0) {
|
||||||
|
const topics = summarizedMessages
|
||||||
|
.filter((m) => m.role === 'user')
|
||||||
|
.map((m) => m.content.slice(0, 120).replace(/\n/g, ' '))
|
||||||
|
.join('; ');
|
||||||
|
summaryText =
|
||||||
|
`**Previous conversation summary** (${summarizedMessages.length} messages omitted for brevity):\n` +
|
||||||
|
`Topics discussed: ${topics || '(no user messages in summarized portion)'}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const verbatimSection = verbatimMessages.map((m) => formatMessage(m)).join('\n\n');
|
||||||
|
|
||||||
|
const parts: string[] = [HISTORY_HEADER];
|
||||||
|
if (summaryText) parts.push(summaryText);
|
||||||
|
if (verbatimSection) parts.push(verbatimSection);
|
||||||
|
|
||||||
|
const result = parts.join('\n\n');
|
||||||
|
this.logger.log(
|
||||||
|
`Session ${sessionId}: summarized ${summarizedMessages.length} messages, kept ${verbatimMessages.length} verbatim (~${this.estimateTokens(result)} tokens)`,
|
||||||
|
);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
private resolveModel(options?: AgentSessionOptions) {
|
private resolveModel(options?: AgentSessionOptions) {
|
||||||
if (!options?.provider && !options?.modelId) {
|
if (!options?.provider && !options?.modelId) {
|
||||||
return this.providerService.getDefaultModel() ?? null;
|
return this.providerService.getDefaultModel() ?? null;
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import type { AgentSessionEvent } from '@mariozechner/pi-coding-agent';
|
|||||||
import type { Auth } from '@mosaic/auth';
|
import type { Auth } from '@mosaic/auth';
|
||||||
import type { Brain } from '@mosaic/brain';
|
import type { Brain } from '@mosaic/brain';
|
||||||
import type { SetThinkingPayload, SlashCommandPayload, SystemReloadPayload } from '@mosaic/types';
|
import type { SetThinkingPayload, SlashCommandPayload, SystemReloadPayload } from '@mosaic/types';
|
||||||
import { AgentService } from '../agent/agent.service.js';
|
import { AgentService, type ConversationHistoryMessage } from '../agent/agent.service.js';
|
||||||
import { AUTH } from '../auth/auth.tokens.js';
|
import { AUTH } from '../auth/auth.tokens.js';
|
||||||
import { BRAIN } from '../brain/brain.tokens.js';
|
import { BRAIN } from '../brain/brain.tokens.js';
|
||||||
import { CommandRegistryService } from '../commands/command-registry.service.js';
|
import { CommandRegistryService } from '../commands/command-registry.service.js';
|
||||||
@@ -100,12 +100,22 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
|
|||||||
try {
|
try {
|
||||||
let agentSession = this.agentService.getSession(conversationId);
|
let agentSession = this.agentService.getSession(conversationId);
|
||||||
if (!agentSession) {
|
if (!agentSession) {
|
||||||
|
// When resuming an existing conversation, load prior messages to inject as context (M1-004)
|
||||||
|
const conversationHistory = await this.loadConversationHistory(conversationId, userId);
|
||||||
|
|
||||||
agentSession = await this.agentService.createSession(conversationId, {
|
agentSession = await this.agentService.createSession(conversationId, {
|
||||||
provider: data.provider,
|
provider: data.provider,
|
||||||
modelId: data.modelId,
|
modelId: data.modelId,
|
||||||
agentConfigId: data.agentId,
|
agentConfigId: data.agentId,
|
||||||
userId,
|
userId,
|
||||||
|
conversationHistory: conversationHistory.length > 0 ? conversationHistory : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (conversationHistory.length > 0) {
|
||||||
|
this.logger.log(
|
||||||
|
`Loaded ${conversationHistory.length} prior messages for conversation=${conversationId}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.logger.error(
|
this.logger.error(
|
||||||
@@ -275,6 +285,35 @@ export class ChatGateway implements OnGatewayInit, OnGatewayConnection, OnGatewa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load prior conversation messages from DB for context injection on session resume (M1-004).
|
||||||
|
* Returns an empty array when no history exists, the conversation is not owned by the user,
|
||||||
|
* or userId is not provided.
|
||||||
|
*/
|
||||||
|
private async loadConversationHistory(
|
||||||
|
conversationId: string,
|
||||||
|
userId: string | undefined,
|
||||||
|
): Promise<ConversationHistoryMessage[]> {
|
||||||
|
if (!userId) return [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const messages = await this.brain.conversations.findMessages(conversationId, userId);
|
||||||
|
if (messages.length === 0) return [];
|
||||||
|
|
||||||
|
return messages.map((msg) => ({
|
||||||
|
role: msg.role as 'user' | 'assistant' | 'system',
|
||||||
|
content: msg.content,
|
||||||
|
createdAt: msg.createdAt,
|
||||||
|
}));
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(
|
||||||
|
`Failed to load conversation history for conversation=${conversationId}`,
|
||||||
|
err instanceof Error ? err.stack : String(err),
|
||||||
|
);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private relayEvent(client: Socket, conversationId: string, event: AgentSessionEvent): void {
|
private relayEvent(client: Socket, conversationId: string, event: AgentSessionEvent): void {
|
||||||
if (!client.connected) {
|
if (!client.connected) {
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
|
|||||||
Reference in New Issue
Block a user