feat(gateway): load conversation history on session resume (M1-004, M1-005) (#301)
Some checks failed
ci/woodpecker/push/ci Pipeline failed

Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
This commit was merged in pull request #301.
This commit is contained in:
2026-03-21 21:00:13 +00:00
committed by jason.woltje
parent ad06e00f99
commit eddcca7533
2 changed files with 153 additions and 3 deletions

View File

@@ -28,6 +28,13 @@ import { SystemOverrideService } from '../preferences/system-override.service.js
import { PreferencesService } from '../preferences/preferences.service.js';
import { SessionGCService } from '../gc/session-gc.service.js';
/** A single message from DB conversation history, used for context injection. */
export interface ConversationHistoryMessage {
role: 'user' | 'assistant' | 'system';
content: string;
createdAt: Date;
}
export interface AgentSessionOptions {
provider?: string;
modelId?: string;
@@ -60,6 +67,12 @@ export interface AgentSessionOptions {
agentConfigId?: string;
/** ID of the user who owns this session. Used for preferences and system override lookups. */
userId?: string;
/**
* Prior conversation messages to inject as context when resuming a session.
* These messages are formatted and prepended to the system prompt so the
* agent is aware of what was discussed in previous sessions.
*/
conversationHistory?: ConversationHistoryMessage[];
}
export interface AgentSession {
@@ -244,8 +257,20 @@ export class AgentService implements OnModuleDestroy {
// Build system prompt: platform prompt + skill additions appended
const platformPrompt =
mergedOptions?.systemPrompt ?? process.env['AGENT_SYSTEM_PROMPT'] ?? undefined;
const appendSystemPrompt =
promptAdditions.length > 0 ? promptAdditions.join('\n\n') : undefined;
// Format conversation history for context injection (M1-004 / M1-005)
const historyPromptSection = mergedOptions?.conversationHistory?.length
? this.buildHistoryPromptSection(
mergedOptions.conversationHistory,
model?.contextWindow ?? 8192,
sessionId,
)
: undefined;
const appendParts: string[] = [];
if (promptAdditions.length > 0) appendParts.push(promptAdditions.join('\n\n'));
if (historyPromptSection) appendParts.push(historyPromptSection);
const appendSystemPrompt = appendParts.length > 0 ? appendParts.join('\n\n') : undefined;
// Construct a resource loader that injects the configured system prompt
const resourceLoader = new DefaultResourceLoader({
@@ -313,6 +338,92 @@ export class AgentService implements OnModuleDestroy {
return session;
}
/**
* Estimate token count for a string using a rough 4-chars-per-token heuristic.
*/
private estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
/**
* Build a conversation history section for injection into the system prompt.
* Implements M1-004 (history loading) and M1-005 (context window management).
*
* - Formats messages as a readable conversation transcript.
* - If the full history exceeds 80% of the model's context window, older messages
* are summarized and only the most recent messages are kept verbatim.
* - Summarization is a simple extractive approach (no LLM required).
*/
private buildHistoryPromptSection(
history: ConversationHistoryMessage[],
contextWindow: number,
sessionId: string,
): string {
const TOKEN_BUDGET = Math.floor(contextWindow * 0.8);
const HISTORY_HEADER = '## Conversation History (resumed session)\n\n';
const formatMessage = (msg: ConversationHistoryMessage): string => {
const roleLabel =
msg.role === 'user' ? 'User' : msg.role === 'assistant' ? 'Assistant' : 'System';
return `**${roleLabel}:** ${msg.content}`;
};
const formatted = history.map((msg) => formatMessage(msg));
const fullHistory = formatted.join('\n\n');
const fullTokens = this.estimateTokens(HISTORY_HEADER + fullHistory);
if (fullTokens <= TOKEN_BUDGET) {
this.logger.debug(
`Session ${sessionId}: injecting full history (${history.length} msgs, ~${fullTokens} tokens)`,
);
return HISTORY_HEADER + fullHistory;
}
// History exceeds budget — summarize oldest messages, keep recent verbatim
this.logger.log(
`Session ${sessionId}: history (~${fullTokens} tokens) exceeds ${TOKEN_BUDGET} token budget; summarizing oldest messages`,
);
// Reserve 20% of the budget for the summary prefix, rest for verbatim messages
const SUMMARY_RESERVE = Math.floor(TOKEN_BUDGET * 0.2);
const verbatimBudget = TOKEN_BUDGET - SUMMARY_RESERVE;
let verbatimTokens = 0;
let verbatimCutIndex = history.length;
for (let i = history.length - 1; i >= 0; i--) {
const t = this.estimateTokens(formatted[i]!);
if (verbatimTokens + t > verbatimBudget) break;
verbatimTokens += t;
verbatimCutIndex = i;
}
const summarizedMessages = history.slice(0, verbatimCutIndex);
const verbatimMessages = history.slice(verbatimCutIndex);
let summaryText = '';
if (summarizedMessages.length > 0) {
const topics = summarizedMessages
.filter((m) => m.role === 'user')
.map((m) => m.content.slice(0, 120).replace(/\n/g, ' '))
.join('; ');
summaryText =
`**Previous conversation summary** (${summarizedMessages.length} messages omitted for brevity):\n` +
`Topics discussed: ${topics || '(no user messages in summarized portion)'}`;
}
const verbatimSection = verbatimMessages.map((m) => formatMessage(m)).join('\n\n');
const parts: string[] = [HISTORY_HEADER];
if (summaryText) parts.push(summaryText);
if (verbatimSection) parts.push(verbatimSection);
const result = parts.join('\n\n');
this.logger.log(
`Session ${sessionId}: summarized ${summarizedMessages.length} messages, kept ${verbatimMessages.length} verbatim (~${this.estimateTokens(result)} tokens)`,
);
return result;
}
private resolveModel(options?: AgentSessionOptions) {
if (!options?.provider && !options?.modelId) {
return this.providerService.getDefaultModel() ?? null;