Some checks failed
ci/woodpecker/push/ci Pipeline failed
Co-authored-by: Jason Woltje <jason@diversecanvas.com> Co-committed-by: Jason Woltje <jason@diversecanvas.com>
378 lines
12 KiB
TypeScript
378 lines
12 KiB
TypeScript
/**
|
|
* M5-008: Session hardening verification tests.
|
|
*
|
|
* Verifies:
|
|
* 1. /model command switches model → session:info reflects updated modelId
|
|
* 2. /agent command switches agent config → system prompt / agentName changes
|
|
* 3. Session resume binds to a conversation (history injected via conversationHistory option)
|
|
* 4. Session metrics track token usage and message count correctly
|
|
*/
|
|
|
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
import type {
|
|
AgentSession,
|
|
AgentSessionOptions,
|
|
ConversationHistoryMessage,
|
|
} from '../agent/agent.service.js';
|
|
import type { SessionInfoDto, SessionMetrics, SessionTokenMetrics } from '../agent/session.dto.js';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers — minimal AgentSession fixture
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function makeMetrics(overrides?: Partial<SessionMetrics>): SessionMetrics {
|
|
return {
|
|
tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
modelSwitches: 0,
|
|
messageCount: 0,
|
|
lastActivityAt: new Date().toISOString(),
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function makeSession(overrides?: Partial<AgentSession>): AgentSession {
|
|
return {
|
|
id: 'session-001',
|
|
provider: 'anthropic',
|
|
modelId: 'claude-3-5-sonnet-20241022',
|
|
piSession: {} as AgentSession['piSession'],
|
|
listeners: new Set(),
|
|
unsubscribe: vi.fn(),
|
|
createdAt: Date.now(),
|
|
promptCount: 0,
|
|
channels: new Set(),
|
|
skillPromptAdditions: [],
|
|
sandboxDir: '/tmp',
|
|
allowedTools: null,
|
|
metrics: makeMetrics(),
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function sessionToInfo(session: AgentSession): SessionInfoDto {
|
|
return {
|
|
id: session.id,
|
|
provider: session.provider,
|
|
modelId: session.modelId,
|
|
...(session.agentName ? { agentName: session.agentName } : {}),
|
|
createdAt: new Date(session.createdAt).toISOString(),
|
|
promptCount: session.promptCount,
|
|
channels: Array.from(session.channels),
|
|
durationMs: Date.now() - session.createdAt,
|
|
metrics: { ...session.metrics },
|
|
};
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Replicated AgentService methods (tested in isolation without full DI setup)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function updateSessionModel(session: AgentSession, modelId: string): void {
|
|
session.modelId = modelId;
|
|
session.metrics.modelSwitches += 1;
|
|
session.metrics.lastActivityAt = new Date().toISOString();
|
|
}
|
|
|
|
function applyAgentConfig(
|
|
session: AgentSession,
|
|
agentConfigId: string,
|
|
agentName: string,
|
|
modelId?: string,
|
|
): void {
|
|
session.agentConfigId = agentConfigId;
|
|
session.agentName = agentName;
|
|
if (modelId) {
|
|
updateSessionModel(session, modelId);
|
|
}
|
|
}
|
|
|
|
function recordTokenUsage(session: AgentSession, tokens: SessionTokenMetrics): void {
|
|
session.metrics.tokens.input += tokens.input;
|
|
session.metrics.tokens.output += tokens.output;
|
|
session.metrics.tokens.cacheRead += tokens.cacheRead;
|
|
session.metrics.tokens.cacheWrite += tokens.cacheWrite;
|
|
session.metrics.tokens.total += tokens.total;
|
|
session.metrics.lastActivityAt = new Date().toISOString();
|
|
}
|
|
|
|
function recordMessage(session: AgentSession): void {
|
|
session.metrics.messageCount += 1;
|
|
session.metrics.lastActivityAt = new Date().toISOString();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 1. /model command — switches model → session:info updated
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe('/model command — model switch reflected in session:info', () => {
|
|
let session: AgentSession;
|
|
|
|
beforeEach(() => {
|
|
session = makeSession();
|
|
});
|
|
|
|
it('updates modelId when /model is called with a model name', () => {
|
|
updateSessionModel(session, 'claude-opus-4-5-20251001');
|
|
|
|
expect(session.modelId).toBe('claude-opus-4-5-20251001');
|
|
});
|
|
|
|
it('increments modelSwitches metric after /model command', () => {
|
|
expect(session.metrics.modelSwitches).toBe(0);
|
|
|
|
updateSessionModel(session, 'gpt-4o');
|
|
expect(session.metrics.modelSwitches).toBe(1);
|
|
|
|
updateSessionModel(session, 'claude-3-5-sonnet-20241022');
|
|
expect(session.metrics.modelSwitches).toBe(2);
|
|
});
|
|
|
|
it('session:info DTO reflects the new modelId after switch', () => {
|
|
updateSessionModel(session, 'claude-haiku-3-5-20251001');
|
|
|
|
const info = sessionToInfo(session);
|
|
|
|
expect(info.modelId).toBe('claude-haiku-3-5-20251001');
|
|
expect(info.metrics.modelSwitches).toBe(1);
|
|
});
|
|
|
|
it('lastActivityAt is updated after model switch', () => {
|
|
const before = session.metrics.lastActivityAt;
|
|
// Ensure at least 1ms passes
|
|
vi.setSystemTime(Date.now() + 1);
|
|
updateSessionModel(session, 'new-model');
|
|
vi.useRealTimers();
|
|
|
|
expect(session.metrics.lastActivityAt).not.toBe(before);
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 2. /agent command — switches agent config → system prompt / agentName updated
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe('/agent command — agent config applied to session', () => {
|
|
let session: AgentSession;
|
|
|
|
beforeEach(() => {
|
|
session = makeSession();
|
|
});
|
|
|
|
it('sets agentConfigId and agentName on the session', () => {
|
|
applyAgentConfig(session, 'agent-uuid-001', 'CodeReviewer');
|
|
|
|
expect(session.agentConfigId).toBe('agent-uuid-001');
|
|
expect(session.agentName).toBe('CodeReviewer');
|
|
});
|
|
|
|
it('also updates modelId when agent config carries a model', () => {
|
|
applyAgentConfig(session, 'agent-uuid-002', 'DataAnalyst', 'gpt-4o-mini');
|
|
|
|
expect(session.agentName).toBe('DataAnalyst');
|
|
expect(session.modelId).toBe('gpt-4o-mini');
|
|
expect(session.metrics.modelSwitches).toBe(1);
|
|
});
|
|
|
|
it('does NOT update modelId when agent config has no model', () => {
|
|
const originalModel = session.modelId;
|
|
applyAgentConfig(session, 'agent-uuid-003', 'Planner', undefined);
|
|
|
|
expect(session.modelId).toBe(originalModel);
|
|
expect(session.metrics.modelSwitches).toBe(0);
|
|
});
|
|
|
|
it('session:info DTO reflects agentName after /agent switch', () => {
|
|
applyAgentConfig(session, 'agent-uuid-004', 'DevBot');
|
|
|
|
const info = sessionToInfo(session);
|
|
|
|
expect(info.agentName).toBe('DevBot');
|
|
});
|
|
|
|
it('multiple /agent calls update to the latest agent', () => {
|
|
applyAgentConfig(session, 'agent-001', 'FirstAgent');
|
|
applyAgentConfig(session, 'agent-002', 'SecondAgent');
|
|
|
|
expect(session.agentConfigId).toBe('agent-002');
|
|
expect(session.agentName).toBe('SecondAgent');
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 3. Session resume — binds to conversation via conversationHistory
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe('Session resume — binds to conversation', () => {
|
|
it('conversationHistory option is preserved in session options', () => {
|
|
const history: ConversationHistoryMessage[] = [
|
|
{
|
|
role: 'user',
|
|
content: 'Hello, what is TypeScript?',
|
|
createdAt: new Date('2026-01-01T00:01:00Z'),
|
|
},
|
|
{
|
|
role: 'assistant',
|
|
content: 'TypeScript is a typed superset of JavaScript.',
|
|
createdAt: new Date('2026-01-01T00:01:05Z'),
|
|
},
|
|
];
|
|
|
|
const options: AgentSessionOptions = {
|
|
conversationHistory: history,
|
|
provider: 'anthropic',
|
|
modelId: 'claude-3-5-sonnet-20241022',
|
|
};
|
|
|
|
expect(options.conversationHistory).toHaveLength(2);
|
|
expect(options.conversationHistory![0]!.role).toBe('user');
|
|
expect(options.conversationHistory![1]!.role).toBe('assistant');
|
|
});
|
|
|
|
it('session with conversationHistory option carries the conversation binding', () => {
|
|
const CONV_ID = 'conv-resume-001';
|
|
const history: ConversationHistoryMessage[] = [
|
|
{ role: 'user', content: 'Prior question', createdAt: new Date('2026-01-01T00:01:00Z') },
|
|
];
|
|
|
|
// Simulate what ChatGateway does: pass conversationId + history to createSession
|
|
const options: AgentSessionOptions = {
|
|
conversationHistory: history,
|
|
};
|
|
|
|
// The session ID is the conversationId in the gateway
|
|
const session = makeSession({ id: CONV_ID });
|
|
|
|
expect(session.id).toBe(CONV_ID);
|
|
expect(options.conversationHistory).toHaveLength(1);
|
|
});
|
|
|
|
it('empty conversationHistory is valid (new conversation)', () => {
|
|
const options: AgentSessionOptions = {
|
|
conversationHistory: [],
|
|
};
|
|
|
|
expect(options.conversationHistory).toHaveLength(0);
|
|
});
|
|
|
|
it('resumed session preserves all message roles', () => {
|
|
const history: ConversationHistoryMessage[] = [
|
|
{ role: 'system', content: 'You are a helpful assistant.', createdAt: new Date() },
|
|
{ role: 'user', content: 'Question 1', createdAt: new Date() },
|
|
{ role: 'assistant', content: 'Answer 1', createdAt: new Date() },
|
|
{ role: 'user', content: 'Question 2', createdAt: new Date() },
|
|
];
|
|
|
|
const roles = history.map((m) => m.role);
|
|
expect(roles).toEqual(['system', 'user', 'assistant', 'user']);
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 4. Session metrics — token usage and message count
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe('Session metrics — token usage and message count', () => {
|
|
let session: AgentSession;
|
|
|
|
beforeEach(() => {
|
|
session = makeSession();
|
|
});
|
|
|
|
it('starts with zero metrics', () => {
|
|
expect(session.metrics.tokens.input).toBe(0);
|
|
expect(session.metrics.tokens.output).toBe(0);
|
|
expect(session.metrics.tokens.total).toBe(0);
|
|
expect(session.metrics.messageCount).toBe(0);
|
|
expect(session.metrics.modelSwitches).toBe(0);
|
|
});
|
|
|
|
it('accumulates token usage across multiple turns', () => {
|
|
recordTokenUsage(session, {
|
|
input: 100,
|
|
output: 50,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
total: 150,
|
|
});
|
|
recordTokenUsage(session, {
|
|
input: 200,
|
|
output: 80,
|
|
cacheRead: 10,
|
|
cacheWrite: 5,
|
|
total: 295,
|
|
});
|
|
|
|
expect(session.metrics.tokens.input).toBe(300);
|
|
expect(session.metrics.tokens.output).toBe(130);
|
|
expect(session.metrics.tokens.cacheRead).toBe(10);
|
|
expect(session.metrics.tokens.cacheWrite).toBe(5);
|
|
expect(session.metrics.tokens.total).toBe(445);
|
|
});
|
|
|
|
it('increments message count with each recordMessage call', () => {
|
|
expect(session.metrics.messageCount).toBe(0);
|
|
|
|
recordMessage(session);
|
|
expect(session.metrics.messageCount).toBe(1);
|
|
|
|
recordMessage(session);
|
|
recordMessage(session);
|
|
expect(session.metrics.messageCount).toBe(3);
|
|
});
|
|
|
|
it('session:info DTO exposes correct metrics snapshot', () => {
|
|
recordTokenUsage(session, {
|
|
input: 500,
|
|
output: 100,
|
|
cacheRead: 20,
|
|
cacheWrite: 10,
|
|
total: 630,
|
|
});
|
|
recordMessage(session);
|
|
recordMessage(session);
|
|
updateSessionModel(session, 'claude-haiku-3-5-20251001');
|
|
|
|
const info = sessionToInfo(session);
|
|
|
|
expect(info.metrics.tokens.input).toBe(500);
|
|
expect(info.metrics.tokens.output).toBe(100);
|
|
expect(info.metrics.tokens.total).toBe(630);
|
|
expect(info.metrics.messageCount).toBe(2);
|
|
expect(info.metrics.modelSwitches).toBe(1);
|
|
});
|
|
|
|
it('metrics are independent per session', () => {
|
|
const sessionA = makeSession({ id: 'session-A' });
|
|
const sessionB = makeSession({ id: 'session-B' });
|
|
|
|
recordTokenUsage(sessionA, { input: 100, output: 50, cacheRead: 0, cacheWrite: 0, total: 150 });
|
|
recordMessage(sessionA);
|
|
|
|
// Session B should remain at zero
|
|
expect(sessionB.metrics.tokens.input).toBe(0);
|
|
expect(sessionB.metrics.messageCount).toBe(0);
|
|
|
|
// Session A should have updated values
|
|
expect(sessionA.metrics.tokens.input).toBe(100);
|
|
expect(sessionA.metrics.messageCount).toBe(1);
|
|
});
|
|
|
|
it('lastActivityAt is updated after recording tokens', () => {
|
|
const before = session.metrics.lastActivityAt;
|
|
vi.setSystemTime(new Date(Date.now() + 100));
|
|
recordTokenUsage(session, { input: 10, output: 5, cacheRead: 0, cacheWrite: 0, total: 15 });
|
|
vi.useRealTimers();
|
|
|
|
expect(session.metrics.lastActivityAt).not.toBe(before);
|
|
});
|
|
|
|
it('lastActivityAt is updated after recording a message', () => {
|
|
const before = session.metrics.lastActivityAt;
|
|
vi.setSystemTime(new Date(Date.now() + 100));
|
|
recordMessage(session);
|
|
vi.useRealTimers();
|
|
|
|
expect(session.metrics.lastActivityAt).not.toBe(before);
|
|
});
|
|
});
|