Implement verification engine to determine if AI agent work is truly complete by analyzing outputs and detecting deferred work patterns. Strategies: - FileChangeStrategy: Detect TODO/FIXME, placeholders, stubs - TestOutputStrategy: Validate pass rates, coverage (85%), skipped tests - BuildOutputStrategy: Detect TS errors, ESLint errors, build failures Deferred work detection patterns: - "follow-up", "to be added later" - "incremental improvement", "future enhancement" - "TODO: complete", "placeholder implementation" - "stub", "work in progress", "partially implemented" Features: - Confidence scoring (0-100%) - Verdict system: complete/incomplete/needs-review - Actionable suggestions for improvements - Strategy-based extensibility Integration: - Complements Quality Orchestrator (#134) - Uses Quality Gate Config (#135) Tests: 46 passing with 95.27% coverage Fixes #136 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
307 lines
9.3 KiB
TypeScript
307 lines
9.3 KiB
TypeScript
import { describe, it, expect, beforeEach } from "vitest";
|
|
import { CompletionVerificationService } from "./completion-verification.service";
|
|
import { VerificationContext } from "./interfaces";
|
|
|
|
describe("CompletionVerificationService", () => {
|
|
let service: CompletionVerificationService;
|
|
let baseContext: VerificationContext;
|
|
|
|
beforeEach(() => {
|
|
service = new CompletionVerificationService();
|
|
baseContext = {
|
|
taskId: "task-1",
|
|
workspaceId: "workspace-1",
|
|
agentId: "agent-1",
|
|
claimMessage: "Completed task",
|
|
filesChanged: ["src/feature.ts"],
|
|
outputLogs: "Implementation complete",
|
|
previousAttempts: 0,
|
|
};
|
|
});
|
|
|
|
describe("verify", () => {
|
|
it("should verify using all registered strategies", async () => {
|
|
const context: VerificationContext = {
|
|
...baseContext,
|
|
filesChanged: ["src/feature.ts", "src/feature.spec.ts"],
|
|
testResults: {
|
|
total: 10,
|
|
passed: 10,
|
|
failed: 0,
|
|
skipped: 0,
|
|
coverage: 90,
|
|
},
|
|
buildOutput: "Build successful",
|
|
};
|
|
|
|
const result = await service.verify(context);
|
|
|
|
expect(result.verdict).toBe("complete");
|
|
expect(result.isComplete).toBe(true);
|
|
expect(result.confidence).toBeGreaterThan(80);
|
|
expect(result.issues).toHaveLength(0);
|
|
});
|
|
|
|
it("should aggregate issues from all strategies", async () => {
|
|
const context: VerificationContext = {
|
|
...baseContext,
|
|
filesChanged: [],
|
|
testResults: {
|
|
total: 10,
|
|
passed: 7,
|
|
failed: 3,
|
|
skipped: 0,
|
|
coverage: 70,
|
|
},
|
|
buildOutput: "error TS2304: Cannot find name",
|
|
};
|
|
|
|
const result = await service.verify(context);
|
|
|
|
expect(result.verdict).toBe("incomplete");
|
|
expect(result.isComplete).toBe(false);
|
|
expect(result.issues.length).toBeGreaterThan(0);
|
|
expect(result.issues.some((i) => i.type === "missing-files")).toBe(true);
|
|
expect(result.issues.some((i) => i.type === "test-failure")).toBe(true);
|
|
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
|
|
});
|
|
|
|
it("should detect deferred work in claim message", async () => {
|
|
const context: VerificationContext = {
|
|
...baseContext,
|
|
claimMessage: "Implemented basic feature, will add tests in follow-up",
|
|
filesChanged: ["src/feature.ts"],
|
|
};
|
|
|
|
const result = await service.verify(context);
|
|
|
|
expect(result.isComplete).toBe(false);
|
|
expect(result.issues.some((i) => i.type === "deferred-work")).toBe(true);
|
|
expect(result.issues.some((i) => i.message.includes("deferred work"))).toBe(true);
|
|
});
|
|
|
|
it("should generate appropriate suggestions", async () => {
|
|
const context: VerificationContext = {
|
|
...baseContext,
|
|
testResults: {
|
|
total: 10,
|
|
passed: 10,
|
|
failed: 0,
|
|
skipped: 0,
|
|
coverage: 70,
|
|
},
|
|
};
|
|
|
|
const result = await service.verify(context);
|
|
|
|
expect(result.suggestions.length).toBeGreaterThan(0);
|
|
expect(result.suggestions.some((s) => s.includes("coverage"))).toBe(true);
|
|
});
|
|
|
|
it("should return needs-review verdict for marginal cases", async () => {
|
|
const context: VerificationContext = {
|
|
...baseContext,
|
|
filesChanged: ["src/feature.ts"],
|
|
testResults: {
|
|
total: 10,
|
|
passed: 9,
|
|
failed: 0,
|
|
skipped: 1,
|
|
coverage: 85, // At threshold - no error
|
|
},
|
|
buildOutput:
|
|
"Build successful\nwarning: unused variable x\nwarning: deprecated API\nwarning: complexity high",
|
|
outputLogs: "Implementation complete",
|
|
};
|
|
|
|
const result = await service.verify(context);
|
|
|
|
// Has warnings but no errors -> needs-review
|
|
expect(result.verdict).toBe("needs-review");
|
|
expect(result.isComplete).toBe(false);
|
|
});
|
|
|
|
it("should calculate confidence from strategy results", async () => {
|
|
const context: VerificationContext = {
|
|
...baseContext,
|
|
filesChanged: ["src/feature.ts"],
|
|
testResults: {
|
|
total: 10,
|
|
passed: 10,
|
|
failed: 0,
|
|
skipped: 0,
|
|
coverage: 95,
|
|
},
|
|
buildOutput: "Build successful",
|
|
};
|
|
|
|
const result = await service.verify(context);
|
|
|
|
expect(result.confidence).toBeGreaterThan(85);
|
|
});
|
|
});
|
|
|
|
describe("detectDeferredWork", () => {
|
|
it('should detect "will implement in follow-up"', () => {
|
|
const message = "Added basic feature, will implement advanced features in follow-up";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues.length).toBeGreaterThan(0);
|
|
expect(issues[0].type).toBe("deferred-work");
|
|
});
|
|
|
|
it('should detect "to be added later"', () => {
|
|
const message = "Core functionality done, tests to be added later";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues.length).toBeGreaterThan(0);
|
|
expect(issues[0].type).toBe("deferred-work");
|
|
});
|
|
|
|
it('should detect "incremental improvement"', () => {
|
|
const message = "This is an incremental improvement, more to come";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues.length).toBeGreaterThan(0);
|
|
expect(issues[0].type).toBe("deferred-work");
|
|
});
|
|
|
|
it('should detect "future enhancement"', () => {
|
|
const message = "Basic feature implemented, future enhancements planned";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues.length).toBeGreaterThan(0);
|
|
expect(issues[0].type).toBe("deferred-work");
|
|
});
|
|
|
|
it('should detect "TODO: complete"', () => {
|
|
const message = "Started implementation, TODO: complete validation logic";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues.length).toBeGreaterThan(0);
|
|
expect(issues[0].type).toBe("deferred-work");
|
|
});
|
|
|
|
it('should detect "placeholder"', () => {
|
|
const message = "Added placeholder implementation for now";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues.length).toBeGreaterThan(0);
|
|
expect(issues[0].type).toBe("deferred-work");
|
|
});
|
|
|
|
it('should detect "stub"', () => {
|
|
const message = "Created stub for the new service";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues.length).toBeGreaterThan(0);
|
|
expect(issues[0].type).toBe("deferred-work");
|
|
});
|
|
|
|
it("should return empty array for complete messages", () => {
|
|
const message = "Implemented feature with all tests passing and 95% coverage";
|
|
const issues = service.detectDeferredWork(message);
|
|
|
|
expect(issues).toHaveLength(0);
|
|
});
|
|
});
|
|
|
|
describe("registerStrategy", () => {
|
|
it("should allow registering custom strategies", async () => {
|
|
class CustomStrategy {
|
|
name = "custom";
|
|
async verify() {
|
|
return {
|
|
strategyName: "custom",
|
|
passed: true,
|
|
confidence: 100,
|
|
issues: [],
|
|
};
|
|
}
|
|
}
|
|
|
|
service.registerStrategy(new CustomStrategy());
|
|
|
|
const result = await service.verify(baseContext);
|
|
expect(result).toBeDefined();
|
|
});
|
|
});
|
|
|
|
describe("calculateConfidence", () => {
|
|
it("should return average confidence from strategies", () => {
|
|
const results = [
|
|
{ strategyName: "s1", passed: true, confidence: 90, issues: [] },
|
|
{ strategyName: "s2", passed: true, confidence: 80, issues: [] },
|
|
{ strategyName: "s3", passed: true, confidence: 70, issues: [] },
|
|
];
|
|
|
|
const confidence = service.calculateConfidence(results);
|
|
|
|
expect(confidence).toBe(80); // Average of 90, 80, 70
|
|
});
|
|
|
|
it("should return 0 for empty results", () => {
|
|
const confidence = service.calculateConfidence([]);
|
|
expect(confidence).toBe(0);
|
|
});
|
|
});
|
|
|
|
describe("generateSuggestions", () => {
|
|
it("should suggest fixing tests for test failures", () => {
|
|
const issues = [
|
|
{
|
|
type: "test-failure" as const,
|
|
severity: "error" as const,
|
|
message: "3 tests failed",
|
|
},
|
|
];
|
|
|
|
const suggestions = service.generateSuggestions(issues);
|
|
|
|
expect(suggestions.some((s) => s.includes("failing tests"))).toBe(true);
|
|
});
|
|
|
|
it("should suggest fixing build errors", () => {
|
|
const issues = [
|
|
{
|
|
type: "build-error" as const,
|
|
severity: "error" as const,
|
|
message: "TypeScript errors",
|
|
},
|
|
];
|
|
|
|
const suggestions = service.generateSuggestions(issues);
|
|
|
|
expect(suggestions.some((s) => s.includes("build errors"))).toBe(true);
|
|
});
|
|
|
|
it("should suggest increasing coverage", () => {
|
|
const issues = [
|
|
{
|
|
type: "low-coverage" as const,
|
|
severity: "error" as const,
|
|
message: "Coverage below 85%",
|
|
},
|
|
];
|
|
|
|
const suggestions = service.generateSuggestions(issues);
|
|
|
|
expect(suggestions.some((s) => s.includes("coverage"))).toBe(true);
|
|
});
|
|
|
|
it("should suggest completing deferred work", () => {
|
|
const issues = [
|
|
{
|
|
type: "deferred-work" as const,
|
|
severity: "warning" as const,
|
|
message: "Work deferred",
|
|
},
|
|
];
|
|
|
|
const suggestions = service.generateSuggestions(issues);
|
|
|
|
expect(suggestions.some((s) => s.includes("deferred work"))).toBe(true);
|
|
});
|
|
});
|
|
});
|