feat(#136): build Completion Verification Engine

Implement verification engine to determine if AI agent work is truly
complete by analyzing outputs and detecting deferred work patterns.

Strategies:
- FileChangeStrategy: Detect TODO/FIXME, placeholders, stubs
- TestOutputStrategy: Validate pass rates, coverage (85%), skipped tests
- BuildOutputStrategy: Detect TS errors, ESLint errors, build failures

Deferred work detection patterns:
- "follow-up", "to be added later"
- "incremental improvement", "future enhancement"
- "TODO: complete", "placeholder implementation"
- "stub", "work in progress", "partially implemented"

Features:
- Confidence scoring (0-100%)
- Verdict system: complete/incomplete/needs-review
- Actionable suggestions for improvements
- Strategy-based extensibility

Integration:
- Complements Quality Orchestrator (#134)
- Uses Quality Gate Config (#135)

Tests: 46 passing with 95.27% coverage

Fixes #136

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-31 13:44:23 -06:00
parent 4a2909ce1e
commit 72ae92f5a6
15 changed files with 1257 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
import { Module } from "@nestjs/common";
import { CompletionVerificationService } from "./completion-verification.service";
@Module({
providers: [CompletionVerificationService],
exports: [CompletionVerificationService],
})
export class CompletionVerificationModule {}

View File

@@ -0,0 +1,306 @@
import { describe, it, expect, beforeEach } from "vitest";
import { CompletionVerificationService } from "./completion-verification.service";
import { VerificationContext } from "./interfaces";
describe("CompletionVerificationService", () => {
let service: CompletionVerificationService;
let baseContext: VerificationContext;
beforeEach(() => {
service = new CompletionVerificationService();
baseContext = {
taskId: "task-1",
workspaceId: "workspace-1",
agentId: "agent-1",
claimMessage: "Completed task",
filesChanged: ["src/feature.ts"],
outputLogs: "Implementation complete",
previousAttempts: 0,
};
});
describe("verify", () => {
it("should verify using all registered strategies", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts", "src/feature.spec.ts"],
testResults: {
total: 10,
passed: 10,
failed: 0,
skipped: 0,
coverage: 90,
},
buildOutput: "Build successful",
};
const result = await service.verify(context);
expect(result.verdict).toBe("complete");
expect(result.isComplete).toBe(true);
expect(result.confidence).toBeGreaterThan(80);
expect(result.issues).toHaveLength(0);
});
it("should aggregate issues from all strategies", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: [],
testResults: {
total: 10,
passed: 7,
failed: 3,
skipped: 0,
coverage: 70,
},
buildOutput: "error TS2304: Cannot find name",
};
const result = await service.verify(context);
expect(result.verdict).toBe("incomplete");
expect(result.isComplete).toBe(false);
expect(result.issues.length).toBeGreaterThan(0);
expect(result.issues.some((i) => i.type === "missing-files")).toBe(true);
expect(result.issues.some((i) => i.type === "test-failure")).toBe(true);
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
});
it("should detect deferred work in claim message", async () => {
const context: VerificationContext = {
...baseContext,
claimMessage: "Implemented basic feature, will add tests in follow-up",
filesChanged: ["src/feature.ts"],
};
const result = await service.verify(context);
expect(result.isComplete).toBe(false);
expect(result.issues.some((i) => i.type === "deferred-work")).toBe(true);
expect(result.issues.some((i) => i.message.includes("deferred work"))).toBe(true);
});
it("should generate appropriate suggestions", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 10,
passed: 10,
failed: 0,
skipped: 0,
coverage: 70,
},
};
const result = await service.verify(context);
expect(result.suggestions.length).toBeGreaterThan(0);
expect(result.suggestions.some((s) => s.includes("coverage"))).toBe(true);
});
it("should return needs-review verdict for marginal cases", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts"],
testResults: {
total: 10,
passed: 9,
failed: 0,
skipped: 1,
coverage: 85, // At threshold - no error
},
buildOutput:
"Build successful\nwarning: unused variable x\nwarning: deprecated API\nwarning: complexity high",
outputLogs: "Implementation complete",
};
const result = await service.verify(context);
// Has warnings but no errors -> needs-review
expect(result.verdict).toBe("needs-review");
expect(result.isComplete).toBe(false);
});
it("should calculate confidence from strategy results", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts"],
testResults: {
total: 10,
passed: 10,
failed: 0,
skipped: 0,
coverage: 95,
},
buildOutput: "Build successful",
};
const result = await service.verify(context);
expect(result.confidence).toBeGreaterThan(85);
});
});
describe("detectDeferredWork", () => {
it('should detect "will implement in follow-up"', () => {
const message = "Added basic feature, will implement advanced features in follow-up";
const issues = service.detectDeferredWork(message);
expect(issues.length).toBeGreaterThan(0);
expect(issues[0].type).toBe("deferred-work");
});
it('should detect "to be added later"', () => {
const message = "Core functionality done, tests to be added later";
const issues = service.detectDeferredWork(message);
expect(issues.length).toBeGreaterThan(0);
expect(issues[0].type).toBe("deferred-work");
});
it('should detect "incremental improvement"', () => {
const message = "This is an incremental improvement, more to come";
const issues = service.detectDeferredWork(message);
expect(issues.length).toBeGreaterThan(0);
expect(issues[0].type).toBe("deferred-work");
});
it('should detect "future enhancement"', () => {
const message = "Basic feature implemented, future enhancements planned";
const issues = service.detectDeferredWork(message);
expect(issues.length).toBeGreaterThan(0);
expect(issues[0].type).toBe("deferred-work");
});
it('should detect "TODO: complete"', () => {
const message = "Started implementation, TODO: complete validation logic";
const issues = service.detectDeferredWork(message);
expect(issues.length).toBeGreaterThan(0);
expect(issues[0].type).toBe("deferred-work");
});
it('should detect "placeholder"', () => {
const message = "Added placeholder implementation for now";
const issues = service.detectDeferredWork(message);
expect(issues.length).toBeGreaterThan(0);
expect(issues[0].type).toBe("deferred-work");
});
it('should detect "stub"', () => {
const message = "Created stub for the new service";
const issues = service.detectDeferredWork(message);
expect(issues.length).toBeGreaterThan(0);
expect(issues[0].type).toBe("deferred-work");
});
it("should return empty array for complete messages", () => {
const message = "Implemented feature with all tests passing and 95% coverage";
const issues = service.detectDeferredWork(message);
expect(issues).toHaveLength(0);
});
});
describe("registerStrategy", () => {
it("should allow registering custom strategies", async () => {
class CustomStrategy {
name = "custom";
async verify() {
return {
strategyName: "custom",
passed: true,
confidence: 100,
issues: [],
};
}
}
service.registerStrategy(new CustomStrategy());
const result = await service.verify(baseContext);
expect(result).toBeDefined();
});
});
describe("calculateConfidence", () => {
it("should return average confidence from strategies", () => {
const results = [
{ strategyName: "s1", passed: true, confidence: 90, issues: [] },
{ strategyName: "s2", passed: true, confidence: 80, issues: [] },
{ strategyName: "s3", passed: true, confidence: 70, issues: [] },
];
const confidence = service.calculateConfidence(results);
expect(confidence).toBe(80); // Average of 90, 80, 70
});
it("should return 0 for empty results", () => {
const confidence = service.calculateConfidence([]);
expect(confidence).toBe(0);
});
});
describe("generateSuggestions", () => {
it("should suggest fixing tests for test failures", () => {
const issues = [
{
type: "test-failure" as const,
severity: "error" as const,
message: "3 tests failed",
},
];
const suggestions = service.generateSuggestions(issues);
expect(suggestions.some((s) => s.includes("failing tests"))).toBe(true);
});
it("should suggest fixing build errors", () => {
const issues = [
{
type: "build-error" as const,
severity: "error" as const,
message: "TypeScript errors",
},
];
const suggestions = service.generateSuggestions(issues);
expect(suggestions.some((s) => s.includes("build errors"))).toBe(true);
});
it("should suggest increasing coverage", () => {
const issues = [
{
type: "low-coverage" as const,
severity: "error" as const,
message: "Coverage below 85%",
},
];
const suggestions = service.generateSuggestions(issues);
expect(suggestions.some((s) => s.includes("coverage"))).toBe(true);
});
it("should suggest completing deferred work", () => {
const issues = [
{
type: "deferred-work" as const,
severity: "warning" as const,
message: "Work deferred",
},
];
const suggestions = service.generateSuggestions(issues);
expect(suggestions.some((s) => s.includes("deferred work"))).toBe(true);
});
});
});

View File

@@ -0,0 +1,147 @@
import { Injectable } from "@nestjs/common";
import {
VerificationContext,
VerificationResult,
VerificationIssue,
StrategyResult,
} from "./interfaces";
import {
BaseVerificationStrategy,
FileChangeStrategy,
TestOutputStrategy,
BuildOutputStrategy,
} from "./strategies";
@Injectable()
export class CompletionVerificationService {
private strategies: BaseVerificationStrategy[] = [];
constructor() {
this.registerDefaultStrategies();
}
private registerDefaultStrategies(): void {
this.strategies.push(new FileChangeStrategy());
this.strategies.push(new TestOutputStrategy());
this.strategies.push(new BuildOutputStrategy());
}
async verify(context: VerificationContext): Promise<VerificationResult> {
// Run all strategies in parallel
const strategyResults = await Promise.all(
this.strategies.map((strategy) => strategy.verify(context))
);
// Detect deferred work in claim message
const deferredWorkIssues = this.detectDeferredWork(context.claimMessage);
// Aggregate all issues
const allIssues = [
...strategyResults.flatMap((result) => result.issues),
...deferredWorkIssues,
];
// Calculate overall confidence
const confidence = this.calculateConfidence(strategyResults);
// Determine verdict
const hasErrors = allIssues.some((issue) => issue.severity === "error");
const hasWarnings = allIssues.some((issue) => issue.severity === "warning");
let verdict: "complete" | "incomplete" | "needs-review";
if (hasErrors) {
verdict = "incomplete";
} else if (hasWarnings || (confidence >= 60 && confidence < 80)) {
verdict = "needs-review";
} else {
verdict = "complete";
}
// Generate suggestions
const suggestions = this.generateSuggestions(allIssues);
return {
isComplete: verdict === "complete",
confidence,
issues: allIssues,
suggestions,
verdict,
};
}
registerStrategy(strategy: BaseVerificationStrategy): void {
this.strategies.push(strategy);
}
detectDeferredWork(claimMessage: string): VerificationIssue[] {
const issues: VerificationIssue[] = [];
const deferredPatterns = [
/follow-up/gi,
/to\s+be\s+added\s+later/gi,
/incremental\s+improvement/gi,
/future\s+enhancement/gi,
/TODO:.{0,100}complete/gi,
/placeholder\s+implementation/gi,
/\bstub\b/gi,
/will\s+(?:add|complete|finish|implement).{0,100}later/gi,
/partially?\s+(?:implemented|complete)/gi,
/work\s+in\s+progress/gi,
];
for (const pattern of deferredPatterns) {
const matches = claimMessage.match(pattern);
if (matches && matches.length > 0) {
issues.push({
type: "deferred-work",
severity: "warning",
message: "Claim message indicates deferred work",
evidence: matches.join(", "),
});
break; // Only report once
}
}
return issues;
}
calculateConfidence(results: StrategyResult[]): number {
if (results.length === 0) {
return 0;
}
const totalConfidence = results.reduce((sum, result) => sum + result.confidence, 0);
return Math.round(totalConfidence / results.length);
}
generateSuggestions(issues: VerificationIssue[]): string[] {
const suggestions: string[] = [];
const issueTypes = new Set(issues.map((i) => i.type));
if (issueTypes.has("test-failure")) {
suggestions.push("Fix all failing tests before marking task complete");
}
if (issueTypes.has("build-error")) {
suggestions.push("Resolve all build errors and type-check issues");
}
if (issueTypes.has("low-coverage")) {
suggestions.push("Increase test coverage to meet the 85% threshold");
}
if (issueTypes.has("missing-files")) {
suggestions.push("Ensure all necessary files have been modified");
}
if (issueTypes.has("incomplete-implementation")) {
suggestions.push("Remove TODO/FIXME comments and complete placeholder implementations");
}
if (issueTypes.has("deferred-work")) {
suggestions.push("Complete all deferred work or create separate tasks for follow-up items");
}
return suggestions;
}
}

View File

@@ -0,0 +1,4 @@
export * from "./completion-verification.module";
export * from "./completion-verification.service";
export * from "./interfaces";
export * from "./strategies";

View File

@@ -0,0 +1,2 @@
export * from "./verification-context.interface";
export * from "./verification-result.interface";

View File

@@ -0,0 +1,19 @@
export interface VerificationContext {
taskId: string;
workspaceId: string;
agentId: string;
claimMessage: string;
filesChanged: string[];
outputLogs: string;
testResults?: TestResults;
buildOutput?: string;
previousAttempts: number;
}
export interface TestResults {
total: number;
passed: number;
failed: number;
skipped: number;
coverage?: number;
}

View File

@@ -0,0 +1,27 @@
export interface VerificationResult {
isComplete: boolean;
confidence: number; // 0-100
issues: VerificationIssue[];
suggestions: string[];
verdict: "complete" | "incomplete" | "needs-review";
}
export interface VerificationIssue {
type:
| "test-failure"
| "build-error"
| "missing-files"
| "low-coverage"
| "incomplete-implementation"
| "deferred-work";
severity: "error" | "warning" | "info";
message: string;
evidence?: string;
}
export interface StrategyResult {
strategyName: string;
passed: boolean;
confidence: number;
issues: VerificationIssue[];
}

View File

@@ -0,0 +1,34 @@
import type { VerificationContext, StrategyResult } from "../interfaces";
export abstract class BaseVerificationStrategy {
abstract name: string;
abstract verify(context: VerificationContext): Promise<StrategyResult>;
protected extractEvidence(text: string, pattern: RegExp): string[] {
const matches: string[] = [];
const lines = text.split("\n");
for (const line of lines) {
if (pattern.test(line)) {
matches.push(line.trim());
}
}
return matches;
}
protected extractAllMatches(text: string, pattern: RegExp): string[] {
const matches: string[] = [];
let match: RegExpExecArray | null;
// Reset lastIndex for global regex
pattern.lastIndex = 0;
while ((match = pattern.exec(text)) !== null) {
matches.push(match[0]);
}
return matches;
}
}

View File

@@ -0,0 +1,137 @@
import { describe, it, expect, beforeEach } from "vitest";
import { BuildOutputStrategy } from "./build-output.strategy";
import { VerificationContext } from "../interfaces";
describe("BuildOutputStrategy", () => {
let strategy: BuildOutputStrategy;
let baseContext: VerificationContext;
beforeEach(() => {
strategy = new BuildOutputStrategy();
baseContext = {
taskId: "task-1",
workspaceId: "workspace-1",
agentId: "agent-1",
claimMessage: "Built successfully",
filesChanged: ["src/feature.ts"],
outputLogs: "",
previousAttempts: 0,
};
});
describe("verify", () => {
it("should pass when build succeeds", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: "Build completed successfully\nNo errors found",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.strategyName).toBe("build-output");
expect(result.confidence).toBeGreaterThanOrEqual(90);
expect(result.issues).toHaveLength(0);
});
it("should fail when TypeScript errors found", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: 'error TS2304: Cannot find name "unknown".\nBuild failed',
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
expect(result.issues.some((i) => i.message.includes("TypeScript"))).toBe(true);
});
it("should fail when build errors found", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: "Error: Module not found\nBuild failed with 1 error",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
});
it("should detect ESLint errors", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: "ESLint error: no-unused-vars\n1 error found",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.message.includes("ESLint"))).toBe(true);
});
it("should warn about lint warnings", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: "warning: unused variable\nBuild completed with warnings",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.issues.some((i) => i.severity === "warning")).toBe(true);
});
it("should pass when no build output provided", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: undefined,
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.confidence).toBeGreaterThan(0);
});
it("should reduce confidence with multiple errors", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput:
"error TS2304: Cannot find name\nerror TS2345: Type mismatch\nerror TS1005: Syntax error\nBuild failed",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.confidence).toBeLessThan(50);
expect(result.issues.length).toBeGreaterThan(0);
});
it("should detect compilation failures", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: "Compilation failed\nProcess exited with code 1",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
});
it("should have high confidence with clean build", async () => {
const context: VerificationContext = {
...baseContext,
buildOutput: "Build successful\nNo errors or warnings\nCompleted in 5s",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.confidence).toBeGreaterThanOrEqual(95);
expect(result.issues).toHaveLength(0);
});
});
});

View File

@@ -0,0 +1,105 @@
import { BaseVerificationStrategy } from "./base-verification.strategy";
import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces";
export class BuildOutputStrategy extends BaseVerificationStrategy {
name = "build-output";
verify(context: VerificationContext): Promise<StrategyResult> {
const issues: VerificationIssue[] = [];
// If no build output, assume build wasn't run (neutral result)
if (!context.buildOutput) {
return Promise.resolve({
strategyName: this.name,
passed: true,
confidence: 50,
issues: [],
});
}
const { buildOutput } = context;
// Check for TypeScript errors
const tsErrorPattern = /error TS\d+:/gi;
const tsErrors = this.extractEvidence(buildOutput, tsErrorPattern);
if (tsErrors.length > 0) {
issues.push({
type: "build-error",
severity: "error",
message: `Found ${tsErrors.length.toString()} TypeScript error(s)`,
evidence: tsErrors.slice(0, 5).join("\n"), // Limit to first 5
});
}
// Check for ESLint errors
const eslintErrorPattern = /ESLint.*error/gi;
const eslintErrors = this.extractEvidence(buildOutput, eslintErrorPattern);
if (eslintErrors.length > 0) {
issues.push({
type: "build-error",
severity: "error",
message: `Found ${eslintErrors.length.toString()} ESLint error(s)`,
evidence: eslintErrors.slice(0, 5).join("\n"),
});
}
// Check for generic build errors
const buildErrorPattern = /\berror\b.*(?:build|compilation|failed)/gi;
const buildErrors = this.extractEvidence(buildOutput, buildErrorPattern);
if (buildErrors.length > 0 && tsErrors.length === 0) {
// Only add if not already counted as TS errors
issues.push({
type: "build-error",
severity: "error",
message: `Build errors detected`,
evidence: buildErrors.slice(0, 5).join("\n"),
});
}
// Check for compilation failure
const compilationFailedPattern = /compilation failed|build failed/gi;
if (compilationFailedPattern.test(buildOutput) && issues.length === 0) {
issues.push({
type: "build-error",
severity: "error",
message: "Compilation failed",
});
}
// Check for warnings
const warningPattern = /\bwarning\b/gi;
const warnings = this.extractEvidence(buildOutput, warningPattern);
if (warnings.length > 0) {
issues.push({
type: "build-error",
severity: "warning",
message: `Found ${warnings.length.toString()} warning(s)`,
evidence: warnings.slice(0, 3).join("\n"),
});
}
// Calculate confidence
let confidence = 100;
// Count total errors
const errorCount = tsErrors.length + eslintErrors.length + buildErrors.length;
if (errorCount > 0) {
// More aggressive penalty: 30 points per error (3 errors = 10% confidence)
confidence = Math.max(0, 100 - errorCount * 30);
}
// Penalty for warnings
if (warnings.length > 0) {
confidence -= Math.min(10, warnings.length * 2);
}
confidence = Math.max(0, Math.round(confidence));
return Promise.resolve({
strategyName: this.name,
passed: issues.filter((i) => i.severity === "error").length === 0,
confidence,
issues,
});
}
}

View File

@@ -0,0 +1,133 @@
import { describe, it, expect, beforeEach } from "vitest";
import { FileChangeStrategy } from "./file-change.strategy";
import { VerificationContext } from "../interfaces";
describe("FileChangeStrategy", () => {
let strategy: FileChangeStrategy;
let baseContext: VerificationContext;
beforeEach(() => {
strategy = new FileChangeStrategy();
baseContext = {
taskId: "task-1",
workspaceId: "workspace-1",
agentId: "agent-1",
claimMessage: "Implemented feature",
filesChanged: [],
outputLogs: "",
previousAttempts: 0,
};
});
describe("verify", () => {
it("should pass when files are changed", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts", "src/feature.spec.ts"],
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.strategyName).toBe("file-change");
expect(result.confidence).toBeGreaterThan(0);
expect(result.issues).toHaveLength(0);
});
it("should fail when no files are changed", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: [],
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues).toHaveLength(1);
expect(result.issues[0].type).toBe("missing-files");
expect(result.issues[0].severity).toBe("error");
});
it("should detect TODO comments in output logs", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts"],
outputLogs: "File modified\nTODO: implement this later\nDone",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
expect(result.issues.some((i) => i.message.includes("TODO"))).toBe(true);
});
it("should detect FIXME comments in output logs", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts"],
outputLogs: "File modified\nFIXME: broken implementation\nDone",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
expect(result.issues.some((i) => i.message.includes("FIXME"))).toBe(true);
});
it("should detect placeholder implementations", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts"],
outputLogs: "Added placeholder implementation for now",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
});
it("should detect stub implementations", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts"],
outputLogs: "Created stub for testing",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
});
it("should reduce confidence with multiple issues", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts"],
outputLogs: "TODO: implement\nFIXME: broken\nPlaceholder added",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.confidence).toBeLessThan(50);
expect(result.issues.length).toBeGreaterThan(1);
});
it("should have high confidence when no issues found", async () => {
const context: VerificationContext = {
...baseContext,
filesChanged: ["src/feature.ts", "src/feature.spec.ts"],
outputLogs: "Implemented feature successfully\nAll tests passing",
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.confidence).toBeGreaterThanOrEqual(90);
expect(result.issues).toHaveLength(0);
});
});
});

View File

@@ -0,0 +1,79 @@
import { BaseVerificationStrategy } from "./base-verification.strategy";
import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces";
export class FileChangeStrategy extends BaseVerificationStrategy {
name = "file-change";
verify(context: VerificationContext): Promise<StrategyResult> {
const issues: VerificationIssue[] = [];
// Check if files were changed
if (context.filesChanged.length === 0) {
issues.push({
type: "missing-files",
severity: "error",
message: "No files were changed",
});
}
// Check for TODO comments (error - incomplete work)
const todoPattern = /TODO:/gi;
const todoMatches = this.extractEvidence(context.outputLogs, todoPattern);
if (todoMatches.length > 0) {
issues.push({
type: "incomplete-implementation",
severity: "error",
message: `Found ${todoMatches.length.toString()} TODO comment(s)`,
evidence: todoMatches.join("\n"),
});
}
// Check for FIXME comments (error - broken code)
const fixmePattern = /FIXME:/gi;
const fixmeMatches = this.extractEvidence(context.outputLogs, fixmePattern);
if (fixmeMatches.length > 0) {
issues.push({
type: "incomplete-implementation",
severity: "error",
message: `Found ${fixmeMatches.length.toString()} FIXME comment(s)`,
evidence: fixmeMatches.join("\n"),
});
}
// Check for placeholder implementations (error - not real implementation)
const placeholderPattern = /placeholder/gi;
const placeholderMatches = this.extractEvidence(context.outputLogs, placeholderPattern);
if (placeholderMatches.length > 0) {
issues.push({
type: "incomplete-implementation",
severity: "error",
message: "Found placeholder implementation",
evidence: placeholderMatches.join("\n"),
});
}
// Check for stub implementations (error - not real implementation)
const stubPattern = /\bstub\b/gi;
const stubMatches = this.extractEvidence(context.outputLogs, stubPattern);
if (stubMatches.length > 0) {
issues.push({
type: "incomplete-implementation",
severity: "error",
message: "Found stub implementation",
evidence: stubMatches.join("\n"),
});
}
// Calculate confidence
const baseConfidence = 100;
const penaltyPerIssue = 20; // Increased from 15 to be more aggressive
const confidence = Math.max(0, baseConfidence - issues.length * penaltyPerIssue);
return Promise.resolve({
strategyName: this.name,
passed: issues.filter((i) => i.severity === "error").length === 0,
confidence,
issues,
});
}
}

View File

@@ -0,0 +1,4 @@
export * from "./base-verification.strategy";
export * from "./file-change.strategy";
export * from "./test-output.strategy";
export * from "./build-output.strategy";

View File

@@ -0,0 +1,167 @@
import { describe, it, expect, beforeEach } from "vitest";
import { TestOutputStrategy } from "./test-output.strategy";
import { VerificationContext } from "../interfaces";
describe("TestOutputStrategy", () => {
let strategy: TestOutputStrategy;
let baseContext: VerificationContext;
beforeEach(() => {
strategy = new TestOutputStrategy();
baseContext = {
taskId: "task-1",
workspaceId: "workspace-1",
agentId: "agent-1",
claimMessage: "Implemented tests",
filesChanged: ["src/feature.spec.ts"],
outputLogs: "",
previousAttempts: 0,
};
});
describe("verify", () => {
it("should pass when all tests pass", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 10,
passed: 10,
failed: 0,
skipped: 0,
coverage: 90,
},
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.strategyName).toBe("test-output");
expect(result.confidence).toBeGreaterThanOrEqual(90);
expect(result.issues).toHaveLength(0);
});
it("should fail when tests fail", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 10,
passed: 7,
failed: 3,
skipped: 0,
coverage: 80,
},
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "test-failure")).toBe(true);
expect(result.issues.some((i) => i.message.includes("3 test(s) failed"))).toBe(true);
});
it("should warn about skipped tests", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 10,
passed: 8,
failed: 0,
skipped: 2,
coverage: 85,
},
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.issues.some((i) => i.severity === "warning")).toBe(true);
expect(result.issues.some((i) => i.message.includes("2 test(s) skipped"))).toBe(true);
});
it("should fail when coverage is below threshold", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 10,
passed: 10,
failed: 0,
skipped: 0,
coverage: 70,
},
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.issues.some((i) => i.type === "low-coverage")).toBe(true);
expect(result.issues.some((i) => i.message.includes("70%"))).toBe(true);
});
it("should pass when coverage is at threshold", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 10,
passed: 10,
failed: 0,
skipped: 0,
coverage: 85,
},
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.issues.filter((i) => i.type === "low-coverage")).toHaveLength(0);
});
it("should pass when no test results provided", async () => {
const context: VerificationContext = {
...baseContext,
testResults: undefined,
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.confidence).toBeGreaterThan(0);
});
it("should reduce confidence based on failure rate", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 10,
passed: 5,
failed: 5,
skipped: 0,
coverage: 80,
},
};
const result = await strategy.verify(context);
expect(result.passed).toBe(false);
expect(result.confidence).toBeLessThan(50);
});
it("should have high confidence with perfect results", async () => {
const context: VerificationContext = {
...baseContext,
testResults: {
total: 20,
passed: 20,
failed: 0,
skipped: 0,
coverage: 95,
},
};
const result = await strategy.verify(context);
expect(result.passed).toBe(true);
expect(result.confidence).toBeGreaterThanOrEqual(95);
expect(result.issues).toHaveLength(0);
});
});
});

View File

@@ -0,0 +1,85 @@
import { BaseVerificationStrategy } from "./base-verification.strategy";
import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces";
export class TestOutputStrategy extends BaseVerificationStrategy {
name = "test-output";
private readonly COVERAGE_THRESHOLD = 85;
verify(context: VerificationContext): Promise<StrategyResult> {
const issues: VerificationIssue[] = [];
// If no test results, assume tests weren't run (neutral result)
if (!context.testResults) {
return Promise.resolve({
strategyName: this.name,
passed: true,
confidence: 50,
issues: [],
});
}
const { testResults } = context;
// Check for failed tests
if (testResults.failed > 0) {
issues.push({
type: "test-failure",
severity: "error",
message: `${testResults.failed.toString()} test(s) failed out of ${testResults.total.toString()}`,
});
}
// Check for skipped tests
if (testResults.skipped > 0) {
issues.push({
type: "test-failure",
severity: "warning",
message: `${testResults.skipped.toString()} test(s) skipped`,
});
}
// Check coverage threshold
if (testResults.coverage !== undefined && testResults.coverage < this.COVERAGE_THRESHOLD) {
issues.push({
type: "low-coverage",
severity: "error",
message: `Code coverage ${testResults.coverage.toString()}% is below threshold of ${this.COVERAGE_THRESHOLD.toString()}%`,
});
}
// Calculate confidence based on test results
let confidence = 100;
// Reduce confidence based on failure rate (use minimum, not average)
if (testResults.total > 0) {
const passRate = (testResults.passed / testResults.total) * 100;
confidence = Math.min(confidence, passRate);
}
// Further reduce for coverage (use minimum of pass rate and coverage)
if (testResults.coverage !== undefined) {
confidence = Math.min(confidence, testResults.coverage);
}
// Additional penalty for failures (more aggressive)
if (testResults.failed > 0) {
const failurePenalty = (testResults.failed / testResults.total) * 30;
confidence -= failurePenalty;
}
// Penalty for skipped tests
if (testResults.skipped > 0) {
const skipPenalty = (testResults.skipped / testResults.total) * 20;
confidence -= skipPenalty;
}
confidence = Math.max(0, Math.round(confidence));
return Promise.resolve({
strategyName: this.name,
passed: issues.filter((i) => i.severity === "error").length === 0,
confidence,
issues,
});
}
}