feat(#136): build Completion Verification Engine
Implement verification engine to determine if AI agent work is truly complete by analyzing outputs and detecting deferred work patterns. Strategies: - FileChangeStrategy: Detect TODO/FIXME, placeholders, stubs - TestOutputStrategy: Validate pass rates, coverage (85%), skipped tests - BuildOutputStrategy: Detect TS errors, ESLint errors, build failures Deferred work detection patterns: - "follow-up", "to be added later" - "incremental improvement", "future enhancement" - "TODO: complete", "placeholder implementation" - "stub", "work in progress", "partially implemented" Features: - Confidence scoring (0-100%) - Verdict system: complete/incomplete/needs-review - Actionable suggestions for improvements - Strategy-based extensibility Integration: - Complements Quality Orchestrator (#134) - Uses Quality Gate Config (#135) Tests: 46 passing with 95.27% coverage Fixes #136 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
import { Module } from "@nestjs/common";
|
||||
import { CompletionVerificationService } from "./completion-verification.service";
|
||||
|
||||
@Module({
|
||||
providers: [CompletionVerificationService],
|
||||
exports: [CompletionVerificationService],
|
||||
})
|
||||
export class CompletionVerificationModule {}
|
||||
@@ -0,0 +1,306 @@
|
||||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import { CompletionVerificationService } from "./completion-verification.service";
|
||||
import { VerificationContext } from "./interfaces";
|
||||
|
||||
describe("CompletionVerificationService", () => {
|
||||
let service: CompletionVerificationService;
|
||||
let baseContext: VerificationContext;
|
||||
|
||||
beforeEach(() => {
|
||||
service = new CompletionVerificationService();
|
||||
baseContext = {
|
||||
taskId: "task-1",
|
||||
workspaceId: "workspace-1",
|
||||
agentId: "agent-1",
|
||||
claimMessage: "Completed task",
|
||||
filesChanged: ["src/feature.ts"],
|
||||
outputLogs: "Implementation complete",
|
||||
previousAttempts: 0,
|
||||
};
|
||||
});
|
||||
|
||||
describe("verify", () => {
|
||||
it("should verify using all registered strategies", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts", "src/feature.spec.ts"],
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 10,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
coverage: 90,
|
||||
},
|
||||
buildOutput: "Build successful",
|
||||
};
|
||||
|
||||
const result = await service.verify(context);
|
||||
|
||||
expect(result.verdict).toBe("complete");
|
||||
expect(result.isComplete).toBe(true);
|
||||
expect(result.confidence).toBeGreaterThan(80);
|
||||
expect(result.issues).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should aggregate issues from all strategies", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: [],
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 7,
|
||||
failed: 3,
|
||||
skipped: 0,
|
||||
coverage: 70,
|
||||
},
|
||||
buildOutput: "error TS2304: Cannot find name",
|
||||
};
|
||||
|
||||
const result = await service.verify(context);
|
||||
|
||||
expect(result.verdict).toBe("incomplete");
|
||||
expect(result.isComplete).toBe(false);
|
||||
expect(result.issues.length).toBeGreaterThan(0);
|
||||
expect(result.issues.some((i) => i.type === "missing-files")).toBe(true);
|
||||
expect(result.issues.some((i) => i.type === "test-failure")).toBe(true);
|
||||
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect deferred work in claim message", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
claimMessage: "Implemented basic feature, will add tests in follow-up",
|
||||
filesChanged: ["src/feature.ts"],
|
||||
};
|
||||
|
||||
const result = await service.verify(context);
|
||||
|
||||
expect(result.isComplete).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "deferred-work")).toBe(true);
|
||||
expect(result.issues.some((i) => i.message.includes("deferred work"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should generate appropriate suggestions", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 10,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
coverage: 70,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await service.verify(context);
|
||||
|
||||
expect(result.suggestions.length).toBeGreaterThan(0);
|
||||
expect(result.suggestions.some((s) => s.includes("coverage"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should return needs-review verdict for marginal cases", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts"],
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 9,
|
||||
failed: 0,
|
||||
skipped: 1,
|
||||
coverage: 85, // At threshold - no error
|
||||
},
|
||||
buildOutput:
|
||||
"Build successful\nwarning: unused variable x\nwarning: deprecated API\nwarning: complexity high",
|
||||
outputLogs: "Implementation complete",
|
||||
};
|
||||
|
||||
const result = await service.verify(context);
|
||||
|
||||
// Has warnings but no errors -> needs-review
|
||||
expect(result.verdict).toBe("needs-review");
|
||||
expect(result.isComplete).toBe(false);
|
||||
});
|
||||
|
||||
it("should calculate confidence from strategy results", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts"],
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 10,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
coverage: 95,
|
||||
},
|
||||
buildOutput: "Build successful",
|
||||
};
|
||||
|
||||
const result = await service.verify(context);
|
||||
|
||||
expect(result.confidence).toBeGreaterThan(85);
|
||||
});
|
||||
});
|
||||
|
||||
describe("detectDeferredWork", () => {
|
||||
it('should detect "will implement in follow-up"', () => {
|
||||
const message = "Added basic feature, will implement advanced features in follow-up";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues[0].type).toBe("deferred-work");
|
||||
});
|
||||
|
||||
it('should detect "to be added later"', () => {
|
||||
const message = "Core functionality done, tests to be added later";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues[0].type).toBe("deferred-work");
|
||||
});
|
||||
|
||||
it('should detect "incremental improvement"', () => {
|
||||
const message = "This is an incremental improvement, more to come";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues[0].type).toBe("deferred-work");
|
||||
});
|
||||
|
||||
it('should detect "future enhancement"', () => {
|
||||
const message = "Basic feature implemented, future enhancements planned";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues[0].type).toBe("deferred-work");
|
||||
});
|
||||
|
||||
it('should detect "TODO: complete"', () => {
|
||||
const message = "Started implementation, TODO: complete validation logic";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues[0].type).toBe("deferred-work");
|
||||
});
|
||||
|
||||
it('should detect "placeholder"', () => {
|
||||
const message = "Added placeholder implementation for now";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues[0].type).toBe("deferred-work");
|
||||
});
|
||||
|
||||
it('should detect "stub"', () => {
|
||||
const message = "Created stub for the new service";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues[0].type).toBe("deferred-work");
|
||||
});
|
||||
|
||||
it("should return empty array for complete messages", () => {
|
||||
const message = "Implemented feature with all tests passing and 95% coverage";
|
||||
const issues = service.detectDeferredWork(message);
|
||||
|
||||
expect(issues).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("registerStrategy", () => {
|
||||
it("should allow registering custom strategies", async () => {
|
||||
class CustomStrategy {
|
||||
name = "custom";
|
||||
async verify() {
|
||||
return {
|
||||
strategyName: "custom",
|
||||
passed: true,
|
||||
confidence: 100,
|
||||
issues: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
service.registerStrategy(new CustomStrategy());
|
||||
|
||||
const result = await service.verify(baseContext);
|
||||
expect(result).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("calculateConfidence", () => {
|
||||
it("should return average confidence from strategies", () => {
|
||||
const results = [
|
||||
{ strategyName: "s1", passed: true, confidence: 90, issues: [] },
|
||||
{ strategyName: "s2", passed: true, confidence: 80, issues: [] },
|
||||
{ strategyName: "s3", passed: true, confidence: 70, issues: [] },
|
||||
];
|
||||
|
||||
const confidence = service.calculateConfidence(results);
|
||||
|
||||
expect(confidence).toBe(80); // Average of 90, 80, 70
|
||||
});
|
||||
|
||||
it("should return 0 for empty results", () => {
|
||||
const confidence = service.calculateConfidence([]);
|
||||
expect(confidence).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateSuggestions", () => {
|
||||
it("should suggest fixing tests for test failures", () => {
|
||||
const issues = [
|
||||
{
|
||||
type: "test-failure" as const,
|
||||
severity: "error" as const,
|
||||
message: "3 tests failed",
|
||||
},
|
||||
];
|
||||
|
||||
const suggestions = service.generateSuggestions(issues);
|
||||
|
||||
expect(suggestions.some((s) => s.includes("failing tests"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should suggest fixing build errors", () => {
|
||||
const issues = [
|
||||
{
|
||||
type: "build-error" as const,
|
||||
severity: "error" as const,
|
||||
message: "TypeScript errors",
|
||||
},
|
||||
];
|
||||
|
||||
const suggestions = service.generateSuggestions(issues);
|
||||
|
||||
expect(suggestions.some((s) => s.includes("build errors"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should suggest increasing coverage", () => {
|
||||
const issues = [
|
||||
{
|
||||
type: "low-coverage" as const,
|
||||
severity: "error" as const,
|
||||
message: "Coverage below 85%",
|
||||
},
|
||||
];
|
||||
|
||||
const suggestions = service.generateSuggestions(issues);
|
||||
|
||||
expect(suggestions.some((s) => s.includes("coverage"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should suggest completing deferred work", () => {
|
||||
const issues = [
|
||||
{
|
||||
type: "deferred-work" as const,
|
||||
severity: "warning" as const,
|
||||
message: "Work deferred",
|
||||
},
|
||||
];
|
||||
|
||||
const suggestions = service.generateSuggestions(issues);
|
||||
|
||||
expect(suggestions.some((s) => s.includes("deferred work"))).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,147 @@
|
||||
import { Injectable } from "@nestjs/common";
|
||||
import {
|
||||
VerificationContext,
|
||||
VerificationResult,
|
||||
VerificationIssue,
|
||||
StrategyResult,
|
||||
} from "./interfaces";
|
||||
import {
|
||||
BaseVerificationStrategy,
|
||||
FileChangeStrategy,
|
||||
TestOutputStrategy,
|
||||
BuildOutputStrategy,
|
||||
} from "./strategies";
|
||||
|
||||
@Injectable()
|
||||
export class CompletionVerificationService {
|
||||
private strategies: BaseVerificationStrategy[] = [];
|
||||
|
||||
constructor() {
|
||||
this.registerDefaultStrategies();
|
||||
}
|
||||
|
||||
private registerDefaultStrategies(): void {
|
||||
this.strategies.push(new FileChangeStrategy());
|
||||
this.strategies.push(new TestOutputStrategy());
|
||||
this.strategies.push(new BuildOutputStrategy());
|
||||
}
|
||||
|
||||
async verify(context: VerificationContext): Promise<VerificationResult> {
|
||||
// Run all strategies in parallel
|
||||
const strategyResults = await Promise.all(
|
||||
this.strategies.map((strategy) => strategy.verify(context))
|
||||
);
|
||||
|
||||
// Detect deferred work in claim message
|
||||
const deferredWorkIssues = this.detectDeferredWork(context.claimMessage);
|
||||
|
||||
// Aggregate all issues
|
||||
const allIssues = [
|
||||
...strategyResults.flatMap((result) => result.issues),
|
||||
...deferredWorkIssues,
|
||||
];
|
||||
|
||||
// Calculate overall confidence
|
||||
const confidence = this.calculateConfidence(strategyResults);
|
||||
|
||||
// Determine verdict
|
||||
const hasErrors = allIssues.some((issue) => issue.severity === "error");
|
||||
const hasWarnings = allIssues.some((issue) => issue.severity === "warning");
|
||||
|
||||
let verdict: "complete" | "incomplete" | "needs-review";
|
||||
if (hasErrors) {
|
||||
verdict = "incomplete";
|
||||
} else if (hasWarnings || (confidence >= 60 && confidence < 80)) {
|
||||
verdict = "needs-review";
|
||||
} else {
|
||||
verdict = "complete";
|
||||
}
|
||||
|
||||
// Generate suggestions
|
||||
const suggestions = this.generateSuggestions(allIssues);
|
||||
|
||||
return {
|
||||
isComplete: verdict === "complete",
|
||||
confidence,
|
||||
issues: allIssues,
|
||||
suggestions,
|
||||
verdict,
|
||||
};
|
||||
}
|
||||
|
||||
registerStrategy(strategy: BaseVerificationStrategy): void {
|
||||
this.strategies.push(strategy);
|
||||
}
|
||||
|
||||
detectDeferredWork(claimMessage: string): VerificationIssue[] {
|
||||
const issues: VerificationIssue[] = [];
|
||||
|
||||
const deferredPatterns = [
|
||||
/follow-up/gi,
|
||||
/to\s+be\s+added\s+later/gi,
|
||||
/incremental\s+improvement/gi,
|
||||
/future\s+enhancement/gi,
|
||||
/TODO:.{0,100}complete/gi,
|
||||
/placeholder\s+implementation/gi,
|
||||
/\bstub\b/gi,
|
||||
/will\s+(?:add|complete|finish|implement).{0,100}later/gi,
|
||||
/partially?\s+(?:implemented|complete)/gi,
|
||||
/work\s+in\s+progress/gi,
|
||||
];
|
||||
|
||||
for (const pattern of deferredPatterns) {
|
||||
const matches = claimMessage.match(pattern);
|
||||
if (matches && matches.length > 0) {
|
||||
issues.push({
|
||||
type: "deferred-work",
|
||||
severity: "warning",
|
||||
message: "Claim message indicates deferred work",
|
||||
evidence: matches.join(", "),
|
||||
});
|
||||
break; // Only report once
|
||||
}
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
calculateConfidence(results: StrategyResult[]): number {
|
||||
if (results.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const totalConfidence = results.reduce((sum, result) => sum + result.confidence, 0);
|
||||
return Math.round(totalConfidence / results.length);
|
||||
}
|
||||
|
||||
generateSuggestions(issues: VerificationIssue[]): string[] {
|
||||
const suggestions: string[] = [];
|
||||
const issueTypes = new Set(issues.map((i) => i.type));
|
||||
|
||||
if (issueTypes.has("test-failure")) {
|
||||
suggestions.push("Fix all failing tests before marking task complete");
|
||||
}
|
||||
|
||||
if (issueTypes.has("build-error")) {
|
||||
suggestions.push("Resolve all build errors and type-check issues");
|
||||
}
|
||||
|
||||
if (issueTypes.has("low-coverage")) {
|
||||
suggestions.push("Increase test coverage to meet the 85% threshold");
|
||||
}
|
||||
|
||||
if (issueTypes.has("missing-files")) {
|
||||
suggestions.push("Ensure all necessary files have been modified");
|
||||
}
|
||||
|
||||
if (issueTypes.has("incomplete-implementation")) {
|
||||
suggestions.push("Remove TODO/FIXME comments and complete placeholder implementations");
|
||||
}
|
||||
|
||||
if (issueTypes.has("deferred-work")) {
|
||||
suggestions.push("Complete all deferred work or create separate tasks for follow-up items");
|
||||
}
|
||||
|
||||
return suggestions;
|
||||
}
|
||||
}
|
||||
4
apps/api/src/completion-verification/index.ts
Normal file
4
apps/api/src/completion-verification/index.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
export * from "./completion-verification.module";
|
||||
export * from "./completion-verification.service";
|
||||
export * from "./interfaces";
|
||||
export * from "./strategies";
|
||||
2
apps/api/src/completion-verification/interfaces/index.ts
Normal file
2
apps/api/src/completion-verification/interfaces/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from "./verification-context.interface";
|
||||
export * from "./verification-result.interface";
|
||||
@@ -0,0 +1,19 @@
|
||||
export interface VerificationContext {
|
||||
taskId: string;
|
||||
workspaceId: string;
|
||||
agentId: string;
|
||||
claimMessage: string;
|
||||
filesChanged: string[];
|
||||
outputLogs: string;
|
||||
testResults?: TestResults;
|
||||
buildOutput?: string;
|
||||
previousAttempts: number;
|
||||
}
|
||||
|
||||
export interface TestResults {
|
||||
total: number;
|
||||
passed: number;
|
||||
failed: number;
|
||||
skipped: number;
|
||||
coverage?: number;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
export interface VerificationResult {
|
||||
isComplete: boolean;
|
||||
confidence: number; // 0-100
|
||||
issues: VerificationIssue[];
|
||||
suggestions: string[];
|
||||
verdict: "complete" | "incomplete" | "needs-review";
|
||||
}
|
||||
|
||||
export interface VerificationIssue {
|
||||
type:
|
||||
| "test-failure"
|
||||
| "build-error"
|
||||
| "missing-files"
|
||||
| "low-coverage"
|
||||
| "incomplete-implementation"
|
||||
| "deferred-work";
|
||||
severity: "error" | "warning" | "info";
|
||||
message: string;
|
||||
evidence?: string;
|
||||
}
|
||||
|
||||
export interface StrategyResult {
|
||||
strategyName: string;
|
||||
passed: boolean;
|
||||
confidence: number;
|
||||
issues: VerificationIssue[];
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
import type { VerificationContext, StrategyResult } from "../interfaces";
|
||||
|
||||
export abstract class BaseVerificationStrategy {
|
||||
abstract name: string;
|
||||
|
||||
abstract verify(context: VerificationContext): Promise<StrategyResult>;
|
||||
|
||||
protected extractEvidence(text: string, pattern: RegExp): string[] {
|
||||
const matches: string[] = [];
|
||||
const lines = text.split("\n");
|
||||
|
||||
for (const line of lines) {
|
||||
if (pattern.test(line)) {
|
||||
matches.push(line.trim());
|
||||
}
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
protected extractAllMatches(text: string, pattern: RegExp): string[] {
|
||||
const matches: string[] = [];
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
// Reset lastIndex for global regex
|
||||
pattern.lastIndex = 0;
|
||||
|
||||
while ((match = pattern.exec(text)) !== null) {
|
||||
matches.push(match[0]);
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import { BuildOutputStrategy } from "./build-output.strategy";
|
||||
import { VerificationContext } from "../interfaces";
|
||||
|
||||
describe("BuildOutputStrategy", () => {
|
||||
let strategy: BuildOutputStrategy;
|
||||
let baseContext: VerificationContext;
|
||||
|
||||
beforeEach(() => {
|
||||
strategy = new BuildOutputStrategy();
|
||||
baseContext = {
|
||||
taskId: "task-1",
|
||||
workspaceId: "workspace-1",
|
||||
agentId: "agent-1",
|
||||
claimMessage: "Built successfully",
|
||||
filesChanged: ["src/feature.ts"],
|
||||
outputLogs: "",
|
||||
previousAttempts: 0,
|
||||
};
|
||||
});
|
||||
|
||||
describe("verify", () => {
|
||||
it("should pass when build succeeds", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: "Build completed successfully\nNo errors found",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.strategyName).toBe("build-output");
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(90);
|
||||
expect(result.issues).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should fail when TypeScript errors found", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: 'error TS2304: Cannot find name "unknown".\nBuild failed',
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
|
||||
expect(result.issues.some((i) => i.message.includes("TypeScript"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should fail when build errors found", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: "Error: Module not found\nBuild failed with 1 error",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect ESLint errors", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: "ESLint error: no-unused-vars\n1 error found",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.message.includes("ESLint"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should warn about lint warnings", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: "warning: unused variable\nBuild completed with warnings",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.issues.some((i) => i.severity === "warning")).toBe(true);
|
||||
});
|
||||
|
||||
it("should pass when no build output provided", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: undefined,
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.confidence).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should reduce confidence with multiple errors", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput:
|
||||
"error TS2304: Cannot find name\nerror TS2345: Type mismatch\nerror TS1005: Syntax error\nBuild failed",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.confidence).toBeLessThan(50);
|
||||
expect(result.issues.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should detect compilation failures", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: "Compilation failed\nProcess exited with code 1",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "build-error")).toBe(true);
|
||||
});
|
||||
|
||||
it("should have high confidence with clean build", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
buildOutput: "Build successful\nNo errors or warnings\nCompleted in 5s",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(95);
|
||||
expect(result.issues).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,105 @@
|
||||
import { BaseVerificationStrategy } from "./base-verification.strategy";
|
||||
import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces";
|
||||
|
||||
export class BuildOutputStrategy extends BaseVerificationStrategy {
|
||||
name = "build-output";
|
||||
|
||||
verify(context: VerificationContext): Promise<StrategyResult> {
|
||||
const issues: VerificationIssue[] = [];
|
||||
|
||||
// If no build output, assume build wasn't run (neutral result)
|
||||
if (!context.buildOutput) {
|
||||
return Promise.resolve({
|
||||
strategyName: this.name,
|
||||
passed: true,
|
||||
confidence: 50,
|
||||
issues: [],
|
||||
});
|
||||
}
|
||||
|
||||
const { buildOutput } = context;
|
||||
|
||||
// Check for TypeScript errors
|
||||
const tsErrorPattern = /error TS\d+:/gi;
|
||||
const tsErrors = this.extractEvidence(buildOutput, tsErrorPattern);
|
||||
if (tsErrors.length > 0) {
|
||||
issues.push({
|
||||
type: "build-error",
|
||||
severity: "error",
|
||||
message: `Found ${tsErrors.length.toString()} TypeScript error(s)`,
|
||||
evidence: tsErrors.slice(0, 5).join("\n"), // Limit to first 5
|
||||
});
|
||||
}
|
||||
|
||||
// Check for ESLint errors
|
||||
const eslintErrorPattern = /ESLint.*error/gi;
|
||||
const eslintErrors = this.extractEvidence(buildOutput, eslintErrorPattern);
|
||||
if (eslintErrors.length > 0) {
|
||||
issues.push({
|
||||
type: "build-error",
|
||||
severity: "error",
|
||||
message: `Found ${eslintErrors.length.toString()} ESLint error(s)`,
|
||||
evidence: eslintErrors.slice(0, 5).join("\n"),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for generic build errors
|
||||
const buildErrorPattern = /\berror\b.*(?:build|compilation|failed)/gi;
|
||||
const buildErrors = this.extractEvidence(buildOutput, buildErrorPattern);
|
||||
if (buildErrors.length > 0 && tsErrors.length === 0) {
|
||||
// Only add if not already counted as TS errors
|
||||
issues.push({
|
||||
type: "build-error",
|
||||
severity: "error",
|
||||
message: `Build errors detected`,
|
||||
evidence: buildErrors.slice(0, 5).join("\n"),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for compilation failure
|
||||
const compilationFailedPattern = /compilation failed|build failed/gi;
|
||||
if (compilationFailedPattern.test(buildOutput) && issues.length === 0) {
|
||||
issues.push({
|
||||
type: "build-error",
|
||||
severity: "error",
|
||||
message: "Compilation failed",
|
||||
});
|
||||
}
|
||||
|
||||
// Check for warnings
|
||||
const warningPattern = /\bwarning\b/gi;
|
||||
const warnings = this.extractEvidence(buildOutput, warningPattern);
|
||||
if (warnings.length > 0) {
|
||||
issues.push({
|
||||
type: "build-error",
|
||||
severity: "warning",
|
||||
message: `Found ${warnings.length.toString()} warning(s)`,
|
||||
evidence: warnings.slice(0, 3).join("\n"),
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate confidence
|
||||
let confidence = 100;
|
||||
|
||||
// Count total errors
|
||||
const errorCount = tsErrors.length + eslintErrors.length + buildErrors.length;
|
||||
if (errorCount > 0) {
|
||||
// More aggressive penalty: 30 points per error (3 errors = 10% confidence)
|
||||
confidence = Math.max(0, 100 - errorCount * 30);
|
||||
}
|
||||
|
||||
// Penalty for warnings
|
||||
if (warnings.length > 0) {
|
||||
confidence -= Math.min(10, warnings.length * 2);
|
||||
}
|
||||
|
||||
confidence = Math.max(0, Math.round(confidence));
|
||||
|
||||
return Promise.resolve({
|
||||
strategyName: this.name,
|
||||
passed: issues.filter((i) => i.severity === "error").length === 0,
|
||||
confidence,
|
||||
issues,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import { FileChangeStrategy } from "./file-change.strategy";
|
||||
import { VerificationContext } from "../interfaces";
|
||||
|
||||
describe("FileChangeStrategy", () => {
|
||||
let strategy: FileChangeStrategy;
|
||||
let baseContext: VerificationContext;
|
||||
|
||||
beforeEach(() => {
|
||||
strategy = new FileChangeStrategy();
|
||||
baseContext = {
|
||||
taskId: "task-1",
|
||||
workspaceId: "workspace-1",
|
||||
agentId: "agent-1",
|
||||
claimMessage: "Implemented feature",
|
||||
filesChanged: [],
|
||||
outputLogs: "",
|
||||
previousAttempts: 0,
|
||||
};
|
||||
});
|
||||
|
||||
describe("verify", () => {
|
||||
it("should pass when files are changed", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts", "src/feature.spec.ts"],
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.strategyName).toBe("file-change");
|
||||
expect(result.confidence).toBeGreaterThan(0);
|
||||
expect(result.issues).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should fail when no files are changed", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: [],
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues).toHaveLength(1);
|
||||
expect(result.issues[0].type).toBe("missing-files");
|
||||
expect(result.issues[0].severity).toBe("error");
|
||||
});
|
||||
|
||||
it("should detect TODO comments in output logs", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts"],
|
||||
outputLogs: "File modified\nTODO: implement this later\nDone",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
|
||||
expect(result.issues.some((i) => i.message.includes("TODO"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect FIXME comments in output logs", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts"],
|
||||
outputLogs: "File modified\nFIXME: broken implementation\nDone",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
|
||||
expect(result.issues.some((i) => i.message.includes("FIXME"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect placeholder implementations", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts"],
|
||||
outputLogs: "Added placeholder implementation for now",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
|
||||
});
|
||||
|
||||
it("should detect stub implementations", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts"],
|
||||
outputLogs: "Created stub for testing",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true);
|
||||
});
|
||||
|
||||
it("should reduce confidence with multiple issues", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts"],
|
||||
outputLogs: "TODO: implement\nFIXME: broken\nPlaceholder added",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.confidence).toBeLessThan(50);
|
||||
expect(result.issues.length).toBeGreaterThan(1);
|
||||
});
|
||||
|
||||
it("should have high confidence when no issues found", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
filesChanged: ["src/feature.ts", "src/feature.spec.ts"],
|
||||
outputLogs: "Implemented feature successfully\nAll tests passing",
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(90);
|
||||
expect(result.issues).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,79 @@
|
||||
import { BaseVerificationStrategy } from "./base-verification.strategy";
|
||||
import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces";
|
||||
|
||||
export class FileChangeStrategy extends BaseVerificationStrategy {
|
||||
name = "file-change";
|
||||
|
||||
verify(context: VerificationContext): Promise<StrategyResult> {
|
||||
const issues: VerificationIssue[] = [];
|
||||
|
||||
// Check if files were changed
|
||||
if (context.filesChanged.length === 0) {
|
||||
issues.push({
|
||||
type: "missing-files",
|
||||
severity: "error",
|
||||
message: "No files were changed",
|
||||
});
|
||||
}
|
||||
|
||||
// Check for TODO comments (error - incomplete work)
|
||||
const todoPattern = /TODO:/gi;
|
||||
const todoMatches = this.extractEvidence(context.outputLogs, todoPattern);
|
||||
if (todoMatches.length > 0) {
|
||||
issues.push({
|
||||
type: "incomplete-implementation",
|
||||
severity: "error",
|
||||
message: `Found ${todoMatches.length.toString()} TODO comment(s)`,
|
||||
evidence: todoMatches.join("\n"),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for FIXME comments (error - broken code)
|
||||
const fixmePattern = /FIXME:/gi;
|
||||
const fixmeMatches = this.extractEvidence(context.outputLogs, fixmePattern);
|
||||
if (fixmeMatches.length > 0) {
|
||||
issues.push({
|
||||
type: "incomplete-implementation",
|
||||
severity: "error",
|
||||
message: `Found ${fixmeMatches.length.toString()} FIXME comment(s)`,
|
||||
evidence: fixmeMatches.join("\n"),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for placeholder implementations (error - not real implementation)
|
||||
const placeholderPattern = /placeholder/gi;
|
||||
const placeholderMatches = this.extractEvidence(context.outputLogs, placeholderPattern);
|
||||
if (placeholderMatches.length > 0) {
|
||||
issues.push({
|
||||
type: "incomplete-implementation",
|
||||
severity: "error",
|
||||
message: "Found placeholder implementation",
|
||||
evidence: placeholderMatches.join("\n"),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for stub implementations (error - not real implementation)
|
||||
const stubPattern = /\bstub\b/gi;
|
||||
const stubMatches = this.extractEvidence(context.outputLogs, stubPattern);
|
||||
if (stubMatches.length > 0) {
|
||||
issues.push({
|
||||
type: "incomplete-implementation",
|
||||
severity: "error",
|
||||
message: "Found stub implementation",
|
||||
evidence: stubMatches.join("\n"),
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate confidence
|
||||
const baseConfidence = 100;
|
||||
const penaltyPerIssue = 20; // Increased from 15 to be more aggressive
|
||||
const confidence = Math.max(0, baseConfidence - issues.length * penaltyPerIssue);
|
||||
|
||||
return Promise.resolve({
|
||||
strategyName: this.name,
|
||||
passed: issues.filter((i) => i.severity === "error").length === 0,
|
||||
confidence,
|
||||
issues,
|
||||
});
|
||||
}
|
||||
}
|
||||
4
apps/api/src/completion-verification/strategies/index.ts
Normal file
4
apps/api/src/completion-verification/strategies/index.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
export * from "./base-verification.strategy";
|
||||
export * from "./file-change.strategy";
|
||||
export * from "./test-output.strategy";
|
||||
export * from "./build-output.strategy";
|
||||
@@ -0,0 +1,167 @@
|
||||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import { TestOutputStrategy } from "./test-output.strategy";
|
||||
import { VerificationContext } from "../interfaces";
|
||||
|
||||
describe("TestOutputStrategy", () => {
|
||||
let strategy: TestOutputStrategy;
|
||||
let baseContext: VerificationContext;
|
||||
|
||||
beforeEach(() => {
|
||||
strategy = new TestOutputStrategy();
|
||||
baseContext = {
|
||||
taskId: "task-1",
|
||||
workspaceId: "workspace-1",
|
||||
agentId: "agent-1",
|
||||
claimMessage: "Implemented tests",
|
||||
filesChanged: ["src/feature.spec.ts"],
|
||||
outputLogs: "",
|
||||
previousAttempts: 0,
|
||||
};
|
||||
});
|
||||
|
||||
describe("verify", () => {
|
||||
it("should pass when all tests pass", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 10,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
coverage: 90,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.strategyName).toBe("test-output");
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(90);
|
||||
expect(result.issues).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should fail when tests fail", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 7,
|
||||
failed: 3,
|
||||
skipped: 0,
|
||||
coverage: 80,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "test-failure")).toBe(true);
|
||||
expect(result.issues.some((i) => i.message.includes("3 test(s) failed"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should warn about skipped tests", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 8,
|
||||
failed: 0,
|
||||
skipped: 2,
|
||||
coverage: 85,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.issues.some((i) => i.severity === "warning")).toBe(true);
|
||||
expect(result.issues.some((i) => i.message.includes("2 test(s) skipped"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should fail when coverage is below threshold", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 10,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
coverage: 70,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.issues.some((i) => i.type === "low-coverage")).toBe(true);
|
||||
expect(result.issues.some((i) => i.message.includes("70%"))).toBe(true);
|
||||
});
|
||||
|
||||
it("should pass when coverage is at threshold", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 10,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
coverage: 85,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.issues.filter((i) => i.type === "low-coverage")).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should pass when no test results provided", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: undefined,
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.confidence).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should reduce confidence based on failure rate", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 10,
|
||||
passed: 5,
|
||||
failed: 5,
|
||||
skipped: 0,
|
||||
coverage: 80,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(false);
|
||||
expect(result.confidence).toBeLessThan(50);
|
||||
});
|
||||
|
||||
it("should have high confidence with perfect results", async () => {
|
||||
const context: VerificationContext = {
|
||||
...baseContext,
|
||||
testResults: {
|
||||
total: 20,
|
||||
passed: 20,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
coverage: 95,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await strategy.verify(context);
|
||||
|
||||
expect(result.passed).toBe(true);
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(95);
|
||||
expect(result.issues).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,85 @@
|
||||
import { BaseVerificationStrategy } from "./base-verification.strategy";
|
||||
import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces";
|
||||
|
||||
export class TestOutputStrategy extends BaseVerificationStrategy {
|
||||
name = "test-output";
|
||||
private readonly COVERAGE_THRESHOLD = 85;
|
||||
|
||||
verify(context: VerificationContext): Promise<StrategyResult> {
|
||||
const issues: VerificationIssue[] = [];
|
||||
|
||||
// If no test results, assume tests weren't run (neutral result)
|
||||
if (!context.testResults) {
|
||||
return Promise.resolve({
|
||||
strategyName: this.name,
|
||||
passed: true,
|
||||
confidence: 50,
|
||||
issues: [],
|
||||
});
|
||||
}
|
||||
|
||||
const { testResults } = context;
|
||||
|
||||
// Check for failed tests
|
||||
if (testResults.failed > 0) {
|
||||
issues.push({
|
||||
type: "test-failure",
|
||||
severity: "error",
|
||||
message: `${testResults.failed.toString()} test(s) failed out of ${testResults.total.toString()}`,
|
||||
});
|
||||
}
|
||||
|
||||
// Check for skipped tests
|
||||
if (testResults.skipped > 0) {
|
||||
issues.push({
|
||||
type: "test-failure",
|
||||
severity: "warning",
|
||||
message: `${testResults.skipped.toString()} test(s) skipped`,
|
||||
});
|
||||
}
|
||||
|
||||
// Check coverage threshold
|
||||
if (testResults.coverage !== undefined && testResults.coverage < this.COVERAGE_THRESHOLD) {
|
||||
issues.push({
|
||||
type: "low-coverage",
|
||||
severity: "error",
|
||||
message: `Code coverage ${testResults.coverage.toString()}% is below threshold of ${this.COVERAGE_THRESHOLD.toString()}%`,
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate confidence based on test results
|
||||
let confidence = 100;
|
||||
|
||||
// Reduce confidence based on failure rate (use minimum, not average)
|
||||
if (testResults.total > 0) {
|
||||
const passRate = (testResults.passed / testResults.total) * 100;
|
||||
confidence = Math.min(confidence, passRate);
|
||||
}
|
||||
|
||||
// Further reduce for coverage (use minimum of pass rate and coverage)
|
||||
if (testResults.coverage !== undefined) {
|
||||
confidence = Math.min(confidence, testResults.coverage);
|
||||
}
|
||||
|
||||
// Additional penalty for failures (more aggressive)
|
||||
if (testResults.failed > 0) {
|
||||
const failurePenalty = (testResults.failed / testResults.total) * 30;
|
||||
confidence -= failurePenalty;
|
||||
}
|
||||
|
||||
// Penalty for skipped tests
|
||||
if (testResults.skipped > 0) {
|
||||
const skipPenalty = (testResults.skipped / testResults.total) * 20;
|
||||
confidence -= skipPenalty;
|
||||
}
|
||||
|
||||
confidence = Math.max(0, Math.round(confidence));
|
||||
|
||||
return Promise.resolve({
|
||||
strategyName: this.name,
|
||||
passed: issues.filter((i) => i.severity === "error").length === 0,
|
||||
confidence,
|
||||
issues,
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user