Implement verification engine to determine if AI agent work is truly complete by analyzing outputs and detecting deferred work patterns. Strategies: - FileChangeStrategy: Detect TODO/FIXME, placeholders, stubs - TestOutputStrategy: Validate pass rates, coverage (85%), skipped tests - BuildOutputStrategy: Detect TS errors, ESLint errors, build failures Deferred work detection patterns: - "follow-up", "to be added later" - "incremental improvement", "future enhancement" - "TODO: complete", "placeholder implementation" - "stub", "work in progress", "partially implemented" Features: - Confidence scoring (0-100%) - Verdict system: complete/incomplete/needs-review - Actionable suggestions for improvements - Strategy-based extensibility Integration: - Complements Quality Orchestrator (#134) - Uses Quality Gate Config (#135) Tests: 46 passing with 95.27% coverage Fixes #136 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
148 lines
4.2 KiB
TypeScript
148 lines
4.2 KiB
TypeScript
import { Injectable } from "@nestjs/common";
|
|
import {
|
|
VerificationContext,
|
|
VerificationResult,
|
|
VerificationIssue,
|
|
StrategyResult,
|
|
} from "./interfaces";
|
|
import {
|
|
BaseVerificationStrategy,
|
|
FileChangeStrategy,
|
|
TestOutputStrategy,
|
|
BuildOutputStrategy,
|
|
} from "./strategies";
|
|
|
|
@Injectable()
|
|
export class CompletionVerificationService {
|
|
private strategies: BaseVerificationStrategy[] = [];
|
|
|
|
constructor() {
|
|
this.registerDefaultStrategies();
|
|
}
|
|
|
|
private registerDefaultStrategies(): void {
|
|
this.strategies.push(new FileChangeStrategy());
|
|
this.strategies.push(new TestOutputStrategy());
|
|
this.strategies.push(new BuildOutputStrategy());
|
|
}
|
|
|
|
async verify(context: VerificationContext): Promise<VerificationResult> {
|
|
// Run all strategies in parallel
|
|
const strategyResults = await Promise.all(
|
|
this.strategies.map((strategy) => strategy.verify(context))
|
|
);
|
|
|
|
// Detect deferred work in claim message
|
|
const deferredWorkIssues = this.detectDeferredWork(context.claimMessage);
|
|
|
|
// Aggregate all issues
|
|
const allIssues = [
|
|
...strategyResults.flatMap((result) => result.issues),
|
|
...deferredWorkIssues,
|
|
];
|
|
|
|
// Calculate overall confidence
|
|
const confidence = this.calculateConfidence(strategyResults);
|
|
|
|
// Determine verdict
|
|
const hasErrors = allIssues.some((issue) => issue.severity === "error");
|
|
const hasWarnings = allIssues.some((issue) => issue.severity === "warning");
|
|
|
|
let verdict: "complete" | "incomplete" | "needs-review";
|
|
if (hasErrors) {
|
|
verdict = "incomplete";
|
|
} else if (hasWarnings || (confidence >= 60 && confidence < 80)) {
|
|
verdict = "needs-review";
|
|
} else {
|
|
verdict = "complete";
|
|
}
|
|
|
|
// Generate suggestions
|
|
const suggestions = this.generateSuggestions(allIssues);
|
|
|
|
return {
|
|
isComplete: verdict === "complete",
|
|
confidence,
|
|
issues: allIssues,
|
|
suggestions,
|
|
verdict,
|
|
};
|
|
}
|
|
|
|
registerStrategy(strategy: BaseVerificationStrategy): void {
|
|
this.strategies.push(strategy);
|
|
}
|
|
|
|
detectDeferredWork(claimMessage: string): VerificationIssue[] {
|
|
const issues: VerificationIssue[] = [];
|
|
|
|
const deferredPatterns = [
|
|
/follow-up/gi,
|
|
/to\s+be\s+added\s+later/gi,
|
|
/incremental\s+improvement/gi,
|
|
/future\s+enhancement/gi,
|
|
/TODO:.{0,100}complete/gi,
|
|
/placeholder\s+implementation/gi,
|
|
/\bstub\b/gi,
|
|
/will\s+(?:add|complete|finish|implement).{0,100}later/gi,
|
|
/partially?\s+(?:implemented|complete)/gi,
|
|
/work\s+in\s+progress/gi,
|
|
];
|
|
|
|
for (const pattern of deferredPatterns) {
|
|
const matches = claimMessage.match(pattern);
|
|
if (matches && matches.length > 0) {
|
|
issues.push({
|
|
type: "deferred-work",
|
|
severity: "warning",
|
|
message: "Claim message indicates deferred work",
|
|
evidence: matches.join(", "),
|
|
});
|
|
break; // Only report once
|
|
}
|
|
}
|
|
|
|
return issues;
|
|
}
|
|
|
|
calculateConfidence(results: StrategyResult[]): number {
|
|
if (results.length === 0) {
|
|
return 0;
|
|
}
|
|
|
|
const totalConfidence = results.reduce((sum, result) => sum + result.confidence, 0);
|
|
return Math.round(totalConfidence / results.length);
|
|
}
|
|
|
|
generateSuggestions(issues: VerificationIssue[]): string[] {
|
|
const suggestions: string[] = [];
|
|
const issueTypes = new Set(issues.map((i) => i.type));
|
|
|
|
if (issueTypes.has("test-failure")) {
|
|
suggestions.push("Fix all failing tests before marking task complete");
|
|
}
|
|
|
|
if (issueTypes.has("build-error")) {
|
|
suggestions.push("Resolve all build errors and type-check issues");
|
|
}
|
|
|
|
if (issueTypes.has("low-coverage")) {
|
|
suggestions.push("Increase test coverage to meet the 85% threshold");
|
|
}
|
|
|
|
if (issueTypes.has("missing-files")) {
|
|
suggestions.push("Ensure all necessary files have been modified");
|
|
}
|
|
|
|
if (issueTypes.has("incomplete-implementation")) {
|
|
suggestions.push("Remove TODO/FIXME comments and complete placeholder implementations");
|
|
}
|
|
|
|
if (issueTypes.has("deferred-work")) {
|
|
suggestions.push("Complete all deferred work or create separate tasks for follow-up items");
|
|
}
|
|
|
|
return suggestions;
|
|
}
|
|
}
|