feat(#136): build Completion Verification Engine
Implement verification engine to determine if AI agent work is truly complete by analyzing outputs and detecting deferred work patterns. Strategies: - FileChangeStrategy: Detect TODO/FIXME, placeholders, stubs - TestOutputStrategy: Validate pass rates, coverage (85%), skipped tests - BuildOutputStrategy: Detect TS errors, ESLint errors, build failures Deferred work detection patterns: - "follow-up", "to be added later" - "incremental improvement", "future enhancement" - "TODO: complete", "placeholder implementation" - "stub", "work in progress", "partially implemented" Features: - Confidence scoring (0-100%) - Verdict system: complete/incomplete/needs-review - Actionable suggestions for improvements - Strategy-based extensibility Integration: - Complements Quality Orchestrator (#134) - Uses Quality Gate Config (#135) Tests: 46 passing with 95.27% coverage Fixes #136 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,147 @@
|
||||
import { Injectable } from "@nestjs/common";
|
||||
import {
|
||||
VerificationContext,
|
||||
VerificationResult,
|
||||
VerificationIssue,
|
||||
StrategyResult,
|
||||
} from "./interfaces";
|
||||
import {
|
||||
BaseVerificationStrategy,
|
||||
FileChangeStrategy,
|
||||
TestOutputStrategy,
|
||||
BuildOutputStrategy,
|
||||
} from "./strategies";
|
||||
|
||||
@Injectable()
|
||||
export class CompletionVerificationService {
|
||||
private strategies: BaseVerificationStrategy[] = [];
|
||||
|
||||
constructor() {
|
||||
this.registerDefaultStrategies();
|
||||
}
|
||||
|
||||
private registerDefaultStrategies(): void {
|
||||
this.strategies.push(new FileChangeStrategy());
|
||||
this.strategies.push(new TestOutputStrategy());
|
||||
this.strategies.push(new BuildOutputStrategy());
|
||||
}
|
||||
|
||||
async verify(context: VerificationContext): Promise<VerificationResult> {
|
||||
// Run all strategies in parallel
|
||||
const strategyResults = await Promise.all(
|
||||
this.strategies.map((strategy) => strategy.verify(context))
|
||||
);
|
||||
|
||||
// Detect deferred work in claim message
|
||||
const deferredWorkIssues = this.detectDeferredWork(context.claimMessage);
|
||||
|
||||
// Aggregate all issues
|
||||
const allIssues = [
|
||||
...strategyResults.flatMap((result) => result.issues),
|
||||
...deferredWorkIssues,
|
||||
];
|
||||
|
||||
// Calculate overall confidence
|
||||
const confidence = this.calculateConfidence(strategyResults);
|
||||
|
||||
// Determine verdict
|
||||
const hasErrors = allIssues.some((issue) => issue.severity === "error");
|
||||
const hasWarnings = allIssues.some((issue) => issue.severity === "warning");
|
||||
|
||||
let verdict: "complete" | "incomplete" | "needs-review";
|
||||
if (hasErrors) {
|
||||
verdict = "incomplete";
|
||||
} else if (hasWarnings || (confidence >= 60 && confidence < 80)) {
|
||||
verdict = "needs-review";
|
||||
} else {
|
||||
verdict = "complete";
|
||||
}
|
||||
|
||||
// Generate suggestions
|
||||
const suggestions = this.generateSuggestions(allIssues);
|
||||
|
||||
return {
|
||||
isComplete: verdict === "complete",
|
||||
confidence,
|
||||
issues: allIssues,
|
||||
suggestions,
|
||||
verdict,
|
||||
};
|
||||
}
|
||||
|
||||
registerStrategy(strategy: BaseVerificationStrategy): void {
|
||||
this.strategies.push(strategy);
|
||||
}
|
||||
|
||||
detectDeferredWork(claimMessage: string): VerificationIssue[] {
|
||||
const issues: VerificationIssue[] = [];
|
||||
|
||||
const deferredPatterns = [
|
||||
/follow-up/gi,
|
||||
/to\s+be\s+added\s+later/gi,
|
||||
/incremental\s+improvement/gi,
|
||||
/future\s+enhancement/gi,
|
||||
/TODO:.{0,100}complete/gi,
|
||||
/placeholder\s+implementation/gi,
|
||||
/\bstub\b/gi,
|
||||
/will\s+(?:add|complete|finish|implement).{0,100}later/gi,
|
||||
/partially?\s+(?:implemented|complete)/gi,
|
||||
/work\s+in\s+progress/gi,
|
||||
];
|
||||
|
||||
for (const pattern of deferredPatterns) {
|
||||
const matches = claimMessage.match(pattern);
|
||||
if (matches && matches.length > 0) {
|
||||
issues.push({
|
||||
type: "deferred-work",
|
||||
severity: "warning",
|
||||
message: "Claim message indicates deferred work",
|
||||
evidence: matches.join(", "),
|
||||
});
|
||||
break; // Only report once
|
||||
}
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
calculateConfidence(results: StrategyResult[]): number {
|
||||
if (results.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const totalConfidence = results.reduce((sum, result) => sum + result.confidence, 0);
|
||||
return Math.round(totalConfidence / results.length);
|
||||
}
|
||||
|
||||
generateSuggestions(issues: VerificationIssue[]): string[] {
|
||||
const suggestions: string[] = [];
|
||||
const issueTypes = new Set(issues.map((i) => i.type));
|
||||
|
||||
if (issueTypes.has("test-failure")) {
|
||||
suggestions.push("Fix all failing tests before marking task complete");
|
||||
}
|
||||
|
||||
if (issueTypes.has("build-error")) {
|
||||
suggestions.push("Resolve all build errors and type-check issues");
|
||||
}
|
||||
|
||||
if (issueTypes.has("low-coverage")) {
|
||||
suggestions.push("Increase test coverage to meet the 85% threshold");
|
||||
}
|
||||
|
||||
if (issueTypes.has("missing-files")) {
|
||||
suggestions.push("Ensure all necessary files have been modified");
|
||||
}
|
||||
|
||||
if (issueTypes.has("incomplete-implementation")) {
|
||||
suggestions.push("Remove TODO/FIXME comments and complete placeholder implementations");
|
||||
}
|
||||
|
||||
if (issueTypes.has("deferred-work")) {
|
||||
suggestions.push("Complete all deferred work or create separate tasks for follow-up items");
|
||||
}
|
||||
|
||||
return suggestions;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user