diff --git a/apps/api/src/completion-verification/completion-verification.module.ts b/apps/api/src/completion-verification/completion-verification.module.ts new file mode 100644 index 0000000..7e2b235 --- /dev/null +++ b/apps/api/src/completion-verification/completion-verification.module.ts @@ -0,0 +1,8 @@ +import { Module } from "@nestjs/common"; +import { CompletionVerificationService } from "./completion-verification.service"; + +@Module({ + providers: [CompletionVerificationService], + exports: [CompletionVerificationService], +}) +export class CompletionVerificationModule {} diff --git a/apps/api/src/completion-verification/completion-verification.service.spec.ts b/apps/api/src/completion-verification/completion-verification.service.spec.ts new file mode 100644 index 0000000..03495ca --- /dev/null +++ b/apps/api/src/completion-verification/completion-verification.service.spec.ts @@ -0,0 +1,306 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { CompletionVerificationService } from "./completion-verification.service"; +import { VerificationContext } from "./interfaces"; + +describe("CompletionVerificationService", () => { + let service: CompletionVerificationService; + let baseContext: VerificationContext; + + beforeEach(() => { + service = new CompletionVerificationService(); + baseContext = { + taskId: "task-1", + workspaceId: "workspace-1", + agentId: "agent-1", + claimMessage: "Completed task", + filesChanged: ["src/feature.ts"], + outputLogs: "Implementation complete", + previousAttempts: 0, + }; + }); + + describe("verify", () => { + it("should verify using all registered strategies", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts", "src/feature.spec.ts"], + testResults: { + total: 10, + passed: 10, + failed: 0, + skipped: 0, + coverage: 90, + }, + buildOutput: "Build successful", + }; + + const result = await service.verify(context); + + expect(result.verdict).toBe("complete"); + expect(result.isComplete).toBe(true); + expect(result.confidence).toBeGreaterThan(80); + expect(result.issues).toHaveLength(0); + }); + + it("should aggregate issues from all strategies", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: [], + testResults: { + total: 10, + passed: 7, + failed: 3, + skipped: 0, + coverage: 70, + }, + buildOutput: "error TS2304: Cannot find name", + }; + + const result = await service.verify(context); + + expect(result.verdict).toBe("incomplete"); + expect(result.isComplete).toBe(false); + expect(result.issues.length).toBeGreaterThan(0); + expect(result.issues.some((i) => i.type === "missing-files")).toBe(true); + expect(result.issues.some((i) => i.type === "test-failure")).toBe(true); + expect(result.issues.some((i) => i.type === "build-error")).toBe(true); + }); + + it("should detect deferred work in claim message", async () => { + const context: VerificationContext = { + ...baseContext, + claimMessage: "Implemented basic feature, will add tests in follow-up", + filesChanged: ["src/feature.ts"], + }; + + const result = await service.verify(context); + + expect(result.isComplete).toBe(false); + expect(result.issues.some((i) => i.type === "deferred-work")).toBe(true); + expect(result.issues.some((i) => i.message.includes("deferred work"))).toBe(true); + }); + + it("should generate appropriate suggestions", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 10, + passed: 10, + failed: 0, + skipped: 0, + coverage: 70, + }, + }; + + const result = await service.verify(context); + + expect(result.suggestions.length).toBeGreaterThan(0); + expect(result.suggestions.some((s) => s.includes("coverage"))).toBe(true); + }); + + it("should return needs-review verdict for marginal cases", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts"], + testResults: { + total: 10, + passed: 9, + failed: 0, + skipped: 1, + coverage: 85, // At threshold - no error + }, + buildOutput: + "Build successful\nwarning: unused variable x\nwarning: deprecated API\nwarning: complexity high", + outputLogs: "Implementation complete", + }; + + const result = await service.verify(context); + + // Has warnings but no errors -> needs-review + expect(result.verdict).toBe("needs-review"); + expect(result.isComplete).toBe(false); + }); + + it("should calculate confidence from strategy results", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts"], + testResults: { + total: 10, + passed: 10, + failed: 0, + skipped: 0, + coverage: 95, + }, + buildOutput: "Build successful", + }; + + const result = await service.verify(context); + + expect(result.confidence).toBeGreaterThan(85); + }); + }); + + describe("detectDeferredWork", () => { + it('should detect "will implement in follow-up"', () => { + const message = "Added basic feature, will implement advanced features in follow-up"; + const issues = service.detectDeferredWork(message); + + expect(issues.length).toBeGreaterThan(0); + expect(issues[0].type).toBe("deferred-work"); + }); + + it('should detect "to be added later"', () => { + const message = "Core functionality done, tests to be added later"; + const issues = service.detectDeferredWork(message); + + expect(issues.length).toBeGreaterThan(0); + expect(issues[0].type).toBe("deferred-work"); + }); + + it('should detect "incremental improvement"', () => { + const message = "This is an incremental improvement, more to come"; + const issues = service.detectDeferredWork(message); + + expect(issues.length).toBeGreaterThan(0); + expect(issues[0].type).toBe("deferred-work"); + }); + + it('should detect "future enhancement"', () => { + const message = "Basic feature implemented, future enhancements planned"; + const issues = service.detectDeferredWork(message); + + expect(issues.length).toBeGreaterThan(0); + expect(issues[0].type).toBe("deferred-work"); + }); + + it('should detect "TODO: complete"', () => { + const message = "Started implementation, TODO: complete validation logic"; + const issues = service.detectDeferredWork(message); + + expect(issues.length).toBeGreaterThan(0); + expect(issues[0].type).toBe("deferred-work"); + }); + + it('should detect "placeholder"', () => { + const message = "Added placeholder implementation for now"; + const issues = service.detectDeferredWork(message); + + expect(issues.length).toBeGreaterThan(0); + expect(issues[0].type).toBe("deferred-work"); + }); + + it('should detect "stub"', () => { + const message = "Created stub for the new service"; + const issues = service.detectDeferredWork(message); + + expect(issues.length).toBeGreaterThan(0); + expect(issues[0].type).toBe("deferred-work"); + }); + + it("should return empty array for complete messages", () => { + const message = "Implemented feature with all tests passing and 95% coverage"; + const issues = service.detectDeferredWork(message); + + expect(issues).toHaveLength(0); + }); + }); + + describe("registerStrategy", () => { + it("should allow registering custom strategies", async () => { + class CustomStrategy { + name = "custom"; + async verify() { + return { + strategyName: "custom", + passed: true, + confidence: 100, + issues: [], + }; + } + } + + service.registerStrategy(new CustomStrategy()); + + const result = await service.verify(baseContext); + expect(result).toBeDefined(); + }); + }); + + describe("calculateConfidence", () => { + it("should return average confidence from strategies", () => { + const results = [ + { strategyName: "s1", passed: true, confidence: 90, issues: [] }, + { strategyName: "s2", passed: true, confidence: 80, issues: [] }, + { strategyName: "s3", passed: true, confidence: 70, issues: [] }, + ]; + + const confidence = service.calculateConfidence(results); + + expect(confidence).toBe(80); // Average of 90, 80, 70 + }); + + it("should return 0 for empty results", () => { + const confidence = service.calculateConfidence([]); + expect(confidence).toBe(0); + }); + }); + + describe("generateSuggestions", () => { + it("should suggest fixing tests for test failures", () => { + const issues = [ + { + type: "test-failure" as const, + severity: "error" as const, + message: "3 tests failed", + }, + ]; + + const suggestions = service.generateSuggestions(issues); + + expect(suggestions.some((s) => s.includes("failing tests"))).toBe(true); + }); + + it("should suggest fixing build errors", () => { + const issues = [ + { + type: "build-error" as const, + severity: "error" as const, + message: "TypeScript errors", + }, + ]; + + const suggestions = service.generateSuggestions(issues); + + expect(suggestions.some((s) => s.includes("build errors"))).toBe(true); + }); + + it("should suggest increasing coverage", () => { + const issues = [ + { + type: "low-coverage" as const, + severity: "error" as const, + message: "Coverage below 85%", + }, + ]; + + const suggestions = service.generateSuggestions(issues); + + expect(suggestions.some((s) => s.includes("coverage"))).toBe(true); + }); + + it("should suggest completing deferred work", () => { + const issues = [ + { + type: "deferred-work" as const, + severity: "warning" as const, + message: "Work deferred", + }, + ]; + + const suggestions = service.generateSuggestions(issues); + + expect(suggestions.some((s) => s.includes("deferred work"))).toBe(true); + }); + }); +}); diff --git a/apps/api/src/completion-verification/completion-verification.service.ts b/apps/api/src/completion-verification/completion-verification.service.ts new file mode 100644 index 0000000..56186f5 --- /dev/null +++ b/apps/api/src/completion-verification/completion-verification.service.ts @@ -0,0 +1,147 @@ +import { Injectable } from "@nestjs/common"; +import { + VerificationContext, + VerificationResult, + VerificationIssue, + StrategyResult, +} from "./interfaces"; +import { + BaseVerificationStrategy, + FileChangeStrategy, + TestOutputStrategy, + BuildOutputStrategy, +} from "./strategies"; + +@Injectable() +export class CompletionVerificationService { + private strategies: BaseVerificationStrategy[] = []; + + constructor() { + this.registerDefaultStrategies(); + } + + private registerDefaultStrategies(): void { + this.strategies.push(new FileChangeStrategy()); + this.strategies.push(new TestOutputStrategy()); + this.strategies.push(new BuildOutputStrategy()); + } + + async verify(context: VerificationContext): Promise { + // Run all strategies in parallel + const strategyResults = await Promise.all( + this.strategies.map((strategy) => strategy.verify(context)) + ); + + // Detect deferred work in claim message + const deferredWorkIssues = this.detectDeferredWork(context.claimMessage); + + // Aggregate all issues + const allIssues = [ + ...strategyResults.flatMap((result) => result.issues), + ...deferredWorkIssues, + ]; + + // Calculate overall confidence + const confidence = this.calculateConfidence(strategyResults); + + // Determine verdict + const hasErrors = allIssues.some((issue) => issue.severity === "error"); + const hasWarnings = allIssues.some((issue) => issue.severity === "warning"); + + let verdict: "complete" | "incomplete" | "needs-review"; + if (hasErrors) { + verdict = "incomplete"; + } else if (hasWarnings || (confidence >= 60 && confidence < 80)) { + verdict = "needs-review"; + } else { + verdict = "complete"; + } + + // Generate suggestions + const suggestions = this.generateSuggestions(allIssues); + + return { + isComplete: verdict === "complete", + confidence, + issues: allIssues, + suggestions, + verdict, + }; + } + + registerStrategy(strategy: BaseVerificationStrategy): void { + this.strategies.push(strategy); + } + + detectDeferredWork(claimMessage: string): VerificationIssue[] { + const issues: VerificationIssue[] = []; + + const deferredPatterns = [ + /follow-up/gi, + /to\s+be\s+added\s+later/gi, + /incremental\s+improvement/gi, + /future\s+enhancement/gi, + /TODO:.{0,100}complete/gi, + /placeholder\s+implementation/gi, + /\bstub\b/gi, + /will\s+(?:add|complete|finish|implement).{0,100}later/gi, + /partially?\s+(?:implemented|complete)/gi, + /work\s+in\s+progress/gi, + ]; + + for (const pattern of deferredPatterns) { + const matches = claimMessage.match(pattern); + if (matches && matches.length > 0) { + issues.push({ + type: "deferred-work", + severity: "warning", + message: "Claim message indicates deferred work", + evidence: matches.join(", "), + }); + break; // Only report once + } + } + + return issues; + } + + calculateConfidence(results: StrategyResult[]): number { + if (results.length === 0) { + return 0; + } + + const totalConfidence = results.reduce((sum, result) => sum + result.confidence, 0); + return Math.round(totalConfidence / results.length); + } + + generateSuggestions(issues: VerificationIssue[]): string[] { + const suggestions: string[] = []; + const issueTypes = new Set(issues.map((i) => i.type)); + + if (issueTypes.has("test-failure")) { + suggestions.push("Fix all failing tests before marking task complete"); + } + + if (issueTypes.has("build-error")) { + suggestions.push("Resolve all build errors and type-check issues"); + } + + if (issueTypes.has("low-coverage")) { + suggestions.push("Increase test coverage to meet the 85% threshold"); + } + + if (issueTypes.has("missing-files")) { + suggestions.push("Ensure all necessary files have been modified"); + } + + if (issueTypes.has("incomplete-implementation")) { + suggestions.push("Remove TODO/FIXME comments and complete placeholder implementations"); + } + + if (issueTypes.has("deferred-work")) { + suggestions.push("Complete all deferred work or create separate tasks for follow-up items"); + } + + return suggestions; + } +} diff --git a/apps/api/src/completion-verification/index.ts b/apps/api/src/completion-verification/index.ts new file mode 100644 index 0000000..d77d46d --- /dev/null +++ b/apps/api/src/completion-verification/index.ts @@ -0,0 +1,4 @@ +export * from "./completion-verification.module"; +export * from "./completion-verification.service"; +export * from "./interfaces"; +export * from "./strategies"; diff --git a/apps/api/src/completion-verification/interfaces/index.ts b/apps/api/src/completion-verification/interfaces/index.ts new file mode 100644 index 0000000..a9c2bbb --- /dev/null +++ b/apps/api/src/completion-verification/interfaces/index.ts @@ -0,0 +1,2 @@ +export * from "./verification-context.interface"; +export * from "./verification-result.interface"; diff --git a/apps/api/src/completion-verification/interfaces/verification-context.interface.ts b/apps/api/src/completion-verification/interfaces/verification-context.interface.ts new file mode 100644 index 0000000..e921ae1 --- /dev/null +++ b/apps/api/src/completion-verification/interfaces/verification-context.interface.ts @@ -0,0 +1,19 @@ +export interface VerificationContext { + taskId: string; + workspaceId: string; + agentId: string; + claimMessage: string; + filesChanged: string[]; + outputLogs: string; + testResults?: TestResults; + buildOutput?: string; + previousAttempts: number; +} + +export interface TestResults { + total: number; + passed: number; + failed: number; + skipped: number; + coverage?: number; +} diff --git a/apps/api/src/completion-verification/interfaces/verification-result.interface.ts b/apps/api/src/completion-verification/interfaces/verification-result.interface.ts new file mode 100644 index 0000000..bfb765d --- /dev/null +++ b/apps/api/src/completion-verification/interfaces/verification-result.interface.ts @@ -0,0 +1,27 @@ +export interface VerificationResult { + isComplete: boolean; + confidence: number; // 0-100 + issues: VerificationIssue[]; + suggestions: string[]; + verdict: "complete" | "incomplete" | "needs-review"; +} + +export interface VerificationIssue { + type: + | "test-failure" + | "build-error" + | "missing-files" + | "low-coverage" + | "incomplete-implementation" + | "deferred-work"; + severity: "error" | "warning" | "info"; + message: string; + evidence?: string; +} + +export interface StrategyResult { + strategyName: string; + passed: boolean; + confidence: number; + issues: VerificationIssue[]; +} diff --git a/apps/api/src/completion-verification/strategies/base-verification.strategy.ts b/apps/api/src/completion-verification/strategies/base-verification.strategy.ts new file mode 100644 index 0000000..5111eb9 --- /dev/null +++ b/apps/api/src/completion-verification/strategies/base-verification.strategy.ts @@ -0,0 +1,34 @@ +import type { VerificationContext, StrategyResult } from "../interfaces"; + +export abstract class BaseVerificationStrategy { + abstract name: string; + + abstract verify(context: VerificationContext): Promise; + + protected extractEvidence(text: string, pattern: RegExp): string[] { + const matches: string[] = []; + const lines = text.split("\n"); + + for (const line of lines) { + if (pattern.test(line)) { + matches.push(line.trim()); + } + } + + return matches; + } + + protected extractAllMatches(text: string, pattern: RegExp): string[] { + const matches: string[] = []; + let match: RegExpExecArray | null; + + // Reset lastIndex for global regex + pattern.lastIndex = 0; + + while ((match = pattern.exec(text)) !== null) { + matches.push(match[0]); + } + + return matches; + } +} diff --git a/apps/api/src/completion-verification/strategies/build-output.strategy.spec.ts b/apps/api/src/completion-verification/strategies/build-output.strategy.spec.ts new file mode 100644 index 0000000..fa285b5 --- /dev/null +++ b/apps/api/src/completion-verification/strategies/build-output.strategy.spec.ts @@ -0,0 +1,137 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { BuildOutputStrategy } from "./build-output.strategy"; +import { VerificationContext } from "../interfaces"; + +describe("BuildOutputStrategy", () => { + let strategy: BuildOutputStrategy; + let baseContext: VerificationContext; + + beforeEach(() => { + strategy = new BuildOutputStrategy(); + baseContext = { + taskId: "task-1", + workspaceId: "workspace-1", + agentId: "agent-1", + claimMessage: "Built successfully", + filesChanged: ["src/feature.ts"], + outputLogs: "", + previousAttempts: 0, + }; + }); + + describe("verify", () => { + it("should pass when build succeeds", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: "Build completed successfully\nNo errors found", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.strategyName).toBe("build-output"); + expect(result.confidence).toBeGreaterThanOrEqual(90); + expect(result.issues).toHaveLength(0); + }); + + it("should fail when TypeScript errors found", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: 'error TS2304: Cannot find name "unknown".\nBuild failed', + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "build-error")).toBe(true); + expect(result.issues.some((i) => i.message.includes("TypeScript"))).toBe(true); + }); + + it("should fail when build errors found", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: "Error: Module not found\nBuild failed with 1 error", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "build-error")).toBe(true); + }); + + it("should detect ESLint errors", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: "ESLint error: no-unused-vars\n1 error found", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.message.includes("ESLint"))).toBe(true); + }); + + it("should warn about lint warnings", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: "warning: unused variable\nBuild completed with warnings", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.issues.some((i) => i.severity === "warning")).toBe(true); + }); + + it("should pass when no build output provided", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: undefined, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.confidence).toBeGreaterThan(0); + }); + + it("should reduce confidence with multiple errors", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: + "error TS2304: Cannot find name\nerror TS2345: Type mismatch\nerror TS1005: Syntax error\nBuild failed", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.confidence).toBeLessThan(50); + expect(result.issues.length).toBeGreaterThan(0); + }); + + it("should detect compilation failures", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: "Compilation failed\nProcess exited with code 1", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "build-error")).toBe(true); + }); + + it("should have high confidence with clean build", async () => { + const context: VerificationContext = { + ...baseContext, + buildOutput: "Build successful\nNo errors or warnings\nCompleted in 5s", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.confidence).toBeGreaterThanOrEqual(95); + expect(result.issues).toHaveLength(0); + }); + }); +}); diff --git a/apps/api/src/completion-verification/strategies/build-output.strategy.ts b/apps/api/src/completion-verification/strategies/build-output.strategy.ts new file mode 100644 index 0000000..c22c82b --- /dev/null +++ b/apps/api/src/completion-verification/strategies/build-output.strategy.ts @@ -0,0 +1,105 @@ +import { BaseVerificationStrategy } from "./base-verification.strategy"; +import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces"; + +export class BuildOutputStrategy extends BaseVerificationStrategy { + name = "build-output"; + + verify(context: VerificationContext): Promise { + const issues: VerificationIssue[] = []; + + // If no build output, assume build wasn't run (neutral result) + if (!context.buildOutput) { + return Promise.resolve({ + strategyName: this.name, + passed: true, + confidence: 50, + issues: [], + }); + } + + const { buildOutput } = context; + + // Check for TypeScript errors + const tsErrorPattern = /error TS\d+:/gi; + const tsErrors = this.extractEvidence(buildOutput, tsErrorPattern); + if (tsErrors.length > 0) { + issues.push({ + type: "build-error", + severity: "error", + message: `Found ${tsErrors.length.toString()} TypeScript error(s)`, + evidence: tsErrors.slice(0, 5).join("\n"), // Limit to first 5 + }); + } + + // Check for ESLint errors + const eslintErrorPattern = /ESLint.*error/gi; + const eslintErrors = this.extractEvidence(buildOutput, eslintErrorPattern); + if (eslintErrors.length > 0) { + issues.push({ + type: "build-error", + severity: "error", + message: `Found ${eslintErrors.length.toString()} ESLint error(s)`, + evidence: eslintErrors.slice(0, 5).join("\n"), + }); + } + + // Check for generic build errors + const buildErrorPattern = /\berror\b.*(?:build|compilation|failed)/gi; + const buildErrors = this.extractEvidence(buildOutput, buildErrorPattern); + if (buildErrors.length > 0 && tsErrors.length === 0) { + // Only add if not already counted as TS errors + issues.push({ + type: "build-error", + severity: "error", + message: `Build errors detected`, + evidence: buildErrors.slice(0, 5).join("\n"), + }); + } + + // Check for compilation failure + const compilationFailedPattern = /compilation failed|build failed/gi; + if (compilationFailedPattern.test(buildOutput) && issues.length === 0) { + issues.push({ + type: "build-error", + severity: "error", + message: "Compilation failed", + }); + } + + // Check for warnings + const warningPattern = /\bwarning\b/gi; + const warnings = this.extractEvidence(buildOutput, warningPattern); + if (warnings.length > 0) { + issues.push({ + type: "build-error", + severity: "warning", + message: `Found ${warnings.length.toString()} warning(s)`, + evidence: warnings.slice(0, 3).join("\n"), + }); + } + + // Calculate confidence + let confidence = 100; + + // Count total errors + const errorCount = tsErrors.length + eslintErrors.length + buildErrors.length; + if (errorCount > 0) { + // More aggressive penalty: 30 points per error (3 errors = 10% confidence) + confidence = Math.max(0, 100 - errorCount * 30); + } + + // Penalty for warnings + if (warnings.length > 0) { + confidence -= Math.min(10, warnings.length * 2); + } + + confidence = Math.max(0, Math.round(confidence)); + + return Promise.resolve({ + strategyName: this.name, + passed: issues.filter((i) => i.severity === "error").length === 0, + confidence, + issues, + }); + } +} diff --git a/apps/api/src/completion-verification/strategies/file-change.strategy.spec.ts b/apps/api/src/completion-verification/strategies/file-change.strategy.spec.ts new file mode 100644 index 0000000..8e82023 --- /dev/null +++ b/apps/api/src/completion-verification/strategies/file-change.strategy.spec.ts @@ -0,0 +1,133 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { FileChangeStrategy } from "./file-change.strategy"; +import { VerificationContext } from "../interfaces"; + +describe("FileChangeStrategy", () => { + let strategy: FileChangeStrategy; + let baseContext: VerificationContext; + + beforeEach(() => { + strategy = new FileChangeStrategy(); + baseContext = { + taskId: "task-1", + workspaceId: "workspace-1", + agentId: "agent-1", + claimMessage: "Implemented feature", + filesChanged: [], + outputLogs: "", + previousAttempts: 0, + }; + }); + + describe("verify", () => { + it("should pass when files are changed", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts", "src/feature.spec.ts"], + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.strategyName).toBe("file-change"); + expect(result.confidence).toBeGreaterThan(0); + expect(result.issues).toHaveLength(0); + }); + + it("should fail when no files are changed", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: [], + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues).toHaveLength(1); + expect(result.issues[0].type).toBe("missing-files"); + expect(result.issues[0].severity).toBe("error"); + }); + + it("should detect TODO comments in output logs", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts"], + outputLogs: "File modified\nTODO: implement this later\nDone", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true); + expect(result.issues.some((i) => i.message.includes("TODO"))).toBe(true); + }); + + it("should detect FIXME comments in output logs", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts"], + outputLogs: "File modified\nFIXME: broken implementation\nDone", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true); + expect(result.issues.some((i) => i.message.includes("FIXME"))).toBe(true); + }); + + it("should detect placeholder implementations", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts"], + outputLogs: "Added placeholder implementation for now", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true); + }); + + it("should detect stub implementations", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts"], + outputLogs: "Created stub for testing", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "incomplete-implementation")).toBe(true); + }); + + it("should reduce confidence with multiple issues", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts"], + outputLogs: "TODO: implement\nFIXME: broken\nPlaceholder added", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.confidence).toBeLessThan(50); + expect(result.issues.length).toBeGreaterThan(1); + }); + + it("should have high confidence when no issues found", async () => { + const context: VerificationContext = { + ...baseContext, + filesChanged: ["src/feature.ts", "src/feature.spec.ts"], + outputLogs: "Implemented feature successfully\nAll tests passing", + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.confidence).toBeGreaterThanOrEqual(90); + expect(result.issues).toHaveLength(0); + }); + }); +}); diff --git a/apps/api/src/completion-verification/strategies/file-change.strategy.ts b/apps/api/src/completion-verification/strategies/file-change.strategy.ts new file mode 100644 index 0000000..e004c2e --- /dev/null +++ b/apps/api/src/completion-verification/strategies/file-change.strategy.ts @@ -0,0 +1,79 @@ +import { BaseVerificationStrategy } from "./base-verification.strategy"; +import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces"; + +export class FileChangeStrategy extends BaseVerificationStrategy { + name = "file-change"; + + verify(context: VerificationContext): Promise { + const issues: VerificationIssue[] = []; + + // Check if files were changed + if (context.filesChanged.length === 0) { + issues.push({ + type: "missing-files", + severity: "error", + message: "No files were changed", + }); + } + + // Check for TODO comments (error - incomplete work) + const todoPattern = /TODO:/gi; + const todoMatches = this.extractEvidence(context.outputLogs, todoPattern); + if (todoMatches.length > 0) { + issues.push({ + type: "incomplete-implementation", + severity: "error", + message: `Found ${todoMatches.length.toString()} TODO comment(s)`, + evidence: todoMatches.join("\n"), + }); + } + + // Check for FIXME comments (error - broken code) + const fixmePattern = /FIXME:/gi; + const fixmeMatches = this.extractEvidence(context.outputLogs, fixmePattern); + if (fixmeMatches.length > 0) { + issues.push({ + type: "incomplete-implementation", + severity: "error", + message: `Found ${fixmeMatches.length.toString()} FIXME comment(s)`, + evidence: fixmeMatches.join("\n"), + }); + } + + // Check for placeholder implementations (error - not real implementation) + const placeholderPattern = /placeholder/gi; + const placeholderMatches = this.extractEvidence(context.outputLogs, placeholderPattern); + if (placeholderMatches.length > 0) { + issues.push({ + type: "incomplete-implementation", + severity: "error", + message: "Found placeholder implementation", + evidence: placeholderMatches.join("\n"), + }); + } + + // Check for stub implementations (error - not real implementation) + const stubPattern = /\bstub\b/gi; + const stubMatches = this.extractEvidence(context.outputLogs, stubPattern); + if (stubMatches.length > 0) { + issues.push({ + type: "incomplete-implementation", + severity: "error", + message: "Found stub implementation", + evidence: stubMatches.join("\n"), + }); + } + + // Calculate confidence + const baseConfidence = 100; + const penaltyPerIssue = 20; // Increased from 15 to be more aggressive + const confidence = Math.max(0, baseConfidence - issues.length * penaltyPerIssue); + + return Promise.resolve({ + strategyName: this.name, + passed: issues.filter((i) => i.severity === "error").length === 0, + confidence, + issues, + }); + } +} diff --git a/apps/api/src/completion-verification/strategies/index.ts b/apps/api/src/completion-verification/strategies/index.ts new file mode 100644 index 0000000..62e1303 --- /dev/null +++ b/apps/api/src/completion-verification/strategies/index.ts @@ -0,0 +1,4 @@ +export * from "./base-verification.strategy"; +export * from "./file-change.strategy"; +export * from "./test-output.strategy"; +export * from "./build-output.strategy"; diff --git a/apps/api/src/completion-verification/strategies/test-output.strategy.spec.ts b/apps/api/src/completion-verification/strategies/test-output.strategy.spec.ts new file mode 100644 index 0000000..0cdd2b6 --- /dev/null +++ b/apps/api/src/completion-verification/strategies/test-output.strategy.spec.ts @@ -0,0 +1,167 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { TestOutputStrategy } from "./test-output.strategy"; +import { VerificationContext } from "../interfaces"; + +describe("TestOutputStrategy", () => { + let strategy: TestOutputStrategy; + let baseContext: VerificationContext; + + beforeEach(() => { + strategy = new TestOutputStrategy(); + baseContext = { + taskId: "task-1", + workspaceId: "workspace-1", + agentId: "agent-1", + claimMessage: "Implemented tests", + filesChanged: ["src/feature.spec.ts"], + outputLogs: "", + previousAttempts: 0, + }; + }); + + describe("verify", () => { + it("should pass when all tests pass", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 10, + passed: 10, + failed: 0, + skipped: 0, + coverage: 90, + }, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.strategyName).toBe("test-output"); + expect(result.confidence).toBeGreaterThanOrEqual(90); + expect(result.issues).toHaveLength(0); + }); + + it("should fail when tests fail", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 10, + passed: 7, + failed: 3, + skipped: 0, + coverage: 80, + }, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "test-failure")).toBe(true); + expect(result.issues.some((i) => i.message.includes("3 test(s) failed"))).toBe(true); + }); + + it("should warn about skipped tests", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 10, + passed: 8, + failed: 0, + skipped: 2, + coverage: 85, + }, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.issues.some((i) => i.severity === "warning")).toBe(true); + expect(result.issues.some((i) => i.message.includes("2 test(s) skipped"))).toBe(true); + }); + + it("should fail when coverage is below threshold", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 10, + passed: 10, + failed: 0, + skipped: 0, + coverage: 70, + }, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.issues.some((i) => i.type === "low-coverage")).toBe(true); + expect(result.issues.some((i) => i.message.includes("70%"))).toBe(true); + }); + + it("should pass when coverage is at threshold", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 10, + passed: 10, + failed: 0, + skipped: 0, + coverage: 85, + }, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.issues.filter((i) => i.type === "low-coverage")).toHaveLength(0); + }); + + it("should pass when no test results provided", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: undefined, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.confidence).toBeGreaterThan(0); + }); + + it("should reduce confidence based on failure rate", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 10, + passed: 5, + failed: 5, + skipped: 0, + coverage: 80, + }, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(false); + expect(result.confidence).toBeLessThan(50); + }); + + it("should have high confidence with perfect results", async () => { + const context: VerificationContext = { + ...baseContext, + testResults: { + total: 20, + passed: 20, + failed: 0, + skipped: 0, + coverage: 95, + }, + }; + + const result = await strategy.verify(context); + + expect(result.passed).toBe(true); + expect(result.confidence).toBeGreaterThanOrEqual(95); + expect(result.issues).toHaveLength(0); + }); + }); +}); diff --git a/apps/api/src/completion-verification/strategies/test-output.strategy.ts b/apps/api/src/completion-verification/strategies/test-output.strategy.ts new file mode 100644 index 0000000..20aaef6 --- /dev/null +++ b/apps/api/src/completion-verification/strategies/test-output.strategy.ts @@ -0,0 +1,85 @@ +import { BaseVerificationStrategy } from "./base-verification.strategy"; +import type { VerificationContext, StrategyResult, VerificationIssue } from "../interfaces"; + +export class TestOutputStrategy extends BaseVerificationStrategy { + name = "test-output"; + private readonly COVERAGE_THRESHOLD = 85; + + verify(context: VerificationContext): Promise { + const issues: VerificationIssue[] = []; + + // If no test results, assume tests weren't run (neutral result) + if (!context.testResults) { + return Promise.resolve({ + strategyName: this.name, + passed: true, + confidence: 50, + issues: [], + }); + } + + const { testResults } = context; + + // Check for failed tests + if (testResults.failed > 0) { + issues.push({ + type: "test-failure", + severity: "error", + message: `${testResults.failed.toString()} test(s) failed out of ${testResults.total.toString()}`, + }); + } + + // Check for skipped tests + if (testResults.skipped > 0) { + issues.push({ + type: "test-failure", + severity: "warning", + message: `${testResults.skipped.toString()} test(s) skipped`, + }); + } + + // Check coverage threshold + if (testResults.coverage !== undefined && testResults.coverage < this.COVERAGE_THRESHOLD) { + issues.push({ + type: "low-coverage", + severity: "error", + message: `Code coverage ${testResults.coverage.toString()}% is below threshold of ${this.COVERAGE_THRESHOLD.toString()}%`, + }); + } + + // Calculate confidence based on test results + let confidence = 100; + + // Reduce confidence based on failure rate (use minimum, not average) + if (testResults.total > 0) { + const passRate = (testResults.passed / testResults.total) * 100; + confidence = Math.min(confidence, passRate); + } + + // Further reduce for coverage (use minimum of pass rate and coverage) + if (testResults.coverage !== undefined) { + confidence = Math.min(confidence, testResults.coverage); + } + + // Additional penalty for failures (more aggressive) + if (testResults.failed > 0) { + const failurePenalty = (testResults.failed / testResults.total) * 30; + confidence -= failurePenalty; + } + + // Penalty for skipped tests + if (testResults.skipped > 0) { + const skipPenalty = (testResults.skipped / testResults.total) * 20; + confidence -= skipPenalty; + } + + confidence = Math.max(0, Math.round(confidence)); + + return Promise.resolve({ + strategyName: this.name, + passed: issues.filter((i) => i.severity === "error").length === 0, + confidence, + issues, + }); + } +}