diff --git a/apps/coordinator/tests/test_rejection_loop.py b/apps/coordinator/tests/test_rejection_loop.py new file mode 100644 index 0000000..975c5d6 --- /dev/null +++ b/apps/coordinator/tests/test_rejection_loop.py @@ -0,0 +1,591 @@ +"""Integration tests for rejection loop behavior. + +These tests simulate scenarios where an agent claims completion with various +quality gate failures, verifying that: +1. Each failure type triggers rejection +2. Forced continuation prompts are generated +3. Agents cannot bypass quality gates +4. Loop continues until all gates pass +""" + +import pytest + +from src.forced_continuation import ForcedContinuationService +from src.gates.quality_gate import GateResult +from src.quality_orchestrator import QualityOrchestrator + + +class TestRejectionLoop: + """Test suite for rejection loop integration scenarios.""" + + @pytest.fixture + def orchestrator(self) -> QualityOrchestrator: + """Create a QualityOrchestrator instance for testing.""" + return QualityOrchestrator() + + @pytest.fixture + def continuation_service(self) -> ForcedContinuationService: + """Create a ForcedContinuationService instance for testing.""" + return ForcedContinuationService() + + @pytest.mark.asyncio + async def test_rejection_on_failing_tests( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that failing tests trigger rejection and continuation prompt. + + Scenario: Agent claims completion but tests are failing. + Expected: Rejection occurs, forced continuation prompt generated. + """ + # Create mock orchestrator with failing test gate + from unittest.mock import Mock + + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=False, + message="Test gate failed: 2 tests failed out of 10", + details={ + "return_code": 1, + "stderr": ( + "FAILED tests/test_auth.py::test_login - AssertionError\n" + "FAILED tests/test_users.py::test_create_user - ValueError" + ), + }, + ) + + # Other gates pass + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=True, message="Build passed", details={} + ) + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=True, message="Lint passed", details={} + ) + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=True, message="Coverage passed", details={} + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + # Simulate agent claiming completion + verification = await orchestrator_with_mocks.verify_completion() + + # Assert: Rejection occurred + assert verification.all_passed is False + assert verification.gate_results["test"].passed is False + assert "failed" in verification.gate_results["test"].message.lower() + + # Assert: Forced continuation prompt is generated + prompt = continuation_service.generate_prompt(verification) + assert isinstance(prompt, str) + assert len(prompt) > 0 + assert "test" in prompt.lower() + assert "must" in prompt.lower() or "fix" in prompt.lower() + # Prompt should include specific failure details + assert "test_auth.py" in prompt or "test_users.py" in prompt or "failed" in prompt.lower() + + @pytest.mark.asyncio + async def test_rejection_on_linting_errors( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that linting errors trigger rejection and continuation prompt. + + Scenario: Agent claims completion but code has linting issues. + Expected: Rejection occurs, forced continuation prompt generated. + """ + from unittest.mock import Mock + + # Create mock orchestrator with failing lint gate + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=False, + message="Lint gate failed: 5 linting issues found", + details={ + "return_code": 1, + "stderr": ( + "src/main.py:10:80: E501 line too long (92 > 79 characters)\n" + "src/models.py:5:1: F401 'typing.Any' imported but unused\n" + "src/utils.py:15:1: W293 blank line contains whitespace" + ), + }, + ) + + # Other gates pass + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=True, message="Build passed", details={} + ) + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=True, message="Test passed", details={} + ) + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=True, message="Coverage passed", details={} + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + # Simulate agent claiming completion + verification = await orchestrator_with_mocks.verify_completion() + + # Assert: Rejection occurred + assert verification.all_passed is False + assert verification.gate_results["lint"].passed is False + assert "lint" in verification.gate_results["lint"].message.lower() + + # Assert: Forced continuation prompt is generated + prompt = continuation_service.generate_prompt(verification) + assert isinstance(prompt, str) + assert len(prompt) > 0 + assert "lint" in prompt.lower() + assert "must" in prompt.lower() or "fix" in prompt.lower() + # Prompt should include linting details or commands + assert "ruff" in prompt.lower() or "lint" in prompt.lower() + + @pytest.mark.asyncio + async def test_rejection_on_low_coverage( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that low coverage triggers rejection and continuation prompt. + + Scenario: Agent claims completion but coverage is below minimum. + Expected: Rejection occurs, forced continuation prompt generated. + """ + from unittest.mock import Mock + + # Create mock orchestrator with failing coverage gate + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=False, + message="Coverage gate failed: 72.5% coverage below minimum 85%", + details={ + "coverage_percent": 72.5, + "minimum_coverage": 85.0, + }, + ) + + # Other gates pass + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=True, message="Build passed", details={} + ) + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=True, message="Lint passed", details={} + ) + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=True, message="Test passed", details={} + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + # Simulate agent claiming completion + verification = await orchestrator_with_mocks.verify_completion() + + # Assert: Rejection occurred + assert verification.all_passed is False + assert verification.gate_results["coverage"].passed is False + assert "coverage" in verification.gate_results["coverage"].message.lower() + + # Assert: Forced continuation prompt is generated + prompt = continuation_service.generate_prompt(verification) + assert isinstance(prompt, str) + assert len(prompt) > 0 + assert "coverage" in prompt.lower() + # Prompt should include specific coverage numbers + assert "72.5" in prompt or "72" in prompt + assert "85" in prompt + assert "must" in prompt.lower() or "increase" in prompt.lower() + + @pytest.mark.asyncio + async def test_rejection_on_build_errors( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that build errors trigger rejection and continuation prompt. + + Scenario: Agent claims completion but code has type errors. + Expected: Rejection occurs, forced continuation prompt generated. + """ + from unittest.mock import Mock + + # Create mock orchestrator with failing build gate + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=False, + message="Build gate failed: Type errors detected", + details={ + "return_code": 1, + "stderr": ( + "src/main.py:10: error: Incompatible return value type " + "(got 'str', expected 'int')\n" + "src/models.py:25: error: Missing type annotation for variable 'config'" + ), + }, + ) + + # Other gates pass + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=True, message="Lint passed", details={} + ) + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=True, message="Test passed", details={} + ) + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=True, message="Coverage passed", details={} + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + # Simulate agent claiming completion + verification = await orchestrator_with_mocks.verify_completion() + + # Assert: Rejection occurred + assert verification.all_passed is False + assert verification.gate_results["build"].passed is False + build_msg = verification.gate_results["build"].message.lower() + assert "build" in build_msg or "type" in build_msg + + # Assert: Forced continuation prompt is generated + prompt = continuation_service.generate_prompt(verification) + assert isinstance(prompt, str) + assert len(prompt) > 0 + assert "build" in prompt.lower() or "type" in prompt.lower() + assert "must" in prompt.lower() or "fix" in prompt.lower() + # Prompt should include type error details or mypy commands + assert "mypy" in prompt.lower() or "type" in prompt.lower() + + @pytest.mark.asyncio + async def test_acceptance_on_all_gates_passing( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that all gates passing allows completion without rejection. + + Scenario: Agent claims completion and all quality gates pass. + Expected: No rejection, completion allowed, no continuation prompt. + """ + from unittest.mock import Mock + + # Create mock orchestrator with all gates passing + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=True, + message="Build gate passed: No type errors found", + details={"return_code": 0}, + ) + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=True, + message="Lint gate passed: No linting issues found", + details={"return_code": 0}, + ) + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=True, + message="Test gate passed: All 10 tests passed (100% pass rate)", + details={"return_code": 0}, + ) + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=True, + message="Coverage gate passed: 90.0% coverage (minimum: 85%)", + details={"coverage_percent": 90.0, "minimum_coverage": 85.0}, + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + # Simulate agent claiming completion + verification = await orchestrator_with_mocks.verify_completion() + + # Assert: No rejection, completion allowed + assert verification.all_passed is True + assert all(result.passed for result in verification.gate_results.values()) + + # Assert: Continuation prompt should raise error (no failures to report) + with pytest.raises(ValueError, match="all.*pass"): + continuation_service.generate_prompt(verification) + + @pytest.mark.asyncio + async def test_rejection_on_multiple_gate_failures( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that multiple simultaneous gate failures are handled correctly. + + Scenario: Agent claims completion with multiple quality gate failures. + Expected: Rejection occurs, comprehensive continuation prompt generated. + """ + from unittest.mock import Mock + + # Create mock orchestrator with multiple failing gates + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=False, + message="Build gate failed: Type errors detected", + details={ + "return_code": 1, + "stderr": "src/main.py:10: error: Incompatible return value type", + }, + ) + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=False, + message="Lint gate failed: Linting issues detected", + details={ + "return_code": 1, + "stderr": "src/main.py:10: E501 line too long", + }, + ) + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=False, + message="Test gate failed: Test failures detected", + details={ + "return_code": 1, + "stderr": "FAILED tests/test_main.py::test_function", + }, + ) + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=False, + message="Coverage gate failed: 60.0% coverage below minimum 85%", + details={ + "coverage_percent": 60.0, + "minimum_coverage": 85.0, + }, + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + # Simulate agent claiming completion + verification = await orchestrator_with_mocks.verify_completion() + + # Assert: Rejection occurred for all gates + assert verification.all_passed is False + assert verification.gate_results["build"].passed is False + assert verification.gate_results["lint"].passed is False + assert verification.gate_results["test"].passed is False + assert verification.gate_results["coverage"].passed is False + + # Assert: Forced continuation prompt covers all failures + prompt = continuation_service.generate_prompt(verification) + assert isinstance(prompt, str) + assert len(prompt) > 0 + # Prompt should mention all failed gates + assert "build" in prompt.lower() or "type" in prompt.lower() + assert "lint" in prompt.lower() + assert "test" in prompt.lower() + assert "coverage" in prompt.lower() + # Prompt should be comprehensive and directive + assert "must" in prompt.lower() or "fix" in prompt.lower() + + @pytest.mark.asyncio + async def test_continuation_prompt_is_non_negotiable( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that continuation prompts are non-negotiable and directive. + + Scenario: Any gate failure generates a prompt. + Expected: Prompt uses directive language, not suggestions. + """ + from unittest.mock import Mock + + # Create mock orchestrator with one failing gate + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=True, message="Build passed", details={} + ) + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=False, + message="Lint gate failed", + details={"return_code": 1}, + ) + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=True, message="Test passed", details={} + ) + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=True, message="Coverage passed", details={} + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + verification = await orchestrator_with_mocks.verify_completion() + prompt = continuation_service.generate_prompt(verification) + + # Assert: Prompt uses directive language (MUST, REQUIRED, etc.) + prompt_lower = prompt.lower() + has_directive_language = ( + "must" in prompt_lower + or "required" in prompt_lower + or "do not" in prompt_lower + or "cannot" in prompt_lower + ) + assert has_directive_language, "Prompt should use directive language" + + # Assert: Prompt does not use suggestion language + has_suggestion_language = ( + "consider" in prompt_lower + or "might want" in prompt_lower + or "could" in prompt_lower + or "perhaps" in prompt_lower + ) + assert not has_suggestion_language, "Prompt should not use suggestion language" + + @pytest.mark.asyncio + async def test_continuation_prompt_includes_remediation_steps( + self, + orchestrator: QualityOrchestrator, + continuation_service: ForcedContinuationService, + ) -> None: + """Test that continuation prompts include actionable remediation steps. + + Scenario: Gate failures generate prompt. + Expected: Prompt includes specific commands and actions to fix issues. + """ + from unittest.mock import Mock + + # Create mock orchestrator with failing test gate + mock_build_gate = Mock() + mock_build_gate.check.return_value = GateResult( + passed=True, message="Build passed", details={} + ) + mock_lint_gate = Mock() + mock_lint_gate.check.return_value = GateResult( + passed=True, message="Lint passed", details={} + ) + mock_test_gate = Mock() + mock_test_gate.check.return_value = GateResult( + passed=False, + message="Test gate failed", + details={"return_code": 1}, + ) + mock_coverage_gate = Mock() + mock_coverage_gate.check.return_value = GateResult( + passed=True, message="Coverage passed", details={} + ) + + orchestrator_with_mocks = QualityOrchestrator( + build_gate=mock_build_gate, + lint_gate=mock_lint_gate, + test_gate=mock_test_gate, + coverage_gate=mock_coverage_gate, + ) + + verification = await orchestrator_with_mocks.verify_completion() + prompt = continuation_service.generate_prompt(verification) + + # Assert: Prompt includes remediation commands + prompt_lower = prompt.lower() + has_commands = ( + "pytest" in prompt_lower + or "run:" in prompt_lower + or "fix" in prompt_lower + ) + assert has_commands, "Prompt should include specific remediation commands" + + @pytest.mark.asyncio + async def test_agent_cannot_bypass_gates( + self, + orchestrator: QualityOrchestrator, + ) -> None: + """Test that agents cannot bypass quality gates. + + Scenario: All gates must be checked, no shortcuts allowed. + Expected: verify_completion always runs all gates. + """ + from unittest.mock import Mock + + # Create mock gates with side effects to track if they were called + call_tracker = {"build": False, "lint": False, "test": False, "coverage": False} + + def make_tracked_gate(gate_name: str, passes: bool) -> Mock: + """Create a mock gate that tracks if it was called.""" + mock_gate = Mock() + + def tracked_check() -> GateResult: + call_tracker[gate_name] = True + return GateResult( + passed=passes, + message=f"{gate_name} {'passed' if passes else 'failed'}", + details={}, + ) + + mock_gate.check = tracked_check + return mock_gate + + # Create orchestrator with all failing gates + orchestrator_with_mocks = QualityOrchestrator( + build_gate=make_tracked_gate("build", False), + lint_gate=make_tracked_gate("lint", False), + test_gate=make_tracked_gate("test", False), + coverage_gate=make_tracked_gate("coverage", False), + ) + + # Run verification + verification = await orchestrator_with_mocks.verify_completion() + + # Assert: All gates were executed (no short-circuiting) + assert call_tracker["build"], "Build gate should be called" + assert call_tracker["lint"], "Lint gate should be called" + assert call_tracker["test"], "Test gate should be called" + assert call_tracker["coverage"], "Coverage gate should be called" + + # Assert: Verification failed as expected + assert verification.all_passed is False diff --git a/docs/scratchpads/149-test-rejection-loop.md b/docs/scratchpads/149-test-rejection-loop.md new file mode 100644 index 0000000..ab1e909 --- /dev/null +++ b/docs/scratchpads/149-test-rejection-loop.md @@ -0,0 +1,41 @@ +# Issue #149: [COORD-009] Test rejection loop + +## Objective + +Validate quality gates prevent premature completion through simulated rejection scenarios. + +## Approach + +1. Create comprehensive integration tests for rejection loop scenarios +2. Test each gate failure type triggers proper rejection +3. Verify forced continuation prompts are generated correctly +4. Ensure agents cannot bypass gates +5. Validate loop continues until all gates pass + +## Test Scenarios + +- [ ] Agent claims done with failing tests +- [ ] Agent claims done with linting errors +- [ ] Agent claims done with low coverage +- [ ] Agent claims done with build errors +- [ ] All gates passing allows completion +- [ ] Multiple simultaneous gate failures handled correctly +- [ ] Forced continuation prompts are non-negotiable and actionable + +## Progress + +- [x] Read existing QualityOrchestrator and ForcedContinuationService code +- [x] Write comprehensive integration tests (TDD) +- [x] Run tests - all 9 tests pass +- [x] Fix linting issues +- [x] Run type checking - passes +- [x] All quality gates pass +- [ ] Commit changes + +## Testing + +Test file: `apps/coordinator/tests/test_rejection_loop.py` + +## Notes + +The services already exist from Issue 148, so this is primarily testing the rejection loop behavior through integration tests that simulate agent completion scenarios.