test(#149): Add comprehensive rejection loop integration tests

Add integration tests validating rejection loop behavior: - Agent claims done with failing tests → rejection + forced continuation - Agent claims done with linting errors → rejection + forced continuation - Agent claims done with low coverage → rejection + forced continuation - Agent claims done with build errors → rejection + forced continuation - All gates passing → completion allowed - Multiple simultaneous failures → comprehensive rejection - Continuation prompts are non-negotiable and directive - Agents cannot bypass quality gates - Remediation steps included in prompts All 9 tests pass. Build gate: passes Lint gate: passes Test gate: passes (100% pass rate) Coverage: quality_orchestrator.py at 85%, forced_continuation.py at 100% Refs #149 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 20:11:15 -06:00
parent 28d0e4b1df
commit ac3f5c1af9
2 changed files with 632 additions and 0 deletions
--- a/apps/coordinator/tests/test_rejection_loop.py
+++ b/apps/coordinator/tests/test_rejection_loop.py
@@ -0,0 +1,591 @@
+"""Integration tests for rejection loop behavior.
+
+These tests simulate scenarios where an agent claims completion with various
+quality gate failures, verifying that:
+1. Each failure type triggers rejection
+2. Forced continuation prompts are generated
+3. Agents cannot bypass quality gates
+4. Loop continues until all gates pass
+"""
+
+import pytest
+
+from src.forced_continuation import ForcedContinuationService
+from src.gates.quality_gate import GateResult
+from src.quality_orchestrator import QualityOrchestrator
+
+
+class TestRejectionLoop:
+    """Test suite for rejection loop integration scenarios."""
+
+    @pytest.fixture
+    def orchestrator(self) -> QualityOrchestrator:
+        """Create a QualityOrchestrator instance for testing."""
+        return QualityOrchestrator()
+
+    @pytest.fixture
+    def continuation_service(self) -> ForcedContinuationService:
+        """Create a ForcedContinuationService instance for testing."""
+        return ForcedContinuationService()
+
+    @pytest.mark.asyncio
+    async def test_rejection_on_failing_tests(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that failing tests trigger rejection and continuation prompt.
+
+        Scenario: Agent claims completion but tests are failing.
+        Expected: Rejection occurs, forced continuation prompt generated.
+        """
+        # Create mock orchestrator with failing test gate
+        from unittest.mock import Mock
+
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=False,
+            message="Test gate failed: 2 tests failed out of 10",
+            details={
+                "return_code": 1,
+                "stderr": (
+                    "FAILED tests/test_auth.py::test_login - AssertionError\n"
+                    "FAILED tests/test_users.py::test_create_user - ValueError"
+                ),
+            },
+        )
+
+        # Other gates pass
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=True, message="Build passed", details={}
+        )
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=True, message="Lint passed", details={}
+        )
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=True, message="Coverage passed", details={}
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        # Simulate agent claiming completion
+        verification = await orchestrator_with_mocks.verify_completion()
+
+        # Assert: Rejection occurred
+        assert verification.all_passed is False
+        assert verification.gate_results["test"].passed is False
+        assert "failed" in verification.gate_results["test"].message.lower()
+
+        # Assert: Forced continuation prompt is generated
+        prompt = continuation_service.generate_prompt(verification)
+        assert isinstance(prompt, str)
+        assert len(prompt) > 0
+        assert "test" in prompt.lower()
+        assert "must" in prompt.lower() or "fix" in prompt.lower()
+        # Prompt should include specific failure details
+        assert "test_auth.py" in prompt or "test_users.py" in prompt or "failed" in prompt.lower()
+
+    @pytest.mark.asyncio
+    async def test_rejection_on_linting_errors(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that linting errors trigger rejection and continuation prompt.
+
+        Scenario: Agent claims completion but code has linting issues.
+        Expected: Rejection occurs, forced continuation prompt generated.
+        """
+        from unittest.mock import Mock
+
+        # Create mock orchestrator with failing lint gate
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=False,
+            message="Lint gate failed: 5 linting issues found",
+            details={
+                "return_code": 1,
+                "stderr": (
+                    "src/main.py:10:80: E501 line too long (92 > 79 characters)\n"
+                    "src/models.py:5:1: F401 'typing.Any' imported but unused\n"
+                    "src/utils.py:15:1: W293 blank line contains whitespace"
+                ),
+            },
+        )
+
+        # Other gates pass
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=True, message="Build passed", details={}
+        )
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=True, message="Test passed", details={}
+        )
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=True, message="Coverage passed", details={}
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        # Simulate agent claiming completion
+        verification = await orchestrator_with_mocks.verify_completion()
+
+        # Assert: Rejection occurred
+        assert verification.all_passed is False
+        assert verification.gate_results["lint"].passed is False
+        assert "lint" in verification.gate_results["lint"].message.lower()
+
+        # Assert: Forced continuation prompt is generated
+        prompt = continuation_service.generate_prompt(verification)
+        assert isinstance(prompt, str)
+        assert len(prompt) > 0
+        assert "lint" in prompt.lower()
+        assert "must" in prompt.lower() or "fix" in prompt.lower()
+        # Prompt should include linting details or commands
+        assert "ruff" in prompt.lower() or "lint" in prompt.lower()
+
+    @pytest.mark.asyncio
+    async def test_rejection_on_low_coverage(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that low coverage triggers rejection and continuation prompt.
+
+        Scenario: Agent claims completion but coverage is below minimum.
+        Expected: Rejection occurs, forced continuation prompt generated.
+        """
+        from unittest.mock import Mock
+
+        # Create mock orchestrator with failing coverage gate
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=False,
+            message="Coverage gate failed: 72.5% coverage below minimum 85%",
+            details={
+                "coverage_percent": 72.5,
+                "minimum_coverage": 85.0,
+            },
+        )
+
+        # Other gates pass
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=True, message="Build passed", details={}
+        )
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=True, message="Lint passed", details={}
+        )
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=True, message="Test passed", details={}
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        # Simulate agent claiming completion
+        verification = await orchestrator_with_mocks.verify_completion()
+
+        # Assert: Rejection occurred
+        assert verification.all_passed is False
+        assert verification.gate_results["coverage"].passed is False
+        assert "coverage" in verification.gate_results["coverage"].message.lower()
+
+        # Assert: Forced continuation prompt is generated
+        prompt = continuation_service.generate_prompt(verification)
+        assert isinstance(prompt, str)
+        assert len(prompt) > 0
+        assert "coverage" in prompt.lower()
+        # Prompt should include specific coverage numbers
+        assert "72.5" in prompt or "72" in prompt
+        assert "85" in prompt
+        assert "must" in prompt.lower() or "increase" in prompt.lower()
+
+    @pytest.mark.asyncio
+    async def test_rejection_on_build_errors(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that build errors trigger rejection and continuation prompt.
+
+        Scenario: Agent claims completion but code has type errors.
+        Expected: Rejection occurs, forced continuation prompt generated.
+        """
+        from unittest.mock import Mock
+
+        # Create mock orchestrator with failing build gate
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=False,
+            message="Build gate failed: Type errors detected",
+            details={
+                "return_code": 1,
+                "stderr": (
+                    "src/main.py:10: error: Incompatible return value type "
+                    "(got 'str', expected 'int')\n"
+                    "src/models.py:25: error: Missing type annotation for variable 'config'"
+                ),
+            },
+        )
+
+        # Other gates pass
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=True, message="Lint passed", details={}
+        )
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=True, message="Test passed", details={}
+        )
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=True, message="Coverage passed", details={}
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        # Simulate agent claiming completion
+        verification = await orchestrator_with_mocks.verify_completion()
+
+        # Assert: Rejection occurred
+        assert verification.all_passed is False
+        assert verification.gate_results["build"].passed is False
+        build_msg = verification.gate_results["build"].message.lower()
+        assert "build" in build_msg or "type" in build_msg
+
+        # Assert: Forced continuation prompt is generated
+        prompt = continuation_service.generate_prompt(verification)
+        assert isinstance(prompt, str)
+        assert len(prompt) > 0
+        assert "build" in prompt.lower() or "type" in prompt.lower()
+        assert "must" in prompt.lower() or "fix" in prompt.lower()
+        # Prompt should include type error details or mypy commands
+        assert "mypy" in prompt.lower() or "type" in prompt.lower()
+
+    @pytest.mark.asyncio
+    async def test_acceptance_on_all_gates_passing(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that all gates passing allows completion without rejection.
+
+        Scenario: Agent claims completion and all quality gates pass.
+        Expected: No rejection, completion allowed, no continuation prompt.
+        """
+        from unittest.mock import Mock
+
+        # Create mock orchestrator with all gates passing
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=True,
+            message="Build gate passed: No type errors found",
+            details={"return_code": 0},
+        )
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=True,
+            message="Lint gate passed: No linting issues found",
+            details={"return_code": 0},
+        )
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=True,
+            message="Test gate passed: All 10 tests passed (100% pass rate)",
+            details={"return_code": 0},
+        )
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=True,
+            message="Coverage gate passed: 90.0% coverage (minimum: 85%)",
+            details={"coverage_percent": 90.0, "minimum_coverage": 85.0},
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        # Simulate agent claiming completion
+        verification = await orchestrator_with_mocks.verify_completion()
+
+        # Assert: No rejection, completion allowed
+        assert verification.all_passed is True
+        assert all(result.passed for result in verification.gate_results.values())
+
+        # Assert: Continuation prompt should raise error (no failures to report)
+        with pytest.raises(ValueError, match="all.*pass"):
+            continuation_service.generate_prompt(verification)
+
+    @pytest.mark.asyncio
+    async def test_rejection_on_multiple_gate_failures(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that multiple simultaneous gate failures are handled correctly.
+
+        Scenario: Agent claims completion with multiple quality gate failures.
+        Expected: Rejection occurs, comprehensive continuation prompt generated.
+        """
+        from unittest.mock import Mock
+
+        # Create mock orchestrator with multiple failing gates
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=False,
+            message="Build gate failed: Type errors detected",
+            details={
+                "return_code": 1,
+                "stderr": "src/main.py:10: error: Incompatible return value type",
+            },
+        )
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=False,
+            message="Lint gate failed: Linting issues detected",
+            details={
+                "return_code": 1,
+                "stderr": "src/main.py:10: E501 line too long",
+            },
+        )
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=False,
+            message="Test gate failed: Test failures detected",
+            details={
+                "return_code": 1,
+                "stderr": "FAILED tests/test_main.py::test_function",
+            },
+        )
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=False,
+            message="Coverage gate failed: 60.0% coverage below minimum 85%",
+            details={
+                "coverage_percent": 60.0,
+                "minimum_coverage": 85.0,
+            },
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        # Simulate agent claiming completion
+        verification = await orchestrator_with_mocks.verify_completion()
+
+        # Assert: Rejection occurred for all gates
+        assert verification.all_passed is False
+        assert verification.gate_results["build"].passed is False
+        assert verification.gate_results["lint"].passed is False
+        assert verification.gate_results["test"].passed is False
+        assert verification.gate_results["coverage"].passed is False
+
+        # Assert: Forced continuation prompt covers all failures
+        prompt = continuation_service.generate_prompt(verification)
+        assert isinstance(prompt, str)
+        assert len(prompt) > 0
+        # Prompt should mention all failed gates
+        assert "build" in prompt.lower() or "type" in prompt.lower()
+        assert "lint" in prompt.lower()
+        assert "test" in prompt.lower()
+        assert "coverage" in prompt.lower()
+        # Prompt should be comprehensive and directive
+        assert "must" in prompt.lower() or "fix" in prompt.lower()
+
+    @pytest.mark.asyncio
+    async def test_continuation_prompt_is_non_negotiable(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that continuation prompts are non-negotiable and directive.
+
+        Scenario: Any gate failure generates a prompt.
+        Expected: Prompt uses directive language, not suggestions.
+        """
+        from unittest.mock import Mock
+
+        # Create mock orchestrator with one failing gate
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=True, message="Build passed", details={}
+        )
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=False,
+            message="Lint gate failed",
+            details={"return_code": 1},
+        )
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=True, message="Test passed", details={}
+        )
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=True, message="Coverage passed", details={}
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        verification = await orchestrator_with_mocks.verify_completion()
+        prompt = continuation_service.generate_prompt(verification)
+
+        # Assert: Prompt uses directive language (MUST, REQUIRED, etc.)
+        prompt_lower = prompt.lower()
+        has_directive_language = (
+            "must" in prompt_lower
+            or "required" in prompt_lower
+            or "do not" in prompt_lower
+            or "cannot" in prompt_lower
+        )
+        assert has_directive_language, "Prompt should use directive language"
+
+        # Assert: Prompt does not use suggestion language
+        has_suggestion_language = (
+            "consider" in prompt_lower
+            or "might want" in prompt_lower
+            or "could" in prompt_lower
+            or "perhaps" in prompt_lower
+        )
+        assert not has_suggestion_language, "Prompt should not use suggestion language"
+
+    @pytest.mark.asyncio
+    async def test_continuation_prompt_includes_remediation_steps(
+        self,
+        orchestrator: QualityOrchestrator,
+        continuation_service: ForcedContinuationService,
+    ) -> None:
+        """Test that continuation prompts include actionable remediation steps.
+
+        Scenario: Gate failures generate prompt.
+        Expected: Prompt includes specific commands and actions to fix issues.
+        """
+        from unittest.mock import Mock
+
+        # Create mock orchestrator with failing test gate
+        mock_build_gate = Mock()
+        mock_build_gate.check.return_value = GateResult(
+            passed=True, message="Build passed", details={}
+        )
+        mock_lint_gate = Mock()
+        mock_lint_gate.check.return_value = GateResult(
+            passed=True, message="Lint passed", details={}
+        )
+        mock_test_gate = Mock()
+        mock_test_gate.check.return_value = GateResult(
+            passed=False,
+            message="Test gate failed",
+            details={"return_code": 1},
+        )
+        mock_coverage_gate = Mock()
+        mock_coverage_gate.check.return_value = GateResult(
+            passed=True, message="Coverage passed", details={}
+        )
+
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=mock_build_gate,
+            lint_gate=mock_lint_gate,
+            test_gate=mock_test_gate,
+            coverage_gate=mock_coverage_gate,
+        )
+
+        verification = await orchestrator_with_mocks.verify_completion()
+        prompt = continuation_service.generate_prompt(verification)
+
+        # Assert: Prompt includes remediation commands
+        prompt_lower = prompt.lower()
+        has_commands = (
+            "pytest" in prompt_lower
+            or "run:" in prompt_lower
+            or "fix" in prompt_lower
+        )
+        assert has_commands, "Prompt should include specific remediation commands"
+
+    @pytest.mark.asyncio
+    async def test_agent_cannot_bypass_gates(
+        self,
+        orchestrator: QualityOrchestrator,
+    ) -> None:
+        """Test that agents cannot bypass quality gates.
+
+        Scenario: All gates must be checked, no shortcuts allowed.
+        Expected: verify_completion always runs all gates.
+        """
+        from unittest.mock import Mock
+
+        # Create mock gates with side effects to track if they were called
+        call_tracker = {"build": False, "lint": False, "test": False, "coverage": False}
+
+        def make_tracked_gate(gate_name: str, passes: bool) -> Mock:
+            """Create a mock gate that tracks if it was called."""
+            mock_gate = Mock()
+
+            def tracked_check() -> GateResult:
+                call_tracker[gate_name] = True
+                return GateResult(
+                    passed=passes,
+                    message=f"{gate_name} {'passed' if passes else 'failed'}",
+                    details={},
+                )
+
+            mock_gate.check = tracked_check
+            return mock_gate
+
+        # Create orchestrator with all failing gates
+        orchestrator_with_mocks = QualityOrchestrator(
+            build_gate=make_tracked_gate("build", False),
+            lint_gate=make_tracked_gate("lint", False),
+            test_gate=make_tracked_gate("test", False),
+            coverage_gate=make_tracked_gate("coverage", False),
+        )
+
+        # Run verification
+        verification = await orchestrator_with_mocks.verify_completion()
+
+        # Assert: All gates were executed (no short-circuiting)
+        assert call_tracker["build"], "Build gate should be called"
+        assert call_tracker["lint"], "Lint gate should be called"
+        assert call_tracker["test"], "Test gate should be called"
+        assert call_tracker["coverage"], "Coverage gate should be called"
+
+        # Assert: Verification failed as expected
+        assert verification.all_passed is False
--- a/docs/scratchpads/149-test-rejection-loop.md
+++ b/docs/scratchpads/149-test-rejection-loop.md
@@ -0,0 +1,41 @@
+# Issue #149: [COORD-009] Test rejection loop
+
+## Objective
+
+Validate quality gates prevent premature completion through simulated rejection scenarios.
+
+## Approach
+
+1. Create comprehensive integration tests for rejection loop scenarios
+2. Test each gate failure type triggers proper rejection
+3. Verify forced continuation prompts are generated correctly
+4. Ensure agents cannot bypass gates
+5. Validate loop continues until all gates pass
+
+## Test Scenarios
+
+- [ ] Agent claims done with failing tests
+- [ ] Agent claims done with linting errors
+- [ ] Agent claims done with low coverage
+- [ ] Agent claims done with build errors
+- [ ] All gates passing allows completion
+- [ ] Multiple simultaneous gate failures handled correctly
+- [ ] Forced continuation prompts are non-negotiable and actionable
+
+## Progress
+
+- [x] Read existing QualityOrchestrator and ForcedContinuationService code
+- [x] Write comprehensive integration tests (TDD)
+- [x] Run tests - all 9 tests pass
+- [x] Fix linting issues
+- [x] Run type checking - passes
+- [x] All quality gates pass
+- [ ] Commit changes
+
+## Testing
+
+Test file: `apps/coordinator/tests/test_rejection_loop.py`
+
+## Notes
+
+The services already exist from Issue 148, so this is primarily testing the rejection loop behavior through integration tests that simulate agent completion scenarios.