test(#149): Add comprehensive rejection loop integration tests

Add integration tests validating rejection loop behavior: - Agent claims done with failing tests → rejection + forced continuation - Agent claims done with linting errors → rejection + forced continuation - Agent claims done with low coverage → rejection + forced continuation - Agent claims done with build errors → rejection + forced continuation - All gates passing → completion allowed - Multiple simultaneous failures → comprehensive rejection - Continuation prompts are non-negotiable and directive - Agents cannot bypass quality gates - Remediation steps included in prompts All 9 tests pass. Build gate: passes Lint gate: passes Test gate: passes (100% pass rate) Coverage: quality_orchestrator.py at 85%, forced_continuation.py at 100% Refs #149 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 20:11:15 -06:00
parent 28d0e4b1df
commit ac3f5c1af9
2 changed files with 632 additions and 0 deletions
--- a/apps/coordinator/tests/test_rejection_loop.py
+++ b/apps/coordinator/tests/test_rejection_loop.py
@@ -0,0 +1,591 @@
 """Integration tests for rejection loop behavior.
 These tests simulate scenarios where an agent claims completion with various
 quality gate failures, verifying that:
 1. Each failure type triggers rejection
 2. Forced continuation prompts are generated
 3. Agents cannot bypass quality gates
 4. Loop continues until all gates pass
 """
 import pytest
 from src.forced_continuation import ForcedContinuationService
 from src.gates.quality_gate import GateResult
 from src.quality_orchestrator import QualityOrchestrator
 class TestRejectionLoop:
    """Test suite for rejection loop integration scenarios."""
    @pytest.fixture
    def orchestrator(self) -> QualityOrchestrator:
        """Create a QualityOrchestrator instance for testing."""
        return QualityOrchestrator()
    @pytest.fixture
    def continuation_service(self) -> ForcedContinuationService:
        """Create a ForcedContinuationService instance for testing."""
        return ForcedContinuationService()
    @pytest.mark.asyncio
    async def test_rejection_on_failing_tests(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that failing tests trigger rejection and continuation prompt.
        Scenario: Agent claims completion but tests are failing.
        Expected: Rejection occurs, forced continuation prompt generated.
        """
        # Create mock orchestrator with failing test gate
        from unittest.mock import Mock
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=False,
            message="Test gate failed: 2 tests failed out of 10",
            details={
                "return_code": 1,
                "stderr": (
                    "FAILED tests/test_auth.py::test_login - AssertionError\n"
                    "FAILED tests/test_users.py::test_create_user - ValueError"
                ),
            },
        )
        # Other gates pass
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=True, message="Build passed", details={}
        )
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=True, message="Lint passed", details={}
        )
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=True, message="Coverage passed", details={}
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        # Simulate agent claiming completion
        verification = await orchestrator_with_mocks.verify_completion()
        # Assert: Rejection occurred
        assert verification.all_passed is False
        assert verification.gate_results["test"].passed is False
        assert "failed" in verification.gate_results["test"].message.lower()
        # Assert: Forced continuation prompt is generated
        prompt = continuation_service.generate_prompt(verification)
        assert isinstance(prompt, str)
        assert len(prompt) > 0
        assert "test" in prompt.lower()
        assert "must" in prompt.lower() or "fix" in prompt.lower()
        # Prompt should include specific failure details
        assert "test_auth.py" in prompt or "test_users.py" in prompt or "failed" in prompt.lower()
    @pytest.mark.asyncio
    async def test_rejection_on_linting_errors(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that linting errors trigger rejection and continuation prompt.
        Scenario: Agent claims completion but code has linting issues.
        Expected: Rejection occurs, forced continuation prompt generated.
        """
        from unittest.mock import Mock
        # Create mock orchestrator with failing lint gate
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=False,
            message="Lint gate failed: 5 linting issues found",
            details={
                "return_code": 1,
                "stderr": (
                    "src/main.py:10:80: E501 line too long (92 > 79 characters)\n"
                    "src/models.py:5:1: F401 'typing.Any' imported but unused\n"
                    "src/utils.py:15:1: W293 blank line contains whitespace"
                ),
            },
        )
        # Other gates pass
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=True, message="Build passed", details={}
        )
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=True, message="Test passed", details={}
        )
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=True, message="Coverage passed", details={}
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        # Simulate agent claiming completion
        verification = await orchestrator_with_mocks.verify_completion()
        # Assert: Rejection occurred
        assert verification.all_passed is False
        assert verification.gate_results["lint"].passed is False
        assert "lint" in verification.gate_results["lint"].message.lower()
        # Assert: Forced continuation prompt is generated
        prompt = continuation_service.generate_prompt(verification)
        assert isinstance(prompt, str)
        assert len(prompt) > 0
        assert "lint" in prompt.lower()
        assert "must" in prompt.lower() or "fix" in prompt.lower()
        # Prompt should include linting details or commands
        assert "ruff" in prompt.lower() or "lint" in prompt.lower()
    @pytest.mark.asyncio
    async def test_rejection_on_low_coverage(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that low coverage triggers rejection and continuation prompt.
        Scenario: Agent claims completion but coverage is below minimum.
        Expected: Rejection occurs, forced continuation prompt generated.
        """
        from unittest.mock import Mock
        # Create mock orchestrator with failing coverage gate
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=False,
            message="Coverage gate failed: 72.5% coverage below minimum 85%",
            details={
                "coverage_percent": 72.5,
                "minimum_coverage": 85.0,
            },
        )
        # Other gates pass
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=True, message="Build passed", details={}
        )
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=True, message="Lint passed", details={}
        )
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=True, message="Test passed", details={}
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        # Simulate agent claiming completion
        verification = await orchestrator_with_mocks.verify_completion()
        # Assert: Rejection occurred
        assert verification.all_passed is False
        assert verification.gate_results["coverage"].passed is False
        assert "coverage" in verification.gate_results["coverage"].message.lower()
        # Assert: Forced continuation prompt is generated
        prompt = continuation_service.generate_prompt(verification)
        assert isinstance(prompt, str)
        assert len(prompt) > 0
        assert "coverage" in prompt.lower()
        # Prompt should include specific coverage numbers
        assert "72.5" in prompt or "72" in prompt
        assert "85" in prompt
        assert "must" in prompt.lower() or "increase" in prompt.lower()
    @pytest.mark.asyncio
    async def test_rejection_on_build_errors(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that build errors trigger rejection and continuation prompt.
        Scenario: Agent claims completion but code has type errors.
        Expected: Rejection occurs, forced continuation prompt generated.
        """
        from unittest.mock import Mock
        # Create mock orchestrator with failing build gate
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=False,
            message="Build gate failed: Type errors detected",
            details={
                "return_code": 1,
                "stderr": (
                    "src/main.py:10: error: Incompatible return value type "
                    "(got 'str', expected 'int')\n"
                    "src/models.py:25: error: Missing type annotation for variable 'config'"
                ),
            },
        )
        # Other gates pass
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=True, message="Lint passed", details={}
        )
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=True, message="Test passed", details={}
        )
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=True, message="Coverage passed", details={}
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        # Simulate agent claiming completion
        verification = await orchestrator_with_mocks.verify_completion()
        # Assert: Rejection occurred
        assert verification.all_passed is False
        assert verification.gate_results["build"].passed is False
        build_msg = verification.gate_results["build"].message.lower()
        assert "build" in build_msg or "type" in build_msg
        # Assert: Forced continuation prompt is generated
        prompt = continuation_service.generate_prompt(verification)
        assert isinstance(prompt, str)
        assert len(prompt) > 0
        assert "build" in prompt.lower() or "type" in prompt.lower()
        assert "must" in prompt.lower() or "fix" in prompt.lower()
        # Prompt should include type error details or mypy commands
        assert "mypy" in prompt.lower() or "type" in prompt.lower()
    @pytest.mark.asyncio
    async def test_acceptance_on_all_gates_passing(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that all gates passing allows completion without rejection.
        Scenario: Agent claims completion and all quality gates pass.
        Expected: No rejection, completion allowed, no continuation prompt.
        """
        from unittest.mock import Mock
        # Create mock orchestrator with all gates passing
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=True,
            message="Build gate passed: No type errors found",
            details={"return_code": 0},
        )
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=True,
            message="Lint gate passed: No linting issues found",
            details={"return_code": 0},
        )
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=True,
            message="Test gate passed: All 10 tests passed (100% pass rate)",
            details={"return_code": 0},
        )
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=True,
            message="Coverage gate passed: 90.0% coverage (minimum: 85%)",
            details={"coverage_percent": 90.0, "minimum_coverage": 85.0},
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        # Simulate agent claiming completion
        verification = await orchestrator_with_mocks.verify_completion()
        # Assert: No rejection, completion allowed
        assert verification.all_passed is True
        assert all(result.passed for result in verification.gate_results.values())
        # Assert: Continuation prompt should raise error (no failures to report)
        with pytest.raises(ValueError, match="all.*pass"):
            continuation_service.generate_prompt(verification)
    @pytest.mark.asyncio
    async def test_rejection_on_multiple_gate_failures(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that multiple simultaneous gate failures are handled correctly.
        Scenario: Agent claims completion with multiple quality gate failures.
        Expected: Rejection occurs, comprehensive continuation prompt generated.
        """
        from unittest.mock import Mock
        # Create mock orchestrator with multiple failing gates
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=False,
            message="Build gate failed: Type errors detected",
            details={
                "return_code": 1,
                "stderr": "src/main.py:10: error: Incompatible return value type",
            },
        )
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=False,
            message="Lint gate failed: Linting issues detected",
            details={
                "return_code": 1,
                "stderr": "src/main.py:10: E501 line too long",
            },
        )
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=False,
            message="Test gate failed: Test failures detected",
            details={
                "return_code": 1,
                "stderr": "FAILED tests/test_main.py::test_function",
            },
        )
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=False,
            message="Coverage gate failed: 60.0% coverage below minimum 85%",
            details={
                "coverage_percent": 60.0,
                "minimum_coverage": 85.0,
            },
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        # Simulate agent claiming completion
        verification = await orchestrator_with_mocks.verify_completion()
        # Assert: Rejection occurred for all gates
        assert verification.all_passed is False
        assert verification.gate_results["build"].passed is False
        assert verification.gate_results["lint"].passed is False
        assert verification.gate_results["test"].passed is False
        assert verification.gate_results["coverage"].passed is False
        # Assert: Forced continuation prompt covers all failures
        prompt = continuation_service.generate_prompt(verification)
        assert isinstance(prompt, str)
        assert len(prompt) > 0
        # Prompt should mention all failed gates
        assert "build" in prompt.lower() or "type" in prompt.lower()
        assert "lint" in prompt.lower()
        assert "test" in prompt.lower()
        assert "coverage" in prompt.lower()
        # Prompt should be comprehensive and directive
        assert "must" in prompt.lower() or "fix" in prompt.lower()
    @pytest.mark.asyncio
    async def test_continuation_prompt_is_non_negotiable(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that continuation prompts are non-negotiable and directive.
        Scenario: Any gate failure generates a prompt.
        Expected: Prompt uses directive language, not suggestions.
        """
        from unittest.mock import Mock
        # Create mock orchestrator with one failing gate
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=True, message="Build passed", details={}
        )
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=False,
            message="Lint gate failed",
            details={"return_code": 1},
        )
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=True, message="Test passed", details={}
        )
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=True, message="Coverage passed", details={}
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        verification = await orchestrator_with_mocks.verify_completion()
        prompt = continuation_service.generate_prompt(verification)
        # Assert: Prompt uses directive language (MUST, REQUIRED, etc.)
        prompt_lower = prompt.lower()
        has_directive_language = (
            "must" in prompt_lower
            or "required" in prompt_lower
            or "do not" in prompt_lower
            or "cannot" in prompt_lower
        )
        assert has_directive_language, "Prompt should use directive language"
        # Assert: Prompt does not use suggestion language
        has_suggestion_language = (
            "consider" in prompt_lower
            or "might want" in prompt_lower
            or "could" in prompt_lower
            or "perhaps" in prompt_lower
        )
        assert not has_suggestion_language, "Prompt should not use suggestion language"
    @pytest.mark.asyncio
    async def test_continuation_prompt_includes_remediation_steps(
        self,
        orchestrator: QualityOrchestrator,
        continuation_service: ForcedContinuationService,
    ) -> None:
        """Test that continuation prompts include actionable remediation steps.
        Scenario: Gate failures generate prompt.
        Expected: Prompt includes specific commands and actions to fix issues.
        """
        from unittest.mock import Mock
        # Create mock orchestrator with failing test gate
        mock_build_gate = Mock()
        mock_build_gate.check.return_value = GateResult(
            passed=True, message="Build passed", details={}
        )
        mock_lint_gate = Mock()
        mock_lint_gate.check.return_value = GateResult(
            passed=True, message="Lint passed", details={}
        )
        mock_test_gate = Mock()
        mock_test_gate.check.return_value = GateResult(
            passed=False,
            message="Test gate failed",
            details={"return_code": 1},
        )
        mock_coverage_gate = Mock()
        mock_coverage_gate.check.return_value = GateResult(
            passed=True, message="Coverage passed", details={}
        )
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=mock_build_gate,
            lint_gate=mock_lint_gate,
            test_gate=mock_test_gate,
            coverage_gate=mock_coverage_gate,
        )
        verification = await orchestrator_with_mocks.verify_completion()
        prompt = continuation_service.generate_prompt(verification)
        # Assert: Prompt includes remediation commands
        prompt_lower = prompt.lower()
        has_commands = (
            "pytest" in prompt_lower
            or "run:" in prompt_lower
            or "fix" in prompt_lower
        )
        assert has_commands, "Prompt should include specific remediation commands"
    @pytest.mark.asyncio
    async def test_agent_cannot_bypass_gates(
        self,
        orchestrator: QualityOrchestrator,
    ) -> None:
        """Test that agents cannot bypass quality gates.
        Scenario: All gates must be checked, no shortcuts allowed.
        Expected: verify_completion always runs all gates.
        """
        from unittest.mock import Mock
        # Create mock gates with side effects to track if they were called
        call_tracker = {"build": False, "lint": False, "test": False, "coverage": False}
        def make_tracked_gate(gate_name: str, passes: bool) -> Mock:
            """Create a mock gate that tracks if it was called."""
            mock_gate = Mock()
            def tracked_check() -> GateResult:
                call_tracker[gate_name] = True
                return GateResult(
                    passed=passes,
                    message=f"{gate_name} {'passed' if passes else 'failed'}",
                    details={},
                )
            mock_gate.check = tracked_check
            return mock_gate
        # Create orchestrator with all failing gates
        orchestrator_with_mocks = QualityOrchestrator(
            build_gate=make_tracked_gate("build", False),
            lint_gate=make_tracked_gate("lint", False),
            test_gate=make_tracked_gate("test", False),
            coverage_gate=make_tracked_gate("coverage", False),
        )
        # Run verification
        verification = await orchestrator_with_mocks.verify_completion()
        # Assert: All gates were executed (no short-circuiting)
        assert call_tracker["build"], "Build gate should be called"
        assert call_tracker["lint"], "Lint gate should be called"
        assert call_tracker["test"], "Test gate should be called"
        assert call_tracker["coverage"], "Coverage gate should be called"
        # Assert: Verification failed as expected
        assert verification.all_passed is False
--- a/docs/scratchpads/149-test-rejection-loop.md
+++ b/docs/scratchpads/149-test-rejection-loop.md
@@ -0,0 +1,41 @@
 # Issue #149: [COORD-009] Test rejection loop
 ## Objective
 Validate quality gates prevent premature completion through simulated rejection scenarios.
 ## Approach
 1. Create comprehensive integration tests for rejection loop scenarios
 2. Test each gate failure type triggers proper rejection
 3. Verify forced continuation prompts are generated correctly
 4. Ensure agents cannot bypass gates
 5. Validate loop continues until all gates pass
 ## Test Scenarios
 - [ ] Agent claims done with failing tests
 - [ ] Agent claims done with linting errors
 - [ ] Agent claims done with low coverage
 - [ ] Agent claims done with build errors
 - [ ] All gates passing allows completion
 - [ ] Multiple simultaneous gate failures handled correctly
 - [ ] Forced continuation prompts are non-negotiable and actionable
 ## Progress
 - [x] Read existing QualityOrchestrator and ForcedContinuationService code
 - [x] Write comprehensive integration tests (TDD)
 - [x] Run tests - all 9 tests pass
 - [x] Fix linting issues
 - [x] Run type checking - passes
 - [x] All quality gates pass
 - [ ] Commit changes
 ## Testing
 Test file: `apps/coordinator/tests/test_rejection_loop.py`
 ## Notes
 The services already exist from Issue 148, so this is primarily testing the rejection loop behavior through integration tests that simulate agent completion scenarios.