test(#149): Add comprehensive rejection loop integration tests

Add integration tests validating rejection loop behavior:
- Agent claims done with failing tests → rejection + forced continuation
- Agent claims done with linting errors → rejection + forced continuation
- Agent claims done with low coverage → rejection + forced continuation
- Agent claims done with build errors → rejection + forced continuation
- All gates passing → completion allowed
- Multiple simultaneous failures → comprehensive rejection
- Continuation prompts are non-negotiable and directive
- Agents cannot bypass quality gates
- Remediation steps included in prompts

All 9 tests pass.
Build gate: passes
Lint gate: passes
Test gate: passes (100% pass rate)
Coverage: quality_orchestrator.py at 85%, forced_continuation.py at 100%

Refs #149

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-01 20:11:15 -06:00
parent 28d0e4b1df
commit ac3f5c1af9
2 changed files with 632 additions and 0 deletions

View File

@@ -0,0 +1,591 @@
"""Integration tests for rejection loop behavior.
These tests simulate scenarios where an agent claims completion with various
quality gate failures, verifying that:
1. Each failure type triggers rejection
2. Forced continuation prompts are generated
3. Agents cannot bypass quality gates
4. Loop continues until all gates pass
"""
import pytest
from src.forced_continuation import ForcedContinuationService
from src.gates.quality_gate import GateResult
from src.quality_orchestrator import QualityOrchestrator
class TestRejectionLoop:
"""Test suite for rejection loop integration scenarios."""
@pytest.fixture
def orchestrator(self) -> QualityOrchestrator:
"""Create a QualityOrchestrator instance for testing."""
return QualityOrchestrator()
@pytest.fixture
def continuation_service(self) -> ForcedContinuationService:
"""Create a ForcedContinuationService instance for testing."""
return ForcedContinuationService()
@pytest.mark.asyncio
async def test_rejection_on_failing_tests(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that failing tests trigger rejection and continuation prompt.
Scenario: Agent claims completion but tests are failing.
Expected: Rejection occurs, forced continuation prompt generated.
"""
# Create mock orchestrator with failing test gate
from unittest.mock import Mock
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=False,
message="Test gate failed: 2 tests failed out of 10",
details={
"return_code": 1,
"stderr": (
"FAILED tests/test_auth.py::test_login - AssertionError\n"
"FAILED tests/test_users.py::test_create_user - ValueError"
),
},
)
# Other gates pass
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["test"].passed is False
assert "failed" in verification.gate_results["test"].message.lower()
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "test" in prompt.lower()
assert "must" in prompt.lower() or "fix" in prompt.lower()
# Prompt should include specific failure details
assert "test_auth.py" in prompt or "test_users.py" in prompt or "failed" in prompt.lower()
@pytest.mark.asyncio
async def test_rejection_on_linting_errors(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that linting errors trigger rejection and continuation prompt.
Scenario: Agent claims completion but code has linting issues.
Expected: Rejection occurs, forced continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing lint gate
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=False,
message="Lint gate failed: 5 linting issues found",
details={
"return_code": 1,
"stderr": (
"src/main.py:10:80: E501 line too long (92 > 79 characters)\n"
"src/models.py:5:1: F401 'typing.Any' imported but unused\n"
"src/utils.py:15:1: W293 blank line contains whitespace"
),
},
)
# Other gates pass
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["lint"].passed is False
assert "lint" in verification.gate_results["lint"].message.lower()
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "lint" in prompt.lower()
assert "must" in prompt.lower() or "fix" in prompt.lower()
# Prompt should include linting details or commands
assert "ruff" in prompt.lower() or "lint" in prompt.lower()
@pytest.mark.asyncio
async def test_rejection_on_low_coverage(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that low coverage triggers rejection and continuation prompt.
Scenario: Agent claims completion but coverage is below minimum.
Expected: Rejection occurs, forced continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing coverage gate
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=False,
message="Coverage gate failed: 72.5% coverage below minimum 85%",
details={
"coverage_percent": 72.5,
"minimum_coverage": 85.0,
},
)
# Other gates pass
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["coverage"].passed is False
assert "coverage" in verification.gate_results["coverage"].message.lower()
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "coverage" in prompt.lower()
# Prompt should include specific coverage numbers
assert "72.5" in prompt or "72" in prompt
assert "85" in prompt
assert "must" in prompt.lower() or "increase" in prompt.lower()
@pytest.mark.asyncio
async def test_rejection_on_build_errors(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that build errors trigger rejection and continuation prompt.
Scenario: Agent claims completion but code has type errors.
Expected: Rejection occurs, forced continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing build gate
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=False,
message="Build gate failed: Type errors detected",
details={
"return_code": 1,
"stderr": (
"src/main.py:10: error: Incompatible return value type "
"(got 'str', expected 'int')\n"
"src/models.py:25: error: Missing type annotation for variable 'config'"
),
},
)
# Other gates pass
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["build"].passed is False
build_msg = verification.gate_results["build"].message.lower()
assert "build" in build_msg or "type" in build_msg
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "build" in prompt.lower() or "type" in prompt.lower()
assert "must" in prompt.lower() or "fix" in prompt.lower()
# Prompt should include type error details or mypy commands
assert "mypy" in prompt.lower() or "type" in prompt.lower()
@pytest.mark.asyncio
async def test_acceptance_on_all_gates_passing(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that all gates passing allows completion without rejection.
Scenario: Agent claims completion and all quality gates pass.
Expected: No rejection, completion allowed, no continuation prompt.
"""
from unittest.mock import Mock
# Create mock orchestrator with all gates passing
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True,
message="Build gate passed: No type errors found",
details={"return_code": 0},
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True,
message="Lint gate passed: No linting issues found",
details={"return_code": 0},
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True,
message="Test gate passed: All 10 tests passed (100% pass rate)",
details={"return_code": 0},
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True,
message="Coverage gate passed: 90.0% coverage (minimum: 85%)",
details={"coverage_percent": 90.0, "minimum_coverage": 85.0},
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: No rejection, completion allowed
assert verification.all_passed is True
assert all(result.passed for result in verification.gate_results.values())
# Assert: Continuation prompt should raise error (no failures to report)
with pytest.raises(ValueError, match="all.*pass"):
continuation_service.generate_prompt(verification)
@pytest.mark.asyncio
async def test_rejection_on_multiple_gate_failures(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that multiple simultaneous gate failures are handled correctly.
Scenario: Agent claims completion with multiple quality gate failures.
Expected: Rejection occurs, comprehensive continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with multiple failing gates
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=False,
message="Build gate failed: Type errors detected",
details={
"return_code": 1,
"stderr": "src/main.py:10: error: Incompatible return value type",
},
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=False,
message="Lint gate failed: Linting issues detected",
details={
"return_code": 1,
"stderr": "src/main.py:10: E501 line too long",
},
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=False,
message="Test gate failed: Test failures detected",
details={
"return_code": 1,
"stderr": "FAILED tests/test_main.py::test_function",
},
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=False,
message="Coverage gate failed: 60.0% coverage below minimum 85%",
details={
"coverage_percent": 60.0,
"minimum_coverage": 85.0,
},
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred for all gates
assert verification.all_passed is False
assert verification.gate_results["build"].passed is False
assert verification.gate_results["lint"].passed is False
assert verification.gate_results["test"].passed is False
assert verification.gate_results["coverage"].passed is False
# Assert: Forced continuation prompt covers all failures
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
# Prompt should mention all failed gates
assert "build" in prompt.lower() or "type" in prompt.lower()
assert "lint" in prompt.lower()
assert "test" in prompt.lower()
assert "coverage" in prompt.lower()
# Prompt should be comprehensive and directive
assert "must" in prompt.lower() or "fix" in prompt.lower()
@pytest.mark.asyncio
async def test_continuation_prompt_is_non_negotiable(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that continuation prompts are non-negotiable and directive.
Scenario: Any gate failure generates a prompt.
Expected: Prompt uses directive language, not suggestions.
"""
from unittest.mock import Mock
# Create mock orchestrator with one failing gate
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=False,
message="Lint gate failed",
details={"return_code": 1},
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
verification = await orchestrator_with_mocks.verify_completion()
prompt = continuation_service.generate_prompt(verification)
# Assert: Prompt uses directive language (MUST, REQUIRED, etc.)
prompt_lower = prompt.lower()
has_directive_language = (
"must" in prompt_lower
or "required" in prompt_lower
or "do not" in prompt_lower
or "cannot" in prompt_lower
)
assert has_directive_language, "Prompt should use directive language"
# Assert: Prompt does not use suggestion language
has_suggestion_language = (
"consider" in prompt_lower
or "might want" in prompt_lower
or "could" in prompt_lower
or "perhaps" in prompt_lower
)
assert not has_suggestion_language, "Prompt should not use suggestion language"
@pytest.mark.asyncio
async def test_continuation_prompt_includes_remediation_steps(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that continuation prompts include actionable remediation steps.
Scenario: Gate failures generate prompt.
Expected: Prompt includes specific commands and actions to fix issues.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing test gate
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=False,
message="Test gate failed",
details={"return_code": 1},
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
verification = await orchestrator_with_mocks.verify_completion()
prompt = continuation_service.generate_prompt(verification)
# Assert: Prompt includes remediation commands
prompt_lower = prompt.lower()
has_commands = (
"pytest" in prompt_lower
or "run:" in prompt_lower
or "fix" in prompt_lower
)
assert has_commands, "Prompt should include specific remediation commands"
@pytest.mark.asyncio
async def test_agent_cannot_bypass_gates(
self,
orchestrator: QualityOrchestrator,
) -> None:
"""Test that agents cannot bypass quality gates.
Scenario: All gates must be checked, no shortcuts allowed.
Expected: verify_completion always runs all gates.
"""
from unittest.mock import Mock
# Create mock gates with side effects to track if they were called
call_tracker = {"build": False, "lint": False, "test": False, "coverage": False}
def make_tracked_gate(gate_name: str, passes: bool) -> Mock:
"""Create a mock gate that tracks if it was called."""
mock_gate = Mock()
def tracked_check() -> GateResult:
call_tracker[gate_name] = True
return GateResult(
passed=passes,
message=f"{gate_name} {'passed' if passes else 'failed'}",
details={},
)
mock_gate.check = tracked_check
return mock_gate
# Create orchestrator with all failing gates
orchestrator_with_mocks = QualityOrchestrator(
build_gate=make_tracked_gate("build", False),
lint_gate=make_tracked_gate("lint", False),
test_gate=make_tracked_gate("test", False),
coverage_gate=make_tracked_gate("coverage", False),
)
# Run verification
verification = await orchestrator_with_mocks.verify_completion()
# Assert: All gates were executed (no short-circuiting)
assert call_tracker["build"], "Build gate should be called"
assert call_tracker["lint"], "Lint gate should be called"
assert call_tracker["test"], "Test gate should be called"
assert call_tracker["coverage"], "Coverage gate should be called"
# Assert: Verification failed as expected
assert verification.all_passed is False

View File

@@ -0,0 +1,41 @@
# Issue #149: [COORD-009] Test rejection loop
## Objective
Validate quality gates prevent premature completion through simulated rejection scenarios.
## Approach
1. Create comprehensive integration tests for rejection loop scenarios
2. Test each gate failure type triggers proper rejection
3. Verify forced continuation prompts are generated correctly
4. Ensure agents cannot bypass gates
5. Validate loop continues until all gates pass
## Test Scenarios
- [ ] Agent claims done with failing tests
- [ ] Agent claims done with linting errors
- [ ] Agent claims done with low coverage
- [ ] Agent claims done with build errors
- [ ] All gates passing allows completion
- [ ] Multiple simultaneous gate failures handled correctly
- [ ] Forced continuation prompts are non-negotiable and actionable
## Progress
- [x] Read existing QualityOrchestrator and ForcedContinuationService code
- [x] Write comprehensive integration tests (TDD)
- [x] Run tests - all 9 tests pass
- [x] Fix linting issues
- [x] Run type checking - passes
- [x] All quality gates pass
- [ ] Commit changes
## Testing
Test file: `apps/coordinator/tests/test_rejection_loop.py`
## Notes
The services already exist from Issue 148, so this is primarily testing the rejection loop behavior through integration tests that simulate agent completion scenarios.