test(#149): Add comprehensive rejection loop integration tests

Add integration tests validating rejection loop behavior:
- Agent claims done with failing tests → rejection + forced continuation
- Agent claims done with linting errors → rejection + forced continuation
- Agent claims done with low coverage → rejection + forced continuation
- Agent claims done with build errors → rejection + forced continuation
- All gates passing → completion allowed
- Multiple simultaneous failures → comprehensive rejection
- Continuation prompts are non-negotiable and directive
- Agents cannot bypass quality gates
- Remediation steps included in prompts

All 9 tests pass.
Build gate: passes
Lint gate: passes
Test gate: passes (100% pass rate)
Coverage: quality_orchestrator.py at 85%, forced_continuation.py at 100%

Refs #149

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-01 20:11:15 -06:00
parent 28d0e4b1df
commit ac3f5c1af9
2 changed files with 632 additions and 0 deletions

View File

@@ -0,0 +1,591 @@
"""Integration tests for rejection loop behavior.
These tests simulate scenarios where an agent claims completion with various
quality gate failures, verifying that:
1. Each failure type triggers rejection
2. Forced continuation prompts are generated
3. Agents cannot bypass quality gates
4. Loop continues until all gates pass
"""
import pytest
from src.forced_continuation import ForcedContinuationService
from src.gates.quality_gate import GateResult
from src.quality_orchestrator import QualityOrchestrator
class TestRejectionLoop:
"""Test suite for rejection loop integration scenarios."""
@pytest.fixture
def orchestrator(self) -> QualityOrchestrator:
"""Create a QualityOrchestrator instance for testing."""
return QualityOrchestrator()
@pytest.fixture
def continuation_service(self) -> ForcedContinuationService:
"""Create a ForcedContinuationService instance for testing."""
return ForcedContinuationService()
@pytest.mark.asyncio
async def test_rejection_on_failing_tests(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that failing tests trigger rejection and continuation prompt.
Scenario: Agent claims completion but tests are failing.
Expected: Rejection occurs, forced continuation prompt generated.
"""
# Create mock orchestrator with failing test gate
from unittest.mock import Mock
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=False,
message="Test gate failed: 2 tests failed out of 10",
details={
"return_code": 1,
"stderr": (
"FAILED tests/test_auth.py::test_login - AssertionError\n"
"FAILED tests/test_users.py::test_create_user - ValueError"
),
},
)
# Other gates pass
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["test"].passed is False
assert "failed" in verification.gate_results["test"].message.lower()
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "test" in prompt.lower()
assert "must" in prompt.lower() or "fix" in prompt.lower()
# Prompt should include specific failure details
assert "test_auth.py" in prompt or "test_users.py" in prompt or "failed" in prompt.lower()
@pytest.mark.asyncio
async def test_rejection_on_linting_errors(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that linting errors trigger rejection and continuation prompt.
Scenario: Agent claims completion but code has linting issues.
Expected: Rejection occurs, forced continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing lint gate
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=False,
message="Lint gate failed: 5 linting issues found",
details={
"return_code": 1,
"stderr": (
"src/main.py:10:80: E501 line too long (92 > 79 characters)\n"
"src/models.py:5:1: F401 'typing.Any' imported but unused\n"
"src/utils.py:15:1: W293 blank line contains whitespace"
),
},
)
# Other gates pass
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["lint"].passed is False
assert "lint" in verification.gate_results["lint"].message.lower()
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "lint" in prompt.lower()
assert "must" in prompt.lower() or "fix" in prompt.lower()
# Prompt should include linting details or commands
assert "ruff" in prompt.lower() or "lint" in prompt.lower()
@pytest.mark.asyncio
async def test_rejection_on_low_coverage(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that low coverage triggers rejection and continuation prompt.
Scenario: Agent claims completion but coverage is below minimum.
Expected: Rejection occurs, forced continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing coverage gate
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=False,
message="Coverage gate failed: 72.5% coverage below minimum 85%",
details={
"coverage_percent": 72.5,
"minimum_coverage": 85.0,
},
)
# Other gates pass
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["coverage"].passed is False
assert "coverage" in verification.gate_results["coverage"].message.lower()
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "coverage" in prompt.lower()
# Prompt should include specific coverage numbers
assert "72.5" in prompt or "72" in prompt
assert "85" in prompt
assert "must" in prompt.lower() or "increase" in prompt.lower()
@pytest.mark.asyncio
async def test_rejection_on_build_errors(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that build errors trigger rejection and continuation prompt.
Scenario: Agent claims completion but code has type errors.
Expected: Rejection occurs, forced continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing build gate
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=False,
message="Build gate failed: Type errors detected",
details={
"return_code": 1,
"stderr": (
"src/main.py:10: error: Incompatible return value type "
"(got 'str', expected 'int')\n"
"src/models.py:25: error: Missing type annotation for variable 'config'"
),
},
)
# Other gates pass
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred
assert verification.all_passed is False
assert verification.gate_results["build"].passed is False
build_msg = verification.gate_results["build"].message.lower()
assert "build" in build_msg or "type" in build_msg
# Assert: Forced continuation prompt is generated
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
assert "build" in prompt.lower() or "type" in prompt.lower()
assert "must" in prompt.lower() or "fix" in prompt.lower()
# Prompt should include type error details or mypy commands
assert "mypy" in prompt.lower() or "type" in prompt.lower()
@pytest.mark.asyncio
async def test_acceptance_on_all_gates_passing(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that all gates passing allows completion without rejection.
Scenario: Agent claims completion and all quality gates pass.
Expected: No rejection, completion allowed, no continuation prompt.
"""
from unittest.mock import Mock
# Create mock orchestrator with all gates passing
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True,
message="Build gate passed: No type errors found",
details={"return_code": 0},
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True,
message="Lint gate passed: No linting issues found",
details={"return_code": 0},
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True,
message="Test gate passed: All 10 tests passed (100% pass rate)",
details={"return_code": 0},
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True,
message="Coverage gate passed: 90.0% coverage (minimum: 85%)",
details={"coverage_percent": 90.0, "minimum_coverage": 85.0},
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: No rejection, completion allowed
assert verification.all_passed is True
assert all(result.passed for result in verification.gate_results.values())
# Assert: Continuation prompt should raise error (no failures to report)
with pytest.raises(ValueError, match="all.*pass"):
continuation_service.generate_prompt(verification)
@pytest.mark.asyncio
async def test_rejection_on_multiple_gate_failures(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that multiple simultaneous gate failures are handled correctly.
Scenario: Agent claims completion with multiple quality gate failures.
Expected: Rejection occurs, comprehensive continuation prompt generated.
"""
from unittest.mock import Mock
# Create mock orchestrator with multiple failing gates
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=False,
message="Build gate failed: Type errors detected",
details={
"return_code": 1,
"stderr": "src/main.py:10: error: Incompatible return value type",
},
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=False,
message="Lint gate failed: Linting issues detected",
details={
"return_code": 1,
"stderr": "src/main.py:10: E501 line too long",
},
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=False,
message="Test gate failed: Test failures detected",
details={
"return_code": 1,
"stderr": "FAILED tests/test_main.py::test_function",
},
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=False,
message="Coverage gate failed: 60.0% coverage below minimum 85%",
details={
"coverage_percent": 60.0,
"minimum_coverage": 85.0,
},
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
# Simulate agent claiming completion
verification = await orchestrator_with_mocks.verify_completion()
# Assert: Rejection occurred for all gates
assert verification.all_passed is False
assert verification.gate_results["build"].passed is False
assert verification.gate_results["lint"].passed is False
assert verification.gate_results["test"].passed is False
assert verification.gate_results["coverage"].passed is False
# Assert: Forced continuation prompt covers all failures
prompt = continuation_service.generate_prompt(verification)
assert isinstance(prompt, str)
assert len(prompt) > 0
# Prompt should mention all failed gates
assert "build" in prompt.lower() or "type" in prompt.lower()
assert "lint" in prompt.lower()
assert "test" in prompt.lower()
assert "coverage" in prompt.lower()
# Prompt should be comprehensive and directive
assert "must" in prompt.lower() or "fix" in prompt.lower()
@pytest.mark.asyncio
async def test_continuation_prompt_is_non_negotiable(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that continuation prompts are non-negotiable and directive.
Scenario: Any gate failure generates a prompt.
Expected: Prompt uses directive language, not suggestions.
"""
from unittest.mock import Mock
# Create mock orchestrator with one failing gate
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=False,
message="Lint gate failed",
details={"return_code": 1},
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=True, message="Test passed", details={}
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
verification = await orchestrator_with_mocks.verify_completion()
prompt = continuation_service.generate_prompt(verification)
# Assert: Prompt uses directive language (MUST, REQUIRED, etc.)
prompt_lower = prompt.lower()
has_directive_language = (
"must" in prompt_lower
or "required" in prompt_lower
or "do not" in prompt_lower
or "cannot" in prompt_lower
)
assert has_directive_language, "Prompt should use directive language"
# Assert: Prompt does not use suggestion language
has_suggestion_language = (
"consider" in prompt_lower
or "might want" in prompt_lower
or "could" in prompt_lower
or "perhaps" in prompt_lower
)
assert not has_suggestion_language, "Prompt should not use suggestion language"
@pytest.mark.asyncio
async def test_continuation_prompt_includes_remediation_steps(
self,
orchestrator: QualityOrchestrator,
continuation_service: ForcedContinuationService,
) -> None:
"""Test that continuation prompts include actionable remediation steps.
Scenario: Gate failures generate prompt.
Expected: Prompt includes specific commands and actions to fix issues.
"""
from unittest.mock import Mock
# Create mock orchestrator with failing test gate
mock_build_gate = Mock()
mock_build_gate.check.return_value = GateResult(
passed=True, message="Build passed", details={}
)
mock_lint_gate = Mock()
mock_lint_gate.check.return_value = GateResult(
passed=True, message="Lint passed", details={}
)
mock_test_gate = Mock()
mock_test_gate.check.return_value = GateResult(
passed=False,
message="Test gate failed",
details={"return_code": 1},
)
mock_coverage_gate = Mock()
mock_coverage_gate.check.return_value = GateResult(
passed=True, message="Coverage passed", details={}
)
orchestrator_with_mocks = QualityOrchestrator(
build_gate=mock_build_gate,
lint_gate=mock_lint_gate,
test_gate=mock_test_gate,
coverage_gate=mock_coverage_gate,
)
verification = await orchestrator_with_mocks.verify_completion()
prompt = continuation_service.generate_prompt(verification)
# Assert: Prompt includes remediation commands
prompt_lower = prompt.lower()
has_commands = (
"pytest" in prompt_lower
or "run:" in prompt_lower
or "fix" in prompt_lower
)
assert has_commands, "Prompt should include specific remediation commands"
@pytest.mark.asyncio
async def test_agent_cannot_bypass_gates(
self,
orchestrator: QualityOrchestrator,
) -> None:
"""Test that agents cannot bypass quality gates.
Scenario: All gates must be checked, no shortcuts allowed.
Expected: verify_completion always runs all gates.
"""
from unittest.mock import Mock
# Create mock gates with side effects to track if they were called
call_tracker = {"build": False, "lint": False, "test": False, "coverage": False}
def make_tracked_gate(gate_name: str, passes: bool) -> Mock:
"""Create a mock gate that tracks if it was called."""
mock_gate = Mock()
def tracked_check() -> GateResult:
call_tracker[gate_name] = True
return GateResult(
passed=passes,
message=f"{gate_name} {'passed' if passes else 'failed'}",
details={},
)
mock_gate.check = tracked_check
return mock_gate
# Create orchestrator with all failing gates
orchestrator_with_mocks = QualityOrchestrator(
build_gate=make_tracked_gate("build", False),
lint_gate=make_tracked_gate("lint", False),
test_gate=make_tracked_gate("test", False),
coverage_gate=make_tracked_gate("coverage", False),
)
# Run verification
verification = await orchestrator_with_mocks.verify_completion()
# Assert: All gates were executed (no short-circuiting)
assert call_tracker["build"], "Build gate should be called"
assert call_tracker["lint"], "Lint gate should be called"
assert call_tracker["test"], "Test gate should be called"
assert call_tracker["coverage"], "Coverage gate should be called"
# Assert: Verification failed as expected
assert verification.all_passed is False