test(#149): Add comprehensive rejection loop integration tests
Add integration tests validating rejection loop behavior: - Agent claims done with failing tests → rejection + forced continuation - Agent claims done with linting errors → rejection + forced continuation - Agent claims done with low coverage → rejection + forced continuation - Agent claims done with build errors → rejection + forced continuation - All gates passing → completion allowed - Multiple simultaneous failures → comprehensive rejection - Continuation prompts are non-negotiable and directive - Agents cannot bypass quality gates - Remediation steps included in prompts All 9 tests pass. Build gate: passes Lint gate: passes Test gate: passes (100% pass rate) Coverage: quality_orchestrator.py at 85%, forced_continuation.py at 100% Refs #149 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
591
apps/coordinator/tests/test_rejection_loop.py
Normal file
591
apps/coordinator/tests/test_rejection_loop.py
Normal file
@@ -0,0 +1,591 @@
|
|||||||
|
"""Integration tests for rejection loop behavior.
|
||||||
|
|
||||||
|
These tests simulate scenarios where an agent claims completion with various
|
||||||
|
quality gate failures, verifying that:
|
||||||
|
1. Each failure type triggers rejection
|
||||||
|
2. Forced continuation prompts are generated
|
||||||
|
3. Agents cannot bypass quality gates
|
||||||
|
4. Loop continues until all gates pass
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.forced_continuation import ForcedContinuationService
|
||||||
|
from src.gates.quality_gate import GateResult
|
||||||
|
from src.quality_orchestrator import QualityOrchestrator
|
||||||
|
|
||||||
|
|
||||||
|
class TestRejectionLoop:
|
||||||
|
"""Test suite for rejection loop integration scenarios."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def orchestrator(self) -> QualityOrchestrator:
|
||||||
|
"""Create a QualityOrchestrator instance for testing."""
|
||||||
|
return QualityOrchestrator()
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def continuation_service(self) -> ForcedContinuationService:
|
||||||
|
"""Create a ForcedContinuationService instance for testing."""
|
||||||
|
return ForcedContinuationService()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rejection_on_failing_tests(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that failing tests trigger rejection and continuation prompt.
|
||||||
|
|
||||||
|
Scenario: Agent claims completion but tests are failing.
|
||||||
|
Expected: Rejection occurs, forced continuation prompt generated.
|
||||||
|
"""
|
||||||
|
# Create mock orchestrator with failing test gate
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Test gate failed: 2 tests failed out of 10",
|
||||||
|
details={
|
||||||
|
"return_code": 1,
|
||||||
|
"stderr": (
|
||||||
|
"FAILED tests/test_auth.py::test_login - AssertionError\n"
|
||||||
|
"FAILED tests/test_users.py::test_create_user - ValueError"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Other gates pass
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Build passed", details={}
|
||||||
|
)
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Lint passed", details={}
|
||||||
|
)
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Coverage passed", details={}
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simulate agent claiming completion
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
|
||||||
|
# Assert: Rejection occurred
|
||||||
|
assert verification.all_passed is False
|
||||||
|
assert verification.gate_results["test"].passed is False
|
||||||
|
assert "failed" in verification.gate_results["test"].message.lower()
|
||||||
|
|
||||||
|
# Assert: Forced continuation prompt is generated
|
||||||
|
prompt = continuation_service.generate_prompt(verification)
|
||||||
|
assert isinstance(prompt, str)
|
||||||
|
assert len(prompt) > 0
|
||||||
|
assert "test" in prompt.lower()
|
||||||
|
assert "must" in prompt.lower() or "fix" in prompt.lower()
|
||||||
|
# Prompt should include specific failure details
|
||||||
|
assert "test_auth.py" in prompt or "test_users.py" in prompt or "failed" in prompt.lower()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rejection_on_linting_errors(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that linting errors trigger rejection and continuation prompt.
|
||||||
|
|
||||||
|
Scenario: Agent claims completion but code has linting issues.
|
||||||
|
Expected: Rejection occurs, forced continuation prompt generated.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock orchestrator with failing lint gate
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Lint gate failed: 5 linting issues found",
|
||||||
|
details={
|
||||||
|
"return_code": 1,
|
||||||
|
"stderr": (
|
||||||
|
"src/main.py:10:80: E501 line too long (92 > 79 characters)\n"
|
||||||
|
"src/models.py:5:1: F401 'typing.Any' imported but unused\n"
|
||||||
|
"src/utils.py:15:1: W293 blank line contains whitespace"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Other gates pass
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Build passed", details={}
|
||||||
|
)
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Test passed", details={}
|
||||||
|
)
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Coverage passed", details={}
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simulate agent claiming completion
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
|
||||||
|
# Assert: Rejection occurred
|
||||||
|
assert verification.all_passed is False
|
||||||
|
assert verification.gate_results["lint"].passed is False
|
||||||
|
assert "lint" in verification.gate_results["lint"].message.lower()
|
||||||
|
|
||||||
|
# Assert: Forced continuation prompt is generated
|
||||||
|
prompt = continuation_service.generate_prompt(verification)
|
||||||
|
assert isinstance(prompt, str)
|
||||||
|
assert len(prompt) > 0
|
||||||
|
assert "lint" in prompt.lower()
|
||||||
|
assert "must" in prompt.lower() or "fix" in prompt.lower()
|
||||||
|
# Prompt should include linting details or commands
|
||||||
|
assert "ruff" in prompt.lower() or "lint" in prompt.lower()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rejection_on_low_coverage(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that low coverage triggers rejection and continuation prompt.
|
||||||
|
|
||||||
|
Scenario: Agent claims completion but coverage is below minimum.
|
||||||
|
Expected: Rejection occurs, forced continuation prompt generated.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock orchestrator with failing coverage gate
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Coverage gate failed: 72.5% coverage below minimum 85%",
|
||||||
|
details={
|
||||||
|
"coverage_percent": 72.5,
|
||||||
|
"minimum_coverage": 85.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Other gates pass
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Build passed", details={}
|
||||||
|
)
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Lint passed", details={}
|
||||||
|
)
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Test passed", details={}
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simulate agent claiming completion
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
|
||||||
|
# Assert: Rejection occurred
|
||||||
|
assert verification.all_passed is False
|
||||||
|
assert verification.gate_results["coverage"].passed is False
|
||||||
|
assert "coverage" in verification.gate_results["coverage"].message.lower()
|
||||||
|
|
||||||
|
# Assert: Forced continuation prompt is generated
|
||||||
|
prompt = continuation_service.generate_prompt(verification)
|
||||||
|
assert isinstance(prompt, str)
|
||||||
|
assert len(prompt) > 0
|
||||||
|
assert "coverage" in prompt.lower()
|
||||||
|
# Prompt should include specific coverage numbers
|
||||||
|
assert "72.5" in prompt or "72" in prompt
|
||||||
|
assert "85" in prompt
|
||||||
|
assert "must" in prompt.lower() or "increase" in prompt.lower()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rejection_on_build_errors(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that build errors trigger rejection and continuation prompt.
|
||||||
|
|
||||||
|
Scenario: Agent claims completion but code has type errors.
|
||||||
|
Expected: Rejection occurs, forced continuation prompt generated.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock orchestrator with failing build gate
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Build gate failed: Type errors detected",
|
||||||
|
details={
|
||||||
|
"return_code": 1,
|
||||||
|
"stderr": (
|
||||||
|
"src/main.py:10: error: Incompatible return value type "
|
||||||
|
"(got 'str', expected 'int')\n"
|
||||||
|
"src/models.py:25: error: Missing type annotation for variable 'config'"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Other gates pass
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Lint passed", details={}
|
||||||
|
)
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Test passed", details={}
|
||||||
|
)
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Coverage passed", details={}
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simulate agent claiming completion
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
|
||||||
|
# Assert: Rejection occurred
|
||||||
|
assert verification.all_passed is False
|
||||||
|
assert verification.gate_results["build"].passed is False
|
||||||
|
build_msg = verification.gate_results["build"].message.lower()
|
||||||
|
assert "build" in build_msg or "type" in build_msg
|
||||||
|
|
||||||
|
# Assert: Forced continuation prompt is generated
|
||||||
|
prompt = continuation_service.generate_prompt(verification)
|
||||||
|
assert isinstance(prompt, str)
|
||||||
|
assert len(prompt) > 0
|
||||||
|
assert "build" in prompt.lower() or "type" in prompt.lower()
|
||||||
|
assert "must" in prompt.lower() or "fix" in prompt.lower()
|
||||||
|
# Prompt should include type error details or mypy commands
|
||||||
|
assert "mypy" in prompt.lower() or "type" in prompt.lower()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_acceptance_on_all_gates_passing(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that all gates passing allows completion without rejection.
|
||||||
|
|
||||||
|
Scenario: Agent claims completion and all quality gates pass.
|
||||||
|
Expected: No rejection, completion allowed, no continuation prompt.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock orchestrator with all gates passing
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=True,
|
||||||
|
message="Build gate passed: No type errors found",
|
||||||
|
details={"return_code": 0},
|
||||||
|
)
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=True,
|
||||||
|
message="Lint gate passed: No linting issues found",
|
||||||
|
details={"return_code": 0},
|
||||||
|
)
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=True,
|
||||||
|
message="Test gate passed: All 10 tests passed (100% pass rate)",
|
||||||
|
details={"return_code": 0},
|
||||||
|
)
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=True,
|
||||||
|
message="Coverage gate passed: 90.0% coverage (minimum: 85%)",
|
||||||
|
details={"coverage_percent": 90.0, "minimum_coverage": 85.0},
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simulate agent claiming completion
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
|
||||||
|
# Assert: No rejection, completion allowed
|
||||||
|
assert verification.all_passed is True
|
||||||
|
assert all(result.passed for result in verification.gate_results.values())
|
||||||
|
|
||||||
|
# Assert: Continuation prompt should raise error (no failures to report)
|
||||||
|
with pytest.raises(ValueError, match="all.*pass"):
|
||||||
|
continuation_service.generate_prompt(verification)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rejection_on_multiple_gate_failures(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that multiple simultaneous gate failures are handled correctly.
|
||||||
|
|
||||||
|
Scenario: Agent claims completion with multiple quality gate failures.
|
||||||
|
Expected: Rejection occurs, comprehensive continuation prompt generated.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock orchestrator with multiple failing gates
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Build gate failed: Type errors detected",
|
||||||
|
details={
|
||||||
|
"return_code": 1,
|
||||||
|
"stderr": "src/main.py:10: error: Incompatible return value type",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Lint gate failed: Linting issues detected",
|
||||||
|
details={
|
||||||
|
"return_code": 1,
|
||||||
|
"stderr": "src/main.py:10: E501 line too long",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Test gate failed: Test failures detected",
|
||||||
|
details={
|
||||||
|
"return_code": 1,
|
||||||
|
"stderr": "FAILED tests/test_main.py::test_function",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Coverage gate failed: 60.0% coverage below minimum 85%",
|
||||||
|
details={
|
||||||
|
"coverage_percent": 60.0,
|
||||||
|
"minimum_coverage": 85.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simulate agent claiming completion
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
|
||||||
|
# Assert: Rejection occurred for all gates
|
||||||
|
assert verification.all_passed is False
|
||||||
|
assert verification.gate_results["build"].passed is False
|
||||||
|
assert verification.gate_results["lint"].passed is False
|
||||||
|
assert verification.gate_results["test"].passed is False
|
||||||
|
assert verification.gate_results["coverage"].passed is False
|
||||||
|
|
||||||
|
# Assert: Forced continuation prompt covers all failures
|
||||||
|
prompt = continuation_service.generate_prompt(verification)
|
||||||
|
assert isinstance(prompt, str)
|
||||||
|
assert len(prompt) > 0
|
||||||
|
# Prompt should mention all failed gates
|
||||||
|
assert "build" in prompt.lower() or "type" in prompt.lower()
|
||||||
|
assert "lint" in prompt.lower()
|
||||||
|
assert "test" in prompt.lower()
|
||||||
|
assert "coverage" in prompt.lower()
|
||||||
|
# Prompt should be comprehensive and directive
|
||||||
|
assert "must" in prompt.lower() or "fix" in prompt.lower()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_continuation_prompt_is_non_negotiable(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that continuation prompts are non-negotiable and directive.
|
||||||
|
|
||||||
|
Scenario: Any gate failure generates a prompt.
|
||||||
|
Expected: Prompt uses directive language, not suggestions.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock orchestrator with one failing gate
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Build passed", details={}
|
||||||
|
)
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Lint gate failed",
|
||||||
|
details={"return_code": 1},
|
||||||
|
)
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Test passed", details={}
|
||||||
|
)
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Coverage passed", details={}
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
prompt = continuation_service.generate_prompt(verification)
|
||||||
|
|
||||||
|
# Assert: Prompt uses directive language (MUST, REQUIRED, etc.)
|
||||||
|
prompt_lower = prompt.lower()
|
||||||
|
has_directive_language = (
|
||||||
|
"must" in prompt_lower
|
||||||
|
or "required" in prompt_lower
|
||||||
|
or "do not" in prompt_lower
|
||||||
|
or "cannot" in prompt_lower
|
||||||
|
)
|
||||||
|
assert has_directive_language, "Prompt should use directive language"
|
||||||
|
|
||||||
|
# Assert: Prompt does not use suggestion language
|
||||||
|
has_suggestion_language = (
|
||||||
|
"consider" in prompt_lower
|
||||||
|
or "might want" in prompt_lower
|
||||||
|
or "could" in prompt_lower
|
||||||
|
or "perhaps" in prompt_lower
|
||||||
|
)
|
||||||
|
assert not has_suggestion_language, "Prompt should not use suggestion language"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_continuation_prompt_includes_remediation_steps(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
continuation_service: ForcedContinuationService,
|
||||||
|
) -> None:
|
||||||
|
"""Test that continuation prompts include actionable remediation steps.
|
||||||
|
|
||||||
|
Scenario: Gate failures generate prompt.
|
||||||
|
Expected: Prompt includes specific commands and actions to fix issues.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock orchestrator with failing test gate
|
||||||
|
mock_build_gate = Mock()
|
||||||
|
mock_build_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Build passed", details={}
|
||||||
|
)
|
||||||
|
mock_lint_gate = Mock()
|
||||||
|
mock_lint_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Lint passed", details={}
|
||||||
|
)
|
||||||
|
mock_test_gate = Mock()
|
||||||
|
mock_test_gate.check.return_value = GateResult(
|
||||||
|
passed=False,
|
||||||
|
message="Test gate failed",
|
||||||
|
details={"return_code": 1},
|
||||||
|
)
|
||||||
|
mock_coverage_gate = Mock()
|
||||||
|
mock_coverage_gate.check.return_value = GateResult(
|
||||||
|
passed=True, message="Coverage passed", details={}
|
||||||
|
)
|
||||||
|
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=mock_build_gate,
|
||||||
|
lint_gate=mock_lint_gate,
|
||||||
|
test_gate=mock_test_gate,
|
||||||
|
coverage_gate=mock_coverage_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
prompt = continuation_service.generate_prompt(verification)
|
||||||
|
|
||||||
|
# Assert: Prompt includes remediation commands
|
||||||
|
prompt_lower = prompt.lower()
|
||||||
|
has_commands = (
|
||||||
|
"pytest" in prompt_lower
|
||||||
|
or "run:" in prompt_lower
|
||||||
|
or "fix" in prompt_lower
|
||||||
|
)
|
||||||
|
assert has_commands, "Prompt should include specific remediation commands"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_agent_cannot_bypass_gates(
|
||||||
|
self,
|
||||||
|
orchestrator: QualityOrchestrator,
|
||||||
|
) -> None:
|
||||||
|
"""Test that agents cannot bypass quality gates.
|
||||||
|
|
||||||
|
Scenario: All gates must be checked, no shortcuts allowed.
|
||||||
|
Expected: verify_completion always runs all gates.
|
||||||
|
"""
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
# Create mock gates with side effects to track if they were called
|
||||||
|
call_tracker = {"build": False, "lint": False, "test": False, "coverage": False}
|
||||||
|
|
||||||
|
def make_tracked_gate(gate_name: str, passes: bool) -> Mock:
|
||||||
|
"""Create a mock gate that tracks if it was called."""
|
||||||
|
mock_gate = Mock()
|
||||||
|
|
||||||
|
def tracked_check() -> GateResult:
|
||||||
|
call_tracker[gate_name] = True
|
||||||
|
return GateResult(
|
||||||
|
passed=passes,
|
||||||
|
message=f"{gate_name} {'passed' if passes else 'failed'}",
|
||||||
|
details={},
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_gate.check = tracked_check
|
||||||
|
return mock_gate
|
||||||
|
|
||||||
|
# Create orchestrator with all failing gates
|
||||||
|
orchestrator_with_mocks = QualityOrchestrator(
|
||||||
|
build_gate=make_tracked_gate("build", False),
|
||||||
|
lint_gate=make_tracked_gate("lint", False),
|
||||||
|
test_gate=make_tracked_gate("test", False),
|
||||||
|
coverage_gate=make_tracked_gate("coverage", False),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run verification
|
||||||
|
verification = await orchestrator_with_mocks.verify_completion()
|
||||||
|
|
||||||
|
# Assert: All gates were executed (no short-circuiting)
|
||||||
|
assert call_tracker["build"], "Build gate should be called"
|
||||||
|
assert call_tracker["lint"], "Lint gate should be called"
|
||||||
|
assert call_tracker["test"], "Test gate should be called"
|
||||||
|
assert call_tracker["coverage"], "Coverage gate should be called"
|
||||||
|
|
||||||
|
# Assert: Verification failed as expected
|
||||||
|
assert verification.all_passed is False
|
||||||
41
docs/scratchpads/149-test-rejection-loop.md
Normal file
41
docs/scratchpads/149-test-rejection-loop.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# Issue #149: [COORD-009] Test rejection loop
|
||||||
|
|
||||||
|
## Objective
|
||||||
|
|
||||||
|
Validate quality gates prevent premature completion through simulated rejection scenarios.
|
||||||
|
|
||||||
|
## Approach
|
||||||
|
|
||||||
|
1. Create comprehensive integration tests for rejection loop scenarios
|
||||||
|
2. Test each gate failure type triggers proper rejection
|
||||||
|
3. Verify forced continuation prompts are generated correctly
|
||||||
|
4. Ensure agents cannot bypass gates
|
||||||
|
5. Validate loop continues until all gates pass
|
||||||
|
|
||||||
|
## Test Scenarios
|
||||||
|
|
||||||
|
- [ ] Agent claims done with failing tests
|
||||||
|
- [ ] Agent claims done with linting errors
|
||||||
|
- [ ] Agent claims done with low coverage
|
||||||
|
- [ ] Agent claims done with build errors
|
||||||
|
- [ ] All gates passing allows completion
|
||||||
|
- [ ] Multiple simultaneous gate failures handled correctly
|
||||||
|
- [ ] Forced continuation prompts are non-negotiable and actionable
|
||||||
|
|
||||||
|
## Progress
|
||||||
|
|
||||||
|
- [x] Read existing QualityOrchestrator and ForcedContinuationService code
|
||||||
|
- [x] Write comprehensive integration tests (TDD)
|
||||||
|
- [x] Run tests - all 9 tests pass
|
||||||
|
- [x] Fix linting issues
|
||||||
|
- [x] Run type checking - passes
|
||||||
|
- [x] All quality gates pass
|
||||||
|
- [ ] Commit changes
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
Test file: `apps/coordinator/tests/test_rejection_loop.py`
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
The services already exist from Issue 148, so this is primarily testing the rejection loop behavior through integration tests that simulate agent completion scenarios.
|
||||||
Reference in New Issue
Block a user