feat(#154): Implement context estimator

Implements formula-based context estimation for predicting token
usage before issue assignment.

Formula:
  base = (files × 7000) + complexity + tests + docs
  total = base × 1.3  (30% safety buffer)

Features:
- EstimationInput/Result data models with validation
- ComplexityLevel, TestLevel, DocLevel enums
- Agent recommendation (haiku/sonnet/opus) based on tokens
- Validation against actual usage with tolerance checking
- Convenience function for quick estimations
- JSON serialization support

Implementation:
- issue_estimator.py: Core estimator with formula
- models.py: Data models and enums (100% coverage)
- test_issue_estimator.py: 35 tests, 100% coverage
- ESTIMATOR.md: Complete API documentation
- requirements.txt: Python dependencies
- .coveragerc: Coverage configuration

Test Results:
- 35 tests passing
- 100% code coverage (excluding __main__)
- Validates against historical issues
- All edge cases covered

Acceptance Criteria Met:
 Context estimation formula implemented
 Validation suite tests against historical issues
 Formula includes all components (files, complexity, tests, docs, buffer)
 Unit tests for estimator (100% coverage, exceeds 85% requirement)
 All components tested (low/medium/high levels)
 Agent recommendation logic validated

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-01 17:42:59 -06:00
parent e23c09f1f2
commit 5639d085b4
8 changed files with 1580 additions and 2 deletions

View File

@@ -0,0 +1,289 @@
"""
Context estimator for issue token usage prediction.
Implements a formula-based estimation algorithm to predict token
usage before issue assignment to agents.
Formula:
base = (files × 7000) + complexity + tests + docs
total = base × 1.3 (30% safety buffer)
Where:
- Complexity: LOW=10K, MEDIUM=20K, HIGH=30K
- Tests: LOW=5K, MEDIUM=10K, HIGH=15K
- Docs: NONE=0, LIGHT=2K, MEDIUM=3K, HEAVY=5K
"""
from models import (
ComplexityLevel,
DocLevel,
EstimationInput,
EstimationResult,
TestLevel,
ValidationResult,
)
# Constants
TOKENS_PER_FILE = 7000
SAFETY_BUFFER = 1.3
# Agent recommendation thresholds (in tokens)
HAIKU_THRESHOLD = 30000 # < 30K tokens
SONNET_THRESHOLD = 80000 # 30K-80K tokens
# > 80K tokens = opus
class ContextEstimator:
"""
Estimates context token requirements for issues.
Uses a formula-based approach to predict token usage based on:
- Number of files to modify
- Implementation complexity
- Test requirements
- Documentation needs
Applies a 30% safety buffer for iteration and debugging.
"""
def estimate(self, input_data: EstimationInput) -> EstimationResult:
"""
Estimate context tokens for an issue.
Args:
input_data: Estimation input parameters
Returns:
EstimationResult with token breakdown and total estimate
Example:
>>> estimator = ContextEstimator()
>>> input_data = EstimationInput(
... files_to_modify=2,
... implementation_complexity=ComplexityLevel.MEDIUM,
... test_requirements=TestLevel.MEDIUM,
... documentation=DocLevel.LIGHT
... )
>>> result = estimator.estimate(input_data)
>>> result.total_estimate
59800
"""
# Calculate individual components
files_context = self._calculate_files_context(input_data.files_to_modify)
implementation_tokens = self._get_complexity_tokens(
input_data.implementation_complexity
)
test_tokens = self._get_test_tokens(input_data.test_requirements)
doc_tokens = self._get_doc_tokens(input_data.documentation)
# Calculate base estimate (sum of all components)
base_estimate = (
files_context + implementation_tokens + test_tokens + doc_tokens
)
# Apply safety buffer
buffer_tokens = int(base_estimate * (SAFETY_BUFFER - 1.0))
total_estimate = base_estimate + buffer_tokens
# Recommend agent based on total estimate
recommended_agent = self._recommend_agent(total_estimate)
return EstimationResult(
files_context=files_context,
implementation_tokens=implementation_tokens,
test_tokens=test_tokens,
doc_tokens=doc_tokens,
base_estimate=base_estimate,
buffer_tokens=buffer_tokens,
total_estimate=total_estimate,
recommended_agent=recommended_agent,
)
def validate_against_actual(
self, input_data: EstimationInput, issue_number: int, actual_tokens: int
) -> ValidationResult:
"""
Validate estimation against actual token usage.
Args:
input_data: Estimation input parameters
issue_number: Issue number for reference
actual_tokens: Actual tokens used
Returns:
ValidationResult with accuracy metrics
Example:
>>> estimator = ContextEstimator()
>>> input_data = EstimationInput(...)
>>> result = estimator.validate_against_actual(
... input_data, issue_number=156, actual_tokens=15000
... )
>>> result.within_tolerance
True
"""
estimation = self.estimate(input_data)
return ValidationResult(
issue_number=issue_number,
estimated_tokens=estimation.total_estimate,
actual_tokens=actual_tokens,
)
def _calculate_files_context(self, files_to_modify: int) -> int:
"""
Calculate context tokens for files.
Formula: files_to_modify × 7000 tokens per file
Args:
files_to_modify: Number of files to be modified
Returns:
Token allocation for file context
"""
return files_to_modify * TOKENS_PER_FILE
def _get_complexity_tokens(self, complexity: ComplexityLevel) -> int:
"""
Get token allocation for implementation complexity.
Args:
complexity: Implementation complexity level
Returns:
Token allocation for implementation
"""
return complexity.value
def _get_test_tokens(self, test_level: TestLevel) -> int:
"""
Get token allocation for test requirements.
Args:
test_level: Testing requirement level
Returns:
Token allocation for tests
"""
return test_level.value
def _get_doc_tokens(self, doc_level: DocLevel) -> int:
"""
Get token allocation for documentation.
Args:
doc_level: Documentation requirement level
Returns:
Token allocation for documentation
"""
return doc_level.value
def _recommend_agent(self, total_estimate: int) -> str:
"""
Recommend agent based on total token estimate.
Thresholds:
- haiku: < 30K tokens (fast, efficient)
- sonnet: 30K-80K tokens (balanced)
- opus: > 80K tokens (powerful, complex tasks)
Args:
total_estimate: Total estimated tokens
Returns:
Recommended agent name (haiku, sonnet, or opus)
"""
if total_estimate < HAIKU_THRESHOLD:
return "haiku"
elif total_estimate < SONNET_THRESHOLD:
return "sonnet"
else:
return "opus"
# Convenience function for quick estimations
def estimate_issue(
files: int,
complexity: str = "medium",
tests: str = "medium",
docs: str = "light",
) -> EstimationResult:
"""
Convenience function for quick issue estimation.
Args:
files: Number of files to modify
complexity: Complexity level (low/medium/high)
tests: Test level (low/medium/high)
docs: Documentation level (none/light/medium/heavy)
Returns:
EstimationResult with token breakdown
Example:
>>> result = estimate_issue(files=2, complexity="medium")
>>> result.total_estimate
59800
"""
# Map string inputs to enums
complexity_map = {
"low": ComplexityLevel.LOW,
"medium": ComplexityLevel.MEDIUM,
"high": ComplexityLevel.HIGH,
}
test_map = {
"low": TestLevel.LOW,
"medium": TestLevel.MEDIUM,
"high": TestLevel.HIGH,
}
doc_map = {
"none": DocLevel.NONE,
"light": DocLevel.LIGHT,
"medium": DocLevel.MEDIUM,
"heavy": DocLevel.HEAVY,
}
input_data = EstimationInput(
files_to_modify=files,
implementation_complexity=complexity_map[complexity.lower()],
test_requirements=test_map[tests.lower()],
documentation=doc_map[docs.lower()],
)
estimator = ContextEstimator()
return estimator.estimate(input_data)
if __name__ == "__main__":
# Example usage
print("Context Estimator - Example Usage\n")
# Example 1: Simple task (issue #156 - bot user)
print("Example 1: Create bot user (issue #156)")
result = estimate_issue(files=0, complexity="low", tests="low", docs="light")
print(f" Total estimate: {result.total_estimate:,} tokens")
print(f" Recommended agent: {result.recommended_agent}")
print()
# Example 2: This task (issue #154 - context estimator)
print("Example 2: Context estimator (issue #154)")
result = estimate_issue(files=2, complexity="medium", tests="medium", docs="light")
print(f" Total estimate: {result.total_estimate:,} tokens")
print(f" Recommended agent: {result.recommended_agent}")
print()
# Example 3: Large integration test (issue #141)
print("Example 3: Integration testing (issue #141)")
result = estimate_issue(files=5, complexity="medium", tests="high", docs="medium")
print(f" Total estimate: {result.total_estimate:,} tokens")
print(f" Recommended agent: {result.recommended_agent}")
print()
# Example 4: Quick fix
print("Example 4: Quick bug fix")
result = estimate_issue(files=1, complexity="low", tests="low", docs="none")
print(f" Total estimate: {result.total_estimate:,} tokens")
print(f" Recommended agent: {result.recommended_agent}")