Implements formula-based context estimation for predicting token usage before issue assignment. Formula: base = (files × 7000) + complexity + tests + docs total = base × 1.3 (30% safety buffer) Features: - EstimationInput/Result data models with validation - ComplexityLevel, TestLevel, DocLevel enums - Agent recommendation (haiku/sonnet/opus) based on tokens - Validation against actual usage with tolerance checking - Convenience function for quick estimations - JSON serialization support Implementation: - issue_estimator.py: Core estimator with formula - models.py: Data models and enums (100% coverage) - test_issue_estimator.py: 35 tests, 100% coverage - ESTIMATOR.md: Complete API documentation - requirements.txt: Python dependencies - .coveragerc: Coverage configuration Test Results: - 35 tests passing - 100% code coverage (excluding __main__) - Validates against historical issues - All edge cases covered Acceptance Criteria Met: ✅ Context estimation formula implemented ✅ Validation suite tests against historical issues ✅ Formula includes all components (files, complexity, tests, docs, buffer) ✅ Unit tests for estimator (100% coverage, exceeds 85% requirement) ✅ All components tested (low/medium/high levels) ✅ Agent recommendation logic validated Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
290 lines
8.6 KiB
Python
290 lines
8.6 KiB
Python
"""
|
||
Context estimator for issue token usage prediction.
|
||
|
||
Implements a formula-based estimation algorithm to predict token
|
||
usage before issue assignment to agents.
|
||
|
||
Formula:
|
||
base = (files × 7000) + complexity + tests + docs
|
||
total = base × 1.3 (30% safety buffer)
|
||
|
||
Where:
|
||
- Complexity: LOW=10K, MEDIUM=20K, HIGH=30K
|
||
- Tests: LOW=5K, MEDIUM=10K, HIGH=15K
|
||
- Docs: NONE=0, LIGHT=2K, MEDIUM=3K, HEAVY=5K
|
||
"""
|
||
|
||
from models import (
|
||
ComplexityLevel,
|
||
DocLevel,
|
||
EstimationInput,
|
||
EstimationResult,
|
||
TestLevel,
|
||
ValidationResult,
|
||
)
|
||
|
||
|
||
# Constants
|
||
TOKENS_PER_FILE = 7000
|
||
SAFETY_BUFFER = 1.3
|
||
|
||
# Agent recommendation thresholds (in tokens)
|
||
HAIKU_THRESHOLD = 30000 # < 30K tokens
|
||
SONNET_THRESHOLD = 80000 # 30K-80K tokens
|
||
# > 80K tokens = opus
|
||
|
||
|
||
class ContextEstimator:
|
||
"""
|
||
Estimates context token requirements for issues.
|
||
|
||
Uses a formula-based approach to predict token usage based on:
|
||
- Number of files to modify
|
||
- Implementation complexity
|
||
- Test requirements
|
||
- Documentation needs
|
||
|
||
Applies a 30% safety buffer for iteration and debugging.
|
||
"""
|
||
|
||
def estimate(self, input_data: EstimationInput) -> EstimationResult:
|
||
"""
|
||
Estimate context tokens for an issue.
|
||
|
||
Args:
|
||
input_data: Estimation input parameters
|
||
|
||
Returns:
|
||
EstimationResult with token breakdown and total estimate
|
||
|
||
Example:
|
||
>>> estimator = ContextEstimator()
|
||
>>> input_data = EstimationInput(
|
||
... files_to_modify=2,
|
||
... implementation_complexity=ComplexityLevel.MEDIUM,
|
||
... test_requirements=TestLevel.MEDIUM,
|
||
... documentation=DocLevel.LIGHT
|
||
... )
|
||
>>> result = estimator.estimate(input_data)
|
||
>>> result.total_estimate
|
||
59800
|
||
"""
|
||
# Calculate individual components
|
||
files_context = self._calculate_files_context(input_data.files_to_modify)
|
||
implementation_tokens = self._get_complexity_tokens(
|
||
input_data.implementation_complexity
|
||
)
|
||
test_tokens = self._get_test_tokens(input_data.test_requirements)
|
||
doc_tokens = self._get_doc_tokens(input_data.documentation)
|
||
|
||
# Calculate base estimate (sum of all components)
|
||
base_estimate = (
|
||
files_context + implementation_tokens + test_tokens + doc_tokens
|
||
)
|
||
|
||
# Apply safety buffer
|
||
buffer_tokens = int(base_estimate * (SAFETY_BUFFER - 1.0))
|
||
total_estimate = base_estimate + buffer_tokens
|
||
|
||
# Recommend agent based on total estimate
|
||
recommended_agent = self._recommend_agent(total_estimate)
|
||
|
||
return EstimationResult(
|
||
files_context=files_context,
|
||
implementation_tokens=implementation_tokens,
|
||
test_tokens=test_tokens,
|
||
doc_tokens=doc_tokens,
|
||
base_estimate=base_estimate,
|
||
buffer_tokens=buffer_tokens,
|
||
total_estimate=total_estimate,
|
||
recommended_agent=recommended_agent,
|
||
)
|
||
|
||
def validate_against_actual(
|
||
self, input_data: EstimationInput, issue_number: int, actual_tokens: int
|
||
) -> ValidationResult:
|
||
"""
|
||
Validate estimation against actual token usage.
|
||
|
||
Args:
|
||
input_data: Estimation input parameters
|
||
issue_number: Issue number for reference
|
||
actual_tokens: Actual tokens used
|
||
|
||
Returns:
|
||
ValidationResult with accuracy metrics
|
||
|
||
Example:
|
||
>>> estimator = ContextEstimator()
|
||
>>> input_data = EstimationInput(...)
|
||
>>> result = estimator.validate_against_actual(
|
||
... input_data, issue_number=156, actual_tokens=15000
|
||
... )
|
||
>>> result.within_tolerance
|
||
True
|
||
"""
|
||
estimation = self.estimate(input_data)
|
||
|
||
return ValidationResult(
|
||
issue_number=issue_number,
|
||
estimated_tokens=estimation.total_estimate,
|
||
actual_tokens=actual_tokens,
|
||
)
|
||
|
||
def _calculate_files_context(self, files_to_modify: int) -> int:
|
||
"""
|
||
Calculate context tokens for files.
|
||
|
||
Formula: files_to_modify × 7000 tokens per file
|
||
|
||
Args:
|
||
files_to_modify: Number of files to be modified
|
||
|
||
Returns:
|
||
Token allocation for file context
|
||
"""
|
||
return files_to_modify * TOKENS_PER_FILE
|
||
|
||
def _get_complexity_tokens(self, complexity: ComplexityLevel) -> int:
|
||
"""
|
||
Get token allocation for implementation complexity.
|
||
|
||
Args:
|
||
complexity: Implementation complexity level
|
||
|
||
Returns:
|
||
Token allocation for implementation
|
||
"""
|
||
return complexity.value
|
||
|
||
def _get_test_tokens(self, test_level: TestLevel) -> int:
|
||
"""
|
||
Get token allocation for test requirements.
|
||
|
||
Args:
|
||
test_level: Testing requirement level
|
||
|
||
Returns:
|
||
Token allocation for tests
|
||
"""
|
||
return test_level.value
|
||
|
||
def _get_doc_tokens(self, doc_level: DocLevel) -> int:
|
||
"""
|
||
Get token allocation for documentation.
|
||
|
||
Args:
|
||
doc_level: Documentation requirement level
|
||
|
||
Returns:
|
||
Token allocation for documentation
|
||
"""
|
||
return doc_level.value
|
||
|
||
def _recommend_agent(self, total_estimate: int) -> str:
|
||
"""
|
||
Recommend agent based on total token estimate.
|
||
|
||
Thresholds:
|
||
- haiku: < 30K tokens (fast, efficient)
|
||
- sonnet: 30K-80K tokens (balanced)
|
||
- opus: > 80K tokens (powerful, complex tasks)
|
||
|
||
Args:
|
||
total_estimate: Total estimated tokens
|
||
|
||
Returns:
|
||
Recommended agent name (haiku, sonnet, or opus)
|
||
"""
|
||
if total_estimate < HAIKU_THRESHOLD:
|
||
return "haiku"
|
||
elif total_estimate < SONNET_THRESHOLD:
|
||
return "sonnet"
|
||
else:
|
||
return "opus"
|
||
|
||
|
||
# Convenience function for quick estimations
|
||
def estimate_issue(
|
||
files: int,
|
||
complexity: str = "medium",
|
||
tests: str = "medium",
|
||
docs: str = "light",
|
||
) -> EstimationResult:
|
||
"""
|
||
Convenience function for quick issue estimation.
|
||
|
||
Args:
|
||
files: Number of files to modify
|
||
complexity: Complexity level (low/medium/high)
|
||
tests: Test level (low/medium/high)
|
||
docs: Documentation level (none/light/medium/heavy)
|
||
|
||
Returns:
|
||
EstimationResult with token breakdown
|
||
|
||
Example:
|
||
>>> result = estimate_issue(files=2, complexity="medium")
|
||
>>> result.total_estimate
|
||
59800
|
||
"""
|
||
# Map string inputs to enums
|
||
complexity_map = {
|
||
"low": ComplexityLevel.LOW,
|
||
"medium": ComplexityLevel.MEDIUM,
|
||
"high": ComplexityLevel.HIGH,
|
||
}
|
||
test_map = {
|
||
"low": TestLevel.LOW,
|
||
"medium": TestLevel.MEDIUM,
|
||
"high": TestLevel.HIGH,
|
||
}
|
||
doc_map = {
|
||
"none": DocLevel.NONE,
|
||
"light": DocLevel.LIGHT,
|
||
"medium": DocLevel.MEDIUM,
|
||
"heavy": DocLevel.HEAVY,
|
||
}
|
||
|
||
input_data = EstimationInput(
|
||
files_to_modify=files,
|
||
implementation_complexity=complexity_map[complexity.lower()],
|
||
test_requirements=test_map[tests.lower()],
|
||
documentation=doc_map[docs.lower()],
|
||
)
|
||
|
||
estimator = ContextEstimator()
|
||
return estimator.estimate(input_data)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# Example usage
|
||
print("Context Estimator - Example Usage\n")
|
||
|
||
# Example 1: Simple task (issue #156 - bot user)
|
||
print("Example 1: Create bot user (issue #156)")
|
||
result = estimate_issue(files=0, complexity="low", tests="low", docs="light")
|
||
print(f" Total estimate: {result.total_estimate:,} tokens")
|
||
print(f" Recommended agent: {result.recommended_agent}")
|
||
print()
|
||
|
||
# Example 2: This task (issue #154 - context estimator)
|
||
print("Example 2: Context estimator (issue #154)")
|
||
result = estimate_issue(files=2, complexity="medium", tests="medium", docs="light")
|
||
print(f" Total estimate: {result.total_estimate:,} tokens")
|
||
print(f" Recommended agent: {result.recommended_agent}")
|
||
print()
|
||
|
||
# Example 3: Large integration test (issue #141)
|
||
print("Example 3: Integration testing (issue #141)")
|
||
result = estimate_issue(files=5, complexity="medium", tests="high", docs="medium")
|
||
print(f" Total estimate: {result.total_estimate:,} tokens")
|
||
print(f" Recommended agent: {result.recommended_agent}")
|
||
print()
|
||
|
||
# Example 4: Quick fix
|
||
print("Example 4: Quick bug fix")
|
||
result = estimate_issue(files=1, complexity="low", tests="low", docs="none")
|
||
print(f" Total estimate: {result.total_estimate:,} tokens")
|
||
print(f" Recommended agent: {result.recommended_agent}")
|