Implements formula-based context estimation for predicting token usage before issue assignment. Formula: base = (files × 7000) + complexity + tests + docs total = base × 1.3 (30% safety buffer) Features: - EstimationInput/Result data models with validation - ComplexityLevel, TestLevel, DocLevel enums - Agent recommendation (haiku/sonnet/opus) based on tokens - Validation against actual usage with tolerance checking - Convenience function for quick estimations - JSON serialization support Implementation: - issue_estimator.py: Core estimator with formula - models.py: Data models and enums (100% coverage) - test_issue_estimator.py: 35 tests, 100% coverage - ESTIMATOR.md: Complete API documentation - requirements.txt: Python dependencies - .coveragerc: Coverage configuration Test Results: - 35 tests passing - 100% code coverage (excluding __main__) - Validates against historical issues - All edge cases covered Acceptance Criteria Met: ✅ Context estimation formula implemented ✅ Validation suite tests against historical issues ✅ Formula includes all components (files, complexity, tests, docs, buffer) ✅ Unit tests for estimator (100% coverage, exceeds 85% requirement) ✅ All components tested (low/medium/high levels) ✅ Agent recommendation logic validated Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
162 lines
5.2 KiB
Python
162 lines
5.2 KiB
Python
"""
|
||
Data models for issue context estimation.
|
||
|
||
Defines enums and data classes used by the context estimator.
|
||
"""
|
||
|
||
from dataclasses import dataclass
|
||
from enum import Enum
|
||
from typing import Optional
|
||
|
||
|
||
class ComplexityLevel(Enum):
|
||
"""Implementation complexity levels with token allocations."""
|
||
|
||
LOW = 10000 # Simple, straightforward implementation
|
||
MEDIUM = 20000 # Moderate complexity, some edge cases
|
||
HIGH = 30000 # Complex logic, many edge cases, algorithms
|
||
|
||
|
||
class TestLevel(Enum):
|
||
"""Test requirement levels with token allocations."""
|
||
|
||
LOW = 5000 # Basic unit tests
|
||
MEDIUM = 10000 # Unit + integration tests
|
||
HIGH = 15000 # Unit + integration + E2E tests
|
||
|
||
|
||
class DocLevel(Enum):
|
||
"""Documentation requirement levels with token allocations."""
|
||
|
||
NONE = 0 # No documentation needed
|
||
LIGHT = 2000 # Inline comments, basic docstrings
|
||
MEDIUM = 3000 # API docs, usage examples
|
||
HEAVY = 5000 # Comprehensive docs, guides, tutorials
|
||
|
||
|
||
@dataclass
|
||
class EstimationInput:
|
||
"""
|
||
Input parameters for context estimation.
|
||
|
||
Attributes:
|
||
files_to_modify: Number of files expected to be modified
|
||
implementation_complexity: Complexity level of implementation
|
||
test_requirements: Level of testing required
|
||
documentation: Level of documentation required
|
||
"""
|
||
|
||
files_to_modify: int
|
||
implementation_complexity: ComplexityLevel
|
||
test_requirements: TestLevel
|
||
documentation: DocLevel
|
||
|
||
def __post_init__(self):
|
||
"""Validate input parameters."""
|
||
if self.files_to_modify < 0:
|
||
raise ValueError("files_to_modify must be >= 0")
|
||
|
||
if not isinstance(self.implementation_complexity, ComplexityLevel):
|
||
raise TypeError(
|
||
f"implementation_complexity must be ComplexityLevel, "
|
||
f"got {type(self.implementation_complexity)}"
|
||
)
|
||
|
||
if not isinstance(self.test_requirements, TestLevel):
|
||
raise TypeError(
|
||
f"test_requirements must be TestLevel, "
|
||
f"got {type(self.test_requirements)}"
|
||
)
|
||
|
||
if not isinstance(self.documentation, DocLevel):
|
||
raise TypeError(
|
||
f"documentation must be DocLevel, " f"got {type(self.documentation)}"
|
||
)
|
||
|
||
|
||
@dataclass
|
||
class EstimationResult:
|
||
"""
|
||
Result of context estimation.
|
||
|
||
Contains breakdown of token allocation by category and total estimate
|
||
with safety buffer applied.
|
||
|
||
Attributes:
|
||
files_context: Tokens allocated for file context (files × 7000)
|
||
implementation_tokens: Tokens allocated for implementation
|
||
test_tokens: Tokens allocated for tests
|
||
doc_tokens: Tokens allocated for documentation
|
||
base_estimate: Sum of all components before buffer
|
||
buffer_tokens: Safety buffer (30% of base)
|
||
total_estimate: Final estimate with buffer applied
|
||
recommended_agent: Suggested agent based on total (haiku/sonnet/opus)
|
||
"""
|
||
|
||
files_context: int
|
||
implementation_tokens: int
|
||
test_tokens: int
|
||
doc_tokens: int
|
||
base_estimate: int
|
||
buffer_tokens: int
|
||
total_estimate: int
|
||
recommended_agent: str
|
||
|
||
def to_dict(self) -> dict:
|
||
"""Convert result to dictionary for serialization."""
|
||
return {
|
||
"files_context": self.files_context,
|
||
"implementation_tokens": self.implementation_tokens,
|
||
"test_tokens": self.test_tokens,
|
||
"doc_tokens": self.doc_tokens,
|
||
"base_estimate": self.base_estimate,
|
||
"buffer_tokens": self.buffer_tokens,
|
||
"total_estimate": self.total_estimate,
|
||
"recommended_agent": self.recommended_agent,
|
||
}
|
||
|
||
|
||
@dataclass
|
||
class ValidationResult:
|
||
"""
|
||
Result of validating estimate against actual usage.
|
||
|
||
Used for historical validation and accuracy tracking.
|
||
|
||
Attributes:
|
||
issue_number: Issue number validated
|
||
estimated_tokens: Estimated context tokens
|
||
actual_tokens: Actual tokens used (if known)
|
||
percentage_error: Percentage error (estimated vs actual)
|
||
within_tolerance: Whether error is within ±20% tolerance
|
||
notes: Optional notes about validation
|
||
"""
|
||
|
||
issue_number: int
|
||
estimated_tokens: int
|
||
actual_tokens: Optional[int] = None
|
||
percentage_error: Optional[float] = None
|
||
within_tolerance: Optional[bool] = None
|
||
notes: Optional[str] = None
|
||
|
||
def __post_init__(self):
|
||
"""Calculate derived fields if actual_tokens provided."""
|
||
if self.actual_tokens is not None:
|
||
self.percentage_error = abs(
|
||
self.estimated_tokens - self.actual_tokens
|
||
) / self.actual_tokens
|
||
self.within_tolerance = self.percentage_error <= 0.20
|
||
|
||
def to_dict(self) -> dict:
|
||
"""Convert result to dictionary for serialization."""
|
||
return {
|
||
"issue_number": self.issue_number,
|
||
"estimated_tokens": self.estimated_tokens,
|
||
"actual_tokens": self.actual_tokens,
|
||
"percentage_error": (
|
||
f"{self.percentage_error:.2%}" if self.percentage_error else None
|
||
),
|
||
"within_tolerance": self.within_tolerance,
|
||
"notes": self.notes,
|
||
}
|