feat(#154): Implement context estimator
Implements formula-based context estimation for predicting token usage before issue assignment. Formula: base = (files × 7000) + complexity + tests + docs total = base × 1.3 (30% safety buffer) Features: - EstimationInput/Result data models with validation - ComplexityLevel, TestLevel, DocLevel enums - Agent recommendation (haiku/sonnet/opus) based on tokens - Validation against actual usage with tolerance checking - Convenience function for quick estimations - JSON serialization support Implementation: - issue_estimator.py: Core estimator with formula - models.py: Data models and enums (100% coverage) - test_issue_estimator.py: 35 tests, 100% coverage - ESTIMATOR.md: Complete API documentation - requirements.txt: Python dependencies - .coveragerc: Coverage configuration Test Results: - 35 tests passing - 100% code coverage (excluding __main__) - Validates against historical issues - All edge cases covered Acceptance Criteria Met: ✅ Context estimation formula implemented ✅ Validation suite tests against historical issues ✅ Formula includes all components (files, complexity, tests, docs, buffer) ✅ Unit tests for estimator (100% coverage, exceeds 85% requirement) ✅ All components tested (low/medium/high levels) ✅ Agent recommendation logic validated Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
161
scripts/coordinator/models.py
Normal file
161
scripts/coordinator/models.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Data models for issue context estimation.
|
||||
|
||||
Defines enums and data classes used by the context estimator.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class ComplexityLevel(Enum):
|
||||
"""Implementation complexity levels with token allocations."""
|
||||
|
||||
LOW = 10000 # Simple, straightforward implementation
|
||||
MEDIUM = 20000 # Moderate complexity, some edge cases
|
||||
HIGH = 30000 # Complex logic, many edge cases, algorithms
|
||||
|
||||
|
||||
class TestLevel(Enum):
|
||||
"""Test requirement levels with token allocations."""
|
||||
|
||||
LOW = 5000 # Basic unit tests
|
||||
MEDIUM = 10000 # Unit + integration tests
|
||||
HIGH = 15000 # Unit + integration + E2E tests
|
||||
|
||||
|
||||
class DocLevel(Enum):
|
||||
"""Documentation requirement levels with token allocations."""
|
||||
|
||||
NONE = 0 # No documentation needed
|
||||
LIGHT = 2000 # Inline comments, basic docstrings
|
||||
MEDIUM = 3000 # API docs, usage examples
|
||||
HEAVY = 5000 # Comprehensive docs, guides, tutorials
|
||||
|
||||
|
||||
@dataclass
|
||||
class EstimationInput:
|
||||
"""
|
||||
Input parameters for context estimation.
|
||||
|
||||
Attributes:
|
||||
files_to_modify: Number of files expected to be modified
|
||||
implementation_complexity: Complexity level of implementation
|
||||
test_requirements: Level of testing required
|
||||
documentation: Level of documentation required
|
||||
"""
|
||||
|
||||
files_to_modify: int
|
||||
implementation_complexity: ComplexityLevel
|
||||
test_requirements: TestLevel
|
||||
documentation: DocLevel
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate input parameters."""
|
||||
if self.files_to_modify < 0:
|
||||
raise ValueError("files_to_modify must be >= 0")
|
||||
|
||||
if not isinstance(self.implementation_complexity, ComplexityLevel):
|
||||
raise TypeError(
|
||||
f"implementation_complexity must be ComplexityLevel, "
|
||||
f"got {type(self.implementation_complexity)}"
|
||||
)
|
||||
|
||||
if not isinstance(self.test_requirements, TestLevel):
|
||||
raise TypeError(
|
||||
f"test_requirements must be TestLevel, "
|
||||
f"got {type(self.test_requirements)}"
|
||||
)
|
||||
|
||||
if not isinstance(self.documentation, DocLevel):
|
||||
raise TypeError(
|
||||
f"documentation must be DocLevel, " f"got {type(self.documentation)}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EstimationResult:
|
||||
"""
|
||||
Result of context estimation.
|
||||
|
||||
Contains breakdown of token allocation by category and total estimate
|
||||
with safety buffer applied.
|
||||
|
||||
Attributes:
|
||||
files_context: Tokens allocated for file context (files × 7000)
|
||||
implementation_tokens: Tokens allocated for implementation
|
||||
test_tokens: Tokens allocated for tests
|
||||
doc_tokens: Tokens allocated for documentation
|
||||
base_estimate: Sum of all components before buffer
|
||||
buffer_tokens: Safety buffer (30% of base)
|
||||
total_estimate: Final estimate with buffer applied
|
||||
recommended_agent: Suggested agent based on total (haiku/sonnet/opus)
|
||||
"""
|
||||
|
||||
files_context: int
|
||||
implementation_tokens: int
|
||||
test_tokens: int
|
||||
doc_tokens: int
|
||||
base_estimate: int
|
||||
buffer_tokens: int
|
||||
total_estimate: int
|
||||
recommended_agent: str
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert result to dictionary for serialization."""
|
||||
return {
|
||||
"files_context": self.files_context,
|
||||
"implementation_tokens": self.implementation_tokens,
|
||||
"test_tokens": self.test_tokens,
|
||||
"doc_tokens": self.doc_tokens,
|
||||
"base_estimate": self.base_estimate,
|
||||
"buffer_tokens": self.buffer_tokens,
|
||||
"total_estimate": self.total_estimate,
|
||||
"recommended_agent": self.recommended_agent,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""
|
||||
Result of validating estimate against actual usage.
|
||||
|
||||
Used for historical validation and accuracy tracking.
|
||||
|
||||
Attributes:
|
||||
issue_number: Issue number validated
|
||||
estimated_tokens: Estimated context tokens
|
||||
actual_tokens: Actual tokens used (if known)
|
||||
percentage_error: Percentage error (estimated vs actual)
|
||||
within_tolerance: Whether error is within ±20% tolerance
|
||||
notes: Optional notes about validation
|
||||
"""
|
||||
|
||||
issue_number: int
|
||||
estimated_tokens: int
|
||||
actual_tokens: Optional[int] = None
|
||||
percentage_error: Optional[float] = None
|
||||
within_tolerance: Optional[bool] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Calculate derived fields if actual_tokens provided."""
|
||||
if self.actual_tokens is not None:
|
||||
self.percentage_error = abs(
|
||||
self.estimated_tokens - self.actual_tokens
|
||||
) / self.actual_tokens
|
||||
self.within_tolerance = self.percentage_error <= 0.20
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert result to dictionary for serialization."""
|
||||
return {
|
||||
"issue_number": self.issue_number,
|
||||
"estimated_tokens": self.estimated_tokens,
|
||||
"actual_tokens": self.actual_tokens,
|
||||
"percentage_error": (
|
||||
f"{self.percentage_error:.2%}" if self.percentage_error else None
|
||||
),
|
||||
"within_tolerance": self.within_tolerance,
|
||||
"notes": self.notes,
|
||||
}
|
||||
Reference in New Issue
Block a user