Files
stack/scripts/coordinator/models.py
Jason Woltje 5639d085b4 feat(#154): Implement context estimator
Implements formula-based context estimation for predicting token
usage before issue assignment.

Formula:
  base = (files × 7000) + complexity + tests + docs
  total = base × 1.3  (30% safety buffer)

Features:
- EstimationInput/Result data models with validation
- ComplexityLevel, TestLevel, DocLevel enums
- Agent recommendation (haiku/sonnet/opus) based on tokens
- Validation against actual usage with tolerance checking
- Convenience function for quick estimations
- JSON serialization support

Implementation:
- issue_estimator.py: Core estimator with formula
- models.py: Data models and enums (100% coverage)
- test_issue_estimator.py: 35 tests, 100% coverage
- ESTIMATOR.md: Complete API documentation
- requirements.txt: Python dependencies
- .coveragerc: Coverage configuration

Test Results:
- 35 tests passing
- 100% code coverage (excluding __main__)
- Validates against historical issues
- All edge cases covered

Acceptance Criteria Met:
 Context estimation formula implemented
 Validation suite tests against historical issues
 Formula includes all components (files, complexity, tests, docs, buffer)
 Unit tests for estimator (100% coverage, exceeds 85% requirement)
 All components tested (low/medium/high levels)
 Agent recommendation logic validated

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-01 17:42:59 -06:00

162 lines
5.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Data models for issue context estimation.
Defines enums and data classes used by the context estimator.
"""
from dataclasses import dataclass
from enum import Enum
from typing import Optional
class ComplexityLevel(Enum):
"""Implementation complexity levels with token allocations."""
LOW = 10000 # Simple, straightforward implementation
MEDIUM = 20000 # Moderate complexity, some edge cases
HIGH = 30000 # Complex logic, many edge cases, algorithms
class TestLevel(Enum):
"""Test requirement levels with token allocations."""
LOW = 5000 # Basic unit tests
MEDIUM = 10000 # Unit + integration tests
HIGH = 15000 # Unit + integration + E2E tests
class DocLevel(Enum):
"""Documentation requirement levels with token allocations."""
NONE = 0 # No documentation needed
LIGHT = 2000 # Inline comments, basic docstrings
MEDIUM = 3000 # API docs, usage examples
HEAVY = 5000 # Comprehensive docs, guides, tutorials
@dataclass
class EstimationInput:
"""
Input parameters for context estimation.
Attributes:
files_to_modify: Number of files expected to be modified
implementation_complexity: Complexity level of implementation
test_requirements: Level of testing required
documentation: Level of documentation required
"""
files_to_modify: int
implementation_complexity: ComplexityLevel
test_requirements: TestLevel
documentation: DocLevel
def __post_init__(self):
"""Validate input parameters."""
if self.files_to_modify < 0:
raise ValueError("files_to_modify must be >= 0")
if not isinstance(self.implementation_complexity, ComplexityLevel):
raise TypeError(
f"implementation_complexity must be ComplexityLevel, "
f"got {type(self.implementation_complexity)}"
)
if not isinstance(self.test_requirements, TestLevel):
raise TypeError(
f"test_requirements must be TestLevel, "
f"got {type(self.test_requirements)}"
)
if not isinstance(self.documentation, DocLevel):
raise TypeError(
f"documentation must be DocLevel, " f"got {type(self.documentation)}"
)
@dataclass
class EstimationResult:
"""
Result of context estimation.
Contains breakdown of token allocation by category and total estimate
with safety buffer applied.
Attributes:
files_context: Tokens allocated for file context (files × 7000)
implementation_tokens: Tokens allocated for implementation
test_tokens: Tokens allocated for tests
doc_tokens: Tokens allocated for documentation
base_estimate: Sum of all components before buffer
buffer_tokens: Safety buffer (30% of base)
total_estimate: Final estimate with buffer applied
recommended_agent: Suggested agent based on total (haiku/sonnet/opus)
"""
files_context: int
implementation_tokens: int
test_tokens: int
doc_tokens: int
base_estimate: int
buffer_tokens: int
total_estimate: int
recommended_agent: str
def to_dict(self) -> dict:
"""Convert result to dictionary for serialization."""
return {
"files_context": self.files_context,
"implementation_tokens": self.implementation_tokens,
"test_tokens": self.test_tokens,
"doc_tokens": self.doc_tokens,
"base_estimate": self.base_estimate,
"buffer_tokens": self.buffer_tokens,
"total_estimate": self.total_estimate,
"recommended_agent": self.recommended_agent,
}
@dataclass
class ValidationResult:
"""
Result of validating estimate against actual usage.
Used for historical validation and accuracy tracking.
Attributes:
issue_number: Issue number validated
estimated_tokens: Estimated context tokens
actual_tokens: Actual tokens used (if known)
percentage_error: Percentage error (estimated vs actual)
within_tolerance: Whether error is within ±20% tolerance
notes: Optional notes about validation
"""
issue_number: int
estimated_tokens: int
actual_tokens: Optional[int] = None
percentage_error: Optional[float] = None
within_tolerance: Optional[bool] = None
notes: Optional[str] = None
def __post_init__(self):
"""Calculate derived fields if actual_tokens provided."""
if self.actual_tokens is not None:
self.percentage_error = abs(
self.estimated_tokens - self.actual_tokens
) / self.actual_tokens
self.within_tolerance = self.percentage_error <= 0.20
def to_dict(self) -> dict:
"""Convert result to dictionary for serialization."""
return {
"issue_number": self.issue_number,
"estimated_tokens": self.estimated_tokens,
"actual_tokens": self.actual_tokens,
"percentage_error": (
f"{self.percentage_error:.2%}" if self.percentage_error else None
),
"within_tolerance": self.within_tolerance,
"notes": self.notes,
}