""" Context estimator for issue token usage prediction. Implements a formula-based estimation algorithm to predict token usage before issue assignment to agents. Formula: base = (files × 7000) + complexity + tests + docs total = base × 1.3 (30% safety buffer) Where: - Complexity: LOW=10K, MEDIUM=20K, HIGH=30K - Tests: LOW=5K, MEDIUM=10K, HIGH=15K - Docs: NONE=0, LIGHT=2K, MEDIUM=3K, HEAVY=5K """ from models import ( ComplexityLevel, DocLevel, EstimationInput, EstimationResult, TestLevel, ValidationResult, ) # Constants TOKENS_PER_FILE = 7000 SAFETY_BUFFER = 1.3 # Agent recommendation thresholds (in tokens) HAIKU_THRESHOLD = 30000 # < 30K tokens SONNET_THRESHOLD = 80000 # 30K-80K tokens # > 80K tokens = opus class ContextEstimator: """ Estimates context token requirements for issues. Uses a formula-based approach to predict token usage based on: - Number of files to modify - Implementation complexity - Test requirements - Documentation needs Applies a 30% safety buffer for iteration and debugging. """ def estimate(self, input_data: EstimationInput) -> EstimationResult: """ Estimate context tokens for an issue. Args: input_data: Estimation input parameters Returns: EstimationResult with token breakdown and total estimate Example: >>> estimator = ContextEstimator() >>> input_data = EstimationInput( ... files_to_modify=2, ... implementation_complexity=ComplexityLevel.MEDIUM, ... test_requirements=TestLevel.MEDIUM, ... documentation=DocLevel.LIGHT ... ) >>> result = estimator.estimate(input_data) >>> result.total_estimate 59800 """ # Calculate individual components files_context = self._calculate_files_context(input_data.files_to_modify) implementation_tokens = self._get_complexity_tokens( input_data.implementation_complexity ) test_tokens = self._get_test_tokens(input_data.test_requirements) doc_tokens = self._get_doc_tokens(input_data.documentation) # Calculate base estimate (sum of all components) base_estimate = ( files_context + implementation_tokens + test_tokens + doc_tokens ) # Apply safety buffer buffer_tokens = int(base_estimate * (SAFETY_BUFFER - 1.0)) total_estimate = base_estimate + buffer_tokens # Recommend agent based on total estimate recommended_agent = self._recommend_agent(total_estimate) return EstimationResult( files_context=files_context, implementation_tokens=implementation_tokens, test_tokens=test_tokens, doc_tokens=doc_tokens, base_estimate=base_estimate, buffer_tokens=buffer_tokens, total_estimate=total_estimate, recommended_agent=recommended_agent, ) def validate_against_actual( self, input_data: EstimationInput, issue_number: int, actual_tokens: int ) -> ValidationResult: """ Validate estimation against actual token usage. Args: input_data: Estimation input parameters issue_number: Issue number for reference actual_tokens: Actual tokens used Returns: ValidationResult with accuracy metrics Example: >>> estimator = ContextEstimator() >>> input_data = EstimationInput(...) >>> result = estimator.validate_against_actual( ... input_data, issue_number=156, actual_tokens=15000 ... ) >>> result.within_tolerance True """ estimation = self.estimate(input_data) return ValidationResult( issue_number=issue_number, estimated_tokens=estimation.total_estimate, actual_tokens=actual_tokens, ) def _calculate_files_context(self, files_to_modify: int) -> int: """ Calculate context tokens for files. Formula: files_to_modify × 7000 tokens per file Args: files_to_modify: Number of files to be modified Returns: Token allocation for file context """ return files_to_modify * TOKENS_PER_FILE def _get_complexity_tokens(self, complexity: ComplexityLevel) -> int: """ Get token allocation for implementation complexity. Args: complexity: Implementation complexity level Returns: Token allocation for implementation """ return complexity.value def _get_test_tokens(self, test_level: TestLevel) -> int: """ Get token allocation for test requirements. Args: test_level: Testing requirement level Returns: Token allocation for tests """ return test_level.value def _get_doc_tokens(self, doc_level: DocLevel) -> int: """ Get token allocation for documentation. Args: doc_level: Documentation requirement level Returns: Token allocation for documentation """ return doc_level.value def _recommend_agent(self, total_estimate: int) -> str: """ Recommend agent based on total token estimate. Thresholds: - haiku: < 30K tokens (fast, efficient) - sonnet: 30K-80K tokens (balanced) - opus: > 80K tokens (powerful, complex tasks) Args: total_estimate: Total estimated tokens Returns: Recommended agent name (haiku, sonnet, or opus) """ if total_estimate < HAIKU_THRESHOLD: return "haiku" elif total_estimate < SONNET_THRESHOLD: return "sonnet" else: return "opus" # Convenience function for quick estimations def estimate_issue( files: int, complexity: str = "medium", tests: str = "medium", docs: str = "light", ) -> EstimationResult: """ Convenience function for quick issue estimation. Args: files: Number of files to modify complexity: Complexity level (low/medium/high) tests: Test level (low/medium/high) docs: Documentation level (none/light/medium/heavy) Returns: EstimationResult with token breakdown Example: >>> result = estimate_issue(files=2, complexity="medium") >>> result.total_estimate 59800 """ # Map string inputs to enums complexity_map = { "low": ComplexityLevel.LOW, "medium": ComplexityLevel.MEDIUM, "high": ComplexityLevel.HIGH, } test_map = { "low": TestLevel.LOW, "medium": TestLevel.MEDIUM, "high": TestLevel.HIGH, } doc_map = { "none": DocLevel.NONE, "light": DocLevel.LIGHT, "medium": DocLevel.MEDIUM, "heavy": DocLevel.HEAVY, } input_data = EstimationInput( files_to_modify=files, implementation_complexity=complexity_map[complexity.lower()], test_requirements=test_map[tests.lower()], documentation=doc_map[docs.lower()], ) estimator = ContextEstimator() return estimator.estimate(input_data) if __name__ == "__main__": # Example usage print("Context Estimator - Example Usage\n") # Example 1: Simple task (issue #156 - bot user) print("Example 1: Create bot user (issue #156)") result = estimate_issue(files=0, complexity="low", tests="low", docs="light") print(f" Total estimate: {result.total_estimate:,} tokens") print(f" Recommended agent: {result.recommended_agent}") print() # Example 2: This task (issue #154 - context estimator) print("Example 2: Context estimator (issue #154)") result = estimate_issue(files=2, complexity="medium", tests="medium", docs="light") print(f" Total estimate: {result.total_estimate:,} tokens") print(f" Recommended agent: {result.recommended_agent}") print() # Example 3: Large integration test (issue #141) print("Example 3: Integration testing (issue #141)") result = estimate_issue(files=5, complexity="medium", tests="high", docs="medium") print(f" Total estimate: {result.total_estimate:,} tokens") print(f" Recommended agent: {result.recommended_agent}") print() # Example 4: Quick fix print("Example 4: Quick bug fix") result = estimate_issue(files=1, complexity="low", tests="low", docs="none") print(f" Total estimate: {result.total_estimate:,} tokens") print(f" Recommended agent: {result.recommended_agent}")