feat(#154): Implement context estimator

Implements formula-based context estimation for predicting token usage before issue assignment. Formula: base = (files × 7000) + complexity + tests + docs total = base × 1.3 (30% safety buffer) Features: - EstimationInput/Result data models with validation - ComplexityLevel, TestLevel, DocLevel enums - Agent recommendation (haiku/sonnet/opus) based on tokens - Validation against actual usage with tolerance checking - Convenience function for quick estimations - JSON serialization support Implementation: - issue_estimator.py: Core estimator with formula - models.py: Data models and enums (100% coverage) - test_issue_estimator.py: 35 tests, 100% coverage - ESTIMATOR.md: Complete API documentation - requirements.txt: Python dependencies - .coveragerc: Coverage configuration Test Results: - 35 tests passing - 100% code coverage (excluding __main__) - Validates against historical issues - All edge cases covered Acceptance Criteria Met: ✅ Context estimation formula implemented ✅ Validation suite tests against historical issues ✅ Formula includes all components (files, complexity, tests, docs, buffer) ✅ Unit tests for estimator (100% coverage, exceeds 85% requirement) ✅ All components tested (low/medium/high levels) ✅ Agent recommendation logic validated Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-01 17:42:59 -06:00
parent e23c09f1f2
commit 5639d085b4
8 changed files with 1580 additions and 2 deletions
--- a/scripts/coordinator/issue_estimator.py
+++ b/scripts/coordinator/issue_estimator.py
@@ -0,0 +1,289 @@
+"""
+Context estimator for issue token usage prediction.
+
+Implements a formula-based estimation algorithm to predict token
+usage before issue assignment to agents.
+
+Formula:
+    base = (files × 7000) + complexity + tests + docs
+    total = base × 1.3  (30% safety buffer)
+
+Where:
+    - Complexity: LOW=10K, MEDIUM=20K, HIGH=30K
+    - Tests: LOW=5K, MEDIUM=10K, HIGH=15K
+    - Docs: NONE=0, LIGHT=2K, MEDIUM=3K, HEAVY=5K
+"""
+
+from models import (
+    ComplexityLevel,
+    DocLevel,
+    EstimationInput,
+    EstimationResult,
+    TestLevel,
+    ValidationResult,
+)
+
+
+# Constants
+TOKENS_PER_FILE = 7000
+SAFETY_BUFFER = 1.3
+
+# Agent recommendation thresholds (in tokens)
+HAIKU_THRESHOLD = 30000  # < 30K tokens
+SONNET_THRESHOLD = 80000  # 30K-80K tokens
+# > 80K tokens = opus
+
+
+class ContextEstimator:
+    """
+    Estimates context token requirements for issues.
+
+    Uses a formula-based approach to predict token usage based on:
+    - Number of files to modify
+    - Implementation complexity
+    - Test requirements
+    - Documentation needs
+
+    Applies a 30% safety buffer for iteration and debugging.
+    """
+
+    def estimate(self, input_data: EstimationInput) -> EstimationResult:
+        """
+        Estimate context tokens for an issue.
+
+        Args:
+            input_data: Estimation input parameters
+
+        Returns:
+            EstimationResult with token breakdown and total estimate
+
+        Example:
+            >>> estimator = ContextEstimator()
+            >>> input_data = EstimationInput(
+            ...     files_to_modify=2,
+            ...     implementation_complexity=ComplexityLevel.MEDIUM,
+            ...     test_requirements=TestLevel.MEDIUM,
+            ...     documentation=DocLevel.LIGHT
+            ... )
+            >>> result = estimator.estimate(input_data)
+            >>> result.total_estimate
+            59800
+        """
+        # Calculate individual components
+        files_context = self._calculate_files_context(input_data.files_to_modify)
+        implementation_tokens = self._get_complexity_tokens(
+            input_data.implementation_complexity
+        )
+        test_tokens = self._get_test_tokens(input_data.test_requirements)
+        doc_tokens = self._get_doc_tokens(input_data.documentation)
+
+        # Calculate base estimate (sum of all components)
+        base_estimate = (
+            files_context + implementation_tokens + test_tokens + doc_tokens
+        )
+
+        # Apply safety buffer
+        buffer_tokens = int(base_estimate * (SAFETY_BUFFER - 1.0))
+        total_estimate = base_estimate + buffer_tokens
+
+        # Recommend agent based on total estimate
+        recommended_agent = self._recommend_agent(total_estimate)
+
+        return EstimationResult(
+            files_context=files_context,
+            implementation_tokens=implementation_tokens,
+            test_tokens=test_tokens,
+            doc_tokens=doc_tokens,
+            base_estimate=base_estimate,
+            buffer_tokens=buffer_tokens,
+            total_estimate=total_estimate,
+            recommended_agent=recommended_agent,
+        )
+
+    def validate_against_actual(
+        self, input_data: EstimationInput, issue_number: int, actual_tokens: int
+    ) -> ValidationResult:
+        """
+        Validate estimation against actual token usage.
+
+        Args:
+            input_data: Estimation input parameters
+            issue_number: Issue number for reference
+            actual_tokens: Actual tokens used
+
+        Returns:
+            ValidationResult with accuracy metrics
+
+        Example:
+            >>> estimator = ContextEstimator()
+            >>> input_data = EstimationInput(...)
+            >>> result = estimator.validate_against_actual(
+            ...     input_data, issue_number=156, actual_tokens=15000
+            ... )
+            >>> result.within_tolerance
+            True
+        """
+        estimation = self.estimate(input_data)
+
+        return ValidationResult(
+            issue_number=issue_number,
+            estimated_tokens=estimation.total_estimate,
+            actual_tokens=actual_tokens,
+        )
+
+    def _calculate_files_context(self, files_to_modify: int) -> int:
+        """
+        Calculate context tokens for files.
+
+        Formula: files_to_modify × 7000 tokens per file
+
+        Args:
+            files_to_modify: Number of files to be modified
+
+        Returns:
+            Token allocation for file context
+        """
+        return files_to_modify * TOKENS_PER_FILE
+
+    def _get_complexity_tokens(self, complexity: ComplexityLevel) -> int:
+        """
+        Get token allocation for implementation complexity.
+
+        Args:
+            complexity: Implementation complexity level
+
+        Returns:
+            Token allocation for implementation
+        """
+        return complexity.value
+
+    def _get_test_tokens(self, test_level: TestLevel) -> int:
+        """
+        Get token allocation for test requirements.
+
+        Args:
+            test_level: Testing requirement level
+
+        Returns:
+            Token allocation for tests
+        """
+        return test_level.value
+
+    def _get_doc_tokens(self, doc_level: DocLevel) -> int:
+        """
+        Get token allocation for documentation.
+
+        Args:
+            doc_level: Documentation requirement level
+
+        Returns:
+            Token allocation for documentation
+        """
+        return doc_level.value
+
+    def _recommend_agent(self, total_estimate: int) -> str:
+        """
+        Recommend agent based on total token estimate.
+
+        Thresholds:
+        - haiku: < 30K tokens (fast, efficient)
+        - sonnet: 30K-80K tokens (balanced)
+        - opus: > 80K tokens (powerful, complex tasks)
+
+        Args:
+            total_estimate: Total estimated tokens
+
+        Returns:
+            Recommended agent name (haiku, sonnet, or opus)
+        """
+        if total_estimate < HAIKU_THRESHOLD:
+            return "haiku"
+        elif total_estimate < SONNET_THRESHOLD:
+            return "sonnet"
+        else:
+            return "opus"
+
+
+# Convenience function for quick estimations
+def estimate_issue(
+    files: int,
+    complexity: str = "medium",
+    tests: str = "medium",
+    docs: str = "light",
+) -> EstimationResult:
+    """
+    Convenience function for quick issue estimation.
+
+    Args:
+        files: Number of files to modify
+        complexity: Complexity level (low/medium/high)
+        tests: Test level (low/medium/high)
+        docs: Documentation level (none/light/medium/heavy)
+
+    Returns:
+        EstimationResult with token breakdown
+
+    Example:
+        >>> result = estimate_issue(files=2, complexity="medium")
+        >>> result.total_estimate
+        59800
+    """
+    # Map string inputs to enums
+    complexity_map = {
+        "low": ComplexityLevel.LOW,
+        "medium": ComplexityLevel.MEDIUM,
+        "high": ComplexityLevel.HIGH,
+    }
+    test_map = {
+        "low": TestLevel.LOW,
+        "medium": TestLevel.MEDIUM,
+        "high": TestLevel.HIGH,
+    }
+    doc_map = {
+        "none": DocLevel.NONE,
+        "light": DocLevel.LIGHT,
+        "medium": DocLevel.MEDIUM,
+        "heavy": DocLevel.HEAVY,
+    }
+
+    input_data = EstimationInput(
+        files_to_modify=files,
+        implementation_complexity=complexity_map[complexity.lower()],
+        test_requirements=test_map[tests.lower()],
+        documentation=doc_map[docs.lower()],
+    )
+
+    estimator = ContextEstimator()
+    return estimator.estimate(input_data)
+
+
+if __name__ == "__main__":
+    # Example usage
+    print("Context Estimator - Example Usage\n")
+
+    # Example 1: Simple task (issue #156 - bot user)
+    print("Example 1: Create bot user (issue #156)")
+    result = estimate_issue(files=0, complexity="low", tests="low", docs="light")
+    print(f"  Total estimate: {result.total_estimate:,} tokens")
+    print(f"  Recommended agent: {result.recommended_agent}")
+    print()
+
+    # Example 2: This task (issue #154 - context estimator)
+    print("Example 2: Context estimator (issue #154)")
+    result = estimate_issue(files=2, complexity="medium", tests="medium", docs="light")
+    print(f"  Total estimate: {result.total_estimate:,} tokens")
+    print(f"  Recommended agent: {result.recommended_agent}")
+    print()
+
+    # Example 3: Large integration test (issue #141)
+    print("Example 3: Integration testing (issue #141)")
+    result = estimate_issue(files=5, complexity="medium", tests="high", docs="medium")
+    print(f"  Total estimate: {result.total_estimate:,} tokens")
+    print(f"  Recommended agent: {result.recommended_agent}")
+    print()
+
+    # Example 4: Quick fix
+    print("Example 4: Quick bug fix")
+    result = estimate_issue(files=1, complexity="low", tests="low", docs="none")
+    print(f"  Total estimate: {result.total_estimate:,} tokens")
+    print(f"  Recommended agent: {result.recommended_agent}")