stack/scripts/coordinator/issue_estimator.py

"""
Context estimator for issue token usage prediction.

Implements a formula-based estimation algorithm to predict token
usage before issue assignment to agents.

Formula:
    base = (files × 7000) + complexity + tests + docs
    total = base × 1.3  (30% safety buffer)

Where:
    - Complexity: LOW=10K, MEDIUM=20K, HIGH=30K
    - Tests: LOW=5K, MEDIUM=10K, HIGH=15K
    - Docs: NONE=0, LIGHT=2K, MEDIUM=3K, HEAVY=5K
"""

from models import (
    ComplexityLevel,
    DocLevel,
    EstimationInput,
    EstimationResult,
    TestLevel,
    ValidationResult,
)


# Constants
TOKENS_PER_FILE = 7000
SAFETY_BUFFER = 1.3

# Agent recommendation thresholds (in tokens)
HAIKU_THRESHOLD = 30000  # < 30K tokens
SONNET_THRESHOLD = 80000  # 30K-80K tokens
# > 80K tokens = opus


class ContextEstimator:
    """
    Estimates context token requirements for issues.

    Uses a formula-based approach to predict token usage based on:
    - Number of files to modify
    - Implementation complexity
    - Test requirements
    - Documentation needs

    Applies a 30% safety buffer for iteration and debugging.
    """

    def estimate(self, input_data: EstimationInput) -> EstimationResult:
        """
        Estimate context tokens for an issue.

        Args:
            input_data: Estimation input parameters

        Returns:
            EstimationResult with token breakdown and total estimate

        Example:
            >>> estimator = ContextEstimator()
            >>> input_data = EstimationInput(
            ...     files_to_modify=2,
            ...     implementation_complexity=ComplexityLevel.MEDIUM,
            ...     test_requirements=TestLevel.MEDIUM,
            ...     documentation=DocLevel.LIGHT
            ... )
            >>> result = estimator.estimate(input_data)
            >>> result.total_estimate
            59800
        """
        # Calculate individual components
        files_context = self._calculate_files_context(input_data.files_to_modify)
        implementation_tokens = self._get_complexity_tokens(
            input_data.implementation_complexity
        )
        test_tokens = self._get_test_tokens(input_data.test_requirements)
        doc_tokens = self._get_doc_tokens(input_data.documentation)

        # Calculate base estimate (sum of all components)
        base_estimate = (
            files_context + implementation_tokens + test_tokens + doc_tokens
        )

        # Apply safety buffer
        buffer_tokens = int(base_estimate * (SAFETY_BUFFER - 1.0))
        total_estimate = base_estimate + buffer_tokens

        # Recommend agent based on total estimate
        recommended_agent = self._recommend_agent(total_estimate)

        return EstimationResult(
            files_context=files_context,
            implementation_tokens=implementation_tokens,
            test_tokens=test_tokens,
            doc_tokens=doc_tokens,
            base_estimate=base_estimate,
            buffer_tokens=buffer_tokens,
            total_estimate=total_estimate,
            recommended_agent=recommended_agent,
        )

    def validate_against_actual(
        self, input_data: EstimationInput, issue_number: int, actual_tokens: int
    ) -> ValidationResult:
        """
        Validate estimation against actual token usage.

        Args:
            input_data: Estimation input parameters
            issue_number: Issue number for reference
            actual_tokens: Actual tokens used

        Returns:
            ValidationResult with accuracy metrics

        Example:
            >>> estimator = ContextEstimator()
            >>> input_data = EstimationInput(...)
            >>> result = estimator.validate_against_actual(
            ...     input_data, issue_number=156, actual_tokens=15000
            ... )
            >>> result.within_tolerance
            True
        """
        estimation = self.estimate(input_data)

        return ValidationResult(
            issue_number=issue_number,
            estimated_tokens=estimation.total_estimate,
            actual_tokens=actual_tokens,
        )

    def _calculate_files_context(self, files_to_modify: int) -> int:
        """
        Calculate context tokens for files.

        Formula: files_to_modify × 7000 tokens per file

        Args:
            files_to_modify: Number of files to be modified

        Returns:
            Token allocation for file context
        """
        return files_to_modify * TOKENS_PER_FILE

    def _get_complexity_tokens(self, complexity: ComplexityLevel) -> int:
        """
        Get token allocation for implementation complexity.

        Args:
            complexity: Implementation complexity level

        Returns:
            Token allocation for implementation
        """
        return complexity.value

    def _get_test_tokens(self, test_level: TestLevel) -> int:
        """
        Get token allocation for test requirements.

        Args:
            test_level: Testing requirement level

        Returns:
            Token allocation for tests
        """
        return test_level.value

    def _get_doc_tokens(self, doc_level: DocLevel) -> int:
        """
        Get token allocation for documentation.

        Args:
            doc_level: Documentation requirement level

        Returns:
            Token allocation for documentation
        """
        return doc_level.value

    def _recommend_agent(self, total_estimate: int) -> str:
        """
        Recommend agent based on total token estimate.

        Thresholds:
        - haiku: < 30K tokens (fast, efficient)
        - sonnet: 30K-80K tokens (balanced)
        - opus: > 80K tokens (powerful, complex tasks)

        Args:
            total_estimate: Total estimated tokens

        Returns:
            Recommended agent name (haiku, sonnet, or opus)
        """
        if total_estimate < HAIKU_THRESHOLD:
            return "haiku"
        elif total_estimate < SONNET_THRESHOLD:
            return "sonnet"
        else:
            return "opus"


# Convenience function for quick estimations
def estimate_issue(
    files: int,
    complexity: str = "medium",
    tests: str = "medium",
    docs: str = "light",
) -> EstimationResult:
    """
    Convenience function for quick issue estimation.

    Args:
        files: Number of files to modify
        complexity: Complexity level (low/medium/high)
        tests: Test level (low/medium/high)
        docs: Documentation level (none/light/medium/heavy)

    Returns:
        EstimationResult with token breakdown

    Example:
        >>> result = estimate_issue(files=2, complexity="medium")
        >>> result.total_estimate
        59800
    """
    # Map string inputs to enums
    complexity_map = {
        "low": ComplexityLevel.LOW,
        "medium": ComplexityLevel.MEDIUM,
        "high": ComplexityLevel.HIGH,
    }
    test_map = {
        "low": TestLevel.LOW,
        "medium": TestLevel.MEDIUM,
        "high": TestLevel.HIGH,
    }
    doc_map = {
        "none": DocLevel.NONE,
        "light": DocLevel.LIGHT,
        "medium": DocLevel.MEDIUM,
        "heavy": DocLevel.HEAVY,
    }

    input_data = EstimationInput(
        files_to_modify=files,
        implementation_complexity=complexity_map[complexity.lower()],
        test_requirements=test_map[tests.lower()],
        documentation=doc_map[docs.lower()],
    )

    estimator = ContextEstimator()
    return estimator.estimate(input_data)


if __name__ == "__main__":
    # Example usage
    print("Context Estimator - Example Usage\n")

    # Example 1: Simple task (issue #156 - bot user)
    print("Example 1: Create bot user (issue #156)")
    result = estimate_issue(files=0, complexity="low", tests="low", docs="light")
    print(f"  Total estimate: {result.total_estimate:,} tokens")
    print(f"  Recommended agent: {result.recommended_agent}")
    print()

    # Example 2: This task (issue #154 - context estimator)
    print("Example 2: Context estimator (issue #154)")
    result = estimate_issue(files=2, complexity="medium", tests="medium", docs="light")
    print(f"  Total estimate: {result.total_estimate:,} tokens")
    print(f"  Recommended agent: {result.recommended_agent}")
    print()

    # Example 3: Large integration test (issue #141)
    print("Example 3: Integration testing (issue #141)")
    result = estimate_issue(files=5, complexity="medium", tests="high", docs="medium")
    print(f"  Total estimate: {result.total_estimate:,} tokens")
    print(f"  Recommended agent: {result.recommended_agent}")
    print()

    # Example 4: Quick fix
    print("Example 4: Quick bug fix")
    result = estimate_issue(files=1, complexity="low", tests="low", docs="none")
    print(f"  Total estimate: {result.total_estimate:,} tokens")
    print(f"  Recommended agent: {result.recommended_agent}")