Files
agent-skills/skills/code-review-excellence/scripts/pr-analyzer.py
Jason Woltje d9bcdc4a8d feat: Initial agent-skills repo — 4 adapted skills for Mosaic Stack
Skills included:
- pr-reviewer: Adapted for Gitea/GitHub via platform-aware scripts
  (dropped fetch_pr_data.py and add_inline_comment.py, kept generate_review_files.py)
- code-review-excellence: Methodology and checklists (React, TS, Python, etc.)
- vercel-react-best-practices: 57 rules for React/Next.js performance
- tailwind-design-system: Tailwind CSS v4 patterns, CVA, design tokens

New shell scripts added to ~/.claude/scripts/git/:
- pr-diff.sh: Get PR diff (GitHub gh / Gitea API)
- pr-metadata.sh: Get PR metadata as normalized JSON

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 16:03:39 -06:00

367 lines
11 KiB
Python

#!/usr/bin/env python3
"""
PR Analyzer - Analyze PR complexity and suggest review approach.
Usage:
python pr-analyzer.py [--diff-file FILE] [--stats]
Or pipe diff directly:
git diff main...HEAD | python pr-analyzer.py
"""
import sys
import re
import argparse
from collections import defaultdict
from dataclasses import dataclass
from typing import List, Dict, Optional
@dataclass
class FileStats:
"""Statistics for a single file."""
filename: str
additions: int = 0
deletions: int = 0
is_test: bool = False
is_config: bool = False
language: str = "unknown"
@dataclass
class PRAnalysis:
"""Complete PR analysis results."""
total_files: int
total_additions: int
total_deletions: int
files: List[FileStats]
complexity_score: float
size_category: str
estimated_review_time: int
risk_factors: List[str]
suggestions: List[str]
def detect_language(filename: str) -> str:
"""Detect programming language from filename."""
extensions = {
'.py': 'Python',
'.js': 'JavaScript',
'.ts': 'TypeScript',
'.tsx': 'TypeScript/React',
'.jsx': 'JavaScript/React',
'.rs': 'Rust',
'.go': 'Go',
'.c': 'C',
'.h': 'C/C++',
'.cpp': 'C++',
'.hpp': 'C++',
'.cc': 'C++',
'.cxx': 'C++',
'.hh': 'C++',
'.hxx': 'C++',
'.java': 'Java',
'.rb': 'Ruby',
'.sql': 'SQL',
'.md': 'Markdown',
'.json': 'JSON',
'.yaml': 'YAML',
'.yml': 'YAML',
'.toml': 'TOML',
'.css': 'CSS',
'.scss': 'SCSS',
'.html': 'HTML',
}
for ext, lang in extensions.items():
if filename.endswith(ext):
return lang
return 'unknown'
def is_test_file(filename: str) -> bool:
"""Check if file is a test file."""
test_patterns = [
r'test_.*\.py$',
r'.*_test\.py$',
r'.*\.test\.(js|ts|tsx)$',
r'.*\.spec\.(js|ts|tsx)$',
r'tests?/',
r'__tests__/',
]
return any(re.search(p, filename) for p in test_patterns)
def is_config_file(filename: str) -> bool:
"""Check if file is a configuration file."""
config_patterns = [
r'\.env',
r'config\.',
r'\.json$',
r'\.yaml$',
r'\.yml$',
r'\.toml$',
r'Cargo\.toml$',
r'package\.json$',
r'tsconfig\.json$',
]
return any(re.search(p, filename) for p in config_patterns)
def parse_diff(diff_content: str) -> List[FileStats]:
"""Parse git diff output and extract file statistics."""
files = []
current_file = None
for line in diff_content.split('\n'):
# New file header
if line.startswith('diff --git'):
if current_file:
files.append(current_file)
# Extract filename from "diff --git a/path b/path"
match = re.search(r'b/(.+)$', line)
if match:
filename = match.group(1)
current_file = FileStats(
filename=filename,
language=detect_language(filename),
is_test=is_test_file(filename),
is_config=is_config_file(filename),
)
elif current_file:
if line.startswith('+') and not line.startswith('+++'):
current_file.additions += 1
elif line.startswith('-') and not line.startswith('---'):
current_file.deletions += 1
if current_file:
files.append(current_file)
return files
def calculate_complexity(files: List[FileStats]) -> float:
"""Calculate complexity score (0-1 scale)."""
if not files:
return 0.0
total_changes = sum(f.additions + f.deletions for f in files)
# Base complexity from size
size_factor = min(total_changes / 1000, 1.0)
# Factor for number of files
file_factor = min(len(files) / 20, 1.0)
# Factor for non-test code ratio
test_lines = sum(f.additions + f.deletions for f in files if f.is_test)
non_test_ratio = 1 - (test_lines / max(total_changes, 1))
# Factor for language diversity
languages = set(f.language for f in files if f.language != 'unknown')
lang_factor = min(len(languages) / 5, 1.0)
complexity = (
size_factor * 0.4 +
file_factor * 0.2 +
non_test_ratio * 0.2 +
lang_factor * 0.2
)
return round(complexity, 2)
def categorize_size(total_changes: int) -> str:
"""Categorize PR size."""
if total_changes < 50:
return "XS (Extra Small)"
elif total_changes < 200:
return "S (Small)"
elif total_changes < 400:
return "M (Medium)"
elif total_changes < 800:
return "L (Large)"
else:
return "XL (Extra Large) - Consider splitting"
def estimate_review_time(files: List[FileStats], complexity: float) -> int:
"""Estimate review time in minutes."""
total_changes = sum(f.additions + f.deletions for f in files)
# Base time: ~1 minute per 20 lines
base_time = total_changes / 20
# Adjust for complexity
adjusted_time = base_time * (1 + complexity)
# Minimum 5 minutes, maximum 120 minutes
return max(5, min(120, int(adjusted_time)))
def identify_risk_factors(files: List[FileStats]) -> List[str]:
"""Identify potential risk factors in the PR."""
risks = []
total_changes = sum(f.additions + f.deletions for f in files)
test_changes = sum(f.additions + f.deletions for f in files if f.is_test)
# Large PR
if total_changes > 400:
risks.append("Large PR (>400 lines) - harder to review thoroughly")
# No tests
if test_changes == 0 and total_changes > 50:
risks.append("No test changes - verify test coverage")
# Low test ratio
if total_changes > 100 and test_changes / max(total_changes, 1) < 0.2:
risks.append("Low test ratio (<20%) - consider adding more tests")
# Security-sensitive files
security_patterns = ['.env', 'auth', 'security', 'password', 'token', 'secret']
for f in files:
if any(p in f.filename.lower() for p in security_patterns):
risks.append(f"Security-sensitive file: {f.filename}")
break
# Database changes
for f in files:
if 'migration' in f.filename.lower() or f.language == 'SQL':
risks.append("Database changes detected - review carefully")
break
# Config changes
config_files = [f for f in files if f.is_config]
if config_files:
risks.append(f"Configuration changes in {len(config_files)} file(s)")
return risks
def generate_suggestions(files: List[FileStats], complexity: float, risks: List[str]) -> List[str]:
"""Generate review suggestions."""
suggestions = []
total_changes = sum(f.additions + f.deletions for f in files)
if total_changes > 800:
suggestions.append("Consider splitting this PR into smaller, focused changes")
if complexity > 0.7:
suggestions.append("High complexity - allocate extra review time")
suggestions.append("Consider pair reviewing for critical sections")
if "No test changes" in str(risks):
suggestions.append("Request test additions before approval")
# Language-specific suggestions
languages = set(f.language for f in files)
if 'TypeScript' in languages or 'TypeScript/React' in languages:
suggestions.append("Check for proper type usage (avoid 'any')")
if 'Rust' in languages:
suggestions.append("Check for unwrap() usage and error handling")
if 'C' in languages or 'C++' in languages or 'C/C++' in languages:
suggestions.append("Check for memory safety, bounds checks, and UB risks")
if 'SQL' in languages:
suggestions.append("Review for SQL injection and query performance")
if not suggestions:
suggestions.append("Standard review process should suffice")
return suggestions
def analyze_pr(diff_content: str) -> PRAnalysis:
"""Perform complete PR analysis."""
files = parse_diff(diff_content)
total_additions = sum(f.additions for f in files)
total_deletions = sum(f.deletions for f in files)
total_changes = total_additions + total_deletions
complexity = calculate_complexity(files)
risks = identify_risk_factors(files)
suggestions = generate_suggestions(files, complexity, risks)
return PRAnalysis(
total_files=len(files),
total_additions=total_additions,
total_deletions=total_deletions,
files=files,
complexity_score=complexity,
size_category=categorize_size(total_changes),
estimated_review_time=estimate_review_time(files, complexity),
risk_factors=risks,
suggestions=suggestions,
)
def print_analysis(analysis: PRAnalysis, show_files: bool = False):
"""Print analysis results."""
print("\n" + "=" * 60)
print("PR ANALYSIS REPORT")
print("=" * 60)
print(f"\n📊 SUMMARY")
print(f" Files changed: {analysis.total_files}")
print(f" Additions: +{analysis.total_additions}")
print(f" Deletions: -{analysis.total_deletions}")
print(f" Total changes: {analysis.total_additions + analysis.total_deletions}")
print(f"\n📏 SIZE: {analysis.size_category}")
print(f" Complexity score: {analysis.complexity_score}/1.0")
print(f" Estimated review time: ~{analysis.estimated_review_time} minutes")
if analysis.risk_factors:
print(f"\n⚠️ RISK FACTORS:")
for risk in analysis.risk_factors:
print(f"{risk}")
print(f"\n💡 SUGGESTIONS:")
for suggestion in analysis.suggestions:
print(f"{suggestion}")
if show_files:
print(f"\n📁 FILES:")
# Group by language
by_lang: Dict[str, List[FileStats]] = defaultdict(list)
for f in analysis.files:
by_lang[f.language].append(f)
for lang, lang_files in sorted(by_lang.items()):
print(f"\n [{lang}]")
for f in lang_files:
prefix = "🧪" if f.is_test else "⚙️" if f.is_config else "📄"
print(f" {prefix} {f.filename} (+{f.additions}/-{f.deletions})")
print("\n" + "=" * 60)
def main():
parser = argparse.ArgumentParser(description='Analyze PR complexity')
parser.add_argument('--diff-file', '-f', help='Path to diff file')
parser.add_argument('--stats', '-s', action='store_true', help='Show file details')
args = parser.parse_args()
# Read diff from file or stdin
if args.diff_file:
with open(args.diff_file, 'r') as f:
diff_content = f.read()
elif not sys.stdin.isatty():
diff_content = sys.stdin.read()
else:
print("Usage: git diff main...HEAD | python pr-analyzer.py")
print(" python pr-analyzer.py -f diff.txt")
sys.exit(1)
if not diff_content.strip():
print("No diff content provided")
sys.exit(1)
analysis = analyze_pr(diff_content)
print_analysis(analysis, show_files=args.stats)
if __name__ == '__main__':
main()