claude-code-meta-skill/source/meta-skill-generator/scripts/analyze_workflow.py

#!/usr/bin/env python3
"""
Workflow Analysis Script for Meta Skill Generator

Analyzes user requirements and suggests which operations should be:
- Go scripts (deterministic, performance-critical)
- Python scripts (library-heavy, data science)
- Agent workflows (requires reasoning, context-dependent)

Usage:
    analyze_workflow.py [--examples file1.txt file2.txt ...] [--interactive]
"""

import argparse
import sys
from pathlib import Path
from typing import List, Dict, Tuple

# Keywords that indicate deterministic operations
DETERMINISTIC_KEYWORDS = {
    'convert', 'transform', 'parse', 'extract', 'validate',
    'format', 'encode', 'decode', 'compress', 'decompress',
    'resize', 'crop', 'rotate', 'merge', 'split',
    'sort', 'filter', 'calculate', 'compute'
}

# Keywords that indicate dynamic/reasoning operations
DYNAMIC_KEYWORDS = {
    'analyze', 'understand', 'interpret', 'decide', 'choose',
    'suggest', 'recommend', 'summarize', 'explain', 'describe',
    'evaluate', 'assess', 'determine', 'identify', 'classify'
}

# Keywords that suggest Go (performance-critical)
GO_INDICATORS = {
    'large file', 'batch', 'thousands', 'millions', 'concurrent',
    'parallel', 'performance', 'fast', 'binary', 'low-level',
    'file system', 'network', 'stream'
}

# Keywords that suggest Python (library-heavy)
PYTHON_INDICATORS = {
    'pandas', 'numpy', 'scikit', 'machine learning', 'data science',
    'plot', 'graph', 'visualization', 'api client', 'requests',
    'beautiful soup', 'selenium', 'opencv'
}

class OperationAnalyzer:
    def __init__(self):
        self.operations = []

    def analyze_text(self, text: str) -> List[Dict]:
        """Analyze text and identify operations."""
        text_lower = text.lower()
        sentences = text.replace('?', '.').replace('!', '.').split('.')

        operations = []
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue

            op = self._analyze_sentence(sentence)
            if op:
                operations.append(op)

        return operations

    def _analyze_sentence(self, sentence: str) -> Dict:
        """Analyze a single sentence for operation type."""
        sentence_lower = sentence.lower()

        # Check for deterministic vs dynamic
        det_score = sum(1 for kw in DETERMINISTIC_KEYWORDS if kw in sentence_lower)
        dyn_score = sum(1 for kw in DYNAMIC_KEYWORDS if kw in sentence_lower)

        # Check for Go vs Python indicators
        go_score = sum(1 for kw in GO_INDICATORS if kw in sentence_lower)
        py_score = sum(1 for kw in PYTHON_INDICATORS if kw in sentence_lower)

        if det_score == 0 and dyn_score == 0:
            return None

        # Determine operation type
        if det_score > dyn_score:
            if go_score > py_score:
                op_type = 'go_script'
                reason = 'Deterministic operation with performance/binary characteristics'
            elif py_score > 0:
                op_type = 'python_script'
                reason = 'Deterministic operation requiring specialized libraries'
            else:
                op_type = 'go_script'
                reason = 'Deterministic operation suitable for compiled binary'
        else:
            op_type = 'agent_workflow'
            reason = 'Requires reasoning, context analysis, or decision-making'

        return {
            'description': sentence,
            'type': op_type,
            'reason': reason,
            'det_score': det_score,
            'dyn_score': dyn_score,
            'go_score': go_score,
            'py_score': py_score
        }

    def generate_recommendations(self, operations: List[Dict]) -> str:
        """Generate recommendations report."""
        go_ops = [op for op in operations if op['type'] == 'go_script']
        py_ops = [op for op in operations if op['type'] == 'python_script']
        agent_ops = [op for op in operations if op['type'] == 'agent_workflow']

        report = []
        report.append("=" * 70)
        report.append("WORKFLOW ANALYSIS REPORT")
        report.append("=" * 70)
        report.append("")

        report.append(f"Total operations identified: {len(operations)}")
        report.append(f"  - Go scripts recommended: {len(go_ops)}")
        report.append(f"  - Python scripts recommended: {len(py_ops)}")
        report.append(f"  - Agent workflows recommended: {len(agent_ops)}")
        report.append("")

        if go_ops:
            report.append("-" * 70)
            report.append("GO SCRIPTS (Deterministic, Performance-Critical)")
            report.append("-" * 70)
            for i, op in enumerate(go_ops, 1):
                report.append(f"\n{i}. {op['description']}")
                report.append(f"   Reason: {op['reason']}")
                report.append(f"   Suggested name: {self._suggest_script_name(op['description'])}")
            report.append("")

        if py_ops:
            report.append("-" * 70)
            report.append("PYTHON SCRIPTS (Library-Heavy Operations)")
            report.append("-" * 70)
            for i, op in enumerate(py_ops, 1):
                report.append(f"\n{i}. {op['description']}")
                report.append(f"   Reason: {op['reason']}")
                report.append(f"   Suggested name: {self._suggest_script_name(op['description'])}")
            report.append("")

        if agent_ops:
            report.append("-" * 70)
            report.append("AGENT WORKFLOWS (Reasoning Required)")
            report.append("-" * 70)
            for i, op in enumerate(agent_ops, 1):
                report.append(f"\n{i}. {op['description']}")
                report.append(f"   Reason: {op['reason']}")
                report.append(f"   Implementation: Keep as natural language workflow in SKILL.md")
            report.append("")

        report.append("=" * 70)
        report.append("RECOMMENDATIONS")
        report.append("=" * 70)
        report.append("")

        if go_ops:
            report.append("For Go scripts:")
            report.append("  1. Use generate_go_script.py to create each script")
            report.append("  2. Focus on performance and error handling")
            report.append("  3. Support parallel processing where applicable")
            report.append("")

        if py_ops:
            report.append("For Python scripts:")
            report.append("  1. Create scripts in scripts/ directory")
            report.append("  2. Add requirements.txt for dependencies")
            report.append("  3. Consider virtual environments")
            report.append("")

        if agent_ops:
            report.append("For agent workflows:")
            report.append("  1. Document in SKILL.md with clear decision points")
            report.append("  2. Provide examples for different scenarios")
            report.append("  3. Use references/ for detailed guidance")
            report.append("")

        return '\n'.join(report)

    def _suggest_script_name(self, description: str) -> str:
        """Suggest a script name from description."""
        # Extract key verbs and nouns
        words = description.lower().split()
        important_words = []

        for word in words:
            cleaned = ''.join(c for c in word if c.isalnum())
            if cleaned in DETERMINISTIC_KEYWORDS or len(cleaned) > 3:
                important_words.append(cleaned)
            if len(important_words) >= 3:
                break

        return '-'.join(important_words[:3]) if important_words else 'operation'

def interactive_mode():
    """Run in interactive mode to gather requirements."""
    print("=" * 70)
    print("WORKFLOW ANALYSIS - INTERACTIVE MODE")
    print("=" * 70)
    print("\nDescribe the operations your skill should perform.")
    print("Enter each operation on a separate line.")
    print("Press Ctrl+D (Unix) or Ctrl+Z (Windows) when done.\n")

    lines = []
    try:
        while True:
            line = input("> ")
            if line.strip():
                lines.append(line)
    except EOFError:
        pass

    return '\n'.join(lines)

def main():
    parser = argparse.ArgumentParser(
        description='Analyze workflows to identify deterministic vs dynamic operations',
        formatter_class=argparse.RawDescriptionHelpFormatter
    )

    parser.add_argument('--examples', nargs='+', help='Files containing example requests')
    parser.add_argument('--interactive', action='store_true', help='Run in interactive mode')
    parser.add_argument('--output', help='Output file for report (default: stdout)')

    args = parser.parse_args()

    analyzer = OperationAnalyzer()

    # Gather input
    text = ""
    if args.interactive:
        text = interactive_mode()
    elif args.examples:
        for example_file in args.examples:
            path = Path(example_file)
            if path.exists():
                text += path.read_text() + '\n'
            else:
                print(f"Warning: File not found: {example_file}", file=sys.stderr)
    else:
        print("Error: Provide --examples or use --interactive mode", file=sys.stderr)
        sys.exit(1)

    if not text.strip():
        print("Error: No input provided", file=sys.stderr)
        sys.exit(1)

    # Analyze
    operations = analyzer.analyze_text(text)

    if not operations:
        print("No operations detected in input", file=sys.stderr)
        sys.exit(1)

    # Generate report
    report = analyzer.generate_recommendations(operations)

    # Output
    if args.output:
        Path(args.output).write_text(report)
        print(f"✅ Report written to: {args.output}")
    else:
        print(report)

if __name__ == '__main__':
    main()