diff --git a/.pi/extensions/plan-executor/README.md b/.pi/extensions/plan-executor/README.md deleted file mode 100644 index 0b66c82..0000000 --- a/.pi/extensions/plan-executor/README.md +++ /dev/null @@ -1,104 +0,0 @@ -# Plan Executor - -Execute multi-phase plan files deterministically, one phase at a time, in isolated contexts. - -## Concept - -Instead of dumping an entire plan into one conversation (where context bloats and the agent loses focus), this extension: - -1. **Parses** your plan file into numbered phases -2. **Executes** each phase in a **clean, isolated** `pi` subprocess (fresh context window) -3. **Runs the quality gate** (default: `make precommit`) after each phase -4. **Auto-fixes** any gate failures in another clean context (up to 3 attempts) -5. **Only proceeds** to the next phase when the gate passes - -This gives you deterministic, phase-by-phase execution with automatic quality gates. - -## Usage - -```bash -# Execute entire plan (default gate: make precommit) -/execute-plan plans/fix-readme-issues.md - -# Start from a specific phase -/execute-plan plans/fix-readme-issues.md --phase 2 - -# Use a custom gate command -/execute-plan plans/fix-readme-issues.md --gate "mix test && mix credo --strict" - -# Dry run — show detected phases without executing -/execute-plan plans/fix-readme-issues.md --dry-run -``` - -## Plan File Format - -Phases are detected by numbered headings: - -```markdown -## 1. Fix test failure - -Remove the fragile describe block... - -Verify: `mix test` - ---- - -## 2. Fix credo issues - -Fix all 41 credo issues... - -Verify: `mix credo --strict` -``` - -Each phase's content includes everything between its heading and the next phase heading (or a non-phase section like "Execution order" or "Risk assessment"). - -## Execution Flow - -``` -┌─────────────────────┐ -│ Parse plan file │ -│ extract phases │ -└────────┬────────────┘ - ▼ -┌─────────────────────┐ -│ For each phase: │ -│ │ -│ 1. Spawn clean pi │──► Agent executes phase -│ subprocess │ in isolated context -│ │ -│ 2. Agent says DONE │──► Phase changes applied -│ │ -│ 3. Run quality │──► Quality gate -│ gate command │ -│ │ -│ 4a. Gate passes? │──✅ Yes → Next phase -│ │ -│ 4b. Gate fails? │──❌ No → Spawn fix agent -│ (up to 3x) │ in another clean context -│ │ -│ Fix agent runs │──► Re-check gate -│ & re-verify │ -└─────────────────────┘ -``` - -## Widget - -While running, a widget in the TUI shows: -- Phase status (⏳ pending, 🔄 running, ✅ done, ❌ failed) -- Turn count and token usage per phase -- Gate attempt count -- Final summary with total cost - -## Configuration - -The extension uses your current pi model and tool configuration for spawned agents. Each phase runs with: -- Full tool access (bash, read, write, edit) -- Your default model -- A custom system prompt scoped to the phase - -## Safety - -- Each phase runs in a separate `pi` process (no shared state) -- `Ctrl+C` aborts the current agent and cleans up -- Quality gate prevents broken state from propagating between phases -- Max 3 fix attempts per phase before failing diff --git a/.pi/extensions/plan-executor/index.ts b/.pi/extensions/plan-executor/index.ts deleted file mode 100644 index f366e74..0000000 --- a/.pi/extensions/plan-executor/index.ts +++ /dev/null @@ -1,624 +0,0 @@ -/** - * Plan Executor - Execute multi-phase plans deterministically - * - * Reads a plan file with numbered phases and executes each phase - * in a clean, isolated pi process. After each phase, runs the - * quality gate and auto-fixes any regressions before proceeding. - * - * Usage: - * /execute-plan plans/fix-readme-issues.md - * /execute-plan plans/fix-readme-issues.md --phase 2 - * /execute-plan plans/fix-readme-issues.md --gate "mix test" - * /execute-plan plans/fix-readme-issues.md --dry-run - */ - -import { spawn } from "node:child_process"; -import * as fs from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; -import type { Message } from "@mariozechner/pi-ai"; -import { - type ExtensionAPI, - getMarkdownTheme, - withFileMutationQueue, -} from "@mariozechner/pi-coding-agent"; -import { Container, Markdown, Spacer, Text } from "@mariozechner/pi-tui"; - -// ─── Phase Executor System Prompt ──────────────────────────────────────────── - -const PHASE_SYSTEM_PROMPT = `You are executing a single phase of a larger plan. Your job is to complete ONLY this phase. - -## Instructions -1. Read the phase description carefully -2. Make all the changes described in the phase -3. Run the verification command(s) listed in the phase -4. When ALL changes are done and verification passes, respond with: - -DONE: - -Do NOT proceed to any other phases. Do NOT make changes outside this phase's scope. -If verification fails, diagnose and fix the issue within this phase's scope, then re-verify. - -## Important -- If the phase says "remove lines X-Y", do exactly that -- If the phase says "run mix format", do that -- Always verify before declaring DONE -- If you cannot complete the phase, explain what is blocking you instead of saying DONE`; - -// ─── Gate Fix System Prompt ────────────────────────────────────────────────── - -const GATE_FIX_SYSTEM_PROMPT = `You are fixing issues found by the quality gate. The following command failed: - - {GATE_COMMAND} - -Here is the output showing what failed. Fix ALL reported issues, then verify by running the check command(s) mentioned in the output. - -When ALL issues are fixed and verification passes, respond with: - -DONE: - -If verification still fails after your fixes, diagnose and fix remaining issues. Keep iterating until clean.`; - -// ─── Types ─────────────────────────────────────────────────────────────────── - -interface Phase { - number: number; - title: string; - content: string; - verifyCommand?: string; -} - -interface PhaseResult { - phase: number; - title: string; - status: "pending" | "running" | "done" | "failed" | "fixing"; - agentOutput?: string; - agentMessages?: Message[]; - gateAttempts: number; - gateOutput?: string; - fixOutput?: string; - turns?: number; - tokensIn?: number; - tokensOut?: number; - cost?: number; - model?: string; - error?: string; -} - -// ─── Plan Parsing ──────────────────────────────────────────────────────────── - -function parsePhases(planContent: string): Phase[] { - const phases: Phase[] = []; - const lines = planContent.split("\n"); - - // Match phase headers like "## 1. Fix test failure" or "1. Fix test failure" - const phaseRegex = /^(?:##\s*)?(\d+)\.\s+(.+)$/; - - let currentPhase: Phase | null = null; - - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - const match = line.match(phaseRegex); - - if (match) { - // Save previous phase - if (currentPhase) { - phases.push(currentPhase); - } - - const num = parseInt(match[1], 10); - const title = match[2].trim(); - - currentPhase = { - number: num, - title, - content: "", - }; - } else if (currentPhase) { - // Skip lines that start a new non-phase section - const isSectionHeader = /^#{1,6}\s+[^#\d]/.test(line); - const isExecutionOrder = /^##\s+Execution/i.test(line); - const isRiskAssessment = /^##\s+Risk/i.test(line); - const isSeparator = /^---+$/.test(line.trim()); - - if (isSectionHeader || isExecutionOrder || isRiskAssessment) { - // End current phase content - if (currentPhase.content.trim()) { - phases.push(currentPhase); - currentPhase = null; - } - continue; - } - - if (!isSeparator || currentPhase.content.length > 0) { - currentPhase.content += line + "\n"; - } - } - } - - // Don't forget the last phase - if (currentPhase && currentPhase.content.trim()) { - phases.push(currentPhase); - } - - return phases; -} - -// ─── Process Helpers ───────────────────────────────────────────────────────── - -function getPiInvocation(args: string[]): { command: string; args: string[] } { - const currentScript = process.argv[1]; - const isBunVirtualScript = currentScript?.startsWith("/$bunfs/root/"); - if (currentScript && !isBunVirtualScript && fs.existsSync(currentScript)) { - return { command: process.execPath, args: [currentScript, ...args] }; - } - return { command: "pi", args }; -} - -async function writePromptToTempFile( - name: string, - prompt: string, -): Promise<{ dir: string; filePath: string }> { - const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-plan-exec-")); - const safeName = name.replace(/[^\w.-]+/g, "_"); - const filePath = path.join(tmpDir, `prompt-${safeName}.md`); - await withFileMutationQueue(filePath, () => - fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 }), - ); - return { dir: tmpDir, filePath }; -} - -async function cleanupTemp(dir: string | null): Promise { - if (dir) { - try { - await fs.promises.rm(dir, { recursive: true, force: true }); - } catch { - /* ignore */ - } - } -} - -interface AgentRunResult { - exitCode: number; - messages: Message[]; - stderr: string; - turns: number; - tokensIn: number; - tokensOut: number; - cost: number; - model?: string; - stopReason?: string; - errorMessage?: string; -} - -function getFinalOutput(messages: Message[]): string { - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i]; - if (msg.role === "assistant") { - for (const part of msg.content) { - if (part.type === "text") return part.text; - } - } - } - return ""; -} - -function isDone(output: string): boolean { - return /^DONE:\s*/im.test(output); -} - -async function runAgent( - cwd: string, - systemPrompt: string, - task: string, - signal?: AbortSignal, - onUpdate?: (partial: { output: string; turns: number }) => void, -): Promise { - const args: string[] = ["--mode", "json", "-p", "--no-session"]; - - // Write system prompt to temp file - const { dir: promptDir, filePath: promptPath } = await writePromptToTempFile( - "phase-executor", - systemPrompt, - ); - args.push("--append-system-prompt", promptPath); - args.push(task); - - const result: AgentRunResult = { - exitCode: 0, - messages: [], - stderr: "", - turns: 0, - tokensIn: 0, - tokensOut: 0, - cost: 0, - }; - - try { - return await new Promise((resolve) => { - const invocation = getPiInvocation(args); - const proc = spawn(invocation.command, invocation.args, { - cwd, - shell: false, - stdio: ["ignore", "pipe", "pipe"], - }); - - let buffer = ""; - - const processLine = (line: string) => { - if (!line.trim()) return; - let event: any; - try { - event = JSON.parse(line); - } catch { - return; - } - - if (event.type === "message_end" && event.message) { - const msg = event.message as Message; - result.messages.push(msg); - - if (msg.role === "assistant") { - result.turns++; - const usage = msg.usage; - if (usage) { - result.tokensIn += usage.input || 0; - result.tokensOut += usage.output || 0; - result.cost += usage.cost?.total || 0; - } - if (!result.model && msg.model) result.model = msg.model; - if (msg.stopReason) result.stopReason = msg.stopReason; - if (msg.errorMessage) result.errorMessage = msg.errorMessage; - - // Stream progress - const output = getFinalOutput(result.messages); - onUpdate?.({ output, turns: result.turns }); - } - } - - if (event.type === "tool_result_end" && event.message) { - result.messages.push(event.message as Message); - } - }; - - proc.stdout.on("data", (data) => { - buffer += data.toString(); - const lines = buffer.split("\n"); - buffer = lines.pop() || ""; - for (const line of lines) processLine(line); - }); - - proc.stderr.on("data", (data) => { - result.stderr += data.toString(); - }); - - proc.on("close", (code) => { - if (buffer.trim()) processLine(buffer); - result.exitCode = code ?? 0; - resolve(result); - }); - - proc.on("error", () => { - result.exitCode = 1; - resolve(result); - }); - - if (signal) { - const killProc = () => { - proc.kill("SIGTERM"); - setTimeout(() => { - if (!proc.killed) proc.kill("SIGKILL"); - }, 5000); - }; - if (signal.aborted) killProc(); - else signal.addEventListener("abort", killProc, { once: true }); - } - }); - } finally { - await cleanupTemp(promptDir); - } -} - -async function runCommand( - cwd: string, - command: string, - signal?: AbortSignal, -): Promise<{ exitCode: number; stdout: string; stderr: string }> { - return await new Promise((resolve) => { - const proc = spawn(command, { - cwd, - shell: true, - stdio: ["ignore", "pipe", "pipe"], - }); - - let stdout = ""; - let stderr = ""; - - proc.stdout.on("data", (data) => (stdout += data.toString())); - proc.stderr.on("data", (data) => (stderr += data.toString())); - - proc.on("close", (code) => { - resolve({ exitCode: code ?? 1, stdout, stderr }); - }); - - proc.on("error", () => { - resolve({ exitCode: 1, stdout, stderr: "Command failed to start" }); - }); - - if (signal) { - const killProc = () => proc.kill("SIGTERM"); - if (signal.aborted) killProc(); - else signal.addEventListener("abort", killProc, { once: true }); - } - }); -} - -// ─── Format Helpers ────────────────────────────────────────────────────────── - -function formatTokens(count: number): string { - if (count < 1000) return count.toString(); - if (count < 10000) return `${(count / 1000).toFixed(1)}k`; - return `${Math.round(count / 1000)}k`; -} - -function formatUsage(result: PhaseResult): string { - const parts: string[] = []; - if (result.turns) parts.push(`${result.turns} turns`); - if (result.tokensIn) parts.push(`↑${formatTokens(result.tokensIn)}`); - if (result.tokensOut) parts.push(`↓${formatTokens(result.tokensOut)}`); - if (result.cost) parts.push(`$${result.cost.toFixed(4)}`); - if (result.model) parts.push(result.model); - return parts.join(" "); -} - -// ─── Extension ─────────────────────────────────────────────────────────────── - -export default function (pi: ExtensionAPI) { - pi.registerCommand("execute-plan", { - description: "Execute a multi-phase plan file deterministically", - handler: async (args, ctx) => { - if (!ctx.hasUI) { - ctx.ui.notify("execute-plan requires interactive mode", "error"); - return; - } - - // Parse arguments - const parts = args.trim().split(/\s+/); - const planFile = parts[0]; - const rest = parts.slice(1).join(" "); - const flags = new Set(parts.slice(1)); - const dryRun = flags.has("--dry-run"); - const startPhase = flags.has("--phase") - ? parseInt(parts[parts.indexOf("--phase") + 1], 10) - : 1; - - // Extract gate command (default: make precommit) - const gateMatch = rest.match(/--gate\s+(?:["']?)(\S+?)(?:["']?)$/); - const gateCommand = gateMatch ? gateMatch[1] : "make precommit"; - - if (!planFile) { - ctx.ui.notify("Usage: /execute-plan [--phase N] [--gate CMD] [--dry-run]", "error"); - return; - } - - // Resolve path - const resolvedPath = path.isAbsolute(planFile) - ? planFile - : path.join(ctx.cwd, planFile); - - if (!fs.existsSync(resolvedPath)) { - ctx.ui.notify(`Plan file not found: ${resolvedPath}`, "error"); - return; - } - - const planContent = fs.readFileSync(resolvedPath, "utf-8"); - const phases = parsePhases(planContent); - - if (phases.length === 0) { - ctx.ui.notify("No phases found in plan file", "error"); - return; - } - - // Dry run: show plan and exit - if (dryRun) { - const lines = [ - `## Plan: ${path.basename(resolvedPath)}`, - ``, - `Gate command: ${gateCommand}`, - `${phases.length} phase(s) detected:`, - ``, - ...phases.map((p) => `### Phase ${p.number}: ${p.title}`), - ``, - ...phases.map((p) => p.content.trim()), - ]; - ctx.ui.setWidget("plan-executor", lines); - return; - } - - // Initialize results - const results: PhaseResult[] = phases.map((p) => ({ - phase: p.number, - title: p.title, - status: "pending", - gateAttempts: 0, - })); - - // Show initial state - const updateUI = () => { - const lines = [ - `## Plan Executor: ${path.basename(resolvedPath)}`, - ``, - ...results.map((r) => { - const icon = - r.status === "done" - ? "✅" - : r.status === "failed" - ? "❌" - : r.status === "running" || r.status === "fixing" - ? "🔄" - : "⏳"; - const usage = r.turns ? ` (${formatUsage(r)})` : ""; - const gateInfo = r.gateAttempts > 0 ? ` [gate: ${r.gateAttempts}]` : ""; - return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateInfo}`; - }), - ``, - ]; - ctx.ui.setWidget("plan-executor", lines); - }; - - updateUI(); - - // Execute phases sequentially - const phasesToRun = phases.filter((p) => p.number >= startPhase); - - for (const phase of phasesToRun) { - const result = results[phase.number - 1]; - result.status = "running"; - updateUI(); - ctx.ui.setStatus("plan-executor", `Executing Phase ${phase.number}: ${phase.title}`); - - // Build task prompt from phase content - const task = `## Phase ${phase.number}: ${phase.title}\n\n${phase.content.trim()}`; - - // Run phase in isolated context - let agentResult: AgentRunResult; - try { - agentResult = await runAgent( - ctx.cwd, - PHASE_SYSTEM_PROMPT, - task, - ctx.signal, - ({ output, turns }) => { - result.turns = turns; - const status = isDone(output) ? "✅" : "🔄"; - ctx.ui.setStatus( - "plan-executor", - `${status} Phase ${phase.number} (${turns} turns): ${phase.title}`, - ); - }, - ); - } catch (err: any) { - result.status = "failed"; - result.error = err.message; - updateUI(); - ctx.ui.notify(`Phase ${phase.number} failed: ${err.message}`, "error"); - break; - } - - result.agentOutput = getFinalOutput(agentResult.messages); - result.agentMessages = agentResult.messages; - result.turns = agentResult.turns; - result.tokensIn = agentResult.tokensIn; - result.tokensOut = agentResult.tokensOut; - result.cost = agentResult.cost; - result.model = agentResult.model; - - // Check if agent said DONE - if (!isDone(result.agentOutput ?? "")) { - result.status = "failed"; - result.error = "Agent did not signal completion (no DONE: message)"; - updateUI(); - ctx.ui.notify( - `Phase ${phase.number} incomplete: agent did not signal DONE`, - "warning", - ); - continue; - } - - // Run gate - ctx.ui.setStatus("plan-executor", `Phase ${phase.number} done, running gate: ${gateCommand}...`); - let gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal); - result.gateAttempts++; - result.gateOutput = gateResult.stdout + gateResult.stderr; - - // If gate fails, fix in a loop - const MAX_FIX_ATTEMPTS = 3; - let fixAttempt = 0; - - while (gateResult.exitCode !== 0 && fixAttempt < MAX_FIX_ATTEMPTS) { - fixAttempt++; - result.status = "fixing"; - updateUI(); - ctx.ui.setStatus( - "plan-executor", - `Fixing gate issues (attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS})...`, - ); - - const fixTask = `## Gate Failed\n\nThe following command failed after completing Phase ${phase.number}:\n\n ${gateCommand}\n\nOutput:\n\n\`\`\`\n${gateResult.stdout}\n${gateResult.stderr}\n\`\`\`\n\nFix ALL reported issues.`; - - const fixSystemPrompt = GATE_FIX_SYSTEM_PROMPT.replace("{GATE_COMMAND}", gateCommand); - - const fixResult = await runAgent( - ctx.cwd, - fixSystemPrompt, - fixTask, - ctx.signal, - ({ turns }) => { - ctx.ui.setStatus( - "plan-executor", - `Fixing gate (${fixAttempt}/${MAX_FIX_ATTEMPTS}, ${turns} turns)...`, - ); - }, - ); - - result.fixOutput = getFinalOutput(fixResult.messages); - result.turns = (result.turns ?? 0) + fixResult.turns; - result.tokensIn = (result.tokensIn ?? 0) + fixResult.tokensIn; - result.tokensOut = (result.tokensOut ?? 0) + fixResult.tokensOut; - result.cost = (result.cost ?? 0) + fixResult.cost; - - // Re-run gate - gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal); - result.gateAttempts++; - result.gateOutput = gateResult.stdout + gateResult.stderr; - } - - if (gateResult.exitCode !== 0) { - result.status = "failed"; - result.error = `Gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`; - updateUI(); - ctx.ui.notify( - `Phase ${phase.number} blocked: gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`, - "error", - ); - break; - } - - // Phase complete - result.status = "done"; - updateUI(); - ctx.ui.notify(`Phase ${phase.number} complete!`, "success"); - } - - // Final summary - const completed = results.filter((r) => r.status === "done").length; - const failed = results.filter((r) => r.status === "failed").length; - const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0); - const totalTurns = results.reduce((sum, r) => sum + (r.turns ?? 0), 0); - - ctx.ui.setStatus( - "plan-executor", - `Done: ${completed}/${results.length} phases complete, ${failed} failed, $${totalCost.toFixed(4)}`, - ); - - // Show final summary widget - const summaryLines = [ - `## Plan Execution Complete`, - ``, - `**Phases:** ${completed}/${results.length} complete${failed > 0 ? `, ${failed} failed` : ""}`, - `**Total turns:** ${totalTurns}`, - `**Total cost:** $${totalCost.toFixed(4)}`, - ``, - ...results.map((r) => { - const icon = r.status === "done" ? "✅" : "❌"; - const usage = formatUsage(r); - const gateRuns = r.gateAttempts > 1 ? ` (gate: ${r.gateAttempts} runs)` : ""; - return `${icon} Phase ${r.phase}: ${r.title} — ${usage}${gateRuns}`; - }), - ``, - failed > 0 ? `## Failed phases need manual attention` : `## All phases passed ✅`, - ]; - ctx.ui.setWidget("plan-executor", summaryLines); - }, - }); -}