/** * Plan Executor - Execute multi-phase plans deterministically * * Reads a plan file with numbered phases and executes each phase * in a clean, isolated pi process. After each phase, runs the * quality gate and auto-fixes any regressions before proceeding. * * Usage: * /execute-plan plans/fix-readme-issues.md * /execute-plan plans/fix-readme-issues.md --phase 2 * /execute-plan plans/fix-readme-issues.md --gate "mix test" * /execute-plan plans/fix-readme-issues.md --dry-run */ import { spawn } from "node:child_process"; import * as fs from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; import type { Message } from "@mariozechner/pi-ai"; import { type ExtensionAPI, getMarkdownTheme, withFileMutationQueue, } from "@mariozechner/pi-coding-agent"; import { Container, Markdown, Spacer, Text } from "@mariozechner/pi-tui"; // ─── Phase Executor System Prompt ──────────────────────────────────────────── const PHASE_SYSTEM_PROMPT = `You are executing a single phase of a larger plan. Your job is to complete ONLY this phase. ## Instructions 1. Read the phase description carefully 2. Make all the changes described in the phase 3. Run the verification command(s) listed in the phase 4. When ALL changes are done and verification passes, respond with: DONE: Do NOT proceed to any other phases. Do NOT make changes outside this phase's scope. If verification fails, diagnose and fix the issue within this phase's scope, then re-verify. ## Important - If the phase says "remove lines X-Y", do exactly that - If the phase says "run mix format", do that - Always verify before declaring DONE - If you cannot complete the phase, explain what is blocking you instead of saying DONE`; // ─── Gate Fix System Prompt ────────────────────────────────────────────────── const GATE_FIX_SYSTEM_PROMPT = `You are fixing issues found by the quality gate. The following command failed: {GATE_COMMAND} Here is the output showing what failed. Fix ALL reported issues, then verify by running the check command(s) mentioned in the output. When ALL issues are fixed and verification passes, respond with: DONE: If verification still fails after your fixes, diagnose and fix remaining issues. Keep iterating until clean.`; // ─── Types ─────────────────────────────────────────────────────────────────── interface Phase { number: number; title: string; content: string; verifyCommand?: string; } interface PhaseResult { phase: number; title: string; status: "pending" | "running" | "done" | "failed" | "fixing"; agentOutput?: string; agentMessages?: Message[]; gateAttempts: number; gateOutput?: string; fixOutput?: string; turns?: number; tokensIn?: number; tokensOut?: number; cost?: number; model?: string; error?: string; } // ─── Plan Parsing ──────────────────────────────────────────────────────────── function parsePhases(planContent: string): Phase[] { const phases: Phase[] = []; const lines = planContent.split("\n"); // Match phase headers like "## 1. Fix test failure" or "1. Fix test failure" const phaseRegex = /^(?:##\s*)?(\d+)\.\s+(.+)$/; let currentPhase: Phase | null = null; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const match = line.match(phaseRegex); if (match) { // Save previous phase if (currentPhase) { phases.push(currentPhase); } const num = parseInt(match[1], 10); const title = match[2].trim(); currentPhase = { number: num, title, content: "", }; } else if (currentPhase) { // Skip lines that start a new non-phase section const isSectionHeader = /^#{1,6}\s+[^#\d]/.test(line); const isExecutionOrder = /^##\s+Execution/i.test(line); const isRiskAssessment = /^##\s+Risk/i.test(line); const isSeparator = /^---+$/.test(line.trim()); if (isSectionHeader || isExecutionOrder || isRiskAssessment) { // End current phase content if (currentPhase.content.trim()) { phases.push(currentPhase); currentPhase = null; } continue; } if (!isSeparator || currentPhase.content.length > 0) { currentPhase.content += line + "\n"; } } } // Don't forget the last phase if (currentPhase && currentPhase.content.trim()) { phases.push(currentPhase); } return phases; } // ─── Process Helpers ───────────────────────────────────────────────────────── function getPiInvocation(args: string[]): { command: string; args: string[] } { const currentScript = process.argv[1]; const isBunVirtualScript = currentScript?.startsWith("/$bunfs/root/"); if (currentScript && !isBunVirtualScript && fs.existsSync(currentScript)) { return { command: process.execPath, args: [currentScript, ...args] }; } return { command: "pi", args }; } async function writePromptToTempFile( name: string, prompt: string, ): Promise<{ dir: string; filePath: string }> { const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-plan-exec-")); const safeName = name.replace(/[^\w.-]+/g, "_"); const filePath = path.join(tmpDir, `prompt-${safeName}.md`); await withFileMutationQueue(filePath, () => fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 }), ); return { dir: tmpDir, filePath }; } async function cleanupTemp(dir: string | null): Promise { if (dir) { try { await fs.promises.rm(dir, { recursive: true, force: true }); } catch { /* ignore */ } } } interface AgentRunResult { exitCode: number; messages: Message[]; stderr: string; turns: number; tokensIn: number; tokensOut: number; cost: number; model?: string; stopReason?: string; errorMessage?: string; } function getFinalOutput(messages: Message[]): string { for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; if (msg.role === "assistant") { for (const part of msg.content) { if (part.type === "text") return part.text; } } } return ""; } function isDone(output: string): boolean { return /^DONE:\s*/im.test(output); } async function runAgent( cwd: string, systemPrompt: string, task: string, signal?: AbortSignal, onUpdate?: (partial: { output: string; turns: number }) => void, ): Promise { const args: string[] = ["--mode", "json", "-p", "--no-session"]; // Write system prompt to temp file const { dir: promptDir, filePath: promptPath } = await writePromptToTempFile( "phase-executor", systemPrompt, ); args.push("--append-system-prompt", promptPath); args.push(task); const result: AgentRunResult = { exitCode: 0, messages: [], stderr: "", turns: 0, tokensIn: 0, tokensOut: 0, cost: 0, }; try { return await new Promise((resolve) => { const invocation = getPiInvocation(args); const proc = spawn(invocation.command, invocation.args, { cwd, shell: false, stdio: ["ignore", "pipe", "pipe"], }); let buffer = ""; const processLine = (line: string) => { if (!line.trim()) return; let event: any; try { event = JSON.parse(line); } catch { return; } if (event.type === "message_end" && event.message) { const msg = event.message as Message; result.messages.push(msg); if (msg.role === "assistant") { result.turns++; const usage = msg.usage; if (usage) { result.tokensIn += usage.input || 0; result.tokensOut += usage.output || 0; result.cost += usage.cost?.total || 0; } if (!result.model && msg.model) result.model = msg.model; if (msg.stopReason) result.stopReason = msg.stopReason; if (msg.errorMessage) result.errorMessage = msg.errorMessage; // Stream progress const output = getFinalOutput(result.messages); onUpdate?.({ output, turns: result.turns }); } } if (event.type === "tool_result_end" && event.message) { result.messages.push(event.message as Message); } }; proc.stdout.on("data", (data) => { buffer += data.toString(); const lines = buffer.split("\n"); buffer = lines.pop() || ""; for (const line of lines) processLine(line); }); proc.stderr.on("data", (data) => { result.stderr += data.toString(); }); proc.on("close", (code) => { if (buffer.trim()) processLine(buffer); result.exitCode = code ?? 0; resolve(result); }); proc.on("error", () => { result.exitCode = 1; resolve(result); }); if (signal) { const killProc = () => { proc.kill("SIGTERM"); setTimeout(() => { if (!proc.killed) proc.kill("SIGKILL"); }, 5000); }; if (signal.aborted) killProc(); else signal.addEventListener("abort", killProc, { once: true }); } }); } finally { await cleanupTemp(promptDir); } } async function runCommand( cwd: string, command: string, signal?: AbortSignal, ): Promise<{ exitCode: number; stdout: string; stderr: string }> { return await new Promise((resolve) => { const proc = spawn(command, { cwd, shell: true, stdio: ["ignore", "pipe", "pipe"], }); let stdout = ""; let stderr = ""; proc.stdout.on("data", (data) => (stdout += data.toString())); proc.stderr.on("data", (data) => (stderr += data.toString())); proc.on("close", (code) => { resolve({ exitCode: code ?? 1, stdout, stderr }); }); proc.on("error", () => { resolve({ exitCode: 1, stdout, stderr: "Command failed to start" }); }); if (signal) { const killProc = () => proc.kill("SIGTERM"); if (signal.aborted) killProc(); else signal.addEventListener("abort", killProc, { once: true }); } }); } // ─── Format Helpers ────────────────────────────────────────────────────────── function formatTokens(count: number): string { if (count < 1000) return count.toString(); if (count < 10000) return `${(count / 1000).toFixed(1)}k`; return `${Math.round(count / 1000)}k`; } function formatUsage(result: PhaseResult): string { const parts: string[] = []; if (result.turns) parts.push(`${result.turns} turns`); if (result.tokensIn) parts.push(`↑${formatTokens(result.tokensIn)}`); if (result.tokensOut) parts.push(`↓${formatTokens(result.tokensOut)}`); if (result.cost) parts.push(`$${result.cost.toFixed(4)}`); if (result.model) parts.push(result.model); return parts.join(" "); } // ─── Extension ─────────────────────────────────────────────────────────────── export default function (pi: ExtensionAPI) { pi.registerCommand("execute-plan", { description: "Execute a multi-phase plan file deterministically", handler: async (args, ctx) => { if (!ctx.hasUI) { ctx.ui.notify("execute-plan requires interactive mode", "error"); return; } // Parse arguments const parts = args.trim().split(/\s+/); const planFile = parts[0]; const rest = parts.slice(1).join(" "); const flags = new Set(parts.slice(1)); const dryRun = flags.has("--dry-run"); const startPhase = flags.has("--phase") ? parseInt(parts[parts.indexOf("--phase") + 1], 10) : 1; // Extract gate command (default: make precommit) const gateMatch = rest.match(/--gate\s+(?:["']?)(\S+?)(?:["']?)$/); const gateCommand = gateMatch ? gateMatch[1] : "make precommit"; if (!planFile) { ctx.ui.notify("Usage: /execute-plan [--phase N] [--gate CMD] [--dry-run]", "error"); return; } // Resolve path const resolvedPath = path.isAbsolute(planFile) ? planFile : path.join(ctx.cwd, planFile); if (!fs.existsSync(resolvedPath)) { ctx.ui.notify(`Plan file not found: ${resolvedPath}`, "error"); return; } const planContent = fs.readFileSync(resolvedPath, "utf-8"); const phases = parsePhases(planContent); if (phases.length === 0) { ctx.ui.notify("No phases found in plan file", "error"); return; } // Dry run: show plan and exit if (dryRun) { const lines = [ `## Plan: ${path.basename(resolvedPath)}`, ``, `Gate command: ${gateCommand}`, `${phases.length} phase(s) detected:`, ``, ...phases.map((p) => `### Phase ${p.number}: ${p.title}`), ``, ...phases.map((p) => p.content.trim()), ]; ctx.ui.setWidget("plan-executor", lines); return; } // Initialize results const results: PhaseResult[] = phases.map((p) => ({ phase: p.number, title: p.title, status: "pending", gateAttempts: 0, })); // Show initial state const updateUI = () => { const lines = [ `## Plan Executor: ${path.basename(resolvedPath)}`, ``, ...results.map((r) => { const icon = r.status === "done" ? "✅" : r.status === "failed" ? "❌" : r.status === "running" || r.status === "fixing" ? "🔄" : "⏳"; const usage = r.turns ? ` (${formatUsage(r)})` : ""; const gateInfo = r.gateAttempts > 0 ? ` [gate: ${r.gateAttempts}]` : ""; return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateInfo}`; }), ``, ]; ctx.ui.setWidget("plan-executor", lines); }; updateUI(); // Execute phases sequentially const phasesToRun = phases.filter((p) => p.number >= startPhase); for (const phase of phasesToRun) { const result = results[phase.number - 1]; result.status = "running"; updateUI(); ctx.ui.setStatus("plan-executor", `Executing Phase ${phase.number}: ${phase.title}`); // Build task prompt from phase content const task = `## Phase ${phase.number}: ${phase.title}\n\n${phase.content.trim()}`; // Run phase in isolated context let agentResult: AgentRunResult; try { agentResult = await runAgent( ctx.cwd, PHASE_SYSTEM_PROMPT, task, ctx.signal, ({ output, turns }) => { result.turns = turns; const status = isDone(output) ? "✅" : "🔄"; ctx.ui.setStatus( "plan-executor", `${status} Phase ${phase.number} (${turns} turns): ${phase.title}`, ); }, ); } catch (err: any) { result.status = "failed"; result.error = err.message; updateUI(); ctx.ui.notify(`Phase ${phase.number} failed: ${err.message}`, "error"); break; } result.agentOutput = getFinalOutput(agentResult.messages); result.agentMessages = agentResult.messages; result.turns = agentResult.turns; result.tokensIn = agentResult.tokensIn; result.tokensOut = agentResult.tokensOut; result.cost = agentResult.cost; result.model = agentResult.model; // Check if agent said DONE if (!isDone(result.agentOutput ?? "")) { result.status = "failed"; result.error = "Agent did not signal completion (no DONE: message)"; updateUI(); ctx.ui.notify( `Phase ${phase.number} incomplete: agent did not signal DONE`, "warning", ); continue; } // Run gate ctx.ui.setStatus("plan-executor", `Phase ${phase.number} done, running gate: ${gateCommand}...`); let gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal); result.gateAttempts++; result.gateOutput = gateResult.stdout + gateResult.stderr; // If gate fails, fix in a loop const MAX_FIX_ATTEMPTS = 3; let fixAttempt = 0; while (gateResult.exitCode !== 0 && fixAttempt < MAX_FIX_ATTEMPTS) { fixAttempt++; result.status = "fixing"; updateUI(); ctx.ui.setStatus( "plan-executor", `Fixing gate issues (attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS})...`, ); const fixTask = `## Gate Failed\n\nThe following command failed after completing Phase ${phase.number}:\n\n ${gateCommand}\n\nOutput:\n\n\`\`\`\n${gateResult.stdout}\n${gateResult.stderr}\n\`\`\`\n\nFix ALL reported issues.`; const fixSystemPrompt = GATE_FIX_SYSTEM_PROMPT.replace("{GATE_COMMAND}", gateCommand); const fixResult = await runAgent( ctx.cwd, fixSystemPrompt, fixTask, ctx.signal, ({ turns }) => { ctx.ui.setStatus( "plan-executor", `Fixing gate (${fixAttempt}/${MAX_FIX_ATTEMPTS}, ${turns} turns)...`, ); }, ); result.fixOutput = getFinalOutput(fixResult.messages); result.turns = (result.turns ?? 0) + fixResult.turns; result.tokensIn = (result.tokensIn ?? 0) + fixResult.tokensIn; result.tokensOut = (result.tokensOut ?? 0) + fixResult.tokensOut; result.cost = (result.cost ?? 0) + fixResult.cost; // Re-run gate gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal); result.gateAttempts++; result.gateOutput = gateResult.stdout + gateResult.stderr; } if (gateResult.exitCode !== 0) { result.status = "failed"; result.error = `Gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`; updateUI(); ctx.ui.notify( `Phase ${phase.number} blocked: gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`, "error", ); break; } // Phase complete result.status = "done"; updateUI(); ctx.ui.notify(`Phase ${phase.number} complete!`, "success"); } // Final summary const completed = results.filter((r) => r.status === "done").length; const failed = results.filter((r) => r.status === "failed").length; const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0); const totalTurns = results.reduce((sum, r) => sum + (r.turns ?? 0), 0); ctx.ui.setStatus( "plan-executor", `Done: ${completed}/${results.length} phases complete, ${failed} failed, $${totalCost.toFixed(4)}`, ); // Show final summary widget const summaryLines = [ `## Plan Execution Complete`, ``, `**Phases:** ${completed}/${results.length} complete${failed > 0 ? `, ${failed} failed` : ""}`, `**Total turns:** ${totalTurns}`, `**Total cost:** $${totalCost.toFixed(4)}`, ``, ...results.map((r) => { const icon = r.status === "done" ? "✅" : "❌"; const usage = formatUsage(r); const gateRuns = r.gateAttempts > 1 ? ` (gate: ${r.gateAttempts} runs)` : ""; return `${icon} Phase ${r.phase}: ${r.title} — ${usage}${gateRuns}`; }), ``, failed > 0 ? `## Failed phases need manual attention` : `## All phases passed ✅`, ]; ctx.ui.setWidget("plan-executor", summaryLines); }, }); }