diff --git a/.pi/extensions/plan-executor/README.md b/.pi/extensions/plan-executor/README.md new file mode 100644 index 0000000..0b66c82 --- /dev/null +++ b/.pi/extensions/plan-executor/README.md @@ -0,0 +1,104 @@ +# Plan Executor + +Execute multi-phase plan files deterministically, one phase at a time, in isolated contexts. + +## Concept + +Instead of dumping an entire plan into one conversation (where context bloats and the agent loses focus), this extension: + +1. **Parses** your plan file into numbered phases +2. **Executes** each phase in a **clean, isolated** `pi` subprocess (fresh context window) +3. **Runs the quality gate** (default: `make precommit`) after each phase +4. **Auto-fixes** any gate failures in another clean context (up to 3 attempts) +5. **Only proceeds** to the next phase when the gate passes + +This gives you deterministic, phase-by-phase execution with automatic quality gates. + +## Usage + +```bash +# Execute entire plan (default gate: make precommit) +/execute-plan plans/fix-readme-issues.md + +# Start from a specific phase +/execute-plan plans/fix-readme-issues.md --phase 2 + +# Use a custom gate command +/execute-plan plans/fix-readme-issues.md --gate "mix test && mix credo --strict" + +# Dry run — show detected phases without executing +/execute-plan plans/fix-readme-issues.md --dry-run +``` + +## Plan File Format + +Phases are detected by numbered headings: + +```markdown +## 1. Fix test failure + +Remove the fragile describe block... + +Verify: `mix test` + +--- + +## 2. Fix credo issues + +Fix all 41 credo issues... + +Verify: `mix credo --strict` +``` + +Each phase's content includes everything between its heading and the next phase heading (or a non-phase section like "Execution order" or "Risk assessment"). + +## Execution Flow + +``` +┌─────────────────────┐ +│ Parse plan file │ +│ extract phases │ +└────────┬────────────┘ + ▼ +┌─────────────────────┐ +│ For each phase: │ +│ │ +│ 1. Spawn clean pi │──► Agent executes phase +│ subprocess │ in isolated context +│ │ +│ 2. Agent says DONE │──► Phase changes applied +│ │ +│ 3. Run quality │──► Quality gate +│ gate command │ +│ │ +│ 4a. Gate passes? │──✅ Yes → Next phase +│ │ +│ 4b. Gate fails? │──❌ No → Spawn fix agent +│ (up to 3x) │ in another clean context +│ │ +│ Fix agent runs │──► Re-check gate +│ & re-verify │ +└─────────────────────┘ +``` + +## Widget + +While running, a widget in the TUI shows: +- Phase status (⏳ pending, 🔄 running, ✅ done, ❌ failed) +- Turn count and token usage per phase +- Gate attempt count +- Final summary with total cost + +## Configuration + +The extension uses your current pi model and tool configuration for spawned agents. Each phase runs with: +- Full tool access (bash, read, write, edit) +- Your default model +- A custom system prompt scoped to the phase + +## Safety + +- Each phase runs in a separate `pi` process (no shared state) +- `Ctrl+C` aborts the current agent and cleans up +- Quality gate prevents broken state from propagating between phases +- Max 3 fix attempts per phase before failing diff --git a/.pi/extensions/plan-executor/index.ts b/.pi/extensions/plan-executor/index.ts new file mode 100644 index 0000000..f366e74 --- /dev/null +++ b/.pi/extensions/plan-executor/index.ts @@ -0,0 +1,624 @@ +/** + * Plan Executor - Execute multi-phase plans deterministically + * + * Reads a plan file with numbered phases and executes each phase + * in a clean, isolated pi process. After each phase, runs the + * quality gate and auto-fixes any regressions before proceeding. + * + * Usage: + * /execute-plan plans/fix-readme-issues.md + * /execute-plan plans/fix-readme-issues.md --phase 2 + * /execute-plan plans/fix-readme-issues.md --gate "mix test" + * /execute-plan plans/fix-readme-issues.md --dry-run + */ + +import { spawn } from "node:child_process"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import type { Message } from "@mariozechner/pi-ai"; +import { + type ExtensionAPI, + getMarkdownTheme, + withFileMutationQueue, +} from "@mariozechner/pi-coding-agent"; +import { Container, Markdown, Spacer, Text } from "@mariozechner/pi-tui"; + +// ─── Phase Executor System Prompt ──────────────────────────────────────────── + +const PHASE_SYSTEM_PROMPT = `You are executing a single phase of a larger plan. Your job is to complete ONLY this phase. + +## Instructions +1. Read the phase description carefully +2. Make all the changes described in the phase +3. Run the verification command(s) listed in the phase +4. When ALL changes are done and verification passes, respond with: + +DONE: + +Do NOT proceed to any other phases. Do NOT make changes outside this phase's scope. +If verification fails, diagnose and fix the issue within this phase's scope, then re-verify. + +## Important +- If the phase says "remove lines X-Y", do exactly that +- If the phase says "run mix format", do that +- Always verify before declaring DONE +- If you cannot complete the phase, explain what is blocking you instead of saying DONE`; + +// ─── Gate Fix System Prompt ────────────────────────────────────────────────── + +const GATE_FIX_SYSTEM_PROMPT = `You are fixing issues found by the quality gate. The following command failed: + + {GATE_COMMAND} + +Here is the output showing what failed. Fix ALL reported issues, then verify by running the check command(s) mentioned in the output. + +When ALL issues are fixed and verification passes, respond with: + +DONE: + +If verification still fails after your fixes, diagnose and fix remaining issues. Keep iterating until clean.`; + +// ─── Types ─────────────────────────────────────────────────────────────────── + +interface Phase { + number: number; + title: string; + content: string; + verifyCommand?: string; +} + +interface PhaseResult { + phase: number; + title: string; + status: "pending" | "running" | "done" | "failed" | "fixing"; + agentOutput?: string; + agentMessages?: Message[]; + gateAttempts: number; + gateOutput?: string; + fixOutput?: string; + turns?: number; + tokensIn?: number; + tokensOut?: number; + cost?: number; + model?: string; + error?: string; +} + +// ─── Plan Parsing ──────────────────────────────────────────────────────────── + +function parsePhases(planContent: string): Phase[] { + const phases: Phase[] = []; + const lines = planContent.split("\n"); + + // Match phase headers like "## 1. Fix test failure" or "1. Fix test failure" + const phaseRegex = /^(?:##\s*)?(\d+)\.\s+(.+)$/; + + let currentPhase: Phase | null = null; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const match = line.match(phaseRegex); + + if (match) { + // Save previous phase + if (currentPhase) { + phases.push(currentPhase); + } + + const num = parseInt(match[1], 10); + const title = match[2].trim(); + + currentPhase = { + number: num, + title, + content: "", + }; + } else if (currentPhase) { + // Skip lines that start a new non-phase section + const isSectionHeader = /^#{1,6}\s+[^#\d]/.test(line); + const isExecutionOrder = /^##\s+Execution/i.test(line); + const isRiskAssessment = /^##\s+Risk/i.test(line); + const isSeparator = /^---+$/.test(line.trim()); + + if (isSectionHeader || isExecutionOrder || isRiskAssessment) { + // End current phase content + if (currentPhase.content.trim()) { + phases.push(currentPhase); + currentPhase = null; + } + continue; + } + + if (!isSeparator || currentPhase.content.length > 0) { + currentPhase.content += line + "\n"; + } + } + } + + // Don't forget the last phase + if (currentPhase && currentPhase.content.trim()) { + phases.push(currentPhase); + } + + return phases; +} + +// ─── Process Helpers ───────────────────────────────────────────────────────── + +function getPiInvocation(args: string[]): { command: string; args: string[] } { + const currentScript = process.argv[1]; + const isBunVirtualScript = currentScript?.startsWith("/$bunfs/root/"); + if (currentScript && !isBunVirtualScript && fs.existsSync(currentScript)) { + return { command: process.execPath, args: [currentScript, ...args] }; + } + return { command: "pi", args }; +} + +async function writePromptToTempFile( + name: string, + prompt: string, +): Promise<{ dir: string; filePath: string }> { + const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-plan-exec-")); + const safeName = name.replace(/[^\w.-]+/g, "_"); + const filePath = path.join(tmpDir, `prompt-${safeName}.md`); + await withFileMutationQueue(filePath, () => + fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 }), + ); + return { dir: tmpDir, filePath }; +} + +async function cleanupTemp(dir: string | null): Promise { + if (dir) { + try { + await fs.promises.rm(dir, { recursive: true, force: true }); + } catch { + /* ignore */ + } + } +} + +interface AgentRunResult { + exitCode: number; + messages: Message[]; + stderr: string; + turns: number; + tokensIn: number; + tokensOut: number; + cost: number; + model?: string; + stopReason?: string; + errorMessage?: string; +} + +function getFinalOutput(messages: Message[]): string { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg.role === "assistant") { + for (const part of msg.content) { + if (part.type === "text") return part.text; + } + } + } + return ""; +} + +function isDone(output: string): boolean { + return /^DONE:\s*/im.test(output); +} + +async function runAgent( + cwd: string, + systemPrompt: string, + task: string, + signal?: AbortSignal, + onUpdate?: (partial: { output: string; turns: number }) => void, +): Promise { + const args: string[] = ["--mode", "json", "-p", "--no-session"]; + + // Write system prompt to temp file + const { dir: promptDir, filePath: promptPath } = await writePromptToTempFile( + "phase-executor", + systemPrompt, + ); + args.push("--append-system-prompt", promptPath); + args.push(task); + + const result: AgentRunResult = { + exitCode: 0, + messages: [], + stderr: "", + turns: 0, + tokensIn: 0, + tokensOut: 0, + cost: 0, + }; + + try { + return await new Promise((resolve) => { + const invocation = getPiInvocation(args); + const proc = spawn(invocation.command, invocation.args, { + cwd, + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }); + + let buffer = ""; + + const processLine = (line: string) => { + if (!line.trim()) return; + let event: any; + try { + event = JSON.parse(line); + } catch { + return; + } + + if (event.type === "message_end" && event.message) { + const msg = event.message as Message; + result.messages.push(msg); + + if (msg.role === "assistant") { + result.turns++; + const usage = msg.usage; + if (usage) { + result.tokensIn += usage.input || 0; + result.tokensOut += usage.output || 0; + result.cost += usage.cost?.total || 0; + } + if (!result.model && msg.model) result.model = msg.model; + if (msg.stopReason) result.stopReason = msg.stopReason; + if (msg.errorMessage) result.errorMessage = msg.errorMessage; + + // Stream progress + const output = getFinalOutput(result.messages); + onUpdate?.({ output, turns: result.turns }); + } + } + + if (event.type === "tool_result_end" && event.message) { + result.messages.push(event.message as Message); + } + }; + + proc.stdout.on("data", (data) => { + buffer += data.toString(); + const lines = buffer.split("\n"); + buffer = lines.pop() || ""; + for (const line of lines) processLine(line); + }); + + proc.stderr.on("data", (data) => { + result.stderr += data.toString(); + }); + + proc.on("close", (code) => { + if (buffer.trim()) processLine(buffer); + result.exitCode = code ?? 0; + resolve(result); + }); + + proc.on("error", () => { + result.exitCode = 1; + resolve(result); + }); + + if (signal) { + const killProc = () => { + proc.kill("SIGTERM"); + setTimeout(() => { + if (!proc.killed) proc.kill("SIGKILL"); + }, 5000); + }; + if (signal.aborted) killProc(); + else signal.addEventListener("abort", killProc, { once: true }); + } + }); + } finally { + await cleanupTemp(promptDir); + } +} + +async function runCommand( + cwd: string, + command: string, + signal?: AbortSignal, +): Promise<{ exitCode: number; stdout: string; stderr: string }> { + return await new Promise((resolve) => { + const proc = spawn(command, { + cwd, + shell: true, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + + proc.stdout.on("data", (data) => (stdout += data.toString())); + proc.stderr.on("data", (data) => (stderr += data.toString())); + + proc.on("close", (code) => { + resolve({ exitCode: code ?? 1, stdout, stderr }); + }); + + proc.on("error", () => { + resolve({ exitCode: 1, stdout, stderr: "Command failed to start" }); + }); + + if (signal) { + const killProc = () => proc.kill("SIGTERM"); + if (signal.aborted) killProc(); + else signal.addEventListener("abort", killProc, { once: true }); + } + }); +} + +// ─── Format Helpers ────────────────────────────────────────────────────────── + +function formatTokens(count: number): string { + if (count < 1000) return count.toString(); + if (count < 10000) return `${(count / 1000).toFixed(1)}k`; + return `${Math.round(count / 1000)}k`; +} + +function formatUsage(result: PhaseResult): string { + const parts: string[] = []; + if (result.turns) parts.push(`${result.turns} turns`); + if (result.tokensIn) parts.push(`↑${formatTokens(result.tokensIn)}`); + if (result.tokensOut) parts.push(`↓${formatTokens(result.tokensOut)}`); + if (result.cost) parts.push(`$${result.cost.toFixed(4)}`); + if (result.model) parts.push(result.model); + return parts.join(" "); +} + +// ─── Extension ─────────────────────────────────────────────────────────────── + +export default function (pi: ExtensionAPI) { + pi.registerCommand("execute-plan", { + description: "Execute a multi-phase plan file deterministically", + handler: async (args, ctx) => { + if (!ctx.hasUI) { + ctx.ui.notify("execute-plan requires interactive mode", "error"); + return; + } + + // Parse arguments + const parts = args.trim().split(/\s+/); + const planFile = parts[0]; + const rest = parts.slice(1).join(" "); + const flags = new Set(parts.slice(1)); + const dryRun = flags.has("--dry-run"); + const startPhase = flags.has("--phase") + ? parseInt(parts[parts.indexOf("--phase") + 1], 10) + : 1; + + // Extract gate command (default: make precommit) + const gateMatch = rest.match(/--gate\s+(?:["']?)(\S+?)(?:["']?)$/); + const gateCommand = gateMatch ? gateMatch[1] : "make precommit"; + + if (!planFile) { + ctx.ui.notify("Usage: /execute-plan [--phase N] [--gate CMD] [--dry-run]", "error"); + return; + } + + // Resolve path + const resolvedPath = path.isAbsolute(planFile) + ? planFile + : path.join(ctx.cwd, planFile); + + if (!fs.existsSync(resolvedPath)) { + ctx.ui.notify(`Plan file not found: ${resolvedPath}`, "error"); + return; + } + + const planContent = fs.readFileSync(resolvedPath, "utf-8"); + const phases = parsePhases(planContent); + + if (phases.length === 0) { + ctx.ui.notify("No phases found in plan file", "error"); + return; + } + + // Dry run: show plan and exit + if (dryRun) { + const lines = [ + `## Plan: ${path.basename(resolvedPath)}`, + ``, + `Gate command: ${gateCommand}`, + `${phases.length} phase(s) detected:`, + ``, + ...phases.map((p) => `### Phase ${p.number}: ${p.title}`), + ``, + ...phases.map((p) => p.content.trim()), + ]; + ctx.ui.setWidget("plan-executor", lines); + return; + } + + // Initialize results + const results: PhaseResult[] = phases.map((p) => ({ + phase: p.number, + title: p.title, + status: "pending", + gateAttempts: 0, + })); + + // Show initial state + const updateUI = () => { + const lines = [ + `## Plan Executor: ${path.basename(resolvedPath)}`, + ``, + ...results.map((r) => { + const icon = + r.status === "done" + ? "✅" + : r.status === "failed" + ? "❌" + : r.status === "running" || r.status === "fixing" + ? "🔄" + : "⏳"; + const usage = r.turns ? ` (${formatUsage(r)})` : ""; + const gateInfo = r.gateAttempts > 0 ? ` [gate: ${r.gateAttempts}]` : ""; + return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateInfo}`; + }), + ``, + ]; + ctx.ui.setWidget("plan-executor", lines); + }; + + updateUI(); + + // Execute phases sequentially + const phasesToRun = phases.filter((p) => p.number >= startPhase); + + for (const phase of phasesToRun) { + const result = results[phase.number - 1]; + result.status = "running"; + updateUI(); + ctx.ui.setStatus("plan-executor", `Executing Phase ${phase.number}: ${phase.title}`); + + // Build task prompt from phase content + const task = `## Phase ${phase.number}: ${phase.title}\n\n${phase.content.trim()}`; + + // Run phase in isolated context + let agentResult: AgentRunResult; + try { + agentResult = await runAgent( + ctx.cwd, + PHASE_SYSTEM_PROMPT, + task, + ctx.signal, + ({ output, turns }) => { + result.turns = turns; + const status = isDone(output) ? "✅" : "🔄"; + ctx.ui.setStatus( + "plan-executor", + `${status} Phase ${phase.number} (${turns} turns): ${phase.title}`, + ); + }, + ); + } catch (err: any) { + result.status = "failed"; + result.error = err.message; + updateUI(); + ctx.ui.notify(`Phase ${phase.number} failed: ${err.message}`, "error"); + break; + } + + result.agentOutput = getFinalOutput(agentResult.messages); + result.agentMessages = agentResult.messages; + result.turns = agentResult.turns; + result.tokensIn = agentResult.tokensIn; + result.tokensOut = agentResult.tokensOut; + result.cost = agentResult.cost; + result.model = agentResult.model; + + // Check if agent said DONE + if (!isDone(result.agentOutput ?? "")) { + result.status = "failed"; + result.error = "Agent did not signal completion (no DONE: message)"; + updateUI(); + ctx.ui.notify( + `Phase ${phase.number} incomplete: agent did not signal DONE`, + "warning", + ); + continue; + } + + // Run gate + ctx.ui.setStatus("plan-executor", `Phase ${phase.number} done, running gate: ${gateCommand}...`); + let gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal); + result.gateAttempts++; + result.gateOutput = gateResult.stdout + gateResult.stderr; + + // If gate fails, fix in a loop + const MAX_FIX_ATTEMPTS = 3; + let fixAttempt = 0; + + while (gateResult.exitCode !== 0 && fixAttempt < MAX_FIX_ATTEMPTS) { + fixAttempt++; + result.status = "fixing"; + updateUI(); + ctx.ui.setStatus( + "plan-executor", + `Fixing gate issues (attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS})...`, + ); + + const fixTask = `## Gate Failed\n\nThe following command failed after completing Phase ${phase.number}:\n\n ${gateCommand}\n\nOutput:\n\n\`\`\`\n${gateResult.stdout}\n${gateResult.stderr}\n\`\`\`\n\nFix ALL reported issues.`; + + const fixSystemPrompt = GATE_FIX_SYSTEM_PROMPT.replace("{GATE_COMMAND}", gateCommand); + + const fixResult = await runAgent( + ctx.cwd, + fixSystemPrompt, + fixTask, + ctx.signal, + ({ turns }) => { + ctx.ui.setStatus( + "plan-executor", + `Fixing gate (${fixAttempt}/${MAX_FIX_ATTEMPTS}, ${turns} turns)...`, + ); + }, + ); + + result.fixOutput = getFinalOutput(fixResult.messages); + result.turns = (result.turns ?? 0) + fixResult.turns; + result.tokensIn = (result.tokensIn ?? 0) + fixResult.tokensIn; + result.tokensOut = (result.tokensOut ?? 0) + fixResult.tokensOut; + result.cost = (result.cost ?? 0) + fixResult.cost; + + // Re-run gate + gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal); + result.gateAttempts++; + result.gateOutput = gateResult.stdout + gateResult.stderr; + } + + if (gateResult.exitCode !== 0) { + result.status = "failed"; + result.error = `Gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`; + updateUI(); + ctx.ui.notify( + `Phase ${phase.number} blocked: gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`, + "error", + ); + break; + } + + // Phase complete + result.status = "done"; + updateUI(); + ctx.ui.notify(`Phase ${phase.number} complete!`, "success"); + } + + // Final summary + const completed = results.filter((r) => r.status === "done").length; + const failed = results.filter((r) => r.status === "failed").length; + const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0); + const totalTurns = results.reduce((sum, r) => sum + (r.turns ?? 0), 0); + + ctx.ui.setStatus( + "plan-executor", + `Done: ${completed}/${results.length} phases complete, ${failed} failed, $${totalCost.toFixed(4)}`, + ); + + // Show final summary widget + const summaryLines = [ + `## Plan Execution Complete`, + ``, + `**Phases:** ${completed}/${results.length} complete${failed > 0 ? `, ${failed} failed` : ""}`, + `**Total turns:** ${totalTurns}`, + `**Total cost:** $${totalCost.toFixed(4)}`, + ``, + ...results.map((r) => { + const icon = r.status === "done" ? "✅" : "❌"; + const usage = formatUsage(r); + const gateRuns = r.gateAttempts > 1 ? ` (gate: ${r.gateAttempts} runs)` : ""; + return `${icon} Phase ${r.phase}: ${r.title} — ${usage}${gateRuns}`; + }), + ``, + failed > 0 ? `## Failed phases need manual attention` : `## All phases passed ✅`, + ]; + ctx.ui.setWidget("plan-executor", summaryLines); + }, + }); +}