2026-05-05 14:43:44 +01:00

625 lines
20 KiB
TypeScript

/**
* Plan Executor - Execute multi-phase plans deterministically
*
* Reads a plan file with numbered phases and executes each phase
* in a clean, isolated pi process. After each phase, runs the
* quality gate and auto-fixes any regressions before proceeding.
*
* Usage:
* /execute-plan plans/fix-readme-issues.md
* /execute-plan plans/fix-readme-issues.md --phase 2
* /execute-plan plans/fix-readme-issues.md --gate "mix test"
* /execute-plan plans/fix-readme-issues.md --dry-run
*/
import { spawn } from "node:child_process";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import type { Message } from "@mariozechner/pi-ai";
import {
type ExtensionAPI,
getMarkdownTheme,
withFileMutationQueue,
} from "@mariozechner/pi-coding-agent";
import { Container, Markdown, Spacer, Text } from "@mariozechner/pi-tui";
// ─── Phase Executor System Prompt ────────────────────────────────────────────
const PHASE_SYSTEM_PROMPT = `You are executing a single phase of a larger plan. Your job is to complete ONLY this phase.
## Instructions
1. Read the phase description carefully
2. Make all the changes described in the phase
3. Run the verification command(s) listed in the phase
4. When ALL changes are done and verification passes, respond with:
DONE: <brief summary of what was done>
Do NOT proceed to any other phases. Do NOT make changes outside this phase's scope.
If verification fails, diagnose and fix the issue within this phase's scope, then re-verify.
## Important
- If the phase says "remove lines X-Y", do exactly that
- If the phase says "run mix format", do that
- Always verify before declaring DONE
- If you cannot complete the phase, explain what is blocking you instead of saying DONE`;
// ─── Gate Fix System Prompt ──────────────────────────────────────────────────
const GATE_FIX_SYSTEM_PROMPT = `You are fixing issues found by the quality gate. The following command failed:
{GATE_COMMAND}
Here is the output showing what failed. Fix ALL reported issues, then verify by running the check command(s) mentioned in the output.
When ALL issues are fixed and verification passes, respond with:
DONE: <brief summary of fixes>
If verification still fails after your fixes, diagnose and fix remaining issues. Keep iterating until clean.`;
// ─── Types ───────────────────────────────────────────────────────────────────
interface Phase {
number: number;
title: string;
content: string;
verifyCommand?: string;
}
interface PhaseResult {
phase: number;
title: string;
status: "pending" | "running" | "done" | "failed" | "fixing";
agentOutput?: string;
agentMessages?: Message[];
gateAttempts: number;
gateOutput?: string;
fixOutput?: string;
turns?: number;
tokensIn?: number;
tokensOut?: number;
cost?: number;
model?: string;
error?: string;
}
// ─── Plan Parsing ────────────────────────────────────────────────────────────
function parsePhases(planContent: string): Phase[] {
const phases: Phase[] = [];
const lines = planContent.split("\n");
// Match phase headers like "## 1. Fix test failure" or "1. Fix test failure"
const phaseRegex = /^(?:##\s*)?(\d+)\.\s+(.+)$/;
let currentPhase: Phase | null = null;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const match = line.match(phaseRegex);
if (match) {
// Save previous phase
if (currentPhase) {
phases.push(currentPhase);
}
const num = parseInt(match[1], 10);
const title = match[2].trim();
currentPhase = {
number: num,
title,
content: "",
};
} else if (currentPhase) {
// Skip lines that start a new non-phase section
const isSectionHeader = /^#{1,6}\s+[^#\d]/.test(line);
const isExecutionOrder = /^##\s+Execution/i.test(line);
const isRiskAssessment = /^##\s+Risk/i.test(line);
const isSeparator = /^---+$/.test(line.trim());
if (isSectionHeader || isExecutionOrder || isRiskAssessment) {
// End current phase content
if (currentPhase.content.trim()) {
phases.push(currentPhase);
currentPhase = null;
}
continue;
}
if (!isSeparator || currentPhase.content.length > 0) {
currentPhase.content += line + "\n";
}
}
}
// Don't forget the last phase
if (currentPhase && currentPhase.content.trim()) {
phases.push(currentPhase);
}
return phases;
}
// ─── Process Helpers ─────────────────────────────────────────────────────────
function getPiInvocation(args: string[]): { command: string; args: string[] } {
const currentScript = process.argv[1];
const isBunVirtualScript = currentScript?.startsWith("/$bunfs/root/");
if (currentScript && !isBunVirtualScript && fs.existsSync(currentScript)) {
return { command: process.execPath, args: [currentScript, ...args] };
}
return { command: "pi", args };
}
async function writePromptToTempFile(
name: string,
prompt: string,
): Promise<{ dir: string; filePath: string }> {
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-plan-exec-"));
const safeName = name.replace(/[^\w.-]+/g, "_");
const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
await withFileMutationQueue(filePath, () =>
fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 }),
);
return { dir: tmpDir, filePath };
}
async function cleanupTemp(dir: string | null): Promise<void> {
if (dir) {
try {
await fs.promises.rm(dir, { recursive: true, force: true });
} catch {
/* ignore */
}
}
}
interface AgentRunResult {
exitCode: number;
messages: Message[];
stderr: string;
turns: number;
tokensIn: number;
tokensOut: number;
cost: number;
model?: string;
stopReason?: string;
errorMessage?: string;
}
function getFinalOutput(messages: Message[]): string {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (msg.role === "assistant") {
for (const part of msg.content) {
if (part.type === "text") return part.text;
}
}
}
return "";
}
function isDone(output: string): boolean {
return /^DONE:\s*/im.test(output);
}
async function runAgent(
cwd: string,
systemPrompt: string,
task: string,
signal?: AbortSignal,
onUpdate?: (partial: { output: string; turns: number }) => void,
): Promise<AgentRunResult> {
const args: string[] = ["--mode", "json", "-p", "--no-session"];
// Write system prompt to temp file
const { dir: promptDir, filePath: promptPath } = await writePromptToTempFile(
"phase-executor",
systemPrompt,
);
args.push("--append-system-prompt", promptPath);
args.push(task);
const result: AgentRunResult = {
exitCode: 0,
messages: [],
stderr: "",
turns: 0,
tokensIn: 0,
tokensOut: 0,
cost: 0,
};
try {
return await new Promise<AgentRunResult>((resolve) => {
const invocation = getPiInvocation(args);
const proc = spawn(invocation.command, invocation.args, {
cwd,
shell: false,
stdio: ["ignore", "pipe", "pipe"],
});
let buffer = "";
const processLine = (line: string) => {
if (!line.trim()) return;
let event: any;
try {
event = JSON.parse(line);
} catch {
return;
}
if (event.type === "message_end" && event.message) {
const msg = event.message as Message;
result.messages.push(msg);
if (msg.role === "assistant") {
result.turns++;
const usage = msg.usage;
if (usage) {
result.tokensIn += usage.input || 0;
result.tokensOut += usage.output || 0;
result.cost += usage.cost?.total || 0;
}
if (!result.model && msg.model) result.model = msg.model;
if (msg.stopReason) result.stopReason = msg.stopReason;
if (msg.errorMessage) result.errorMessage = msg.errorMessage;
// Stream progress
const output = getFinalOutput(result.messages);
onUpdate?.({ output, turns: result.turns });
}
}
if (event.type === "tool_result_end" && event.message) {
result.messages.push(event.message as Message);
}
};
proc.stdout.on("data", (data) => {
buffer += data.toString();
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) processLine(line);
});
proc.stderr.on("data", (data) => {
result.stderr += data.toString();
});
proc.on("close", (code) => {
if (buffer.trim()) processLine(buffer);
result.exitCode = code ?? 0;
resolve(result);
});
proc.on("error", () => {
result.exitCode = 1;
resolve(result);
});
if (signal) {
const killProc = () => {
proc.kill("SIGTERM");
setTimeout(() => {
if (!proc.killed) proc.kill("SIGKILL");
}, 5000);
};
if (signal.aborted) killProc();
else signal.addEventListener("abort", killProc, { once: true });
}
});
} finally {
await cleanupTemp(promptDir);
}
}
async function runCommand(
cwd: string,
command: string,
signal?: AbortSignal,
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
return await new Promise((resolve) => {
const proc = spawn(command, {
cwd,
shell: true,
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
proc.stdout.on("data", (data) => (stdout += data.toString()));
proc.stderr.on("data", (data) => (stderr += data.toString()));
proc.on("close", (code) => {
resolve({ exitCode: code ?? 1, stdout, stderr });
});
proc.on("error", () => {
resolve({ exitCode: 1, stdout, stderr: "Command failed to start" });
});
if (signal) {
const killProc = () => proc.kill("SIGTERM");
if (signal.aborted) killProc();
else signal.addEventListener("abort", killProc, { once: true });
}
});
}
// ─── Format Helpers ──────────────────────────────────────────────────────────
function formatTokens(count: number): string {
if (count < 1000) return count.toString();
if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
return `${Math.round(count / 1000)}k`;
}
function formatUsage(result: PhaseResult): string {
const parts: string[] = [];
if (result.turns) parts.push(`${result.turns} turns`);
if (result.tokensIn) parts.push(`${formatTokens(result.tokensIn)}`);
if (result.tokensOut) parts.push(`${formatTokens(result.tokensOut)}`);
if (result.cost) parts.push(`$${result.cost.toFixed(4)}`);
if (result.model) parts.push(result.model);
return parts.join(" ");
}
// ─── Extension ───────────────────────────────────────────────────────────────
export default function (pi: ExtensionAPI) {
pi.registerCommand("execute-plan", {
description: "Execute a multi-phase plan file deterministically",
handler: async (args, ctx) => {
if (!ctx.hasUI) {
ctx.ui.notify("execute-plan requires interactive mode", "error");
return;
}
// Parse arguments
const parts = args.trim().split(/\s+/);
const planFile = parts[0];
const rest = parts.slice(1).join(" ");
const flags = new Set(parts.slice(1));
const dryRun = flags.has("--dry-run");
const startPhase = flags.has("--phase")
? parseInt(parts[parts.indexOf("--phase") + 1], 10)
: 1;
// Extract gate command (default: make precommit)
const gateMatch = rest.match(/--gate\s+(?:["']?)(\S+?)(?:["']?)$/);
const gateCommand = gateMatch ? gateMatch[1] : "make precommit";
if (!planFile) {
ctx.ui.notify("Usage: /execute-plan <plan-file> [--phase N] [--gate CMD] [--dry-run]", "error");
return;
}
// Resolve path
const resolvedPath = path.isAbsolute(planFile)
? planFile
: path.join(ctx.cwd, planFile);
if (!fs.existsSync(resolvedPath)) {
ctx.ui.notify(`Plan file not found: ${resolvedPath}`, "error");
return;
}
const planContent = fs.readFileSync(resolvedPath, "utf-8");
const phases = parsePhases(planContent);
if (phases.length === 0) {
ctx.ui.notify("No phases found in plan file", "error");
return;
}
// Dry run: show plan and exit
if (dryRun) {
const lines = [
`## Plan: ${path.basename(resolvedPath)}`,
``,
`Gate command: ${gateCommand}`,
`${phases.length} phase(s) detected:`,
``,
...phases.map((p) => `### Phase ${p.number}: ${p.title}`),
``,
...phases.map((p) => p.content.trim()),
];
ctx.ui.setWidget("plan-executor", lines);
return;
}
// Initialize results
const results: PhaseResult[] = phases.map((p) => ({
phase: p.number,
title: p.title,
status: "pending",
gateAttempts: 0,
}));
// Show initial state
const updateUI = () => {
const lines = [
`## Plan Executor: ${path.basename(resolvedPath)}`,
``,
...results.map((r) => {
const icon =
r.status === "done"
? "✅"
: r.status === "failed"
? "❌"
: r.status === "running" || r.status === "fixing"
? "🔄"
: "⏳";
const usage = r.turns ? ` (${formatUsage(r)})` : "";
const gateInfo = r.gateAttempts > 0 ? ` [gate: ${r.gateAttempts}]` : "";
return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateInfo}`;
}),
``,
];
ctx.ui.setWidget("plan-executor", lines);
};
updateUI();
// Execute phases sequentially
const phasesToRun = phases.filter((p) => p.number >= startPhase);
for (const phase of phasesToRun) {
const result = results[phase.number - 1];
result.status = "running";
updateUI();
ctx.ui.setStatus("plan-executor", `Executing Phase ${phase.number}: ${phase.title}`);
// Build task prompt from phase content
const task = `## Phase ${phase.number}: ${phase.title}\n\n${phase.content.trim()}`;
// Run phase in isolated context
let agentResult: AgentRunResult;
try {
agentResult = await runAgent(
ctx.cwd,
PHASE_SYSTEM_PROMPT,
task,
ctx.signal,
({ output, turns }) => {
result.turns = turns;
const status = isDone(output) ? "✅" : "🔄";
ctx.ui.setStatus(
"plan-executor",
`${status} Phase ${phase.number} (${turns} turns): ${phase.title}`,
);
},
);
} catch (err: any) {
result.status = "failed";
result.error = err.message;
updateUI();
ctx.ui.notify(`Phase ${phase.number} failed: ${err.message}`, "error");
break;
}
result.agentOutput = getFinalOutput(agentResult.messages);
result.agentMessages = agentResult.messages;
result.turns = agentResult.turns;
result.tokensIn = agentResult.tokensIn;
result.tokensOut = agentResult.tokensOut;
result.cost = agentResult.cost;
result.model = agentResult.model;
// Check if agent said DONE
if (!isDone(result.agentOutput ?? "")) {
result.status = "failed";
result.error = "Agent did not signal completion (no DONE: message)";
updateUI();
ctx.ui.notify(
`Phase ${phase.number} incomplete: agent did not signal DONE`,
"warning",
);
continue;
}
// Run gate
ctx.ui.setStatus("plan-executor", `Phase ${phase.number} done, running gate: ${gateCommand}...`);
let gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
result.gateAttempts++;
result.gateOutput = gateResult.stdout + gateResult.stderr;
// If gate fails, fix in a loop
const MAX_FIX_ATTEMPTS = 3;
let fixAttempt = 0;
while (gateResult.exitCode !== 0 && fixAttempt < MAX_FIX_ATTEMPTS) {
fixAttempt++;
result.status = "fixing";
updateUI();
ctx.ui.setStatus(
"plan-executor",
`Fixing gate issues (attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS})...`,
);
const fixTask = `## Gate Failed\n\nThe following command failed after completing Phase ${phase.number}:\n\n ${gateCommand}\n\nOutput:\n\n\`\`\`\n${gateResult.stdout}\n${gateResult.stderr}\n\`\`\`\n\nFix ALL reported issues.`;
const fixSystemPrompt = GATE_FIX_SYSTEM_PROMPT.replace("{GATE_COMMAND}", gateCommand);
const fixResult = await runAgent(
ctx.cwd,
fixSystemPrompt,
fixTask,
ctx.signal,
({ turns }) => {
ctx.ui.setStatus(
"plan-executor",
`Fixing gate (${fixAttempt}/${MAX_FIX_ATTEMPTS}, ${turns} turns)...`,
);
},
);
result.fixOutput = getFinalOutput(fixResult.messages);
result.turns = (result.turns ?? 0) + fixResult.turns;
result.tokensIn = (result.tokensIn ?? 0) + fixResult.tokensIn;
result.tokensOut = (result.tokensOut ?? 0) + fixResult.tokensOut;
result.cost = (result.cost ?? 0) + fixResult.cost;
// Re-run gate
gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
result.gateAttempts++;
result.gateOutput = gateResult.stdout + gateResult.stderr;
}
if (gateResult.exitCode !== 0) {
result.status = "failed";
result.error = `Gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`;
updateUI();
ctx.ui.notify(
`Phase ${phase.number} blocked: gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`,
"error",
);
break;
}
// Phase complete
result.status = "done";
updateUI();
ctx.ui.notify(`Phase ${phase.number} complete!`, "success");
}
// Final summary
const completed = results.filter((r) => r.status === "done").length;
const failed = results.filter((r) => r.status === "failed").length;
const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0);
const totalTurns = results.reduce((sum, r) => sum + (r.turns ?? 0), 0);
ctx.ui.setStatus(
"plan-executor",
`Done: ${completed}/${results.length} phases complete, ${failed} failed, $${totalCost.toFixed(4)}`,
);
// Show final summary widget
const summaryLines = [
`## Plan Execution Complete`,
``,
`**Phases:** ${completed}/${results.length} complete${failed > 0 ? `, ${failed} failed` : ""}`,
`**Total turns:** ${totalTurns}`,
`**Total cost:** $${totalCost.toFixed(4)}`,
``,
...results.map((r) => {
const icon = r.status === "done" ? "✅" : "❌";
const usage = formatUsage(r);
const gateRuns = r.gateAttempts > 1 ? ` (gate: ${r.gateAttempts} runs)` : "";
return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateRuns}`;
}),
``,
failed > 0 ? `## Failed phases need manual attention` : `## All phases passed ✅`,
];
ctx.ui.setWidget("plan-executor", summaryLines);
},
});
}