625 lines
20 KiB
TypeScript
625 lines
20 KiB
TypeScript
/**
|
|
* Plan Executor - Execute multi-phase plans deterministically
|
|
*
|
|
* Reads a plan file with numbered phases and executes each phase
|
|
* in a clean, isolated pi process. After each phase, runs the
|
|
* quality gate and auto-fixes any regressions before proceeding.
|
|
*
|
|
* Usage:
|
|
* /execute-plan plans/fix-readme-issues.md
|
|
* /execute-plan plans/fix-readme-issues.md --phase 2
|
|
* /execute-plan plans/fix-readme-issues.md --gate "mix test"
|
|
* /execute-plan plans/fix-readme-issues.md --dry-run
|
|
*/
|
|
|
|
import { spawn } from "node:child_process";
|
|
import * as fs from "node:fs";
|
|
import * as os from "node:os";
|
|
import * as path from "node:path";
|
|
import type { Message } from "@mariozechner/pi-ai";
|
|
import {
|
|
type ExtensionAPI,
|
|
getMarkdownTheme,
|
|
withFileMutationQueue,
|
|
} from "@mariozechner/pi-coding-agent";
|
|
import { Container, Markdown, Spacer, Text } from "@mariozechner/pi-tui";
|
|
|
|
// ─── Phase Executor System Prompt ────────────────────────────────────────────
|
|
|
|
const PHASE_SYSTEM_PROMPT = `You are executing a single phase of a larger plan. Your job is to complete ONLY this phase.
|
|
|
|
## Instructions
|
|
1. Read the phase description carefully
|
|
2. Make all the changes described in the phase
|
|
3. Run the verification command(s) listed in the phase
|
|
4. When ALL changes are done and verification passes, respond with:
|
|
|
|
DONE: <brief summary of what was done>
|
|
|
|
Do NOT proceed to any other phases. Do NOT make changes outside this phase's scope.
|
|
If verification fails, diagnose and fix the issue within this phase's scope, then re-verify.
|
|
|
|
## Important
|
|
- If the phase says "remove lines X-Y", do exactly that
|
|
- If the phase says "run mix format", do that
|
|
- Always verify before declaring DONE
|
|
- If you cannot complete the phase, explain what is blocking you instead of saying DONE`;
|
|
|
|
// ─── Gate Fix System Prompt ──────────────────────────────────────────────────
|
|
|
|
const GATE_FIX_SYSTEM_PROMPT = `You are fixing issues found by the quality gate. The following command failed:
|
|
|
|
{GATE_COMMAND}
|
|
|
|
Here is the output showing what failed. Fix ALL reported issues, then verify by running the check command(s) mentioned in the output.
|
|
|
|
When ALL issues are fixed and verification passes, respond with:
|
|
|
|
DONE: <brief summary of fixes>
|
|
|
|
If verification still fails after your fixes, diagnose and fix remaining issues. Keep iterating until clean.`;
|
|
|
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
|
|
interface Phase {
|
|
number: number;
|
|
title: string;
|
|
content: string;
|
|
verifyCommand?: string;
|
|
}
|
|
|
|
interface PhaseResult {
|
|
phase: number;
|
|
title: string;
|
|
status: "pending" | "running" | "done" | "failed" | "fixing";
|
|
agentOutput?: string;
|
|
agentMessages?: Message[];
|
|
gateAttempts: number;
|
|
gateOutput?: string;
|
|
fixOutput?: string;
|
|
turns?: number;
|
|
tokensIn?: number;
|
|
tokensOut?: number;
|
|
cost?: number;
|
|
model?: string;
|
|
error?: string;
|
|
}
|
|
|
|
// ─── Plan Parsing ────────────────────────────────────────────────────────────
|
|
|
|
function parsePhases(planContent: string): Phase[] {
|
|
const phases: Phase[] = [];
|
|
const lines = planContent.split("\n");
|
|
|
|
// Match phase headers like "## 1. Fix test failure" or "1. Fix test failure"
|
|
const phaseRegex = /^(?:##\s*)?(\d+)\.\s+(.+)$/;
|
|
|
|
let currentPhase: Phase | null = null;
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
const match = line.match(phaseRegex);
|
|
|
|
if (match) {
|
|
// Save previous phase
|
|
if (currentPhase) {
|
|
phases.push(currentPhase);
|
|
}
|
|
|
|
const num = parseInt(match[1], 10);
|
|
const title = match[2].trim();
|
|
|
|
currentPhase = {
|
|
number: num,
|
|
title,
|
|
content: "",
|
|
};
|
|
} else if (currentPhase) {
|
|
// Skip lines that start a new non-phase section
|
|
const isSectionHeader = /^#{1,6}\s+[^#\d]/.test(line);
|
|
const isExecutionOrder = /^##\s+Execution/i.test(line);
|
|
const isRiskAssessment = /^##\s+Risk/i.test(line);
|
|
const isSeparator = /^---+$/.test(line.trim());
|
|
|
|
if (isSectionHeader || isExecutionOrder || isRiskAssessment) {
|
|
// End current phase content
|
|
if (currentPhase.content.trim()) {
|
|
phases.push(currentPhase);
|
|
currentPhase = null;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (!isSeparator || currentPhase.content.length > 0) {
|
|
currentPhase.content += line + "\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
// Don't forget the last phase
|
|
if (currentPhase && currentPhase.content.trim()) {
|
|
phases.push(currentPhase);
|
|
}
|
|
|
|
return phases;
|
|
}
|
|
|
|
// ─── Process Helpers ─────────────────────────────────────────────────────────
|
|
|
|
function getPiInvocation(args: string[]): { command: string; args: string[] } {
|
|
const currentScript = process.argv[1];
|
|
const isBunVirtualScript = currentScript?.startsWith("/$bunfs/root/");
|
|
if (currentScript && !isBunVirtualScript && fs.existsSync(currentScript)) {
|
|
return { command: process.execPath, args: [currentScript, ...args] };
|
|
}
|
|
return { command: "pi", args };
|
|
}
|
|
|
|
async function writePromptToTempFile(
|
|
name: string,
|
|
prompt: string,
|
|
): Promise<{ dir: string; filePath: string }> {
|
|
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-plan-exec-"));
|
|
const safeName = name.replace(/[^\w.-]+/g, "_");
|
|
const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
|
|
await withFileMutationQueue(filePath, () =>
|
|
fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 }),
|
|
);
|
|
return { dir: tmpDir, filePath };
|
|
}
|
|
|
|
async function cleanupTemp(dir: string | null): Promise<void> {
|
|
if (dir) {
|
|
try {
|
|
await fs.promises.rm(dir, { recursive: true, force: true });
|
|
} catch {
|
|
/* ignore */
|
|
}
|
|
}
|
|
}
|
|
|
|
interface AgentRunResult {
|
|
exitCode: number;
|
|
messages: Message[];
|
|
stderr: string;
|
|
turns: number;
|
|
tokensIn: number;
|
|
tokensOut: number;
|
|
cost: number;
|
|
model?: string;
|
|
stopReason?: string;
|
|
errorMessage?: string;
|
|
}
|
|
|
|
function getFinalOutput(messages: Message[]): string {
|
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
const msg = messages[i];
|
|
if (msg.role === "assistant") {
|
|
for (const part of msg.content) {
|
|
if (part.type === "text") return part.text;
|
|
}
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
function isDone(output: string): boolean {
|
|
return /^DONE:\s*/im.test(output);
|
|
}
|
|
|
|
async function runAgent(
|
|
cwd: string,
|
|
systemPrompt: string,
|
|
task: string,
|
|
signal?: AbortSignal,
|
|
onUpdate?: (partial: { output: string; turns: number }) => void,
|
|
): Promise<AgentRunResult> {
|
|
const args: string[] = ["--mode", "json", "-p", "--no-session"];
|
|
|
|
// Write system prompt to temp file
|
|
const { dir: promptDir, filePath: promptPath } = await writePromptToTempFile(
|
|
"phase-executor",
|
|
systemPrompt,
|
|
);
|
|
args.push("--append-system-prompt", promptPath);
|
|
args.push(task);
|
|
|
|
const result: AgentRunResult = {
|
|
exitCode: 0,
|
|
messages: [],
|
|
stderr: "",
|
|
turns: 0,
|
|
tokensIn: 0,
|
|
tokensOut: 0,
|
|
cost: 0,
|
|
};
|
|
|
|
try {
|
|
return await new Promise<AgentRunResult>((resolve) => {
|
|
const invocation = getPiInvocation(args);
|
|
const proc = spawn(invocation.command, invocation.args, {
|
|
cwd,
|
|
shell: false,
|
|
stdio: ["ignore", "pipe", "pipe"],
|
|
});
|
|
|
|
let buffer = "";
|
|
|
|
const processLine = (line: string) => {
|
|
if (!line.trim()) return;
|
|
let event: any;
|
|
try {
|
|
event = JSON.parse(line);
|
|
} catch {
|
|
return;
|
|
}
|
|
|
|
if (event.type === "message_end" && event.message) {
|
|
const msg = event.message as Message;
|
|
result.messages.push(msg);
|
|
|
|
if (msg.role === "assistant") {
|
|
result.turns++;
|
|
const usage = msg.usage;
|
|
if (usage) {
|
|
result.tokensIn += usage.input || 0;
|
|
result.tokensOut += usage.output || 0;
|
|
result.cost += usage.cost?.total || 0;
|
|
}
|
|
if (!result.model && msg.model) result.model = msg.model;
|
|
if (msg.stopReason) result.stopReason = msg.stopReason;
|
|
if (msg.errorMessage) result.errorMessage = msg.errorMessage;
|
|
|
|
// Stream progress
|
|
const output = getFinalOutput(result.messages);
|
|
onUpdate?.({ output, turns: result.turns });
|
|
}
|
|
}
|
|
|
|
if (event.type === "tool_result_end" && event.message) {
|
|
result.messages.push(event.message as Message);
|
|
}
|
|
};
|
|
|
|
proc.stdout.on("data", (data) => {
|
|
buffer += data.toString();
|
|
const lines = buffer.split("\n");
|
|
buffer = lines.pop() || "";
|
|
for (const line of lines) processLine(line);
|
|
});
|
|
|
|
proc.stderr.on("data", (data) => {
|
|
result.stderr += data.toString();
|
|
});
|
|
|
|
proc.on("close", (code) => {
|
|
if (buffer.trim()) processLine(buffer);
|
|
result.exitCode = code ?? 0;
|
|
resolve(result);
|
|
});
|
|
|
|
proc.on("error", () => {
|
|
result.exitCode = 1;
|
|
resolve(result);
|
|
});
|
|
|
|
if (signal) {
|
|
const killProc = () => {
|
|
proc.kill("SIGTERM");
|
|
setTimeout(() => {
|
|
if (!proc.killed) proc.kill("SIGKILL");
|
|
}, 5000);
|
|
};
|
|
if (signal.aborted) killProc();
|
|
else signal.addEventListener("abort", killProc, { once: true });
|
|
}
|
|
});
|
|
} finally {
|
|
await cleanupTemp(promptDir);
|
|
}
|
|
}
|
|
|
|
async function runCommand(
|
|
cwd: string,
|
|
command: string,
|
|
signal?: AbortSignal,
|
|
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
|
return await new Promise((resolve) => {
|
|
const proc = spawn(command, {
|
|
cwd,
|
|
shell: true,
|
|
stdio: ["ignore", "pipe", "pipe"],
|
|
});
|
|
|
|
let stdout = "";
|
|
let stderr = "";
|
|
|
|
proc.stdout.on("data", (data) => (stdout += data.toString()));
|
|
proc.stderr.on("data", (data) => (stderr += data.toString()));
|
|
|
|
proc.on("close", (code) => {
|
|
resolve({ exitCode: code ?? 1, stdout, stderr });
|
|
});
|
|
|
|
proc.on("error", () => {
|
|
resolve({ exitCode: 1, stdout, stderr: "Command failed to start" });
|
|
});
|
|
|
|
if (signal) {
|
|
const killProc = () => proc.kill("SIGTERM");
|
|
if (signal.aborted) killProc();
|
|
else signal.addEventListener("abort", killProc, { once: true });
|
|
}
|
|
});
|
|
}
|
|
|
|
// ─── Format Helpers ──────────────────────────────────────────────────────────
|
|
|
|
function formatTokens(count: number): string {
|
|
if (count < 1000) return count.toString();
|
|
if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
|
|
return `${Math.round(count / 1000)}k`;
|
|
}
|
|
|
|
function formatUsage(result: PhaseResult): string {
|
|
const parts: string[] = [];
|
|
if (result.turns) parts.push(`${result.turns} turns`);
|
|
if (result.tokensIn) parts.push(`↑${formatTokens(result.tokensIn)}`);
|
|
if (result.tokensOut) parts.push(`↓${formatTokens(result.tokensOut)}`);
|
|
if (result.cost) parts.push(`$${result.cost.toFixed(4)}`);
|
|
if (result.model) parts.push(result.model);
|
|
return parts.join(" ");
|
|
}
|
|
|
|
// ─── Extension ───────────────────────────────────────────────────────────────
|
|
|
|
export default function (pi: ExtensionAPI) {
|
|
pi.registerCommand("execute-plan", {
|
|
description: "Execute a multi-phase plan file deterministically",
|
|
handler: async (args, ctx) => {
|
|
if (!ctx.hasUI) {
|
|
ctx.ui.notify("execute-plan requires interactive mode", "error");
|
|
return;
|
|
}
|
|
|
|
// Parse arguments
|
|
const parts = args.trim().split(/\s+/);
|
|
const planFile = parts[0];
|
|
const rest = parts.slice(1).join(" ");
|
|
const flags = new Set(parts.slice(1));
|
|
const dryRun = flags.has("--dry-run");
|
|
const startPhase = flags.has("--phase")
|
|
? parseInt(parts[parts.indexOf("--phase") + 1], 10)
|
|
: 1;
|
|
|
|
// Extract gate command (default: make precommit)
|
|
const gateMatch = rest.match(/--gate\s+(?:["']?)(\S+?)(?:["']?)$/);
|
|
const gateCommand = gateMatch ? gateMatch[1] : "make precommit";
|
|
|
|
if (!planFile) {
|
|
ctx.ui.notify("Usage: /execute-plan <plan-file> [--phase N] [--gate CMD] [--dry-run]", "error");
|
|
return;
|
|
}
|
|
|
|
// Resolve path
|
|
const resolvedPath = path.isAbsolute(planFile)
|
|
? planFile
|
|
: path.join(ctx.cwd, planFile);
|
|
|
|
if (!fs.existsSync(resolvedPath)) {
|
|
ctx.ui.notify(`Plan file not found: ${resolvedPath}`, "error");
|
|
return;
|
|
}
|
|
|
|
const planContent = fs.readFileSync(resolvedPath, "utf-8");
|
|
const phases = parsePhases(planContent);
|
|
|
|
if (phases.length === 0) {
|
|
ctx.ui.notify("No phases found in plan file", "error");
|
|
return;
|
|
}
|
|
|
|
// Dry run: show plan and exit
|
|
if (dryRun) {
|
|
const lines = [
|
|
`## Plan: ${path.basename(resolvedPath)}`,
|
|
``,
|
|
`Gate command: ${gateCommand}`,
|
|
`${phases.length} phase(s) detected:`,
|
|
``,
|
|
...phases.map((p) => `### Phase ${p.number}: ${p.title}`),
|
|
``,
|
|
...phases.map((p) => p.content.trim()),
|
|
];
|
|
ctx.ui.setWidget("plan-executor", lines);
|
|
return;
|
|
}
|
|
|
|
// Initialize results
|
|
const results: PhaseResult[] = phases.map((p) => ({
|
|
phase: p.number,
|
|
title: p.title,
|
|
status: "pending",
|
|
gateAttempts: 0,
|
|
}));
|
|
|
|
// Show initial state
|
|
const updateUI = () => {
|
|
const lines = [
|
|
`## Plan Executor: ${path.basename(resolvedPath)}`,
|
|
``,
|
|
...results.map((r) => {
|
|
const icon =
|
|
r.status === "done"
|
|
? "✅"
|
|
: r.status === "failed"
|
|
? "❌"
|
|
: r.status === "running" || r.status === "fixing"
|
|
? "🔄"
|
|
: "⏳";
|
|
const usage = r.turns ? ` (${formatUsage(r)})` : "";
|
|
const gateInfo = r.gateAttempts > 0 ? ` [gate: ${r.gateAttempts}]` : "";
|
|
return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateInfo}`;
|
|
}),
|
|
``,
|
|
];
|
|
ctx.ui.setWidget("plan-executor", lines);
|
|
};
|
|
|
|
updateUI();
|
|
|
|
// Execute phases sequentially
|
|
const phasesToRun = phases.filter((p) => p.number >= startPhase);
|
|
|
|
for (const phase of phasesToRun) {
|
|
const result = results[phase.number - 1];
|
|
result.status = "running";
|
|
updateUI();
|
|
ctx.ui.setStatus("plan-executor", `Executing Phase ${phase.number}: ${phase.title}`);
|
|
|
|
// Build task prompt from phase content
|
|
const task = `## Phase ${phase.number}: ${phase.title}\n\n${phase.content.trim()}`;
|
|
|
|
// Run phase in isolated context
|
|
let agentResult: AgentRunResult;
|
|
try {
|
|
agentResult = await runAgent(
|
|
ctx.cwd,
|
|
PHASE_SYSTEM_PROMPT,
|
|
task,
|
|
ctx.signal,
|
|
({ output, turns }) => {
|
|
result.turns = turns;
|
|
const status = isDone(output) ? "✅" : "🔄";
|
|
ctx.ui.setStatus(
|
|
"plan-executor",
|
|
`${status} Phase ${phase.number} (${turns} turns): ${phase.title}`,
|
|
);
|
|
},
|
|
);
|
|
} catch (err: any) {
|
|
result.status = "failed";
|
|
result.error = err.message;
|
|
updateUI();
|
|
ctx.ui.notify(`Phase ${phase.number} failed: ${err.message}`, "error");
|
|
break;
|
|
}
|
|
|
|
result.agentOutput = getFinalOutput(agentResult.messages);
|
|
result.agentMessages = agentResult.messages;
|
|
result.turns = agentResult.turns;
|
|
result.tokensIn = agentResult.tokensIn;
|
|
result.tokensOut = agentResult.tokensOut;
|
|
result.cost = agentResult.cost;
|
|
result.model = agentResult.model;
|
|
|
|
// Check if agent said DONE
|
|
if (!isDone(result.agentOutput ?? "")) {
|
|
result.status = "failed";
|
|
result.error = "Agent did not signal completion (no DONE: message)";
|
|
updateUI();
|
|
ctx.ui.notify(
|
|
`Phase ${phase.number} incomplete: agent did not signal DONE`,
|
|
"warning",
|
|
);
|
|
continue;
|
|
}
|
|
|
|
// Run gate
|
|
ctx.ui.setStatus("plan-executor", `Phase ${phase.number} done, running gate: ${gateCommand}...`);
|
|
let gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
|
|
result.gateAttempts++;
|
|
result.gateOutput = gateResult.stdout + gateResult.stderr;
|
|
|
|
// If gate fails, fix in a loop
|
|
const MAX_FIX_ATTEMPTS = 3;
|
|
let fixAttempt = 0;
|
|
|
|
while (gateResult.exitCode !== 0 && fixAttempt < MAX_FIX_ATTEMPTS) {
|
|
fixAttempt++;
|
|
result.status = "fixing";
|
|
updateUI();
|
|
ctx.ui.setStatus(
|
|
"plan-executor",
|
|
`Fixing gate issues (attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS})...`,
|
|
);
|
|
|
|
const fixTask = `## Gate Failed\n\nThe following command failed after completing Phase ${phase.number}:\n\n ${gateCommand}\n\nOutput:\n\n\`\`\`\n${gateResult.stdout}\n${gateResult.stderr}\n\`\`\`\n\nFix ALL reported issues.`;
|
|
|
|
const fixSystemPrompt = GATE_FIX_SYSTEM_PROMPT.replace("{GATE_COMMAND}", gateCommand);
|
|
|
|
const fixResult = await runAgent(
|
|
ctx.cwd,
|
|
fixSystemPrompt,
|
|
fixTask,
|
|
ctx.signal,
|
|
({ turns }) => {
|
|
ctx.ui.setStatus(
|
|
"plan-executor",
|
|
`Fixing gate (${fixAttempt}/${MAX_FIX_ATTEMPTS}, ${turns} turns)...`,
|
|
);
|
|
},
|
|
);
|
|
|
|
result.fixOutput = getFinalOutput(fixResult.messages);
|
|
result.turns = (result.turns ?? 0) + fixResult.turns;
|
|
result.tokensIn = (result.tokensIn ?? 0) + fixResult.tokensIn;
|
|
result.tokensOut = (result.tokensOut ?? 0) + fixResult.tokensOut;
|
|
result.cost = (result.cost ?? 0) + fixResult.cost;
|
|
|
|
// Re-run gate
|
|
gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
|
|
result.gateAttempts++;
|
|
result.gateOutput = gateResult.stdout + gateResult.stderr;
|
|
}
|
|
|
|
if (gateResult.exitCode !== 0) {
|
|
result.status = "failed";
|
|
result.error = `Gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`;
|
|
updateUI();
|
|
ctx.ui.notify(
|
|
`Phase ${phase.number} blocked: gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`,
|
|
"error",
|
|
);
|
|
break;
|
|
}
|
|
|
|
// Phase complete
|
|
result.status = "done";
|
|
updateUI();
|
|
ctx.ui.notify(`Phase ${phase.number} complete!`, "success");
|
|
}
|
|
|
|
// Final summary
|
|
const completed = results.filter((r) => r.status === "done").length;
|
|
const failed = results.filter((r) => r.status === "failed").length;
|
|
const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0);
|
|
const totalTurns = results.reduce((sum, r) => sum + (r.turns ?? 0), 0);
|
|
|
|
ctx.ui.setStatus(
|
|
"plan-executor",
|
|
`Done: ${completed}/${results.length} phases complete, ${failed} failed, $${totalCost.toFixed(4)}`,
|
|
);
|
|
|
|
// Show final summary widget
|
|
const summaryLines = [
|
|
`## Plan Execution Complete`,
|
|
``,
|
|
`**Phases:** ${completed}/${results.length} complete${failed > 0 ? `, ${failed} failed` : ""}`,
|
|
`**Total turns:** ${totalTurns}`,
|
|
`**Total cost:** $${totalCost.toFixed(4)}`,
|
|
``,
|
|
...results.map((r) => {
|
|
const icon = r.status === "done" ? "✅" : "❌";
|
|
const usage = formatUsage(r);
|
|
const gateRuns = r.gateAttempts > 1 ? ` (gate: ${r.gateAttempts} runs)` : "";
|
|
return `${icon} Phase ${r.phase}: ${r.title} — ${usage}${gateRuns}`;
|
|
}),
|
|
``,
|
|
failed > 0 ? `## Failed phases need manual attention` : `## All phases passed ✅`,
|
|
];
|
|
ctx.ui.setWidget("plan-executor", summaryLines);
|
|
},
|
|
});
|
|
}
|