Remopve failed plan executor

This commit is contained in:
Willem van den Ende 2026-05-05 15:44:01 +01:00
parent a181c0e814
commit a5acf21395
2 changed files with 0 additions and 728 deletions

View File

@ -1,104 +0,0 @@
# Plan Executor
Execute multi-phase plan files deterministically, one phase at a time, in isolated contexts.
## Concept
Instead of dumping an entire plan into one conversation (where context bloats and the agent loses focus), this extension:
1. **Parses** your plan file into numbered phases
2. **Executes** each phase in a **clean, isolated** `pi` subprocess (fresh context window)
3. **Runs the quality gate** (default: `make precommit`) after each phase
4. **Auto-fixes** any gate failures in another clean context (up to 3 attempts)
5. **Only proceeds** to the next phase when the gate passes
This gives you deterministic, phase-by-phase execution with automatic quality gates.
## Usage
```bash
# Execute entire plan (default gate: make precommit)
/execute-plan plans/fix-readme-issues.md
# Start from a specific phase
/execute-plan plans/fix-readme-issues.md --phase 2
# Use a custom gate command
/execute-plan plans/fix-readme-issues.md --gate "mix test && mix credo --strict"
# Dry run — show detected phases without executing
/execute-plan plans/fix-readme-issues.md --dry-run
```
## Plan File Format
Phases are detected by numbered headings:
```markdown
## 1. Fix test failure
Remove the fragile describe block...
Verify: `mix test`
---
## 2. Fix credo issues
Fix all 41 credo issues...
Verify: `mix credo --strict`
```
Each phase's content includes everything between its heading and the next phase heading (or a non-phase section like "Execution order" or "Risk assessment").
## Execution Flow
```
┌─────────────────────┐
│ Parse plan file │
│ extract phases │
└────────┬────────────┘
┌─────────────────────┐
│ For each phase: │
│ │
│ 1. Spawn clean pi │──► Agent executes phase
│ subprocess │ in isolated context
│ │
│ 2. Agent says DONE │──► Phase changes applied
│ │
│ 3. Run quality │──► Quality gate
│ gate command │
│ │
│ 4a. Gate passes? │──✅ Yes → Next phase
│ │
│ 4b. Gate fails? │──❌ No → Spawn fix agent
│ (up to 3x) │ in another clean context
│ │
│ Fix agent runs │──► Re-check gate
& re-verify │
└─────────────────────┘
```
## Widget
While running, a widget in the TUI shows:
- Phase status (⏳ pending, 🔄 running, ✅ done, ❌ failed)
- Turn count and token usage per phase
- Gate attempt count
- Final summary with total cost
## Configuration
The extension uses your current pi model and tool configuration for spawned agents. Each phase runs with:
- Full tool access (bash, read, write, edit)
- Your default model
- A custom system prompt scoped to the phase
## Safety
- Each phase runs in a separate `pi` process (no shared state)
- `Ctrl+C` aborts the current agent and cleans up
- Quality gate prevents broken state from propagating between phases
- Max 3 fix attempts per phase before failing

View File

@ -1,624 +0,0 @@
/**
* Plan Executor - Execute multi-phase plans deterministically
*
* Reads a plan file with numbered phases and executes each phase
* in a clean, isolated pi process. After each phase, runs the
* quality gate and auto-fixes any regressions before proceeding.
*
* Usage:
* /execute-plan plans/fix-readme-issues.md
* /execute-plan plans/fix-readme-issues.md --phase 2
* /execute-plan plans/fix-readme-issues.md --gate "mix test"
* /execute-plan plans/fix-readme-issues.md --dry-run
*/
import { spawn } from "node:child_process";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import type { Message } from "@mariozechner/pi-ai";
import {
type ExtensionAPI,
getMarkdownTheme,
withFileMutationQueue,
} from "@mariozechner/pi-coding-agent";
import { Container, Markdown, Spacer, Text } from "@mariozechner/pi-tui";
// ─── Phase Executor System Prompt ────────────────────────────────────────────
const PHASE_SYSTEM_PROMPT = `You are executing a single phase of a larger plan. Your job is to complete ONLY this phase.
## Instructions
1. Read the phase description carefully
2. Make all the changes described in the phase
3. Run the verification command(s) listed in the phase
4. When ALL changes are done and verification passes, respond with:
DONE: <brief summary of what was done>
Do NOT proceed to any other phases. Do NOT make changes outside this phase's scope.
If verification fails, diagnose and fix the issue within this phase's scope, then re-verify.
## Important
- If the phase says "remove lines X-Y", do exactly that
- If the phase says "run mix format", do that
- Always verify before declaring DONE
- If you cannot complete the phase, explain what is blocking you instead of saying DONE`;
// ─── Gate Fix System Prompt ──────────────────────────────────────────────────
const GATE_FIX_SYSTEM_PROMPT = `You are fixing issues found by the quality gate. The following command failed:
{GATE_COMMAND}
Here is the output showing what failed. Fix ALL reported issues, then verify by running the check command(s) mentioned in the output.
When ALL issues are fixed and verification passes, respond with:
DONE: <brief summary of fixes>
If verification still fails after your fixes, diagnose and fix remaining issues. Keep iterating until clean.`;
// ─── Types ───────────────────────────────────────────────────────────────────
interface Phase {
number: number;
title: string;
content: string;
verifyCommand?: string;
}
interface PhaseResult {
phase: number;
title: string;
status: "pending" | "running" | "done" | "failed" | "fixing";
agentOutput?: string;
agentMessages?: Message[];
gateAttempts: number;
gateOutput?: string;
fixOutput?: string;
turns?: number;
tokensIn?: number;
tokensOut?: number;
cost?: number;
model?: string;
error?: string;
}
// ─── Plan Parsing ────────────────────────────────────────────────────────────
function parsePhases(planContent: string): Phase[] {
const phases: Phase[] = [];
const lines = planContent.split("\n");
// Match phase headers like "## 1. Fix test failure" or "1. Fix test failure"
const phaseRegex = /^(?:##\s*)?(\d+)\.\s+(.+)$/;
let currentPhase: Phase | null = null;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const match = line.match(phaseRegex);
if (match) {
// Save previous phase
if (currentPhase) {
phases.push(currentPhase);
}
const num = parseInt(match[1], 10);
const title = match[2].trim();
currentPhase = {
number: num,
title,
content: "",
};
} else if (currentPhase) {
// Skip lines that start a new non-phase section
const isSectionHeader = /^#{1,6}\s+[^#\d]/.test(line);
const isExecutionOrder = /^##\s+Execution/i.test(line);
const isRiskAssessment = /^##\s+Risk/i.test(line);
const isSeparator = /^---+$/.test(line.trim());
if (isSectionHeader || isExecutionOrder || isRiskAssessment) {
// End current phase content
if (currentPhase.content.trim()) {
phases.push(currentPhase);
currentPhase = null;
}
continue;
}
if (!isSeparator || currentPhase.content.length > 0) {
currentPhase.content += line + "\n";
}
}
}
// Don't forget the last phase
if (currentPhase && currentPhase.content.trim()) {
phases.push(currentPhase);
}
return phases;
}
// ─── Process Helpers ─────────────────────────────────────────────────────────
function getPiInvocation(args: string[]): { command: string; args: string[] } {
const currentScript = process.argv[1];
const isBunVirtualScript = currentScript?.startsWith("/$bunfs/root/");
if (currentScript && !isBunVirtualScript && fs.existsSync(currentScript)) {
return { command: process.execPath, args: [currentScript, ...args] };
}
return { command: "pi", args };
}
async function writePromptToTempFile(
name: string,
prompt: string,
): Promise<{ dir: string; filePath: string }> {
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-plan-exec-"));
const safeName = name.replace(/[^\w.-]+/g, "_");
const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
await withFileMutationQueue(filePath, () =>
fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 }),
);
return { dir: tmpDir, filePath };
}
async function cleanupTemp(dir: string | null): Promise<void> {
if (dir) {
try {
await fs.promises.rm(dir, { recursive: true, force: true });
} catch {
/* ignore */
}
}
}
interface AgentRunResult {
exitCode: number;
messages: Message[];
stderr: string;
turns: number;
tokensIn: number;
tokensOut: number;
cost: number;
model?: string;
stopReason?: string;
errorMessage?: string;
}
function getFinalOutput(messages: Message[]): string {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (msg.role === "assistant") {
for (const part of msg.content) {
if (part.type === "text") return part.text;
}
}
}
return "";
}
function isDone(output: string): boolean {
return /^DONE:\s*/im.test(output);
}
async function runAgent(
cwd: string,
systemPrompt: string,
task: string,
signal?: AbortSignal,
onUpdate?: (partial: { output: string; turns: number }) => void,
): Promise<AgentRunResult> {
const args: string[] = ["--mode", "json", "-p", "--no-session"];
// Write system prompt to temp file
const { dir: promptDir, filePath: promptPath } = await writePromptToTempFile(
"phase-executor",
systemPrompt,
);
args.push("--append-system-prompt", promptPath);
args.push(task);
const result: AgentRunResult = {
exitCode: 0,
messages: [],
stderr: "",
turns: 0,
tokensIn: 0,
tokensOut: 0,
cost: 0,
};
try {
return await new Promise<AgentRunResult>((resolve) => {
const invocation = getPiInvocation(args);
const proc = spawn(invocation.command, invocation.args, {
cwd,
shell: false,
stdio: ["ignore", "pipe", "pipe"],
});
let buffer = "";
const processLine = (line: string) => {
if (!line.trim()) return;
let event: any;
try {
event = JSON.parse(line);
} catch {
return;
}
if (event.type === "message_end" && event.message) {
const msg = event.message as Message;
result.messages.push(msg);
if (msg.role === "assistant") {
result.turns++;
const usage = msg.usage;
if (usage) {
result.tokensIn += usage.input || 0;
result.tokensOut += usage.output || 0;
result.cost += usage.cost?.total || 0;
}
if (!result.model && msg.model) result.model = msg.model;
if (msg.stopReason) result.stopReason = msg.stopReason;
if (msg.errorMessage) result.errorMessage = msg.errorMessage;
// Stream progress
const output = getFinalOutput(result.messages);
onUpdate?.({ output, turns: result.turns });
}
}
if (event.type === "tool_result_end" && event.message) {
result.messages.push(event.message as Message);
}
};
proc.stdout.on("data", (data) => {
buffer += data.toString();
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) processLine(line);
});
proc.stderr.on("data", (data) => {
result.stderr += data.toString();
});
proc.on("close", (code) => {
if (buffer.trim()) processLine(buffer);
result.exitCode = code ?? 0;
resolve(result);
});
proc.on("error", () => {
result.exitCode = 1;
resolve(result);
});
if (signal) {
const killProc = () => {
proc.kill("SIGTERM");
setTimeout(() => {
if (!proc.killed) proc.kill("SIGKILL");
}, 5000);
};
if (signal.aborted) killProc();
else signal.addEventListener("abort", killProc, { once: true });
}
});
} finally {
await cleanupTemp(promptDir);
}
}
async function runCommand(
cwd: string,
command: string,
signal?: AbortSignal,
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
return await new Promise((resolve) => {
const proc = spawn(command, {
cwd,
shell: true,
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
proc.stdout.on("data", (data) => (stdout += data.toString()));
proc.stderr.on("data", (data) => (stderr += data.toString()));
proc.on("close", (code) => {
resolve({ exitCode: code ?? 1, stdout, stderr });
});
proc.on("error", () => {
resolve({ exitCode: 1, stdout, stderr: "Command failed to start" });
});
if (signal) {
const killProc = () => proc.kill("SIGTERM");
if (signal.aborted) killProc();
else signal.addEventListener("abort", killProc, { once: true });
}
});
}
// ─── Format Helpers ──────────────────────────────────────────────────────────
function formatTokens(count: number): string {
if (count < 1000) return count.toString();
if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
return `${Math.round(count / 1000)}k`;
}
function formatUsage(result: PhaseResult): string {
const parts: string[] = [];
if (result.turns) parts.push(`${result.turns} turns`);
if (result.tokensIn) parts.push(`${formatTokens(result.tokensIn)}`);
if (result.tokensOut) parts.push(`${formatTokens(result.tokensOut)}`);
if (result.cost) parts.push(`$${result.cost.toFixed(4)}`);
if (result.model) parts.push(result.model);
return parts.join(" ");
}
// ─── Extension ───────────────────────────────────────────────────────────────
export default function (pi: ExtensionAPI) {
pi.registerCommand("execute-plan", {
description: "Execute a multi-phase plan file deterministically",
handler: async (args, ctx) => {
if (!ctx.hasUI) {
ctx.ui.notify("execute-plan requires interactive mode", "error");
return;
}
// Parse arguments
const parts = args.trim().split(/\s+/);
const planFile = parts[0];
const rest = parts.slice(1).join(" ");
const flags = new Set(parts.slice(1));
const dryRun = flags.has("--dry-run");
const startPhase = flags.has("--phase")
? parseInt(parts[parts.indexOf("--phase") + 1], 10)
: 1;
// Extract gate command (default: make precommit)
const gateMatch = rest.match(/--gate\s+(?:["']?)(\S+?)(?:["']?)$/);
const gateCommand = gateMatch ? gateMatch[1] : "make precommit";
if (!planFile) {
ctx.ui.notify("Usage: /execute-plan <plan-file> [--phase N] [--gate CMD] [--dry-run]", "error");
return;
}
// Resolve path
const resolvedPath = path.isAbsolute(planFile)
? planFile
: path.join(ctx.cwd, planFile);
if (!fs.existsSync(resolvedPath)) {
ctx.ui.notify(`Plan file not found: ${resolvedPath}`, "error");
return;
}
const planContent = fs.readFileSync(resolvedPath, "utf-8");
const phases = parsePhases(planContent);
if (phases.length === 0) {
ctx.ui.notify("No phases found in plan file", "error");
return;
}
// Dry run: show plan and exit
if (dryRun) {
const lines = [
`## Plan: ${path.basename(resolvedPath)}`,
``,
`Gate command: ${gateCommand}`,
`${phases.length} phase(s) detected:`,
``,
...phases.map((p) => `### Phase ${p.number}: ${p.title}`),
``,
...phases.map((p) => p.content.trim()),
];
ctx.ui.setWidget("plan-executor", lines);
return;
}
// Initialize results
const results: PhaseResult[] = phases.map((p) => ({
phase: p.number,
title: p.title,
status: "pending",
gateAttempts: 0,
}));
// Show initial state
const updateUI = () => {
const lines = [
`## Plan Executor: ${path.basename(resolvedPath)}`,
``,
...results.map((r) => {
const icon =
r.status === "done"
? "✅"
: r.status === "failed"
? "❌"
: r.status === "running" || r.status === "fixing"
? "🔄"
: "⏳";
const usage = r.turns ? ` (${formatUsage(r)})` : "";
const gateInfo = r.gateAttempts > 0 ? ` [gate: ${r.gateAttempts}]` : "";
return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateInfo}`;
}),
``,
];
ctx.ui.setWidget("plan-executor", lines);
};
updateUI();
// Execute phases sequentially
const phasesToRun = phases.filter((p) => p.number >= startPhase);
for (const phase of phasesToRun) {
const result = results[phase.number - 1];
result.status = "running";
updateUI();
ctx.ui.setStatus("plan-executor", `Executing Phase ${phase.number}: ${phase.title}`);
// Build task prompt from phase content
const task = `## Phase ${phase.number}: ${phase.title}\n\n${phase.content.trim()}`;
// Run phase in isolated context
let agentResult: AgentRunResult;
try {
agentResult = await runAgent(
ctx.cwd,
PHASE_SYSTEM_PROMPT,
task,
ctx.signal,
({ output, turns }) => {
result.turns = turns;
const status = isDone(output) ? "✅" : "🔄";
ctx.ui.setStatus(
"plan-executor",
`${status} Phase ${phase.number} (${turns} turns): ${phase.title}`,
);
},
);
} catch (err: any) {
result.status = "failed";
result.error = err.message;
updateUI();
ctx.ui.notify(`Phase ${phase.number} failed: ${err.message}`, "error");
break;
}
result.agentOutput = getFinalOutput(agentResult.messages);
result.agentMessages = agentResult.messages;
result.turns = agentResult.turns;
result.tokensIn = agentResult.tokensIn;
result.tokensOut = agentResult.tokensOut;
result.cost = agentResult.cost;
result.model = agentResult.model;
// Check if agent said DONE
if (!isDone(result.agentOutput ?? "")) {
result.status = "failed";
result.error = "Agent did not signal completion (no DONE: message)";
updateUI();
ctx.ui.notify(
`Phase ${phase.number} incomplete: agent did not signal DONE`,
"warning",
);
continue;
}
// Run gate
ctx.ui.setStatus("plan-executor", `Phase ${phase.number} done, running gate: ${gateCommand}...`);
let gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
result.gateAttempts++;
result.gateOutput = gateResult.stdout + gateResult.stderr;
// If gate fails, fix in a loop
const MAX_FIX_ATTEMPTS = 3;
let fixAttempt = 0;
while (gateResult.exitCode !== 0 && fixAttempt < MAX_FIX_ATTEMPTS) {
fixAttempt++;
result.status = "fixing";
updateUI();
ctx.ui.setStatus(
"plan-executor",
`Fixing gate issues (attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS})...`,
);
const fixTask = `## Gate Failed\n\nThe following command failed after completing Phase ${phase.number}:\n\n ${gateCommand}\n\nOutput:\n\n\`\`\`\n${gateResult.stdout}\n${gateResult.stderr}\n\`\`\`\n\nFix ALL reported issues.`;
const fixSystemPrompt = GATE_FIX_SYSTEM_PROMPT.replace("{GATE_COMMAND}", gateCommand);
const fixResult = await runAgent(
ctx.cwd,
fixSystemPrompt,
fixTask,
ctx.signal,
({ turns }) => {
ctx.ui.setStatus(
"plan-executor",
`Fixing gate (${fixAttempt}/${MAX_FIX_ATTEMPTS}, ${turns} turns)...`,
);
},
);
result.fixOutput = getFinalOutput(fixResult.messages);
result.turns = (result.turns ?? 0) + fixResult.turns;
result.tokensIn = (result.tokensIn ?? 0) + fixResult.tokensIn;
result.tokensOut = (result.tokensOut ?? 0) + fixResult.tokensOut;
result.cost = (result.cost ?? 0) + fixResult.cost;
// Re-run gate
gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
result.gateAttempts++;
result.gateOutput = gateResult.stdout + gateResult.stderr;
}
if (gateResult.exitCode !== 0) {
result.status = "failed";
result.error = `Gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`;
updateUI();
ctx.ui.notify(
`Phase ${phase.number} blocked: gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`,
"error",
);
break;
}
// Phase complete
result.status = "done";
updateUI();
ctx.ui.notify(`Phase ${phase.number} complete!`, "success");
}
// Final summary
const completed = results.filter((r) => r.status === "done").length;
const failed = results.filter((r) => r.status === "failed").length;
const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0);
const totalTurns = results.reduce((sum, r) => sum + (r.turns ?? 0), 0);
ctx.ui.setStatus(
"plan-executor",
`Done: ${completed}/${results.length} phases complete, ${failed} failed, $${totalCost.toFixed(4)}`,
);
// Show final summary widget
const summaryLines = [
`## Plan Execution Complete`,
``,
`**Phases:** ${completed}/${results.length} complete${failed > 0 ? `, ${failed} failed` : ""}`,
`**Total turns:** ${totalTurns}`,
`**Total cost:** $${totalCost.toFixed(4)}`,
``,
...results.map((r) => {
const icon = r.status === "done" ? "✅" : "❌";
const usage = formatUsage(r);
const gateRuns = r.gateAttempts > 1 ? ` (gate: ${r.gateAttempts} runs)` : "";
return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateRuns}`;
}),
``,
failed > 0 ? `## Failed phases need manual attention` : `## All phases passed ✅`,
];
ctx.ui.setWidget("plan-executor", summaryLines);
},
});
}