DEADEND: a work-the-plan agent that is too complicated

kept for reference -dead end
2026-05-05 14:43:44 +01:00 · 2026-05-05 14:43:44 +01:00 · 8cc1524fec
commit 8cc1524fec
parent 7be68af01e
2 changed files with 728 additions and 0 deletions
--- a/.pi/extensions/plan-executor/README.md
+++ b/.pi/extensions/plan-executor/README.md
@ -0,0 +1,104 @@
+# Plan Executor
+
+Execute multi-phase plan files deterministically, one phase at a time, in isolated contexts.
+
+## Concept
+
+Instead of dumping an entire plan into one conversation (where context bloats and the agent loses focus), this extension:
+
+1. **Parses** your plan file into numbered phases
+2. **Executes** each phase in a **clean, isolated** `pi` subprocess (fresh context window)
+3. **Runs the quality gate** (default: `make precommit`) after each phase
+4. **Auto-fixes** any gate failures in another clean context (up to 3 attempts)
+5. **Only proceeds** to the next phase when the gate passes
+
+This gives you deterministic, phase-by-phase execution with automatic quality gates.
+
+## Usage
+
+```bash
+# Execute entire plan (default gate: make precommit)
+/execute-plan plans/fix-readme-issues.md
+
+# Start from a specific phase
+/execute-plan plans/fix-readme-issues.md --phase 2
+
+# Use a custom gate command
+/execute-plan plans/fix-readme-issues.md --gate "mix test && mix credo --strict"
+
+# Dry run — show detected phases without executing
+/execute-plan plans/fix-readme-issues.md --dry-run
+```
+
+## Plan File Format
+
+Phases are detected by numbered headings:
+
+```markdown
+## 1. Fix test failure
+
+Remove the fragile describe block...
+
+Verify: `mix test`
+
+---
+
+## 2. Fix credo issues
+
+Fix all 41 credo issues...
+
+Verify: `mix credo --strict`
+```
+
+Each phase's content includes everything between its heading and the next phase heading (or a non-phase section like "Execution order" or "Risk assessment").
+
+## Execution Flow
+
+```
+┌─────────────────────┐
+│ Parse plan file     │
+│ extract phases      │
+└────────┬────────────┘
+         ▼
+┌─────────────────────┐
+│ For each phase:     │
+│                     │
+│  1. Spawn clean pi  │──► Agent executes phase
+│     subprocess      │    in isolated context
+│                     │
+│  2. Agent says DONE │──► Phase changes applied
+│                     │
+│  3. Run quality     │──► Quality gate
+│     gate command    │
+│                     │
+│  4a. Gate passes?   │──✅ Yes → Next phase
+│                     │
+│  4b. Gate fails?    │──❌ No → Spawn fix agent
+│     (up to 3x)      │    in another clean context
+│                     │
+│     Fix agent runs  │──► Re-check gate
+│     & re-verify     │
+└─────────────────────┘
+```
+
+## Widget
+
+While running, a widget in the TUI shows:
+- Phase status (⏳ pending, 🔄 running, ✅ done, ❌ failed)
+- Turn count and token usage per phase
+- Gate attempt count
+- Final summary with total cost
+
+## Configuration
+
+The extension uses your current pi model and tool configuration for spawned agents. Each phase runs with:
+- Full tool access (bash, read, write, edit)
+- Your default model
+- A custom system prompt scoped to the phase
+
+## Safety
+
+- Each phase runs in a separate `pi` process (no shared state)
+- `Ctrl+C` aborts the current agent and cleans up
+- Quality gate prevents broken state from propagating between phases
+- Max 3 fix attempts per phase before failing
--- a/.pi/extensions/plan-executor/index.ts
+++ b/.pi/extensions/plan-executor/index.ts
@ -0,0 +1,624 @@
+/**
+ * Plan Executor - Execute multi-phase plans deterministically
+ *
+ * Reads a plan file with numbered phases and executes each phase
+ * in a clean, isolated pi process. After each phase, runs the
+ * quality gate and auto-fixes any regressions before proceeding.
+ *
+ * Usage:
+ *   /execute-plan plans/fix-readme-issues.md
+ *   /execute-plan plans/fix-readme-issues.md --phase 2
+ *   /execute-plan plans/fix-readme-issues.md --gate "mix test"
+ *   /execute-plan plans/fix-readme-issues.md --dry-run
+ */
+
+import { spawn } from "node:child_process";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import type { Message } from "@mariozechner/pi-ai";
+import {
+  type ExtensionAPI,
+  getMarkdownTheme,
+  withFileMutationQueue,
+} from "@mariozechner/pi-coding-agent";
+import { Container, Markdown, Spacer, Text } from "@mariozechner/pi-tui";
+
+// ─── Phase Executor System Prompt ────────────────────────────────────────────
+
+const PHASE_SYSTEM_PROMPT = `You are executing a single phase of a larger plan. Your job is to complete ONLY this phase.
+
+## Instructions
+1. Read the phase description carefully
+2. Make all the changes described in the phase
+3. Run the verification command(s) listed in the phase
+4. When ALL changes are done and verification passes, respond with:
+
+DONE: <brief summary of what was done>
+
+Do NOT proceed to any other phases. Do NOT make changes outside this phase's scope.
+If verification fails, diagnose and fix the issue within this phase's scope, then re-verify.
+
+## Important
+- If the phase says "remove lines X-Y", do exactly that
+- If the phase says "run mix format", do that
+- Always verify before declaring DONE
+- If you cannot complete the phase, explain what is blocking you instead of saying DONE`;
+
+// ─── Gate Fix System Prompt ──────────────────────────────────────────────────
+
+const GATE_FIX_SYSTEM_PROMPT = `You are fixing issues found by the quality gate. The following command failed:
+
+    {GATE_COMMAND}
+
+Here is the output showing what failed. Fix ALL reported issues, then verify by running the check command(s) mentioned in the output.
+
+When ALL issues are fixed and verification passes, respond with:
+
+DONE: <brief summary of fixes>
+
+If verification still fails after your fixes, diagnose and fix remaining issues. Keep iterating until clean.`;
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+interface Phase {
+  number: number;
+  title: string;
+  content: string;
+  verifyCommand?: string;
+}
+
+interface PhaseResult {
+  phase: number;
+  title: string;
+  status: "pending" | "running" | "done" | "failed" | "fixing";
+  agentOutput?: string;
+  agentMessages?: Message[];
+  gateAttempts: number;
+  gateOutput?: string;
+  fixOutput?: string;
+  turns?: number;
+  tokensIn?: number;
+  tokensOut?: number;
+  cost?: number;
+  model?: string;
+  error?: string;
+}
+
+// ─── Plan Parsing ────────────────────────────────────────────────────────────
+
+function parsePhases(planContent: string): Phase[] {
+  const phases: Phase[] = [];
+  const lines = planContent.split("\n");
+
+  // Match phase headers like "## 1. Fix test failure" or "1. Fix test failure"
+  const phaseRegex = /^(?:##\s*)?(\d+)\.\s+(.+)$/;
+
+  let currentPhase: Phase | null = null;
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const match = line.match(phaseRegex);
+
+    if (match) {
+      // Save previous phase
+      if (currentPhase) {
+        phases.push(currentPhase);
+      }
+
+      const num = parseInt(match[1], 10);
+      const title = match[2].trim();
+
+      currentPhase = {
+        number: num,
+        title,
+        content: "",
+      };
+    } else if (currentPhase) {
+      // Skip lines that start a new non-phase section
+      const isSectionHeader = /^#{1,6}\s+[^#\d]/.test(line);
+      const isExecutionOrder = /^##\s+Execution/i.test(line);
+      const isRiskAssessment = /^##\s+Risk/i.test(line);
+      const isSeparator = /^---+$/.test(line.trim());
+
+      if (isSectionHeader || isExecutionOrder || isRiskAssessment) {
+        // End current phase content
+        if (currentPhase.content.trim()) {
+          phases.push(currentPhase);
+          currentPhase = null;
+        }
+        continue;
+      }
+
+      if (!isSeparator || currentPhase.content.length > 0) {
+        currentPhase.content += line + "\n";
+      }
+    }
+  }
+
+  // Don't forget the last phase
+  if (currentPhase && currentPhase.content.trim()) {
+    phases.push(currentPhase);
+  }
+
+  return phases;
+}
+
+// ─── Process Helpers ─────────────────────────────────────────────────────────
+
+function getPiInvocation(args: string[]): { command: string; args: string[] } {
+  const currentScript = process.argv[1];
+  const isBunVirtualScript = currentScript?.startsWith("/$bunfs/root/");
+  if (currentScript && !isBunVirtualScript && fs.existsSync(currentScript)) {
+    return { command: process.execPath, args: [currentScript, ...args] };
+  }
+  return { command: "pi", args };
+}
+
+async function writePromptToTempFile(
+  name: string,
+  prompt: string,
+): Promise<{ dir: string; filePath: string }> {
+  const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-plan-exec-"));
+  const safeName = name.replace(/[^\w.-]+/g, "_");
+  const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
+  await withFileMutationQueue(filePath, () =>
+    fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 }),
+  );
+  return { dir: tmpDir, filePath };
+}
+
+async function cleanupTemp(dir: string | null): Promise<void> {
+  if (dir) {
+    try {
+      await fs.promises.rm(dir, { recursive: true, force: true });
+    } catch {
+      /* ignore */
+    }
+  }
+}
+
+interface AgentRunResult {
+  exitCode: number;
+  messages: Message[];
+  stderr: string;
+  turns: number;
+  tokensIn: number;
+  tokensOut: number;
+  cost: number;
+  model?: string;
+  stopReason?: string;
+  errorMessage?: string;
+}
+
+function getFinalOutput(messages: Message[]): string {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "assistant") {
+      for (const part of msg.content) {
+        if (part.type === "text") return part.text;
+      }
+    }
+  }
+  return "";
+}
+
+function isDone(output: string): boolean {
+  return /^DONE:\s*/im.test(output);
+}
+
+async function runAgent(
+  cwd: string,
+  systemPrompt: string,
+  task: string,
+  signal?: AbortSignal,
+  onUpdate?: (partial: { output: string; turns: number }) => void,
+): Promise<AgentRunResult> {
+  const args: string[] = ["--mode", "json", "-p", "--no-session"];
+
+  // Write system prompt to temp file
+  const { dir: promptDir, filePath: promptPath } = await writePromptToTempFile(
+    "phase-executor",
+    systemPrompt,
+  );
+  args.push("--append-system-prompt", promptPath);
+  args.push(task);
+
+  const result: AgentRunResult = {
+    exitCode: 0,
+    messages: [],
+    stderr: "",
+    turns: 0,
+    tokensIn: 0,
+    tokensOut: 0,
+    cost: 0,
+  };
+
+  try {
+    return await new Promise<AgentRunResult>((resolve) => {
+      const invocation = getPiInvocation(args);
+      const proc = spawn(invocation.command, invocation.args, {
+        cwd,
+        shell: false,
+        stdio: ["ignore", "pipe", "pipe"],
+      });
+
+      let buffer = "";
+
+      const processLine = (line: string) => {
+        if (!line.trim()) return;
+        let event: any;
+        try {
+          event = JSON.parse(line);
+        } catch {
+          return;
+        }
+
+        if (event.type === "message_end" && event.message) {
+          const msg = event.message as Message;
+          result.messages.push(msg);
+
+          if (msg.role === "assistant") {
+            result.turns++;
+            const usage = msg.usage;
+            if (usage) {
+              result.tokensIn += usage.input || 0;
+              result.tokensOut += usage.output || 0;
+              result.cost += usage.cost?.total || 0;
+            }
+            if (!result.model && msg.model) result.model = msg.model;
+            if (msg.stopReason) result.stopReason = msg.stopReason;
+            if (msg.errorMessage) result.errorMessage = msg.errorMessage;
+
+            // Stream progress
+            const output = getFinalOutput(result.messages);
+            onUpdate?.({ output, turns: result.turns });
+          }
+        }
+
+        if (event.type === "tool_result_end" && event.message) {
+          result.messages.push(event.message as Message);
+        }
+      };
+
+      proc.stdout.on("data", (data) => {
+        buffer += data.toString();
+        const lines = buffer.split("\n");
+        buffer = lines.pop() || "";
+        for (const line of lines) processLine(line);
+      });
+
+      proc.stderr.on("data", (data) => {
+        result.stderr += data.toString();
+      });
+
+      proc.on("close", (code) => {
+        if (buffer.trim()) processLine(buffer);
+        result.exitCode = code ?? 0;
+        resolve(result);
+      });
+
+      proc.on("error", () => {
+        result.exitCode = 1;
+        resolve(result);
+      });
+
+      if (signal) {
+        const killProc = () => {
+          proc.kill("SIGTERM");
+          setTimeout(() => {
+            if (!proc.killed) proc.kill("SIGKILL");
+          }, 5000);
+        };
+        if (signal.aborted) killProc();
+        else signal.addEventListener("abort", killProc, { once: true });
+      }
+    });
+  } finally {
+    await cleanupTemp(promptDir);
+  }
+}
+
+async function runCommand(
+  cwd: string,
+  command: string,
+  signal?: AbortSignal,
+): Promise<{ exitCode: number; stdout: string; stderr: string }> {
+  return await new Promise((resolve) => {
+    const proc = spawn(command, {
+      cwd,
+      shell: true,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    proc.stdout.on("data", (data) => (stdout += data.toString()));
+    proc.stderr.on("data", (data) => (stderr += data.toString()));
+
+    proc.on("close", (code) => {
+      resolve({ exitCode: code ?? 1, stdout, stderr });
+    });
+
+    proc.on("error", () => {
+      resolve({ exitCode: 1, stdout, stderr: "Command failed to start" });
+    });
+
+    if (signal) {
+      const killProc = () => proc.kill("SIGTERM");
+      if (signal.aborted) killProc();
+      else signal.addEventListener("abort", killProc, { once: true });
+    }
+  });
+}
+
+// ─── Format Helpers ──────────────────────────────────────────────────────────
+
+function formatTokens(count: number): string {
+  if (count < 1000) return count.toString();
+  if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
+  return `${Math.round(count / 1000)}k`;
+}
+
+function formatUsage(result: PhaseResult): string {
+  const parts: string[] = [];
+  if (result.turns) parts.push(`${result.turns} turns`);
+  if (result.tokensIn) parts.push(`↑${formatTokens(result.tokensIn)}`);
+  if (result.tokensOut) parts.push(`↓${formatTokens(result.tokensOut)}`);
+  if (result.cost) parts.push(`$${result.cost.toFixed(4)}`);
+  if (result.model) parts.push(result.model);
+  return parts.join(" ");
+}
+
+// ─── Extension ───────────────────────────────────────────────────────────────
+
+export default function (pi: ExtensionAPI) {
+  pi.registerCommand("execute-plan", {
+    description: "Execute a multi-phase plan file deterministically",
+    handler: async (args, ctx) => {
+      if (!ctx.hasUI) {
+        ctx.ui.notify("execute-plan requires interactive mode", "error");
+        return;
+      }
+
+      // Parse arguments
+      const parts = args.trim().split(/\s+/);
+      const planFile = parts[0];
+      const rest = parts.slice(1).join(" ");
+      const flags = new Set(parts.slice(1));
+      const dryRun = flags.has("--dry-run");
+      const startPhase = flags.has("--phase")
+        ? parseInt(parts[parts.indexOf("--phase") + 1], 10)
+        : 1;
+
+      // Extract gate command (default: make precommit)
+      const gateMatch = rest.match(/--gate\s+(?:["']?)(\S+?)(?:["']?)$/);
+      const gateCommand = gateMatch ? gateMatch[1] : "make precommit";
+
+      if (!planFile) {
+        ctx.ui.notify("Usage: /execute-plan <plan-file> [--phase N] [--gate CMD] [--dry-run]", "error");
+        return;
+      }
+
+      // Resolve path
+      const resolvedPath = path.isAbsolute(planFile)
+        ? planFile
+        : path.join(ctx.cwd, planFile);
+
+      if (!fs.existsSync(resolvedPath)) {
+        ctx.ui.notify(`Plan file not found: ${resolvedPath}`, "error");
+        return;
+      }
+
+      const planContent = fs.readFileSync(resolvedPath, "utf-8");
+      const phases = parsePhases(planContent);
+
+      if (phases.length === 0) {
+        ctx.ui.notify("No phases found in plan file", "error");
+        return;
+      }
+
+      // Dry run: show plan and exit
+      if (dryRun) {
+        const lines = [
+          `## Plan: ${path.basename(resolvedPath)}`,
+          ``,
+          `Gate command: ${gateCommand}`,
+          `${phases.length} phase(s) detected:`,
+          ``,
+          ...phases.map((p) => `### Phase ${p.number}: ${p.title}`),
+          ``,
+          ...phases.map((p) => p.content.trim()),
+        ];
+        ctx.ui.setWidget("plan-executor", lines);
+        return;
+      }
+
+      // Initialize results
+      const results: PhaseResult[] = phases.map((p) => ({
+        phase: p.number,
+        title: p.title,
+        status: "pending",
+        gateAttempts: 0,
+      }));
+
+      // Show initial state
+      const updateUI = () => {
+        const lines = [
+          `## Plan Executor: ${path.basename(resolvedPath)}`,
+          ``,
+          ...results.map((r) => {
+            const icon =
+              r.status === "done"
+                ? "✅"
+                : r.status === "failed"
+                  ? "❌"
+                  : r.status === "running" || r.status === "fixing"
+                    ? "🔄"
+                    : "⏳";
+            const usage = r.turns ? ` (${formatUsage(r)})` : "";
+            const gateInfo = r.gateAttempts > 0 ? ` [gate: ${r.gateAttempts}]` : "";
+            return `${icon} Phase ${r.phase}: ${r.title}${usage}${gateInfo}`;
+          }),
+          ``,
+        ];
+        ctx.ui.setWidget("plan-executor", lines);
+      };
+
+      updateUI();
+
+      // Execute phases sequentially
+      const phasesToRun = phases.filter((p) => p.number >= startPhase);
+
+      for (const phase of phasesToRun) {
+        const result = results[phase.number - 1];
+        result.status = "running";
+        updateUI();
+        ctx.ui.setStatus("plan-executor", `Executing Phase ${phase.number}: ${phase.title}`);
+
+        // Build task prompt from phase content
+        const task = `## Phase ${phase.number}: ${phase.title}\n\n${phase.content.trim()}`;
+
+        // Run phase in isolated context
+        let agentResult: AgentRunResult;
+        try {
+          agentResult = await runAgent(
+            ctx.cwd,
+            PHASE_SYSTEM_PROMPT,
+            task,
+            ctx.signal,
+            ({ output, turns }) => {
+              result.turns = turns;
+              const status = isDone(output) ? "✅" : "🔄";
+              ctx.ui.setStatus(
+                "plan-executor",
+                `${status} Phase ${phase.number} (${turns} turns): ${phase.title}`,
+              );
+            },
+          );
+        } catch (err: any) {
+          result.status = "failed";
+          result.error = err.message;
+          updateUI();
+          ctx.ui.notify(`Phase ${phase.number} failed: ${err.message}`, "error");
+          break;
+        }
+
+        result.agentOutput = getFinalOutput(agentResult.messages);
+        result.agentMessages = agentResult.messages;
+        result.turns = agentResult.turns;
+        result.tokensIn = agentResult.tokensIn;
+        result.tokensOut = agentResult.tokensOut;
+        result.cost = agentResult.cost;
+        result.model = agentResult.model;
+
+        // Check if agent said DONE
+        if (!isDone(result.agentOutput ?? "")) {
+          result.status = "failed";
+          result.error = "Agent did not signal completion (no DONE: message)";
+          updateUI();
+          ctx.ui.notify(
+            `Phase ${phase.number} incomplete: agent did not signal DONE`,
+            "warning",
+          );
+          continue;
+        }
+
+        // Run gate
+        ctx.ui.setStatus("plan-executor", `Phase ${phase.number} done, running gate: ${gateCommand}...`);
+        let gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
+        result.gateAttempts++;
+        result.gateOutput = gateResult.stdout + gateResult.stderr;
+
+        // If gate fails, fix in a loop
+        const MAX_FIX_ATTEMPTS = 3;
+        let fixAttempt = 0;
+
+        while (gateResult.exitCode !== 0 && fixAttempt < MAX_FIX_ATTEMPTS) {
+          fixAttempt++;
+          result.status = "fixing";
+          updateUI();
+          ctx.ui.setStatus(
+            "plan-executor",
+            `Fixing gate issues (attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS})...`,
+          );
+
+          const fixTask = `## Gate Failed\n\nThe following command failed after completing Phase ${phase.number}:\n\n    ${gateCommand}\n\nOutput:\n\n\`\`\`\n${gateResult.stdout}\n${gateResult.stderr}\n\`\`\`\n\nFix ALL reported issues.`;
+
+          const fixSystemPrompt = GATE_FIX_SYSTEM_PROMPT.replace("{GATE_COMMAND}", gateCommand);
+
+          const fixResult = await runAgent(
+            ctx.cwd,
+            fixSystemPrompt,
+            fixTask,
+            ctx.signal,
+            ({ turns }) => {
+              ctx.ui.setStatus(
+                "plan-executor",
+                `Fixing gate (${fixAttempt}/${MAX_FIX_ATTEMPTS}, ${turns} turns)...`,
+              );
+            },
+          );
+
+          result.fixOutput = getFinalOutput(fixResult.messages);
+          result.turns = (result.turns ?? 0) + fixResult.turns;
+          result.tokensIn = (result.tokensIn ?? 0) + fixResult.tokensIn;
+          result.tokensOut = (result.tokensOut ?? 0) + fixResult.tokensOut;
+          result.cost = (result.cost ?? 0) + fixResult.cost;
+
+          // Re-run gate
+          gateResult = await runCommand(ctx.cwd, gateCommand, ctx.signal);
+          result.gateAttempts++;
+          result.gateOutput = gateResult.stdout + gateResult.stderr;
+        }
+
+        if (gateResult.exitCode !== 0) {
+          result.status = "failed";
+          result.error = `Gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`;
+          updateUI();
+          ctx.ui.notify(
+            `Phase ${phase.number} blocked: gate failed after ${MAX_FIX_ATTEMPTS} fix attempts`,
+            "error",
+          );
+          break;
+        }
+
+        // Phase complete
+        result.status = "done";
+        updateUI();
+        ctx.ui.notify(`Phase ${phase.number} complete!`, "success");
+      }
+
+      // Final summary
+      const completed = results.filter((r) => r.status === "done").length;
+      const failed = results.filter((r) => r.status === "failed").length;
+      const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0);
+      const totalTurns = results.reduce((sum, r) => sum + (r.turns ?? 0), 0);
+
+      ctx.ui.setStatus(
+        "plan-executor",
+        `Done: ${completed}/${results.length} phases complete, ${failed} failed, $${totalCost.toFixed(4)}`,
+      );
+
+      // Show final summary widget
+      const summaryLines = [
+        `## Plan Execution Complete`,
+        ``,
+        `**Phases:** ${completed}/${results.length} complete${failed > 0 ? `, ${failed} failed` : ""}`,
+        `**Total turns:** ${totalTurns}`,
+        `**Total cost:** $${totalCost.toFixed(4)}`,
+        ``,
+        ...results.map((r) => {
+          const icon = r.status === "done" ? "✅" : "❌";
+          const usage = formatUsage(r);
+          const gateRuns = r.gateAttempts > 1 ? ` (gate: ${r.gateAttempts} runs)` : "";
+          return `${icon} Phase ${r.phase}: ${r.title} — ${usage}${gateRuns}`;
+        }),
+        ``,
+        failed > 0 ? `## Failed phases need manual attention` : `## All phases passed ✅`,
+      ];
+      ctx.ui.setWidget("plan-executor", summaryLines);
+    },
+  });
+}