From fcd296c499f263ed53db024d788cade049e47ded Mon Sep 17 00:00:00 2001
From: YeonGyu-Kim <code.yeon.gyu@gmail.com>
Date: Wed, 24 Jun 2026 15:34:28 +0900
Subject: [PATCH] docs(web): restore skills detail copy

---
 packages/web/content/docs/skills.md        | 67 +++++++++++++++++++++-
 packages/web/lib/docs-content.generated.ts | 27 ++++++++-
 2 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/packages/web/content/docs/skills.md b/packages/web/content/docs/skills.md
index dfe7d49..5e0d82f 100644
--- a/packages/web/content/docs/skills.md
+++ b/packages/web/content/docs/skills.md
@@ -13,6 +13,33 @@ The command pillars stay simple:
 
 Skills add specialist judgment around those pillars. The sections below describe each skill and how it is typically used.
 
+### Skill index
+
+Most skills auto-activate when a request matches their domain, so you do not need to study or manually select every skill before using LazyCodex. When you want to be explicit, put the skill name in the prompt — for example `$visual-qa`, `$git-master`, or `$ulw-research`.
+
+| Skill | Use it for |
+| --- | --- |
+| `init-deep` | Hierarchical `AGENTS.md` context for large or old repos |
+| `ulw-plan` | Explore-first planning before coding |
+| `ulw-loop` | Evidence-bound loop until verified completion |
+| `start-work` | Execute a plan with durable Boulder progress |
+| `review-work` | Five-lane parallel post-implementation review |
+| `remove-ai-slops` | Behavior-preserving cleanup of AI-looking code |
+| `frontend` | Designed UI work instead of generic layout filling |
+| `programming` | Strict TypeScript, Rust, Python, or Go discipline, TDD-first |
+| `git-master` | Atomic commits, rebase/squash, push safety, history investigation |
+| `visual-qa` | Screenshot/TUI diff plus dual-oracle visual QA |
+| `debugging` | Evidence-led root-cause investigation |
+| `refactor` | Behavior-preserving restructure of existing code |
+| `ulw-research` | Maximum-saturation research with codebase, web, official-docs, and OSS-repo swarms |
+| `LSP` | Diagnostics, definitions, references, symbols, and renames |
+| `lsp-setup` | Configure language servers for a project |
+| `AST-grep` | Structural search and rewrite across code |
+| `rules` | Project instructions from AGENTS, rules, and instruction files |
+| `comment-checker` | Feedback after edit-like operations |
+
+### Skill highlights
+
 ---
 
 ### review-work
@@ -176,9 +203,47 @@ Finds code by syntactic shape rather than text — every function call matching
 
 ---
 
+### lsp-setup
+
+Language-server installation and workspace wiring.
+
+Configures language servers when a project does not already expose reliable diagnostics, definitions, references, and safe renames. It detects the language stack, installs or points to the right server, and validates that LSP calls work before higher-level coding or refactor skills depend on them.
+
+**When it activates:** When diagnostics are missing, definitions cannot be resolved, or a project needs LSP support before a refactor or programming task.
+
+---
+
+### rules
+
+Project instruction injection from repository and user rule files.
+
+Automatically loads project instructions from sources such as `AGENTS.md`, `CONTEXT.md`, `.omo/rules/`, `.claude/rules/`, `.github/instructions/`, and `.github/copilot-instructions.md`. There is no command to run — the harness treats these rules as active context when the plugin is enabled.
+
+**When it activates:** At session start and prompt submission, so agents inherit project constraints before planning or editing.
+
+---
+
+### comment-checker
+
+Immediate feedback after edit-like operations.
+
+After code changes, `comment-checker` inspects comments near the edited lines. If it flags comment drift — a comment that no longer matches the code below it — the agent must fix or justify the comment before proceeding. This catches stale comments at the moment they are introduced rather than during a later review.
+
+**When it activates:** After write, edit, patch, or other edit-like tool calls when the plugin has the guardrail enabled.
+
+---
+
 ### Where skills live
 
-LazyCodex installs skills as part of the OmO plugin. OmO can also load skills from project and user locations such as `.codex/skills`, `~/.codex/skills`, `.agents/skills`, and `~/.agents/skills`.
+LazyCodex installs skills as part of the OmO plugin. OmO can also load skills from project and user locations such as `.codex/skills`, `~/.codex/skills`, `.opencode/skills`, `~/.config/opencode/skills`, `.claude/skills`, `.agents/skills`, and `~/.agents/skills`.
+
+LazyCodex installs the Codex Light setup with:
+
+```bash
+npx lazycodex-ai install
+```
+
+That installer wires the Codex marketplace plugin as `omo@sisyphuslabs` while keeping the public package alias easy to remember.
 
 Each skill carries deep internal references — detailed playbooks, language-specific recipes, and per-phase instructions — but none of that is something you need to read. The harness reads it for you when the skill activates.
 
diff --git a/packages/web/lib/docs-content.generated.ts b/packages/web/lib/docs-content.generated.ts
index 92798bc..e342c12 100644
--- a/packages/web/lib/docs-content.generated.ts
+++ b/packages/web/lib/docs-content.generated.ts
@@ -9,7 +9,7 @@ export const DOC_SOURCES: Record<string, string> = {
   "ulw-plan.md": "<p><code>$ulw-plan</code> is the strategic planning consultant (Prometheus). It turns an idea into a decision-complete work plan. It is a planner, NOT an implementer. When you say &quot;do X&quot; it produces a plan for X and never writes product code.</p>\n<h3 id=\"the-flow\">The flow</h3>\n<ol>\n<li><strong>Socratic interview</strong> — ask only the forks that exploration cannot resolve. When intent is fuzzy, research to best practice instead of interrogating.</li>\n<li><strong>Parallel codebase exploration</strong> — fan out read-only subagents to ground every decision in the actual code, never in memory.</li>\n<li><strong>Metis gap analysis</strong> — name every unknown the plan depends on and either close it or surface it as an explicit fork.</li>\n<li><strong>Write the plan</strong> to <code>plans/&lt;slug&gt;.md</code> — one decision-complete plan a worker executes with zero further interview.</li>\n<li><strong>Optional Momus high-accuracy review</strong> — an adversarial pass that tries to break the plan before it ships.</li>\n</ol>\n<h3 id=\"output\">Output</h3>\n<p>Questions, research, and a work plan whose every todo carries references, acceptance criteria, a QA plan, and a commit boundary. The plan records <code>status: awaiting-approval</code> and waits — it never begins execution itself.</p>\n<h3 id=\"handoff\">Handoff</h3>\n<p>Once you approve, hand the plan to <a href=\"#start-work\"><code>$start-work</code></a>, which executes it against durable Boulder state with the five evidence gates.</p>\n",
   "start-work.md": "<p><code>$start-work</code> executes a Prometheus work plan until every top-level checkbox is done.</p>\n<h3 id=\"how-it-works\">How it works</h3>\n<ul>\n<li>Durable Boulder state in <code>.omo/boulder.json</code> survives across turns and sessions</li>\n<li>A Stop-hook re-injects the next turn until the plan is complete</li>\n<li>Independent sub-tasks fan out to parallel subagents</li>\n<li>Strict TDD plus five evidence gates: plan reread, automated verification, manual-QA, adversarial QA, cleanup</li>\n<li>Progress is recorded to a ledger</li>\n</ul>\n<h3 id=\"syntax\">Syntax</h3>\n<pre><code class=\"language-bash\">$start-work [plan-name] [--worktree &lt;absolute-path&gt;]\n</code></pre>\n<h3 id=\"done\">Done</h3>\n<p>It prints an <code>ORCHESTRATION COMPLETE</code> block when every checkbox is checked.</p>\n",
   "ulw-loop.md": "<p><code>$ulw-loop</code> is a self-referential development loop that decomposes work into systematic, evidence-bound steps and runs until verified completion.</p>\n<h3 id=\"how-it-works\">How it works</h3>\n<p>The agent works continuously and emits <code>&lt;promise&gt;DONE&lt;/promise&gt;</code> when it believes the task is complete, but that does NOT end the loop. An Oracle must verify the result first. The loop ends only after the system confirms the Oracle verified it. If verification fails, it continues with the message: &quot;Oracle verification failed. Continuing ULTRAWORK loop.&quot;</p>\n<p>Each step carries its own evidence: a real artifact, not a dry-run claim. Progress is checkpointed, so a long run survives restarts without losing what was already proven.</p>\n<h3 id=\"bootstrap\">Bootstrap</h3>\n<p>Before the first run, the loop reads its full workflow reference (Bootstrap tier triage, the Execution Loop, and the Manual-QA channels table) so every later phase executes the same way. It only reads the sections the current phase needs.</p>\n<h3 id=\"manual-qa-channels\">Manual-QA channels</h3>\n<p>A step does not close on a status string. It closes on a captured artifact from a real surface — an HTTP call, a tmux session, or a browser — plus an adversarial pass and a cleanup receipt. See <a href=\"#manual-qa\">manual QA</a>.</p>\n<h3 id=\"syntax\">Syntax</h3>\n<pre><code class=\"language-bash\">$ulw-loop &quot;task description&quot; [--completion-promise=TEXT] [--strategy=reset|continue]\n</code></pre>\n<h3 id=\"limits\">Limits</h3>\n<p>The iteration cap is 500 in ultrawork mode (100 in normal mode).</p>\n<h3 id=\"reading-more\">Reading more</h3>\n<ul>\n<li><a href=\"#ultrawork\">ultrawork mode</a> — the mode that turns the loop into a binding verified run.</li>\n<li><a href=\"#hooks-lifecycle\">Hooks &amp; Lifecycle</a> — how the Stop-hook re-injects the next turn.</li>\n</ul>\n",
-  "skills.md": "<p>Skills are specialist playbooks that LazyCodex loads on top of the command pillars. They auto-activate when a task matches their domain — you do not need to study or memorize them. Include <code>ultrawork</code> (or the short alias <code>ulw</code>) in your prompt and the harness picks the right skills internally.</p>\n<p>When you want to call a skill explicitly, put its name in the prompt: <code>$review-work</code>, <code>$remove-ai-slops</code>, <code>$ulw-research</code>, and so on.</p>\n<h3 id=\"commands\">Commands</h3>\n<p>The command pillars stay simple:</p>\n<ul>\n<li><code>$init-deep</code> — project memory</li>\n<li><code>$ulw-plan</code> — decision-complete planning before coding</li>\n<li><code>$start-work</code> — execute a plan with durable Boulder progress</li>\n<li><code>$ulw-loop</code> — evidence-bound loop until verified completion</li>\n</ul>\n<p>Skills add specialist judgment around those pillars. The sections below describe each skill and how it is typically used.</p>\n<hr>\n<h3 id=\"review-work\">review-work</h3>\n<p>Five-lane parallel post-implementation review.</p>\n<p>After significant work, <code>review-work</code> launches five sub-agents in parallel — each covering a different angle: goal/constraint verification, hands-on QA execution, code quality, security, and context mining from git history and issues. All five must pass for the review to pass. One failure means the review fails.</p>\n<p><strong>When it activates:</strong> After completing any meaningful implementation — especially when the change touches 3+ files or runs for 20+ minutes.</p>\n<p><strong>Example:</strong> After finishing a PR, the user says:</p>\n<pre><code class=\"language-text\">review my work\n</code></pre>\n<p>The harness spawns five parallel reviewers in separate threads, each with a focused lens. The final verdict is PASS only when every lane agrees.</p>\n<hr>\n<h3 id=\"remove-ai-slops\">remove-ai-slops</h3>\n<p>Behavior-preserving cleanup of AI-generated code smells.</p>\n<p>The safety invariant: regression tests lock behavior <em>before</em> a single line is deleted. Covers obvious comments, excessive defensive code, unnecessary abstractions, dead code, duplicates, and oversized modules (250+ pure LOC triggers a full modular refactoring). Workers run in parallel batches of five, and any test failure triggers an immediate revert.</p>\n<p><strong>When it activates:</strong> When asked to clean, deslop, or remove AI-generated patterns.</p>\n<p><strong>Example:</strong> Combining with <code>refactor</code> and <code>programming</code> for a full cleanup pass:</p>\n<pre><code class=\"language-text\">ulw plan and manual qa, no behaviour changes, no regressions\n/refactor /remove-ai-slops through /programming\n</code></pre>\n<p>The harness plans the cleanup first, locks behavior with tests, then dispatches parallel workers by slop category — safe to dangerous order.</p>\n<hr>\n<h3 id=\"frontend\">frontend</h3>\n<p>UI, UX, design, performance, accessibility, and visual QA — all in one router.</p>\n<p>Not a single rule file but a router. It reads design, perfection, and ui-ux-db references based on the task, then builds and verifies against the actual browser. Covers UI implementation, styling, layout, animation, Lighthouse 100, Core Web Vitals, accessibility, SEO, and React dev tools like <code>react-scan</code> and <code>react-doctor</code>.</p>\n<p><strong>When it activates:</strong> Any task involving UI, styling, layout, animation, design, or performance auditing.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">redesign the sidebar with better spacing and hit Lighthouse 100\n</code></pre>\n<p>The skill routes to the right design references, builds to match the existing design system, then runs a real Playwright Chromium Lighthouse audit — never the Lighthouse CLI, never by weakening UX.</p>\n<hr>\n<h3 id=\"programming\">programming</h3>\n<p>One philosophy across four languages: strict types, modern stacks, TDD.</p>\n<p>Applies to every <code>.py</code>, <code>.pyi</code>, <code>.rs</code>, <code>.ts</code>, <code>.tsx</code>, <code>.mts</code>, <code>.cts</code>, <code>.go</code> file. The skill gates on language, loads the matching reference set, and enforces: parse-don&#39;t-validate at boundaries, exhaustive variant matching, typed errors, no escape hatches (<code>any</code>, <code>unwrap</code>, <code>@ts-ignore</code>), 250 pure LOC ceiling per file, and mandatory TDD (RED → GREEN → REFACTOR).</p>\n<p><strong>When it activates:</strong> Automatically on any code file edit in the supported languages.</p>\n<p><strong>Example:</strong> The skill is always on. When editing TypeScript, it loads the TypeScript reference (Bun + Biome + strict tsconfig), enforces branded types and discriminated unions, and runs the post-write review loop: measure pure LOC, self-review seven questions, refactor if over 250 LOC.</p>\n<hr>\n<h3 id=\"debugging\">debugging</h3>\n<p>Hypothesis-driven runtime debugging across any language or binary.</p>\n<p>Every claim about why a bug happens must come from observed runtime state, not code reading. The skill runs a phased loop: setup and journal, form 3+ orthogonal hypotheses, investigate in parallel, escalate to independent verifiers after 2 failed rounds, confirm root cause by toggling, lock with a failing test, fix minimally, QA on the real surface, then clean up every debug artifact.</p>\n<p><strong>When it activates:</strong> Crashes, silent failures, wrong responses, stuck processes, memory leaks, async misbehavior, or reverse engineering.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">debug this — the API returns 200 but the body is empty\n</code></pre>\n<p>The skill fires parallel investigation lanes, attaches real debuggers (pdb, node inspect, lldb, dlv), and does not close the bug until the root cause is confirmed by toggling and a failing test goes GREEN.</p>\n<hr>\n<h3 id=\"refactor\">refactor</h3>\n<p>Codemap-aware, LSP- and AST-grep-powered restructuring.</p>\n<p>Maps the codebase before touching anything, evaluates test coverage to set the verification strategy, plans atomic steps with rollback points, then executes with LSP renames and AST-grep structural rewrites. Any test failure during execution triggers an immediate stop and revert.</p>\n<p><strong>When it activates:</strong> Requests to refactor, restructure, extract, simplify, or modernize code.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">refactor the validation logic into its own module --scope=module\n</code></pre>\n<p>The skill builds a dependency graph of the target, runs characterization tests to pin current behavior, then executes the restructuring step by step — verifying after each step.</p>\n<hr>\n<h3 id=\"visual-qa\">visual-qa</h3>\n<p>Screenshot and TUI diff plus dual-oracle visual QA.</p>\n<p>Captures reference and actual evidence — screenshots for web UIs, <code>tmux capture-pane</code> for terminal UIs — then runs a bundled pixel-diff or column-width script. Two parallel read-only oracle passes evaluate: one for design-system and functional integrity, one for visual fidelity and CJK text precision. The final verdict is a single good/bad score.</p>\n<p><strong>When it activates:</strong> After building or changing any UI, or when asked to verify visual correctness.</p>\n<hr>\n<h3 id=\"git-master\">git-master</h3>\n<p>Atomic commits, rebase/squash, push safety, history investigation.</p>\n<p>Handles commit message style detection, semantic grouping, fixup autosquash, blame, bisect, <code>log -S</code>, and questions like &quot;who wrote this&quot; or &quot;when was this added.&quot;</p>\n<p><strong>When it activates:</strong> Any git operation — committing, rebasing, squashing, history search.</p>\n<hr>\n<h3 id=\"ulw-research\">ulw-research</h3>\n<p>Maximum-saturation research mode (formerly <code>ultraresearch</code>).</p>\n<p>Orchestrates parallel explore and librarian swarms across the codebase, the web, official documentation, and OSS repositories. Runs a recursive EXPAND loop driven by leads that workers return, verifies findings empirically by running code, and produces cited synthesis with optional reports.</p>\n<p><strong>When it activates:</strong> Only on explicit demand — the word <code>ulw-research</code>, the legacy alias <code>ultraresearch</code>, or any request for deep research or an ultra-precise investigation.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">ulw-research the typeclaw architecture — map every module and find the official docs\n</code></pre>\n<p>The skill fans out 10+ parallel search lanes across GitHub, official docs, and web sources, recursively expands promising leads, then synthesizes a cited report.</p>\n<hr>\n<h3 id=\"lsp\">LSP</h3>\n<p>Language-server diagnostics, definitions, references, symbols, and safe renames.</p>\n<p>Gives the agent language-server precision via MCP tool calls. Runs diagnostics after every edit, finds definitions and references across the workspace, and performs safe renames through the language server&#39;s own workspace edit — not text find-and-replace.</p>\n<p><strong>When it activates:</strong> Automatically after edit-like tool calls (diagnostics), and on demand for navigation and renames.</p>\n<hr>\n<h3 id=\"ast-grep\">AST-grep</h3>\n<p>Structural search and rewrite across 25 languages.</p>\n<p>Finds code by syntactic shape rather than text — every function call matching a pattern, every import shaped like X. Rewrites are deterministic and always previewed with <code>dryRun=true</code> before applying. Pairs with the <code>refactor</code> skill for safe, large-scale codemods.</p>\n<p><strong>When it activates:</strong> Structural code matching, pattern-based search, or deterministic rewrites (strip <code>as any</code>, migrate <code>require()</code> to <code>import</code>, find empty catch blocks).</p>\n<hr>\n<h3 id=\"where-skills-live\">Where skills live</h3>\n<p>LazyCodex installs skills as part of the OmO plugin. OmO can also load skills from project and user locations such as <code>.codex/skills</code>, <code>~/.codex/skills</code>, <code>.agents/skills</code>, and <code>~/.agents/skills</code>.</p>\n<p>Each skill carries deep internal references — detailed playbooks, language-specific recipes, and per-phase instructions — but none of that is something you need to read. The harness reads it for you when the skill activates.</p>\n<p>The command pillars and the disciplines behind them are covered in depth: <a href=\"#ulw-plan\">ulw-plan</a>, <a href=\"#ulw-loop\">ulw-loop</a>, <a href=\"#start-work\">start-work</a>, <a href=\"#tdd\">TDD</a>, <a href=\"#manual-qa\">manual QA</a>, and <a href=\"#git-workflow\">git workflow</a>.</p>\n",
+  "skills.md": "<p>Skills are specialist playbooks that LazyCodex loads on top of the command pillars. They auto-activate when a task matches their domain — you do not need to study or memorize them. Include <code>ultrawork</code> (or the short alias <code>ulw</code>) in your prompt and the harness picks the right skills internally.</p>\n<p>When you want to call a skill explicitly, put its name in the prompt: <code>$review-work</code>, <code>$remove-ai-slops</code>, <code>$ulw-research</code>, and so on.</p>\n<h3 id=\"commands\">Commands</h3>\n<p>The command pillars stay simple:</p>\n<ul>\n<li><code>$init-deep</code> — project memory</li>\n<li><code>$ulw-plan</code> — decision-complete planning before coding</li>\n<li><code>$start-work</code> — execute a plan with durable Boulder progress</li>\n<li><code>$ulw-loop</code> — evidence-bound loop until verified completion</li>\n</ul>\n<p>Skills add specialist judgment around those pillars. The sections below describe each skill and how it is typically used.</p>\n<h3 id=\"skill-index\">Skill index</h3>\n<p>Most skills auto-activate when a request matches their domain, so you do not need to study or manually select every skill before using LazyCodex. When you want to be explicit, put the skill name in the prompt — for example <code>$visual-qa</code>, <code>$git-master</code>, or <code>$ulw-research</code>.</p>\n<table>\n<thead>\n<tr>\n<th>Skill</th>\n<th>Use it for</th>\n</tr>\n</thead>\n<tbody><tr>\n<td><code>init-deep</code></td>\n<td>Hierarchical <code>AGENTS.md</code> context for large or old repos</td>\n</tr>\n<tr>\n<td><code>ulw-plan</code></td>\n<td>Explore-first planning before coding</td>\n</tr>\n<tr>\n<td><code>ulw-loop</code></td>\n<td>Evidence-bound loop until verified completion</td>\n</tr>\n<tr>\n<td><code>start-work</code></td>\n<td>Execute a plan with durable Boulder progress</td>\n</tr>\n<tr>\n<td><code>review-work</code></td>\n<td>Five-lane parallel post-implementation review</td>\n</tr>\n<tr>\n<td><code>remove-ai-slops</code></td>\n<td>Behavior-preserving cleanup of AI-looking code</td>\n</tr>\n<tr>\n<td><code>frontend</code></td>\n<td>Designed UI work instead of generic layout filling</td>\n</tr>\n<tr>\n<td><code>programming</code></td>\n<td>Strict TypeScript, Rust, Python, or Go discipline, TDD-first</td>\n</tr>\n<tr>\n<td><code>git-master</code></td>\n<td>Atomic commits, rebase/squash, push safety, history investigation</td>\n</tr>\n<tr>\n<td><code>visual-qa</code></td>\n<td>Screenshot/TUI diff plus dual-oracle visual QA</td>\n</tr>\n<tr>\n<td><code>debugging</code></td>\n<td>Evidence-led root-cause investigation</td>\n</tr>\n<tr>\n<td><code>refactor</code></td>\n<td>Behavior-preserving restructure of existing code</td>\n</tr>\n<tr>\n<td><code>ulw-research</code></td>\n<td>Maximum-saturation research with codebase, web, official-docs, and OSS-repo swarms</td>\n</tr>\n<tr>\n<td><code>LSP</code></td>\n<td>Diagnostics, definitions, references, symbols, and renames</td>\n</tr>\n<tr>\n<td><code>lsp-setup</code></td>\n<td>Configure language servers for a project</td>\n</tr>\n<tr>\n<td><code>AST-grep</code></td>\n<td>Structural search and rewrite across code</td>\n</tr>\n<tr>\n<td><code>rules</code></td>\n<td>Project instructions from AGENTS, rules, and instruction files</td>\n</tr>\n<tr>\n<td><code>comment-checker</code></td>\n<td>Feedback after edit-like operations</td>\n</tr>\n</tbody></table>\n<h3 id=\"skill-highlights\">Skill highlights</h3>\n<hr>\n<h3 id=\"review-work\">review-work</h3>\n<p>Five-lane parallel post-implementation review.</p>\n<p>After significant work, <code>review-work</code> launches five sub-agents in parallel — each covering a different angle: goal/constraint verification, hands-on QA execution, code quality, security, and context mining from git history and issues. All five must pass for the review to pass. One failure means the review fails.</p>\n<p><strong>When it activates:</strong> After completing any meaningful implementation — especially when the change touches 3+ files or runs for 20+ minutes.</p>\n<p><strong>Example:</strong> After finishing a PR, the user says:</p>\n<pre><code class=\"language-text\">review my work\n</code></pre>\n<p>The harness spawns five parallel reviewers in separate threads, each with a focused lens. The final verdict is PASS only when every lane agrees.</p>\n<hr>\n<h3 id=\"remove-ai-slops\">remove-ai-slops</h3>\n<p>Behavior-preserving cleanup of AI-generated code smells.</p>\n<p>The safety invariant: regression tests lock behavior <em>before</em> a single line is deleted. Covers obvious comments, excessive defensive code, unnecessary abstractions, dead code, duplicates, and oversized modules (250+ pure LOC triggers a full modular refactoring). Workers run in parallel batches of five, and any test failure triggers an immediate revert.</p>\n<p><strong>When it activates:</strong> When asked to clean, deslop, or remove AI-generated patterns.</p>\n<p><strong>Example:</strong> Combining with <code>refactor</code> and <code>programming</code> for a full cleanup pass:</p>\n<pre><code class=\"language-text\">ulw plan and manual qa, no behaviour changes, no regressions\n/refactor /remove-ai-slops through /programming\n</code></pre>\n<p>The harness plans the cleanup first, locks behavior with tests, then dispatches parallel workers by slop category — safe to dangerous order.</p>\n<hr>\n<h3 id=\"frontend\">frontend</h3>\n<p>UI, UX, design, performance, accessibility, and visual QA — all in one router.</p>\n<p>Not a single rule file but a router. It reads design, perfection, and ui-ux-db references based on the task, then builds and verifies against the actual browser. Covers UI implementation, styling, layout, animation, Lighthouse 100, Core Web Vitals, accessibility, SEO, and React dev tools like <code>react-scan</code> and <code>react-doctor</code>.</p>\n<p><strong>When it activates:</strong> Any task involving UI, styling, layout, animation, design, or performance auditing.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">redesign the sidebar with better spacing and hit Lighthouse 100\n</code></pre>\n<p>The skill routes to the right design references, builds to match the existing design system, then runs a real Playwright Chromium Lighthouse audit — never the Lighthouse CLI, never by weakening UX.</p>\n<hr>\n<h3 id=\"programming\">programming</h3>\n<p>One philosophy across four languages: strict types, modern stacks, TDD.</p>\n<p>Applies to every <code>.py</code>, <code>.pyi</code>, <code>.rs</code>, <code>.ts</code>, <code>.tsx</code>, <code>.mts</code>, <code>.cts</code>, <code>.go</code> file. The skill gates on language, loads the matching reference set, and enforces: parse-don&#39;t-validate at boundaries, exhaustive variant matching, typed errors, no escape hatches (<code>any</code>, <code>unwrap</code>, <code>@ts-ignore</code>), 250 pure LOC ceiling per file, and mandatory TDD (RED → GREEN → REFACTOR).</p>\n<p><strong>When it activates:</strong> Automatically on any code file edit in the supported languages.</p>\n<p><strong>Example:</strong> The skill is always on. When editing TypeScript, it loads the TypeScript reference (Bun + Biome + strict tsconfig), enforces branded types and discriminated unions, and runs the post-write review loop: measure pure LOC, self-review seven questions, refactor if over 250 LOC.</p>\n<hr>\n<h3 id=\"debugging\">debugging</h3>\n<p>Hypothesis-driven runtime debugging across any language or binary.</p>\n<p>Every claim about why a bug happens must come from observed runtime state, not code reading. The skill runs a phased loop: setup and journal, form 3+ orthogonal hypotheses, investigate in parallel, escalate to independent verifiers after 2 failed rounds, confirm root cause by toggling, lock with a failing test, fix minimally, QA on the real surface, then clean up every debug artifact.</p>\n<p><strong>When it activates:</strong> Crashes, silent failures, wrong responses, stuck processes, memory leaks, async misbehavior, or reverse engineering.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">debug this — the API returns 200 but the body is empty\n</code></pre>\n<p>The skill fires parallel investigation lanes, attaches real debuggers (pdb, node inspect, lldb, dlv), and does not close the bug until the root cause is confirmed by toggling and a failing test goes GREEN.</p>\n<hr>\n<h3 id=\"refactor\">refactor</h3>\n<p>Codemap-aware, LSP- and AST-grep-powered restructuring.</p>\n<p>Maps the codebase before touching anything, evaluates test coverage to set the verification strategy, plans atomic steps with rollback points, then executes with LSP renames and AST-grep structural rewrites. Any test failure during execution triggers an immediate stop and revert.</p>\n<p><strong>When it activates:</strong> Requests to refactor, restructure, extract, simplify, or modernize code.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">refactor the validation logic into its own module --scope=module\n</code></pre>\n<p>The skill builds a dependency graph of the target, runs characterization tests to pin current behavior, then executes the restructuring step by step — verifying after each step.</p>\n<hr>\n<h3 id=\"visual-qa\">visual-qa</h3>\n<p>Screenshot and TUI diff plus dual-oracle visual QA.</p>\n<p>Captures reference and actual evidence — screenshots for web UIs, <code>tmux capture-pane</code> for terminal UIs — then runs a bundled pixel-diff or column-width script. Two parallel read-only oracle passes evaluate: one for design-system and functional integrity, one for visual fidelity and CJK text precision. The final verdict is a single good/bad score.</p>\n<p><strong>When it activates:</strong> After building or changing any UI, or when asked to verify visual correctness.</p>\n<hr>\n<h3 id=\"git-master\">git-master</h3>\n<p>Atomic commits, rebase/squash, push safety, history investigation.</p>\n<p>Handles commit message style detection, semantic grouping, fixup autosquash, blame, bisect, <code>log -S</code>, and questions like &quot;who wrote this&quot; or &quot;when was this added.&quot;</p>\n<p><strong>When it activates:</strong> Any git operation — committing, rebasing, squashing, history search.</p>\n<hr>\n<h3 id=\"ulw-research\">ulw-research</h3>\n<p>Maximum-saturation research mode (formerly <code>ultraresearch</code>).</p>\n<p>Orchestrates parallel explore and librarian swarms across the codebase, the web, official documentation, and OSS repositories. Runs a recursive EXPAND loop driven by leads that workers return, verifies findings empirically by running code, and produces cited synthesis with optional reports.</p>\n<p><strong>When it activates:</strong> Only on explicit demand — the word <code>ulw-research</code>, the legacy alias <code>ultraresearch</code>, or any request for deep research or an ultra-precise investigation.</p>\n<p><strong>Example:</strong></p>\n<pre><code class=\"language-text\">ulw-research the typeclaw architecture — map every module and find the official docs\n</code></pre>\n<p>The skill fans out 10+ parallel search lanes across GitHub, official docs, and web sources, recursively expands promising leads, then synthesizes a cited report.</p>\n<hr>\n<h3 id=\"lsp\">LSP</h3>\n<p>Language-server diagnostics, definitions, references, symbols, and safe renames.</p>\n<p>Gives the agent language-server precision via MCP tool calls. Runs diagnostics after every edit, finds definitions and references across the workspace, and performs safe renames through the language server&#39;s own workspace edit — not text find-and-replace.</p>\n<p><strong>When it activates:</strong> Automatically after edit-like tool calls (diagnostics), and on demand for navigation and renames.</p>\n<hr>\n<h3 id=\"ast-grep\">AST-grep</h3>\n<p>Structural search and rewrite across 25 languages.</p>\n<p>Finds code by syntactic shape rather than text — every function call matching a pattern, every import shaped like X. Rewrites are deterministic and always previewed with <code>dryRun=true</code> before applying. Pairs with the <code>refactor</code> skill for safe, large-scale codemods.</p>\n<p><strong>When it activates:</strong> Structural code matching, pattern-based search, or deterministic rewrites (strip <code>as any</code>, migrate <code>require()</code> to <code>import</code>, find empty catch blocks).</p>\n<hr>\n<h3 id=\"lsp-setup\">lsp-setup</h3>\n<p>Language-server installation and workspace wiring.</p>\n<p>Configures language servers when a project does not already expose reliable diagnostics, definitions, references, and safe renames. It detects the language stack, installs or points to the right server, and validates that LSP calls work before higher-level coding or refactor skills depend on them.</p>\n<p><strong>When it activates:</strong> When diagnostics are missing, definitions cannot be resolved, or a project needs LSP support before a refactor or programming task.</p>\n<hr>\n<h3 id=\"rules\">rules</h3>\n<p>Project instruction injection from repository and user rule files.</p>\n<p>Automatically loads project instructions from sources such as <code>AGENTS.md</code>, <code>CONTEXT.md</code>, <code>.omo/rules/</code>, <code>.claude/rules/</code>, <code>.github/instructions/</code>, and <code>.github/copilot-instructions.md</code>. There is no command to run — the harness treats these rules as active context when the plugin is enabled.</p>\n<p><strong>When it activates:</strong> At session start and prompt submission, so agents inherit project constraints before planning or editing.</p>\n<hr>\n<h3 id=\"comment-checker\">comment-checker</h3>\n<p>Immediate feedback after edit-like operations.</p>\n<p>After code changes, <code>comment-checker</code> inspects comments near the edited lines. If it flags comment drift — a comment that no longer matches the code below it — the agent must fix or justify the comment before proceeding. This catches stale comments at the moment they are introduced rather than during a later review.</p>\n<p><strong>When it activates:</strong> After write, edit, patch, or other edit-like tool calls when the plugin has the guardrail enabled.</p>\n<hr>\n<h3 id=\"where-skills-live\">Where skills live</h3>\n<p>LazyCodex installs skills as part of the OmO plugin. OmO can also load skills from project and user locations such as <code>.codex/skills</code>, <code>~/.codex/skills</code>, <code>.opencode/skills</code>, <code>~/.config/opencode/skills</code>, <code>.claude/skills</code>, <code>.agents/skills</code>, and <code>~/.agents/skills</code>.</p>\n<p>LazyCodex installs the Codex Light setup with:</p>\n<pre><code class=\"language-bash\">npx lazycodex-ai install\n</code></pre>\n<p>That installer wires the Codex marketplace plugin as <code>omo@sisyphuslabs</code> while keeping the public package alias easy to remember.</p>\n<p>Each skill carries deep internal references — detailed playbooks, language-specific recipes, and per-phase instructions — but none of that is something you need to read. The harness reads it for you when the skill activates.</p>\n<p>The command pillars and the disciplines behind them are covered in depth: <a href=\"#ulw-plan\">ulw-plan</a>, <a href=\"#ulw-loop\">ulw-loop</a>, <a href=\"#start-work\">start-work</a>, <a href=\"#tdd\">TDD</a>, <a href=\"#manual-qa\">manual QA</a>, and <a href=\"#git-workflow\">git workflow</a>.</p>\n",
   "ultrawork.md": "<p>ultrawork is the headline mode. Include <code>ultrawork</code> (or the short alias <code>ulw</code>) anywhere in your prompt — like adding <code>ultrathink</code> — and the harness switches to maximum-precision, outcome-first, evidence-driven orchestration. Skills activate internally; you do not need to name them.</p>\n<blockquote>\n<p>&quot;Plan, execute, verify, and keep the evidence attached.&quot;</p>\n</blockquote>\n<p>The principle is simple. An agent saying it is done does not mean the work is done. The work is done when <strong>observable evidence verifies</strong> it.</p>\n<h3 id=\"usage\">Usage</h3>\n<p>Just include the word in your prompt. Nothing else to configure.</p>\n<pre><code class=\"language-text\">ulw add authentication\n</code></pre>\n<pre><code class=\"language-text\">fix the flaky checkout test ultrawork\n</code></pre>\n<p>The harness reads the task, picks the right skills (programming, debugging, refactor, etc.), and runs the evidence-bound loop automatically. You do not choose skills yourself unless you want to be explicit — for example <code>$review-work</code> or <code>$ulw-research</code>.</p>\n<h3 id=\"what-it-enforces\">What it enforces</h3>\n<ul>\n<li>Strict TDD: RED → GREEN → SURFACE → CLEAN</li>\n<li>At least 3 realistic QA scenarios scaled to the risk of the task</li>\n<li>Real manual-QA channels (HTTP call, tmux, browser, computer use, CLI stdout, data diff)</li>\n<li>A binding verification gate that loops until the work is genuinely done</li>\n</ul>\n<h3 id=\"relationship-to-ulw-loop\">Relationship to <code>$ulw-loop</code></h3>\n<p><code>$ulw-loop</code> is the command form of ultrawork discipline. The latest flow stores request, goals, success criteria, and an evidence ledger under <code>.omo/ulw-loop</code>:</p>\n<table>\n<thead>\n<tr>\n<th>File</th>\n<th>Role</th>\n</tr>\n</thead>\n<tbody><tr>\n<td><code>.omo/ulw-loop/brief.md</code></td>\n<td>Original request and persistent constraints</td>\n</tr>\n<tr>\n<td><code>.omo/ulw-loop/goals.json</code></td>\n<td>Goals and success criteria</td>\n</tr>\n<tr>\n<td><code>.omo/ulw-loop/ledger.jsonl</code></td>\n<td>pass, fail, block, steering, checkpoint records</td>\n</tr>\n</tbody></table>\n<p>Saying &quot;done&quot; is not enough. Each success criterion requires evidence captured from a real surface, and that evidence must pass before the loop stops.</p>\n<p>The exact syntax and flags live in the <a href=\"#ulw-loop\"><code>$ulw-loop</code> command docs</a>.</p>\n<h3 id=\"failure-limits\">Failure limits</h3>\n<p>The loop does not run forever. The latest <code>$ulw-loop</code> workflow uses these caps:</p>\n<table>\n<thead>\n<tr>\n<th>Condition</th>\n<th>Limit</th>\n</tr>\n</thead>\n<tbody><tr>\n<td>Iterations on one goal without a full pass</td>\n<td>5 cycles</td>\n</tr>\n<tr>\n<td>Same failure on the same criterion</td>\n<td>3 times</td>\n</tr>\n</tbody></table>\n<h3 id=\"evidence-over-hope\">Evidence over hope</h3>\n<p>The loop does not stop at &quot;I wrote some code.&quot; It stops when the result is confirmed by evidence — what check ran and what it showed — not by the agent&#39;s expected status report.</p>\n<h3 id=\"position-among-commands\">Position among commands</h3>\n<p><code>$ulw-loop</code> is one of several commands, each for a different shape of work.</p>\n<p>The typical flow: <code>$ulw-plan</code> produces a decision-complete plan, <code>$start-work</code> executes it checkpoint by checkpoint, and <code>$ulw-loop</code> keeps open-ended work running until a verifier approves. Detailed syntax for each command is in the <a href=\"#ulw-plan\">Commands</a> section.</p>\n",
   "discipline-agents.md": "<p>LazyCodex ports a single discipline agent from OmO into Codex: <strong>Hephaestus</strong>, the autonomous deep worker. There is no Sisyphus orchestrator in the Codex package — Hephaestus is the one role, and it carries the whole run itself with read-only subagents for parallel exploration.</p>\n<h3 id=\"what-hephaestus-is\">What Hephaestus is</h3>\n<p>Named after the Greek god of the forge. Goal-oriented: you give it objectives, not step-by-step recipes, and it executes them end-to-end. &quot;The Legitimate Craftsman.&quot; Methodical, thorough, obsessive — built for deep architectural reasoning, complex debugging, and cross-domain synthesis.</p>\n<h3 id=\"installed-roles\">Installed roles</h3>\n<p>As of <code>4.12.1</code>, the following roles are installed. When Codex exposes <code>agent_type</code>, the role is set directly; otherwise the role description is included in the message as a fallback.</p>\n<table>\n<thead>\n<tr>\n<th>Role</th>\n<th>Primary use</th>\n</tr>\n</thead>\n<tbody><tr>\n<td><code>explorer</code></td>\n<td>Internal codebase context: structure, call flows, test locations.</td>\n</tr>\n<tr>\n<td><code>librarian</code></td>\n<td>External docs, library contracts, latest API research.</td>\n</tr>\n<tr>\n<td><code>plan</code></td>\n<td>Plan drafting and task decomposition.</td>\n</tr>\n<tr>\n<td><code>momus</code> / <code>metis</code></td>\n<td>Missing decisions, edge cases, risk review.</td>\n</tr>\n<tr>\n<td><code>lazycodex-executor</code></td>\n<td>Executing specific task units from a plan.</td>\n</tr>\n<tr>\n<td><code>lazycodex-code-reviewer</code></td>\n<td>Post-implementation code quality review.</td>\n</tr>\n<tr>\n<td><code>lazycodex-qa-executor</code></td>\n<td>Real-execution-based QA.</td>\n</tr>\n<tr>\n<td><code>lazycodex-gate-reviewer</code></td>\n<td>Pre-completion verification gates.</td>\n</tr>\n<tr>\n<td><code>lazycodex-clone-fidelity-reviewer</code></td>\n<td>Clone and sync operation fidelity checks.</td>\n</tr>\n</tbody></table>\n<h3 id=\"parent-session-ownership\">Parent session ownership</h3>\n<p>Even with multiple roles, completion judgment is never handed wholesale to a sub-agent. The parent Codex session keeps ownership of goals, constraints, and final judgment. Sub-agents are used to read terrain, find gaps, or assist review.</p>\n<h3 id=\"the-operating-loop\">The operating loop</h3>\n<p>Hephaestus runs a short, tight loop on every unit of work:</p>\n<ol>\n<li><strong>Explore</strong> — map the terrain. Read the code with tools, never speculate. Fire 2-5 parallel explore subagents before writing anything.</li>\n<li><strong>Plan</strong> — chart the course. Record files to modify, specific changes, and dependencies via <code>update_plan</code>.</li>\n<li><strong>Implement</strong> — build with precision. Surgical edits that match codebase style (naming, indentation, imports, error handling) even when a greenfield would read differently.</li>\n<li><strong>Verify</strong> — prove it works. LSP diagnostics on changed files, related tests, and build — in parallel where possible.</li>\n<li><strong>Manually QA</strong> — drive the artifact through its real surface (HTTP call, tmux, browser), then write the final message.</li>\n</ol>\n<h3 id=\"non-goals\">Non-goals</h3>\n<ul>\n<li><strong>Never trusts subagent self-reports.</strong> Verification is independent; a child saying &quot;done&quot; does not close the work.</li>\n<li><strong>Never proposes when you asked for code.</strong> Unless you explicitly want a plan or a brainstorm, it implements.</li>\n<li><strong>Never speculates about code it has not read.</strong> Exploration is cheap; assumption is expensive.</li>\n<li><strong>Never leaves work unresolved at end of turn.</strong> Every plan step is reconciled: <code>completed</code>, blocked (one-line reason), or removed (one-line reason).</li>\n</ul>\n<h3 id=\"delegation-not-orchestration\">Delegation, not orchestration</h3>\n<p>Hephaestus stays the parent. For parallel exploration it spawns read-only Codex subagent roles (<code>multi_agent_v1.spawn_agent</code>) and keeps the parent session live with brief status updates while children run. It does not hand the run off to a separate orchestrator — it owns the goal, delegates the grunt work, and verifies the results itself.</p>\n<h3 id=\"boulder-state\">Boulder state</h3>\n<p><code>$start-work</code> uses <code>.omo/boulder.json</code> to persist progress and the Stop-hook continuation to keep plan execution moving. This is the core visible behavior: checkboxes advance, and when all are done it prints <strong>ORCHESTRATION COMPLETE</strong>.</p>\n<h3 id=\"where-the-boulder-comes-from\">Where the boulder comes from</h3>\n<p>The full OmO has a second primary agent, <strong>Sisyphus</strong>, the orchestrator with <code>.omo/boulder.json</code> session continuity. The Codex package is the Hephaestus-only light port, so on Codex the durable progress state lives in <code>.omo/boulder.json</code> as written by <a href=\"#start-work\"><code>$start-work</code></a> and the Stop-hook continuation — without the Sisyphus orchestration layer.</p>\n<h3 id=\"reading-more\">Reading more</h3>\n<ul>\n<li><a href=\"#ultrawork\">ultrawork mode</a> — the mode that turns the loop into a binding verified run.</li>\n<li><a href=\"#hooks-lifecycle\">Hooks &amp; Lifecycle</a> — how the Stop-hook re-injects the next turn until the plan is complete.</li>\n</ul>\n",
   "model-routing.md": "<p>Multi-model routing sends each part of a run to the model that fits it best, instead of running everything on one model. LazyCodex installs OmO&#39;s routing defaults into Codex so a serious repository is not bottlenecked by a single context window or price point.</p>\n<h3 id=\"current-baseline\">Current baseline</h3>\n<p>The <code>4.12.1</code> bundled <code>model-catalog.json</code> centers the default profile on <code>gpt-5.5</code>:</p>\n<table>\n<thead>\n<tr>\n<th>Profile</th>\n<th>Model</th>\n<th>Reasoning</th>\n</tr>\n</thead>\n<tbody><tr>\n<td>Default</td>\n<td><code>gpt-5.5</code></td>\n<td><code>high</code></td>\n</tr>\n<tr>\n<td>Plan mode</td>\n<td><code>gpt-5.5</code></td>\n<td><code>xhigh</code></td>\n</tr>\n<tr>\n<td>Worker</td>\n<td><code>gpt-5.5</code></td>\n<td><code>high</code></td>\n</tr>\n<tr>\n<td>Verifier</td>\n<td><code>gpt-5.5</code></td>\n<td><code>high</code></td>\n</tr>\n</tbody></table>\n<p>The actual model name you see may differ as Codex and OpenAI update their model lineup. This doc focuses on <em>how</em> LazyCodex uses model profiles, not on comparing specific models.</p>\n<h3 id=\"what-gets-routed\">What gets routed</h3>\n<ul>\n<li><strong>Planning and exploration</strong> go to a strong reasoning model that can hold a large context and weigh trade-offs.</li>\n<li><strong>Implementation turns</strong> go to a fast, capable coding model for the bulk of the edit loop.</li>\n<li><strong>Verification</strong> goes to a model used as an oracle, chosen for judgment rather than throughput.</li>\n<li><strong>Specialist skills</strong> can target their own model when a skill benefits from a specific profile.</li>\n</ul>\n<h3 id=\"why-role-profiles-exist\">Why role profiles exist</h3>\n<p>Role-based profiles separate work by nature:</p>\n<ul>\n<li>General tasks follow the default model setting.</li>\n<li>Plan mode may demand stronger reasoning.</li>\n<li>Worker and verifier are kept separate so the same result is checked from a different angle.</li>\n</ul>\n<p>This pairs with <a href=\"#discipline-agents\">Agent Roles</a>. Even when multiple roles move in parallel, each role&#39;s model profile is tracked in the Codex configuration.</p>\n<h3 id=\"how-it-fits-the-harness\">How it fits the harness</h3>\n<p>Routing is part of the harness setup that <code>npx lazycodex-ai install</code> wires into Codex. It detects the available subscriptions and provider auth, then maps roles to models so you do not hand-configure each one.</p>\n<h3 id=\"provider-auth\">Provider auth</h3>\n<p>Auth targets Codex itself, not LazyCodex. Once Codex is logged in, the installer&#39;s subscription detection and provider routing take over. If you let an LLM agent run the install, it walks the same detection and selection for you.</p>\n<h3 id=\"user-notes\">User notes</h3>\n<ul>\n<li>The model name you see after install may differ from what the docs list. The installed <code>model-catalog.json</code> and the models your Codex build supports take precedence.</li>\n<li>Model settings balance quality and speed. Lowering them arbitrarily can degrade planning, review, and QA quality together.</li>\n<li>When in doubt, check install state and Codex config first.</li>\n</ul>\n<h3 id=\"customizing-it\">Customizing it</h3>\n<p>Routing and provider settings live in the configuration. See <a href=\"#configuration\">Configuration</a> for the fields that control which model handles which role, and how to override the defaults per project.</p>\n",
@@ -263,6 +263,16 @@ export const DOC_TOC: Record<string, DocHeading[]> = {
       "id": "commands",
       "text": "Commands"
     },
+    {
+      "level": 3,
+      "id": "skill-index",
+      "text": "Skill index"
+    },
+    {
+      "level": 3,
+      "id": "skill-highlights",
+      "text": "Skill highlights"
+    },
     {
       "level": 3,
       "id": "review-work",
@@ -318,6 +328,21 @@ export const DOC_TOC: Record<string, DocHeading[]> = {
       "id": "ast-grep",
       "text": "AST-grep"
     },
+    {
+      "level": 3,
+      "id": "lsp-setup",
+      "text": "lsp-setup"
+    },
+    {
+      "level": 3,
+      "id": "rules",
+      "text": "rules"
+    },
+    {
+      "level": 3,
+      "id": "comment-checker",
+      "text": "comment-checker"
+    },
     {
       "level": 3,
       "id": "where-skills-live",