diff --git a/plugins/omo/skills/ultimate-browsing/SKILL.md b/plugins/omo/skills/ultimate-browsing/SKILL.md index 019d9ea..b3f6237 100644 --- a/plugins/omo/skills/ultimate-browsing/SKILL.md +++ b/plugins/omo/skills/ultimate-browsing/SKILL.md @@ -54,6 +54,10 @@ yt-dlp --write-sub --write-auto-sub --sub-lang "en,ko" --skip-download -o "/tmp/ The full engine harness (rules R1-R7, the Phase 0 official-API index, the no-site-name rule, and the `references/insane-search/*.md` deep-dives for TLS, Playwright routing, Naver, media, etc.) is in [`references/insane-search/README.md`](references/insane-search/README.md). Read it before tuning the engine or adding a WAF profile. +### Research-source handoff + +When a higher-level research skill (especially `ultraresearch`) asks for blocked-source evidence, return an `INSANE_SOURCE_RECORD` instead of only prose. Include `url`, `final_url`, `title`, `access_method`, `waf_profile`, `verdict`, `selector_proof`, `fetched_at`, `trace_summary`, `source_quality_hint`, and extracted text or a pointer to it. This keeps WAF-bypassed sources eligible for the research source registry, citation ledger, and later claim verification. + ### Escalate to Tier 1.5 or Tier 2 when - The target is a Chinese / social platform with a native reader -> Tier 1.5. - insane-search returns empty/partial, or the page needs JS interaction, a screenshot, a persistent login, or media playback -> Tier 2. diff --git a/plugins/omo/skills/ultraresearch/SKILL.md b/plugins/omo/skills/ultraresearch/SKILL.md index 82dd391..aa81216 100644 --- a/plugins/omo/skills/ultraresearch/SKILL.md +++ b/plugins/omo/skills/ultraresearch/SKILL.md @@ -128,7 +128,7 @@ Role protocols — embed the relevant one in each spawn message; every worker ge - **Codebase (explore), 2-4 workers.** Grep with 3+ keyword variations; structural/AST search; LSP definitions and references; file-name globs; `git log --all -S ''` and `--grep` for history including deleted code. Cross-validate hits across tools. Report absolute file paths, patterns with `file:line`, and how findings connect. - **Web (librarian), 3-6 workers.** At least 10 distinct websearch queries per worker, each with a different operator or angle (see Search craft); fetch the full page for every result that matters — snippets lie. Context7 with 3+ queries per known library. grep.app and `gh search code|repos|issues` for real-world usage. Official docs via sitemap discovery (`/sitemap.xml`), then targeted pages. -- **Browsing, 0-3 workers.** Pages plain fetch cannot read (WAF, 403, Cloudflare, dynamic rendering, login): the worker loads the `ultimate-browsing` skill and escalates through its tiers — Tier-1 insane-search engine first, then Tier-2 Chrome stealth — rather than abandoning the source. Capture screenshots when visual context matters. When one blocked territory hides many leads, fan out more browsing subagents in parallel for breadth instead of serializing one worker through them. +- **Browsing, 0-3 workers.** Pages plain fetch cannot read (WAF, 403, Cloudflare, dynamic rendering, login): the worker loads the `ultimate-browsing` skill and escalates through its tiers — Tier-1 insane-search engine first, then Tier-2 Chrome stealth — rather than abandoning the source. Capture screenshots when visual context matters. When one blocked territory hides many leads, fan out more browsing subagents in parallel for breadth instead of serializing one worker through them. The browsing worker must return an `INSANE_SOURCE_RECORD` for every blocked source it resolves through insane-search or its fallback chain: `url`, `final_url`, `title`, `access_method`, `waf_profile`, `verdict`, `selector_proof`, `fetched_at`, `trace_summary`, `source_quality_hint`, and the extracted text or a pointer to it. The orchestrator imports those records into the source registry before synthesis, so blocked-source evidence is cited and auditable instead of becoming an uncited note. - **Repo deep-dive (librarian), 0-2 workers.** Shallow-clone the most relevant repos to `${TMPDIR:-/tmp}`, pin the HEAD SHA, read core modules, follow call chains, return SHA-pinned permalinks. Example spawn (codebase axis; librarian, browsing, and repo-dive follow the same contract with their own protocol): diff --git a/plugins/omo/test/ultimate-browsing-research-contract.test.mjs b/plugins/omo/test/ultimate-browsing-research-contract.test.mjs new file mode 100644 index 0000000..08746a1 --- /dev/null +++ b/plugins/omo/test/ultimate-browsing-research-contract.test.mjs @@ -0,0 +1,31 @@ +import assert from "node:assert/strict"; +import { readFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import test from "node:test"; +import { fileURLToPath } from "node:url"; + +const root = dirname(dirname(fileURLToPath(import.meta.url))); + +test("#given ultimate-browsing #when used by research #then insane-search exposes a source-record handoff", async () => { + const content = await readFile(join(root, "skills", "ultimate-browsing", "SKILL.md"), "utf8"); + + assert.match(content, /insane-search/i); + assert.match(content, /Research-source handoff/); + assert.match(content, /INSANE_SOURCE_RECORD/); + for (const field of [ + "url", + "final_url", + "title", + "access_method", + "waf_profile", + "verdict", + "selector_proof", + "fetched_at", + "trace_summary", + "source_quality_hint", + ]) { + assert.match(content, new RegExp(`\\b${field}\\b`), `missing ${field} in handoff contract`); + } + assert.match(content, /source registry/i); + assert.match(content, /claim verification/i); +}); diff --git a/plugins/omo/test/ultraresearch-skill-contract.test.mjs b/plugins/omo/test/ultraresearch-skill-contract.test.mjs index c29c5de..76e9c75 100644 --- a/plugins/omo/test/ultraresearch-skill-contract.test.mjs +++ b/plugins/omo/test/ultraresearch-skill-contract.test.mjs @@ -202,3 +202,21 @@ test("#given ultraresearch blocked sources #when escalation is inspected #then i ); } }); + +test("#given ultraresearch blocked sources #when insane-search resolves them #then records flow into the source registry", async () => { + for (const copy of await readUltraresearchCopies()) { + assert.match(copy.content, /INSANE_SOURCE_RECORD/, `${copy.label}: browsing workers must return source records`); + assert.match(copy.content, /source registry/i, `${copy.label}: insane-search records must feed the source registry`); + for (const field of [ + "url", + "final_url", + "access_method", + "waf_profile", + "selector_proof", + "trace_summary", + "source_quality_hint", + ]) { + assert.match(copy.content, new RegExp(`\\b${field}\\b`), `${copy.label}: missing ${field} in source record contract`); + } + } +});