diff --git a/CLAUDE.md b/CLAUDE.md index 1d94adff..221de9de 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -358,7 +358,7 @@ Verify after every deploy with `docker exec phantom sh -c 'touch /app/public/_w ## Known Bugs -1. **Onboarding re-fires on restart (LOW):** When evolution generation is 0, the intro DM sends again on restart. Needs an "intro_sent" flag in SQLite. +1. ~~**Onboarding re-fires on restart (LOW):**~~ Fixed in Phase 12 (`feat/2026-05-01-phase12-user-research-enrichment`). The firstboot ledger (`firstboot_state` table) now stamps `intro_sent_at` after a successful intro DM, and `startOnboarding` short-circuits with `skipped: true` on every later boot. Process restarts before the first evolution generation no longer re-fire the DM. ## Key Files to Read First diff --git a/src/agent/research/__tests__/enrich-owner.test.ts b/src/agent/research/__tests__/enrich-owner.test.ts new file mode 100644 index 00000000..69154027 --- /dev/null +++ b/src/agent/research/__tests__/enrich-owner.test.ts @@ -0,0 +1,285 @@ +import { describe, expect, test } from "bun:test"; +import { enrichOwner } from "../enrich-owner.ts"; + +function jsonResponse(body: unknown, init: ResponseInit = {}): Response { + return new Response(JSON.stringify(body), { + status: 200, + headers: { "content-type": "application/json" }, + ...init, + }); +} + +function htmlResponse(body: string, init: ResponseInit = {}): Response { + return new Response(body, { + status: 200, + headers: { "content-type": "text/html; charset=utf-8" }, + ...init, + }); +} + +function notFound(): Response { + return new Response("not found", { status: 404 }); +} + +// A handler-style fetch double that routes by URL prefix so a single +// research run can hit github + a personal site + linkedin and we can +// assert on the bullets that got composed. +function makeFetch( + handlers: Array<{ match: (url: string) => boolean; respond: () => Response | Promise }>, +): typeof fetch { + return (async (url: string | URL | Request) => { + const u = typeof url === "string" ? url : url instanceof URL ? url.toString() : url.url; + for (const h of handlers) { + if (h.match(u)) return h.respond(); + } + return notFound(); + }) as unknown as typeof fetch; +} + +describe("enrichOwner happy path", () => { + test("returns three bullets when github + linkedin + site all answer", async () => { + const fetchImpl = makeFetch([ + { + match: (u) => u.startsWith("https://api.github.com/users/"), + respond: () => + jsonResponse({ + login: "matt", + name: "Matt Example", + bio: "Engineer building developer tools.", + public_repos: 23, + followers: 100, + company: "Acme", + html_url: "https://github.com/matt", + }), + }, + { + match: (u) => u.startsWith("https://www.linkedin.com/"), + respond: () => + htmlResponse( + ``, + ), + }, + { + match: (u) => u.startsWith("https://acme.com"), + respond: () => + htmlResponse( + ` + + + `, + ), + }, + ]); + + const result = await enrichOwner( + { email: "matt@acme.com", name: "Matt Example", linkedinUrl: "https://www.linkedin.com/in/matt/" }, + { fetchImpl, perFetchTimeoutMs: 1_000 }, + ); + + expect(result.outcome).toBe("ok"); + expect(result.bullets).not.toBeNull(); + const bulletCount = result.bullets?.length ?? 0; + expect(bulletCount).toBeGreaterThanOrEqual(2); + expect(bulletCount).toBeLessThanOrEqual(3); + expect(result.bullets?.some((b) => b.includes("@matt"))).toBe(true); + expect(result.sources.length).toBe(bulletCount); + }); + + test("returns linkedin bullet from headline parse", async () => { + const fetchImpl = makeFetch([ + { match: (u) => u.includes("api.github.com"), respond: () => notFound() }, + { + match: (u) => u.includes("linkedin.com"), + respond: () => + htmlResponse( + ``, + ), + }, + ]); + + const result = await enrichOwner( + { email: "sara@gmail.com", linkedinUrl: "https://www.linkedin.com/in/sara/" }, + { fetchImpl, perFetchTimeoutMs: 1_000 }, + ); + + expect(result.bullets).not.toBeNull(); + expect(result.bullets?.[0]).toContain("VP Eng at Foo"); + expect(result.sources[0]?.kind).toBe("linkedin_public"); + }); + + test("personal site bullet uses og:description over og:title", async () => { + const fetchImpl = makeFetch([ + { match: (u) => u.includes("api.github.com"), respond: () => notFound() }, + { + match: (u) => u.includes("acme.dev"), + respond: () => + htmlResponse( + ` + + + `, + ), + }, + ]); + + const result = await enrichOwner({ email: "founder@acme.dev" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + + expect(result.bullets).not.toBeNull(); + expect(result.bullets?.[0]).toContain("acme.dev"); + expect(result.bullets?.[0]).toContain("distributed systems"); + }); + + test("github structural fallback when bio is missing", async () => { + const fetchImpl = makeFetch([ + { + match: (u) => u.includes("api.github.com"), + respond: () => + jsonResponse({ + login: "carla", + name: "Carla Q", + bio: null, + public_repos: 12, + company: "Foo Co", + html_url: "https://github.com/carla", + }), + }, + ]); + + const result = await enrichOwner({ email: "carla@example.org" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + + expect(result.bullets).not.toBeNull(); + expect(result.bullets?.[0]).toContain("@carla"); + expect(result.bullets?.[0]).toContain("Carla Q"); + expect(result.bullets?.[0]).toContain("Foo Co"); + expect(result.bullets?.[0]).toContain("12"); + }); +}); + +describe("enrichOwner empty paths", () => { + test("returns null bullets when nothing answers", async () => { + const fetchImpl = makeFetch([{ match: () => true, respond: () => notFound() }]); + const result = await enrichOwner({ email: "nobody@example.com" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + expect(result.bullets).toBeNull(); + expect(result.outcome).toBe("empty"); + expect(result.sources).toEqual([]); + }); + + test("returns disabled outcome when deps.disabled is true", async () => { + const result = await enrichOwner({ email: "x@y.com" }, { disabled: true }); + expect(result.outcome).toBe("disabled"); + expect(result.bullets).toBeNull(); + }); + + test("returns empty when email is blank", async () => { + const fetchImpl = makeFetch([]); + const result = await enrichOwner({ email: " " }, { fetchImpl }); + expect(result.outcome).toBe("empty"); + expect(result.bullets).toBeNull(); + }); + + test("skips personal-site probe when email domain is a public mailbox", async () => { + const calls: string[] = []; + const fetchImpl = (async (url: string | URL | Request) => { + const u = typeof url === "string" ? url : url instanceof URL ? url.toString() : url.url; + calls.push(u); + return notFound(); + }) as unknown as typeof fetch; + await enrichOwner({ email: "person@gmail.com" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + // We should not see a fetch to https://gmail.com. + expect(calls.some((u) => u === "https://gmail.com" || u.startsWith("https://gmail.com/"))).toBe(false); + }); +}); + +describe("enrichOwner network failure tolerance", () => { + test("returns gracefully when every fetch throws", async () => { + const fetchImpl = (async () => { + throw new Error("ENETUNREACH"); + }) as unknown as typeof fetch; + const result = await enrichOwner({ email: "matt@acme.com" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + expect(result.bullets).toBeNull(); + expect(result.outcome === "empty" || result.outcome === "timeout").toBe(true); + }); + + test("returns partial bullets when only one source answers", async () => { + const fetchImpl = makeFetch([ + { + match: (u) => u.includes("api.github.com"), + respond: () => + jsonResponse({ + login: "kara", + bio: "ML researcher.", + public_repos: 3, + html_url: "https://github.com/kara", + }), + }, + { match: () => true, respond: () => notFound() }, + ]); + + const result = await enrichOwner({ email: "kara@example.org" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + + expect(result.bullets).not.toBeNull(); + expect(result.bullets?.length).toBe(1); + expect(result.bullets?.[0]).toContain("@kara"); + }); +}); + +describe("enrichOwner timeout", () => { + test("global budget elapsed marks outcome timeout", async () => { + // Advance the clock past the budget on the second now() call so + // the post-Promise.all elapsed check trips. + let calls = 0; + const now = () => { + calls += 1; + return calls === 1 ? 0 : 999_999; + }; + const fetchImpl = makeFetch([{ match: () => true, respond: () => notFound() }]); + const result = await enrichOwner( + { email: "x@example.org" }, + { fetchImpl, perFetchTimeoutMs: 100, budgetMs: 1, now }, + ); + expect(["timeout", "empty"]).toContain(result.outcome); + }); +}); + +describe("enrichOwner safety invariants", () => { + test("never echoes the owner email into a bullet", async () => { + const fetchImpl = makeFetch([ + { + match: (u) => u.includes("api.github.com"), + respond: () => + jsonResponse({ + login: "matt", + bio: "Engineer.", + public_repos: 1, + html_url: "https://github.com/matt", + }), + }, + ]); + const result = await enrichOwner({ email: "matt@acme.com" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + expect(result.bullets).not.toBeNull(); + for (const bullet of result.bullets ?? []) { + expect(bullet).not.toContain("matt@acme.com"); + } + }); + + test("caps each bullet at 280 chars", async () => { + const longBio = "x".repeat(500); + const fetchImpl = makeFetch([ + { + match: (u) => u.includes("api.github.com"), + respond: () => + jsonResponse({ + login: "longbio", + bio: longBio, + public_repos: 1, + html_url: "https://github.com/longbio", + }), + }, + ]); + const result = await enrichOwner({ email: "longbio@example.org" }, { fetchImpl, perFetchTimeoutMs: 1_000 }); + expect(result.bullets).not.toBeNull(); + for (const b of result.bullets ?? []) { + expect(b.length).toBeLessThanOrEqual(280); + } + }); +}); diff --git a/src/agent/research/__tests__/fetchers.test.ts b/src/agent/research/__tests__/fetchers.test.ts new file mode 100644 index 00000000..36d5c89d --- /dev/null +++ b/src/agent/research/__tests__/fetchers.test.ts @@ -0,0 +1,238 @@ +import { describe, expect, test } from "bun:test"; +import { fetchGithubProfile, fetchLinkedinPublic, fetchPageMetadata } from "../fetchers.ts"; + +function jsonResponse(body: unknown, init: ResponseInit = {}): Response { + return new Response(JSON.stringify(body), { + status: 200, + headers: { "content-type": "application/json" }, + ...init, + }); +} + +function htmlResponse(body: string, init: ResponseInit = {}): Response { + return new Response(body, { + status: 200, + headers: { "content-type": "text/html; charset=utf-8" }, + ...init, + }); +} + +describe("fetchGithubProfile", () => { + test("returns null for empty login", async () => { + const result = await fetchGithubProfile("", { + fetchImpl: (async () => jsonResponse({})) as unknown as typeof fetch, + }); + expect(result).toBeNull(); + }); + + test("returns null for malformed login", async () => { + const calls: string[] = []; + const fetchImpl = (async (url: string) => { + calls.push(url); + return jsonResponse({}); + }) as unknown as typeof fetch; + const result = await fetchGithubProfile("not a real login!", { fetchImpl }); + expect(result).toBeNull(); + expect(calls).toEqual([]); + }); + + test("parses a real GitHub user payload into the public shape", async () => { + const fetchImpl = (async () => + jsonResponse({ + login: "octocat", + name: "The Octocat", + company: "@github", + blog: "https://github.blog", + location: "San Francisco", + bio: "There once was...", + public_repos: 8, + followers: 4000, + html_url: "https://github.com/octocat", + })) as unknown as typeof fetch; + const result = await fetchGithubProfile("octocat", { fetchImpl }); + expect(result).not.toBeNull(); + expect(result?.login).toBe("octocat"); + expect(result?.bio).toBe("There once was..."); + expect(result?.public_repos).toBe(8); + expect(result?.html_url).toBe("https://github.com/octocat"); + }); + + test("normalizes empty string fields to null", async () => { + const fetchImpl = (async () => + jsonResponse({ + login: "octocat", + name: "", + company: " ", + bio: null, + html_url: "https://github.com/octocat", + })) as unknown as typeof fetch; + const result = await fetchGithubProfile("octocat", { fetchImpl }); + expect(result?.name).toBeNull(); + expect(result?.company).toBeNull(); + expect(result?.bio).toBeNull(); + }); + + test("returns null on non-200 response", async () => { + const fetchImpl = (async () => new Response("not found", { status: 404 })) as unknown as typeof fetch; + const result = await fetchGithubProfile("octocat", { fetchImpl }); + expect(result).toBeNull(); + }); + + test("returns null on malformed JSON", async () => { + const fetchImpl = (async () => + new Response("not json", { + status: 200, + headers: { "content-type": "application/json" }, + })) as unknown as typeof fetch; + const result = await fetchGithubProfile("octocat", { fetchImpl }); + expect(result).toBeNull(); + }); + + test("returns null when network throws", async () => { + const fetchImpl = (async () => { + throw new Error("ENETDOWN"); + }) as unknown as typeof fetch; + const result = await fetchGithubProfile("octocat", { fetchImpl }); + expect(result).toBeNull(); + }); + + test("aborts when external signal already aborted", async () => { + const controller = new AbortController(); + controller.abort(); + const fetchImpl = (async (_u: unknown, init: RequestInit | undefined) => { + if (init?.signal?.aborted) throw new Error("aborted"); + return jsonResponse({ login: "x" }); + }) as unknown as typeof fetch; + const result = await fetchGithubProfile("octocat", { fetchImpl, signal: controller.signal }); + expect(result).toBeNull(); + }); +}); + +describe("fetchPageMetadata", () => { + test("returns null for empty url", async () => { + const result = await fetchPageMetadata("", { + fetchImpl: (async () => htmlResponse("")) as unknown as typeof fetch, + }); + expect(result).toBeNull(); + }); + + test("returns null for non-http url", async () => { + const calls: string[] = []; + const fetchImpl = (async (url: string) => { + calls.push(url); + return htmlResponse(""); + }) as unknown as typeof fetch; + const result = await fetchPageMetadata("file:///etc/passwd", { fetchImpl }); + expect(result).toBeNull(); + expect(calls).toEqual([]); + }); + + test("extracts og:title and og:description", async () => { + const html = ` + + + `; + const fetchImpl = (async () => htmlResponse(html)) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com", { fetchImpl }); + expect(result?.title).toBe("Real Title"); + expect(result?.description).toBe("Real description goes here."); + }); + + test("extracts twitter: tags as fallback", async () => { + const html = ` + + + `; + const fetchImpl = (async () => htmlResponse(html)) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com", { fetchImpl }); + expect(result?.title).toBe("Tw Title"); + expect(result?.description).toBe("Tw Desc"); + }); + + test("falls back to tag when no og: tags exist", async () => { + const html = "<html><head><title>Plain Title"; + const fetchImpl = (async () => htmlResponse(html)) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com", { fetchImpl }); + expect(result?.title).toBe("Plain Title"); + expect(result?.description).toBeNull(); + }); + + test("decodes HTML entities in extracted strings", async () => { + const html = ` + + `; + const fetchImpl = (async () => htmlResponse(html)) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com", { fetchImpl }); + expect(result?.title).toBe("A & B's site"); + }); + + test("handles meta tag with content first then property", async () => { + const html = ` + + `; + const fetchImpl = (async () => htmlResponse(html)) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com", { fetchImpl }); + expect(result?.title).toBe("Reverse Order"); + }); + + test("returns null when content-type is not HTML", async () => { + const fetchImpl = (async () => + new Response("PDF binary", { + status: 200, + headers: { "content-type": "application/pdf" }, + })) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com/doc.pdf", { fetchImpl }); + expect(result).toBeNull(); + }); + + test("returns null when no metadata at all", async () => { + const fetchImpl = (async () => htmlResponse("Hi")) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com", { fetchImpl }); + expect(result).toBeNull(); + }); + + test("caps page size at 256KB to defeat memory blowup", async () => { + const giantHtml = `OK${"X".repeat(2_000_000)}`; + const fetchImpl = (async () => htmlResponse(giantHtml)) as unknown as typeof fetch; + const result = await fetchPageMetadata("https://example.com", { fetchImpl }); + // We should still find the title because the cap is post-fetch. + expect(result?.title).toBe("OK"); + }); +}); + +describe("fetchLinkedinPublic", () => { + test("returns null for non-LinkedIn URL", async () => { + const calls: string[] = []; + const fetchImpl = (async (url: string) => { + calls.push(url); + return htmlResponse(""); + }) as unknown as typeof fetch; + const result = await fetchLinkedinPublic("https://example.com/in/x", { fetchImpl }); + expect(result).toBeNull(); + expect(calls).toEqual([]); + }); + + test("accepts www.linkedin.com", async () => { + const html = ` + + `; + const fetchImpl = (async () => htmlResponse(html)) as unknown as typeof fetch; + const result = await fetchLinkedinPublic("https://www.linkedin.com/in/person/", { fetchImpl }); + expect(result).not.toBeNull(); + expect(result?.title).toContain("Engineer"); + }); + + test("accepts bare linkedin.com host", async () => { + const html = ``; + const fetchImpl = (async () => htmlResponse(html)) as unknown as typeof fetch; + const result = await fetchLinkedinPublic("https://linkedin.com/in/person/", { fetchImpl }); + expect(result).not.toBeNull(); + }); + + test("returns null when LinkedIn answers with 999 (rate limit)", async () => { + const fetchImpl = (async () => + new Response("", { status: 999, headers: { "content-type": "text/html" } })) as unknown as typeof fetch; + const result = await fetchLinkedinPublic("https://www.linkedin.com/in/x", { fetchImpl }); + expect(result).toBeNull(); + }); +}); diff --git a/src/agent/research/enrich-owner.ts b/src/agent/research/enrich-owner.ts new file mode 100644 index 00000000..82ed07c0 --- /dev/null +++ b/src/agent/research/enrich-owner.ts @@ -0,0 +1,297 @@ +// Phase 12 user research first-pass. +// +// Runs at firstboot, BEFORE the intro DM is composed. Pulls public, +// anonymously-reachable signals about the agent's owner so the intro DM +// can open with a short, honest "what I learned about you" instead of a +// generic greeting. Architectural invariants (master plan section 3 +// Phase 12, builder brief 2026-05-01-phase12-user-research-builder.md): +// +// 1. Public sources only. NO authenticated API calls. NO LinkedIn ToS +// violations. We fetch the LinkedIn public profile page anonymously +// and read whatever og: tags it serves; if LinkedIn refuses (HTTP +// 999, 403, login redirect), we move on. +// 2. Time-bounded to RESEARCH_BUDGET_MS (~15s). The intro DM cannot +// block firstboot for minutes. If the budget elapses, we return +// whatever we have so far. +// 3. Plaintext discipline. We never log the owner email at any level. +// The bullets render the owner's NAME at most; the email stays in +// env vars / SQLite. +// 4. Don't fabricate. If every probe is empty, return null bullets so +// the intro DM degrades cleanly to today's no-research copy. +// +// Why this lives in src/agent/research/ and not src/onboarding/: the +// research output is also injected into the agent's system-prompt overlay +// (Phase 9 self-knowledge plus a "What I learned about my owner" block). +// Keeping it under src/agent/ makes that wiring obvious. The onboarding +// firstboot step is the FIRST consumer; the prompt overlay is the +// second. + +import { + type GithubProfile, + type PageMetadata, + fetchGithubProfile, + fetchLinkedinPublic, + fetchPageMetadata, +} from "./fetchers.ts"; +import { + MAX_BULLET_CHARS, + type OwnerResearchInput, + type OwnerResearchResult, + RESEARCH_BUDGET_MS, + type ResearchOutcome, + type SourceRef, +} from "./types.ts"; + +export interface EnrichOwnerDeps { + fetchImpl?: typeof fetch; + /** Override the global research budget. Tests use this to assert + * the timeout path returns a partial result. */ + budgetMs?: number; + /** Override the per-fetch timeout. Tests use this so they don't + * need to wait the full default. */ + perFetchTimeoutMs?: number; + /** When true, the function returns immediately with an empty + * outcome="disabled" result. The firstboot caller toggles this off + * via PHANTOM_OWNER_RESEARCH_ENABLED=false (operator escape hatch). */ + disabled?: boolean; + /** Injected clock for testing the timeout path. Defaults to Date.now. */ + now?: () => number; +} + +export async function enrichOwner(input: OwnerResearchInput, deps: EnrichOwnerDeps = {}): Promise { + if (deps.disabled) { + return { bullets: null, sources: [], outcome: "disabled" }; + } + + const email = input.email.trim(); + if (!email) { + return { bullets: null, sources: [], outcome: "empty" }; + } + + const name = (input.name?.trim() || deriveNameFromEmail(email)).trim(); + const linkedinUrl = input.linkedinUrl?.trim(); + const personalSiteUrl = derivePersonalSiteUrl(email); + const githubLogin = deriveGithubLogin(email, name); + + const budgetMs = deps.budgetMs ?? RESEARCH_BUDGET_MS; + const now = deps.now ?? Date.now; + const startedAt = now(); + const overall = AbortSignal.timeout(budgetMs); + + const fetchDeps = { + fetchImpl: deps.fetchImpl, + timeoutMs: deps.perFetchTimeoutMs, + signal: overall, + }; + + // Run all probes in parallel. Each tolerates network failure and + // returns null on its own; the global AbortSignal cancels them all + // when the budget elapses. + const [githubResult, siteResult, linkedinResult] = await Promise.all([ + githubLogin ? safeProbe(() => fetchGithubProfile(githubLogin, fetchDeps)) : Promise.resolve(null), + personalSiteUrl ? safeProbe(() => fetchPageMetadata(personalSiteUrl, fetchDeps)) : Promise.resolve(null), + linkedinUrl ? safeProbe(() => fetchLinkedinPublic(linkedinUrl, fetchDeps)) : Promise.resolve(null), + ]); + + const elapsed = now() - startedAt; + const timedOut = elapsed >= budgetMs; + + const bullets: string[] = []; + const sources: SourceRef[] = []; + + const githubBullet = composeGithubBullet(githubResult, name); + if (githubBullet && githubResult) { + bullets.push(githubBullet); + sources.push({ kind: "github", url: githubResult.html_url }); + } + + const linkedinBullet = composeLinkedinBullet(linkedinResult); + if (linkedinBullet && linkedinResult) { + bullets.push(linkedinBullet); + sources.push({ kind: "linkedin_public", url: linkedinResult.url }); + } + + const siteBullet = composeSiteBullet(siteResult, name); + if (siteBullet && siteResult) { + bullets.push(siteBullet); + sources.push({ kind: "personal_site", url: siteResult.url }); + } + + // Cap at 3 bullets; pick the first three that survived the probes. + const finalBullets = bullets.slice(0, 3); + const finalSources = sources.slice(0, 3); + + if (finalBullets.length === 0) { + const outcome: ResearchOutcome = timedOut ? "timeout" : "empty"; + return { bullets: null, sources: [], outcome }; + } + + return { + bullets: finalBullets, + sources: finalSources, + outcome: timedOut ? "timeout" : "ok", + }; +} + +async function safeProbe(probe: () => Promise): Promise { + try { + return await probe(); + } catch { + return null; + } +} + +// Derive a likely GitHub login from email or name. We try the local-part +// of the email first (the most common pattern: matt@example.com -> "matt"), +// then the name with whitespace removed. The fetcher rejects malformed +// logins so this is allowed to be slightly optimistic. +function deriveGithubLogin(email: string, name: string): string | null { + const localPart = email.split("@")[0]?.trim(); + if (localPart && /^[a-zA-Z0-9-]{1,39}$/.test(localPart)) { + return localPart; + } + if (name) { + const compact = name.replace(/\s+/g, "").trim(); + if (compact && /^[a-zA-Z0-9-]{1,39}$/.test(compact)) { + return compact; + } + } + return null; +} + +// Derive a personal-site URL from the email domain. We skip well-known +// mailbox providers (gmail, outlook, etc.) because their domain is not +// the user's site. For everything else we try https://. +function derivePersonalSiteUrl(email: string): string | null { + const domain = email.split("@")[1]?.trim().toLowerCase(); + if (!domain) return null; + if (PUBLIC_MAILBOX_DOMAINS.has(domain)) return null; + if (!/^[a-z0-9.-]+\.[a-z]{2,}$/.test(domain)) return null; + return `https://${domain}`; +} + +// Derive a fallback name from the email local-part when PHANTOM_OWNER_NAME +// is unset. "matt.j@x.com" -> "matt.j" stays as a single token; we do not +// try to pretty-print here because that easily fabricates ("matt j" is not +// the same person as "matt-j" or "mattj"). +function deriveNameFromEmail(email: string): string { + const local = email.split("@")[0] ?? ""; + return local; +} + +function composeGithubBullet(profile: GithubProfile | null, _name: string): string | null { + if (!profile) return null; + // Prefer bio when present (it is the user's own self-description). + // Fall back to a structural summary built from name + company + + // public-repo count. We never fabricate; if all signals are null, + // return null and let the caller drop the bullet. + if (profile.bio) { + return cap(`On GitHub as @${profile.login}: ${profile.bio}`); + } + const facts: string[] = []; + if (profile.name) facts.push(profile.name); + if (profile.company) facts.push(`at ${profile.company}`); + if (profile.public_repos > 0) facts.push(`${profile.public_repos} public repos`); + if (facts.length === 0) return null; + return cap(`On GitHub as @${profile.login}: ${facts.join(", ")}.`); +} + +function composeLinkedinBullet(meta: PageMetadata | null): string | null { + if (!meta) return null; + // LinkedIn's og:title is usually "FirstName LastName - Headline | LinkedIn". + // The headline is the most useful signal; if the title carries a hyphen + // we keep what comes after it. og:description tends to be a short bio. + const headline = parseLinkedinHeadline(meta.title); + if (headline) { + return cap(`LinkedIn headline: ${headline}.`); + } + if (meta.description) { + return cap(`LinkedIn says: ${meta.description}`); + } + return null; +} + +function composeSiteBullet(meta: PageMetadata | null, name: string): string | null { + if (!meta) return null; + // Prefer description (richer). Title is often just the site name and + // adds little signal. If both exist we combine them; if only title + // exists we drop it (a site title alone is usually noise like "Home"). + if (meta.description) { + const stem = meta.title ? `${meta.title}: ${meta.description}` : meta.description; + return cap(`Their site at ${shortHost(meta.url)}: ${stem}`); + } + if (meta.title && !looksLikeGenericSiteTitle(meta.title, name)) { + return cap(`Their site at ${shortHost(meta.url)}: ${meta.title}.`); + } + return null; +} + +function parseLinkedinHeadline(title: string | null): string | null { + if (!title) return null; + // Strip trailing "| LinkedIn" decoration. + const stripped = title.replace(/\s*[|-]\s*linkedin\s*$/i, "").trim(); + if (!stripped) return null; + // "Name - Headline" pattern: keep the headline. + const dashSplit = stripped.split(/\s+[-]\s+/); + if (dashSplit.length >= 2) { + const tail = dashSplit.slice(1).join(" - ").trim(); + return tail.length > 0 ? tail : null; + } + return stripped; +} + +function looksLikeGenericSiteTitle(title: string, name: string): boolean { + const lower = title.toLowerCase().trim(); + if (["home", "welcome", "index", "untitled"].includes(lower)) return true; + if (name && lower === name.toLowerCase().trim()) return true; + return false; +} + +function shortHost(url: string): string { + try { + return new URL(url).hostname.replace(/^www\./, ""); + } catch { + return url; + } +} + +function cap(text: string): string { + const collapsed = text.replace(/\s+/g, " ").trim(); + if (collapsed.length <= MAX_BULLET_CHARS) return collapsed; + return `${collapsed.slice(0, MAX_BULLET_CHARS - 1)}…`; +} + +// Major public mailbox providers we never treat as the user's personal +// site. Lowercase, no subdomain. Keep this list tight: false positives +// here just mean we skip a probe, false negatives mean we waste budget. +const PUBLIC_MAILBOX_DOMAINS: ReadonlySet = new Set([ + "gmail.com", + "googlemail.com", + "yahoo.com", + "yahoo.co.uk", + "outlook.com", + "hotmail.com", + "live.com", + "icloud.com", + "me.com", + "mac.com", + "protonmail.com", + "proton.me", + "pm.me", + "aol.com", + "msn.com", + "fastmail.com", + "hey.com", + "zoho.com", + "yandex.com", + "yandex.ru", + "mail.com", + "gmx.com", + "gmx.de", + "web.de", + "qq.com", + "163.com", + "126.com", + "naver.com", + "daum.net", +]); diff --git a/src/agent/research/fetchers.ts b/src/agent/research/fetchers.ts new file mode 100644 index 00000000..32341ef9 --- /dev/null +++ b/src/agent/research/fetchers.ts @@ -0,0 +1,236 @@ +// Phase 12: lightweight HTTP fetchers for public sources. +// +// We deliberately avoid pulling in puppeteer/playwright for this path. +// The intro DM enrichment is a 15-second one-shot; it cannot afford a +// browser launch. Plain fetch + regex pulls the og: tags and the GitHub +// public profile JSON cheaply. +// +// Every fetcher is allowlist-aware in its caller (probe.ts decides which +// hosts to hit based on the input). Every fetch carries an explicit +// AbortSignal so a slow source does not eat the global budget. + +import { PER_FETCH_TIMEOUT_MS } from "./types.ts"; + +const USER_AGENT = "phantom-research-firstboot/1.0 (+https://ghostwright.dev)"; + +/** GitHub public user profile shape (subset we actually use). */ +export interface GithubProfile { + login: string; + name: string | null; + bio: string | null; + company: string | null; + location: string | null; + blog: string | null; + public_repos: number; + followers: number; + html_url: string; +} + +/** Minimal og:/twitter: meta tags pulled from a personal site index. */ +export interface PageMetadata { + title: string | null; + description: string | null; + url: string; +} + +export interface FetchDeps { + /** Defaults to global fetch. Tests inject a deterministic stub. */ + fetchImpl?: typeof fetch; + /** Extra ms to allow per fetch. Defaults to PER_FETCH_TIMEOUT_MS. */ + timeoutMs?: number; + /** External AbortSignal; if it fires, the fetch is cancelled. */ + signal?: AbortSignal; +} + +/** + * Look up a GitHub user by login. Public REST API, no auth required. + * Returns null on 404, network error, or non-200 responses. The fetcher + * does NOT retry; the caller manages parallelism + the global budget. + */ +export async function fetchGithubProfile(login: string, deps: FetchDeps = {}): Promise { + const trimmed = login.trim(); + if (!trimmed) return null; + if (!isLikelyGithubLogin(trimmed)) return null; + + const url = `https://api.github.com/users/${encodeURIComponent(trimmed)}`; + const res = await safeFetch(url, deps); + if (!res || !res.ok) return null; + + try { + const body = (await res.json()) as Partial; + if (!body || typeof body.login !== "string") return null; + // Defensive cleaning: GitHub returns empty strings for unset fields, + // we normalize to null so the bullet builder can use boolean checks. + return { + login: body.login, + name: nonEmpty(body.name), + bio: nonEmpty(body.bio), + company: nonEmpty(body.company), + location: nonEmpty(body.location), + blog: nonEmpty(body.blog), + public_repos: typeof body.public_repos === "number" ? body.public_repos : 0, + followers: typeof body.followers === "number" ? body.followers : 0, + html_url: body.html_url ?? `https://github.com/${trimmed}`, + }; + } catch { + return null; + } +} + +/** + * Fetch a page and pull og:title, og:description, twitter:title, + * twitter:description, and the tag. We deliberately skip + * authenticated content; only public, anonymously-reachable pages are + * useful here. + */ +export async function fetchPageMetadata(url: string, deps: FetchDeps = {}): Promise<PageMetadata | null> { + const trimmed = url.trim(); + if (!trimmed) return null; + if (!isHttpUrl(trimmed)) return null; + + const res = await safeFetch(trimmed, deps); + if (!res || !res.ok) return null; + + const contentType = res.headers.get("content-type") ?? ""; + if (!contentType.toLowerCase().includes("text/html")) return null; + + let body: string; + try { + body = await res.text(); + } catch { + return null; + } + + // Cap parsing at 256KB; real og: tags live in the first 16KB. This + // also defeats memory-blowup pages that ship multi-MB index.html. + const head = body.slice(0, 256 * 1024); + + // og: lives on `property=`, twitter: lives on `name=` per OpenGraph + Twitter conventions. + const title = extractMeta(head, "og:title") ?? extractMetaName(head, "twitter:title") ?? extractTitleTag(head); + const description = + extractMeta(head, "og:description") ?? + extractMetaName(head, "twitter:description") ?? + extractMetaName(head, "description"); + + if (!title && !description) return null; + + return { + title: nonEmpty(title), + description: nonEmpty(description), + url: res.url || trimmed, + }; +} + +/** + * LinkedIn public profile fetch. LinkedIn aggressively gates anonymous + * traffic (HTTP 999, 403, redirects to login) so this fetch fails far + * more often than it succeeds, and that is fine. We never try to + * authenticate, never use a scraping service, never bypass the gate. + * When LinkedIn answers, we extract og:title + og:description like any + * other page. + */ +export async function fetchLinkedinPublic(url: string, deps: FetchDeps = {}): Promise<PageMetadata | null> { + const trimmed = url.trim(); + if (!trimmed) return null; + if (!isLinkedinUrl(trimmed)) return null; + return fetchPageMetadata(trimmed, deps); +} + +// --- internals --- + +async function safeFetch(url: string, deps: FetchDeps): Promise<Response | null> { + const fetchImpl = deps.fetchImpl ?? fetch; + const timeoutMs = deps.timeoutMs ?? PER_FETCH_TIMEOUT_MS; + const controller = new AbortController(); + const externalSignal = deps.signal; + const onAbort = () => controller.abort(); + if (externalSignal) { + if (externalSignal.aborted) { + controller.abort(); + } else { + externalSignal.addEventListener("abort", onAbort, { once: true }); + } + } + const timer = setTimeout(() => controller.abort(), timeoutMs); + try { + return await fetchImpl(url, { + method: "GET", + signal: controller.signal, + headers: { + "user-agent": USER_AGENT, + accept: "application/json,text/html;q=0.9,*/*;q=0.5", + }, + redirect: "follow", + }); + } catch { + return null; + } finally { + clearTimeout(timer); + if (externalSignal) externalSignal.removeEventListener("abort", onAbort); + } +} + +function nonEmpty(value: string | null | undefined): string | null { + if (typeof value !== "string") return null; + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; +} + +function isHttpUrl(value: string): boolean { + try { + const u = new URL(value); + return u.protocol === "http:" || u.protocol === "https:"; + } catch { + return false; + } +} + +function isLinkedinUrl(value: string): boolean { + if (!isHttpUrl(value)) return false; + try { + const host = new URL(value).hostname.toLowerCase(); + return host === "www.linkedin.com" || host === "linkedin.com"; + } catch { + return false; + } +} + +// GitHub usernames: alphanumeric + single hyphens, 1-39 chars, no leading +// or trailing hyphen. We want to defeat malformed input and obvious URL +// fragments before we hit the GitHub API. +function isLikelyGithubLogin(value: string): boolean { + if (value.length < 1 || value.length > 39) return false; + return /^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?!-))*[a-zA-Z0-9]$|^[a-zA-Z0-9]$/.test(value); +} + +function extractMeta(html: string, property: string): string | null { + // Match either order: property="..." content="..." or content="..." property="...". + const escaped = property.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const re1 = new RegExp(`<meta[^>]+property=["']${escaped}["'][^>]+content=["']([^"']+)["']`, "i"); + const re2 = new RegExp(`<meta[^>]+content=["']([^"']+)["'][^>]+property=["']${escaped}["']`, "i"); + const match = html.match(re1) ?? html.match(re2); + return match ? decodeHtml(match[1]) : null; +} + +function extractMetaName(html: string, name: string): string | null { + const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const re1 = new RegExp(`<meta[^>]+name=["']${escaped}["'][^>]+content=["']([^"']+)["']`, "i"); + const re2 = new RegExp(`<meta[^>]+content=["']([^"']+)["'][^>]+name=["']${escaped}["']`, "i"); + const match = html.match(re1) ?? html.match(re2); + return match ? decodeHtml(match[1]) : null; +} + +function extractTitleTag(html: string): string | null { + const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i); + return match ? decodeHtml(match[1].replace(/\s+/g, " ").trim()) : null; +} + +function decodeHtml(value: string): string { + return value + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/'/g, "'"); +} diff --git a/src/agent/research/types.ts b/src/agent/research/types.ts new file mode 100644 index 00000000..7449818d --- /dev/null +++ b/src/agent/research/types.ts @@ -0,0 +1,55 @@ +// Phase 12 user research first-pass: shared types. +// +// The research subroutine collects public-only signals about the agent's +// owner before the firstboot intro DM goes out. The output is a small, +// honest summary the agent can reference later. We never fabricate; if +// every probe comes back empty, we return null bullets and the intro DM +// renders without the "What I learned" section. + +/** A single public-source citation supporting one bullet. */ +export interface SourceRef { + /** "github" | "personal_site" | "linkedin_public" | "domain_search" */ + kind: SourceKind; + /** The URL the bullet was derived from. Used for transparency only; + * never echoed verbatim into the intro DM body unless the bullet text + * already mentions it. */ + url: string; +} + +export type SourceKind = "github" | "personal_site" | "linkedin_public" | "domain_search"; + +/** Result of the owner-research run. Bullets is null when research found + * nothing useful; we never invent placeholder copy. */ +export interface OwnerResearchResult { + /** Up to 3 short bullets, each <= 280 chars. null on empty research. */ + bullets: string[] | null; + /** One SourceRef per bullet (parallel array). Empty when bullets is null. */ + sources: SourceRef[]; + /** Diagnostic outcome for the operator. Never user-facing. */ + outcome: ResearchOutcome; +} + +export type ResearchOutcome = "ok" | "empty" | "timeout" | "disabled" | "error"; + +/** Inputs the firstboot path collects from env vars. linkedinUrl is + * optional; the wizard collects it in Phase 1 once that PR lands. */ +export interface OwnerResearchInput { + /** PHANTOM_OWNER_EMAIL. Required. */ + email: string; + /** PHANTOM_OWNER_NAME. Optional. Falls back to the local-part of email. */ + name?: string; + /** PHANTOM_OWNER_LINKEDIN_URL. Optional, future Phase 12 wizard plumbing. */ + linkedinUrl?: string; +} + +/** Cap a single bullet at 280 chars. Mirrors a Tweet length so the intro + * DM stays readable on mobile. The caller already ensures plain ASCII. */ +export const MAX_BULLET_CHARS = 280; + +/** Hard cap on total research time so the firstboot intro DM is not held + * hostage by a slow site. The architect spec says ~15s. */ +export const RESEARCH_BUDGET_MS = 15_000; + +/** Per-fetch HTTP timeout. We make several fetches in parallel; each + * gets its own AbortController so a single hang does not eat the budget. */ +export const PER_FETCH_TIMEOUT_MS = 4_000; diff --git a/src/db/__tests__/migrate.test.ts b/src/db/__tests__/migrate.test.ts index da227075..f68c1193 100644 --- a/src/db/__tests__/migrate.test.ts +++ b/src/db/__tests__/migrate.test.ts @@ -27,6 +27,7 @@ describe("runMigrations", () => { expect(tables).toContain("secrets"); expect(tables).toContain("secret_requests"); expect(tables).toContain("chat_run_timelines"); + expect(tables).toContain("firstboot_state"); expect(tables).toContain("_migrations"); }); @@ -38,8 +39,9 @@ describe("runMigrations", () => { const migrationCount = db.query("SELECT COUNT(*) as count FROM _migrations").get() as { count: number }; // Migration history: base 28 + chat channel tables 28-39 (12 entries) + // auth/push 40-43 (4 entries) + scheduler audit 44-45 (2 entries) + - // phantom-config audit section column 46 + run timelines 47-50 (4 entries) = 51. - expect(migrationCount.count).toBe(51); + // phantom-config audit section column 46 + run timelines 47-50 (4 entries) + + // Phase 12 firstboot_state 51 = 52. + expect(migrationCount.count).toBe(52); }); test("tracks applied migration indices", () => { @@ -53,7 +55,7 @@ describe("runMigrations", () => { expect(indices).toEqual([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, ]); }); diff --git a/src/db/schema.ts b/src/db/schema.ts index ac20cb45..50f71b89 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -410,4 +410,23 @@ export const MIGRATIONS: string[] = [ `CREATE UNIQUE INDEX IF NOT EXISTS idx_chat_run_timelines_assistant ON chat_run_timelines(assistant_message_id) WHERE assistant_message_id IS NOT NULL`, + + // Phase 12 idempotency fix for the LOW bug at CLAUDE.md:284: + // "Onboarding re-fires on restart when evolution generation is 0". + // Today the firstboot path checks isFirstRun(configDir) which reads + // phantom-config/meta/version.json; that file's `version` stays at 0 + // until the FIRST evolution generation lands, which can be days later. + // Every process restart in that window re-fires the intro DM. The new + // flag is set in the same SQLite transaction as markOnboardingStarted + // once the intro DM has gone out, so a restart that happens before the + // first evolution lands no longer re-introduces the agent. Pre-existing + // installs (where the agent is already past intro but the flag was + // never set) are handled by the inserter only setting intro_sent_at on + // transitions FROM "no row at all" TO "row written"; we never overwrite + // completed onboarding records. + `CREATE TABLE IF NOT EXISTS firstboot_state ( + id INTEGER PRIMARY KEY CHECK (id = 1), + intro_sent_at TEXT, + research_outcome TEXT + )`, ]; diff --git a/src/index.ts b/src/index.ts index 717657dc..f86de278 100644 --- a/src/index.ts +++ b/src/index.ts @@ -870,7 +870,12 @@ async function main(): Promise<void> { }); }); - // Post onboarding intro after channels are connected + // Post onboarding intro after channels are connected. Phase 12 added + // firstboot-ledger idempotency inside startOnboarding so a process + // restart while phantom-config/meta/version.json is still at version + // 0 no longer re-fires the intro DM. The isFirstRun guard here stays + // (cheap pre-check that avoids loading the slack client) but the + // authoritative idempotency check is the SQLite firstboot_state row. if (isFirstRun(configDir) && activeRole && slackChannel) { const ownerUserId = channelsConfig?.slack?.owner_user_id; const defaultChannel = channelsConfig?.slack?.default_channel_id; @@ -888,19 +893,46 @@ async function main(): Promise<void> { if (target) { const slackClient = slackChannel.getClient(); - const profile = await startOnboarding(slackChannel, target, config.name, activeRole, db, slackClient); - // Inject owner profile into onboarding prompt for personalized agent conversation - if (profile && needsOnboarding) { - const personalizedPrompt = buildOnboardingPrompt(activeRole, config.name, profile); - runtime.setOnboardingPrompt(personalizedPrompt); - } + // Phase 12 research inputs from env. PHANTOM_OWNER_RESEARCH_ENABLED + // is the operator escape hatch (set to "false" to disable). The + // research subroutine is otherwise on by default whenever an + // owner email is present. + const researchEnabled = (process.env.PHANTOM_OWNER_RESEARCH_ENABLED ?? "").trim().toLowerCase() !== "false"; + const ownerEmail = (process.env.PHANTOM_OWNER_EMAIL ?? "").trim(); + const ownerName = (process.env.PHANTOM_OWNER_NAME ?? "").trim(); + const ownerLinkedinUrl = (process.env.PHANTOM_OWNER_LINKEDIN_URL ?? "").trim(); + + const result = await startOnboarding(slackChannel, target, config.name, activeRole, db, slackClient, { + ownerEmail: ownerEmail || undefined, + ownerName: ownerName || undefined, + ownerLinkedinUrl: ownerLinkedinUrl || undefined, + researchEnabled, + }); - // Also post to channel if owner DM was sent and channel is configured - if (target.type === "dm" && defaultChannel) { - const channelIntro = `Hey team, I'm ${config.name}. I just joined as a ${activeRole.name} co-worker. I'll be working with ${profile?.name ?? "the team"} - feel free to @mention me if you need anything.`; - await slackChannel.postToChannel(defaultChannel, channelIntro); - console.log(`[onboarding] Also posted introduction to channel ${defaultChannel}`); + if (result.skipped) { + console.log("[onboarding] firstboot ledger says intro already sent; not re-firing"); + } else { + // Inject owner profile + research bullets into onboarding + // prompt for personalized agent conversation. Both are + // optional; the prompt builder gates each section so a + // missing value silently disappears. + if ((result.profile || result.research?.bullets) && needsOnboarding) { + const personalizedPrompt = buildOnboardingPrompt( + activeRole, + config.name, + result.profile ?? undefined, + result.research, + ); + runtime.setOnboardingPrompt(personalizedPrompt); + } + + // Also post to channel if owner DM was sent and channel is configured + if (target.type === "dm" && defaultChannel) { + const channelIntro = `Hey team, I'm ${config.name}. I just joined as a ${activeRole.name} co-worker. I'll be working with ${result.profile?.name ?? "the team"} - feel free to @mention me if you need anything.`; + await slackChannel.postToChannel(defaultChannel, channelIntro); + console.log(`[onboarding] Also posted introduction to channel ${defaultChannel}`); + } } } else { console.warn("[onboarding] No owner, default user, or channel configured, skipping intro message"); diff --git a/src/onboarding/__tests__/firstboot-state.test.ts b/src/onboarding/__tests__/firstboot-state.test.ts new file mode 100644 index 00000000..ac7605c3 --- /dev/null +++ b/src/onboarding/__tests__/firstboot-state.test.ts @@ -0,0 +1,65 @@ +import { Database } from "bun:sqlite"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { runMigrations } from "../../db/migrate.ts"; +import { getFirstbootState, isIntroSent, markIntroSent } from "../state.ts"; + +describe("Phase 12 firstboot state", () => { + let db: Database; + + beforeEach(() => { + db = new Database(":memory:"); + db.run("PRAGMA journal_mode = WAL"); + db.run("PRAGMA foreign_keys = ON"); + runMigrations(db); + }); + + afterEach(() => { + db.close(); + }); + + test("default state has nothing set", () => { + const state = getFirstbootState(db); + expect(state.intro_sent_at).toBeNull(); + expect(state.research_outcome).toBeNull(); + expect(isIntroSent(db)).toBe(false); + }); + + test("markIntroSent stores the outcome and timestamp", () => { + markIntroSent(db, "ok"); + const state = getFirstbootState(db); + expect(state.intro_sent_at).not.toBeNull(); + expect(state.research_outcome).toBe("ok"); + expect(isIntroSent(db)).toBe(true); + }); + + test("markIntroSent is idempotent on the timestamp", () => { + markIntroSent(db, "ok"); + const first = getFirstbootState(db).intro_sent_at; + markIntroSent(db, "ok"); + const second = getFirstbootState(db).intro_sent_at; + expect(second).toBe(first); + }); + + test("markIntroSent updates the outcome on later call but preserves timestamp", () => { + markIntroSent(db, "empty"); + const stamp1 = getFirstbootState(db).intro_sent_at; + markIntroSent(db, "ok"); + const stamp2 = getFirstbootState(db).intro_sent_at; + expect(stamp2).toBe(stamp1); + expect(getFirstbootState(db).research_outcome).toBe("ok"); + }); + + test("only one row exists after multiple calls", () => { + markIntroSent(db, "ok"); + markIntroSent(db, "timeout"); + markIntroSent(db, "ok"); + const rows = db.query("SELECT * FROM firstboot_state").all(); + expect(rows).toHaveLength(1); + }); + + test("CHECK constraint enforces id = 1", () => { + expect(() => { + db.run("INSERT INTO firstboot_state (id) VALUES (2)"); + }).toThrow(); + }); +}); diff --git a/src/onboarding/__tests__/flow.test.ts b/src/onboarding/__tests__/flow.test.ts index 03c33514..1bba11c3 100644 --- a/src/onboarding/__tests__/flow.test.ts +++ b/src/onboarding/__tests__/flow.test.ts @@ -2,9 +2,9 @@ import { Database } from "bun:sqlite"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { runMigrations } from "../../db/migrate.ts"; import type { RoleTemplate } from "../../roles/types.ts"; -import { type OnboardingTarget, startOnboarding } from "../flow.ts"; +import { type OnboardingTarget, appendResearchSection, startOnboarding } from "../flow.ts"; import type { SlackProfileClient } from "../profiler.ts"; -import { getOnboardingStatus } from "../state.ts"; +import { getFirstbootState, getOnboardingStatus, isIntroSent } from "../state.ts"; const mockRole: RoleTemplate = { id: "swe", @@ -200,12 +200,13 @@ describe("startOnboarding with profiling", () => { const client = createMockSlackClient(); const target: OnboardingTarget = { type: "dm", userId: "U0A9P3CC5EE" }; - const profile = await startOnboarding(slack as never, target, "Scout", mockRole, db, client); + const result = await startOnboarding(slack as never, target, "Scout", mockRole, db, client); - expect(profile).not.toBeNull(); - expect(profile?.name).toBe("Cheema"); - expect(profile?.title).toBe("Founder"); - expect(profile?.teamName).toBe("Ghostwright"); + expect(result.profile).not.toBeNull(); + expect(result.profile?.name).toBe("Cheema"); + expect(result.profile?.title).toBe("Founder"); + expect(result.profile?.teamName).toBe("Ghostwright"); + expect(result.skipped).toBe(false); }); test("falls back to generic intro when profiling fails", async () => { @@ -221,12 +222,11 @@ describe("startOnboarding with profiling", () => { }; const target: OnboardingTarget = { type: "dm", userId: "U04XYZ789" }; - const profile = await startOnboarding(slack as never, target, "Scout", mockRole, db, failingClient); + const result = await startOnboarding(slack as never, target, "Scout", mockRole, db, failingClient); const text = slack.sendDm.mock.calls[0][1] as string; - // Generic fallback when profile has no real data expect(text).toContain("Hey there. I'm Scout"); - expect(profile).toBeNull(); + expect(result.profile).toBeNull(); }); test("does not profile for channel targets", async () => { @@ -239,12 +239,221 @@ describe("startOnboarding with profiling", () => { expect(client.users.info).not.toHaveBeenCalled(); }); - test("returns null when target is channel", async () => { + test("returns null profile when target is channel", async () => { const slack = createMockSlack(); const target: OnboardingTarget = { type: "channel", channelId: "C04ABC123" }; - const profile = await startOnboarding(slack as never, target, "Scout", mockRole, db); + const result = await startOnboarding(slack as never, target, "Scout", mockRole, db); - expect(profile).toBeNull(); + expect(result.profile).toBeNull(); + }); +}); + +describe("appendResearchSection", () => { + test("returns the original message when research is null", () => { + expect(appendResearchSection("hi", null)).toBe("hi"); + }); + + test("returns the original message when bullets is null", () => { + expect(appendResearchSection("hi", { bullets: null, sources: [], outcome: "empty" })).toBe("hi"); + }); + + test("appends bullets when present", () => { + const out = appendResearchSection("hi", { + bullets: ["First bullet.", "Second bullet."], + sources: [ + { kind: "github", url: "https://github.com/x" }, + { kind: "personal_site", url: "https://x.com" }, + ], + outcome: "ok", + }); + expect(out).toContain("hi"); + expect(out).toContain("What I learned about you so far"); + expect(out).toContain("- First bullet."); + expect(out).toContain("- Second bullet."); + }); +}); + +describe("Phase 12 idempotency in startOnboarding", () => { + let db: Database; + + beforeEach(() => { + db = new Database(":memory:"); + db.run("PRAGMA journal_mode = WAL"); + db.run("PRAGMA foreign_keys = ON"); + runMigrations(db); + }); + + afterEach(() => { + db.close(); + }); + + test("first call sends the DM and stamps the firstboot ledger", async () => { + const slack = createMockSlack(); + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + + const result = await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { + researchEnabled: false, + }); + + expect(result.skipped).toBe(false); + expect(slack.sendDm).toHaveBeenCalledTimes(1); + expect(isIntroSent(db)).toBe(true); + }); + + test("second call skips entirely when ledger says intro_sent_at is set", async () => { + const slack = createMockSlack(); + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + + await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { researchEnabled: false }); + const callsAfterFirst = slack.sendDm.mock.calls.length; + + const second = await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { + researchEnabled: false, + }); + + expect(second.skipped).toBe(true); + expect(slack.sendDm.mock.calls.length).toBe(callsAfterFirst); + }); + + test("ledger records the research outcome", async () => { + const slack = createMockSlack(); + const target: OnboardingTarget = { type: "channel", channelId: "C001" }; + + await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { + ownerEmail: "matt@acme.com", + enrichImpl: async () => ({ + bullets: ["b1"], + sources: [{ kind: "github", url: "https://github.com/matt" }], + outcome: "ok", + }), + }); + + expect(getFirstbootState(db).research_outcome).toBe("ok"); + }); + + test("does not stamp ledger when sendDm throws (so a retry can happen)", async () => { + const slack = { + sendDm: mock(() => Promise.reject(new Error("slack down"))), + postToChannel: mock(() => Promise.resolve("1.0")), + }; + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + + await expect( + startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { researchEnabled: false }), + ).rejects.toThrow("slack down"); + + expect(isIntroSent(db)).toBe(false); + }); +}); + +describe("Phase 12 research integration in startOnboarding", () => { + let db: Database; + + beforeEach(() => { + db = new Database(":memory:"); + db.run("PRAGMA journal_mode = WAL"); + db.run("PRAGMA foreign_keys = ON"); + runMigrations(db); + }); + + afterEach(() => { + db.close(); + }); + + test("research bullets are appended to the intro DM", async () => { + const slack = createMockSlack(); + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + + await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { + ownerEmail: "matt@acme.com", + ownerName: "Matt Example", + enrichImpl: async () => ({ + bullets: ["On GitHub as @matt: building developer tools.", "Their site at acme.com: Acme builds tools."], + sources: [ + { kind: "github", url: "https://github.com/matt" }, + { kind: "personal_site", url: "https://acme.com" }, + ], + outcome: "ok", + }), + }); + + const text = slack.sendDm.mock.calls[0][1] as string; + expect(text).toContain("What I learned about you so far"); + expect(text).toContain("@matt"); + expect(text).toContain("acme.com"); + }); + + test("empty research result does NOT add the section", async () => { + const slack = createMockSlack(); + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + + await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { + ownerEmail: "matt@acme.com", + enrichImpl: async () => ({ bullets: null, sources: [], outcome: "empty" }), + }); + + const text = slack.sendDm.mock.calls[0][1] as string; + expect(text).not.toContain("What I learned about you so far"); + }); + + test("network failure during research still sends the intro DM", async () => { + const slack = createMockSlack(); + const target: OnboardingTarget = { type: "channel", channelId: "C001" }; + + await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { + ownerEmail: "matt@acme.com", + enrichImpl: async () => { + throw new Error("ENETDOWN"); + }, + }); + + expect(slack.postToChannel).toHaveBeenCalledTimes(1); + expect(getFirstbootState(db).research_outcome).toBe("error"); + }); + + test("researchEnabled=false skips research entirely", async () => { + const slack = createMockSlack(); + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + const enrich = mock(() => Promise.resolve({ bullets: ["b"], sources: [], outcome: "ok" as const })); + + await startOnboarding(slack as never, target, "Scout", mockRole, db, undefined, { + ownerEmail: "matt@acme.com", + researchEnabled: false, + enrichImpl: enrich, + }); + + expect(enrich).not.toHaveBeenCalled(); + }); + + test("Slack profile name is used as fallback for research input", async () => { + const slack = createMockSlack(); + const client = createMockSlackClient(); + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + const enrich = mock(() => Promise.resolve({ bullets: null, sources: [], outcome: "empty" as const })) as never; + + await startOnboarding(slack as never, target, "Scout", mockRole, db, client, { + ownerEmail: "matt@acme.com", + enrichImpl: enrich as never, + }); + + const seen = (enrich as unknown as { mock: { calls: Array<[{ name?: string }]> } }).mock.calls[0][0].name; + expect(seen).toBe("Cheema"); + }); + + test("explicit ownerName beats Slack profile name", async () => { + const slack = createMockSlack(); + const client = createMockSlackClient(); + const target: OnboardingTarget = { type: "dm", userId: "U001" }; + const enrich = mock(() => Promise.resolve({ bullets: null, sources: [], outcome: "empty" as const })) as never; + + await startOnboarding(slack as never, target, "Scout", mockRole, db, client, { + ownerEmail: "matt@acme.com", + ownerName: "Override Name", + enrichImpl: enrich as never, + }); + + const seen = (enrich as unknown as { mock: { calls: Array<[{ name?: string }]> } }).mock.calls[0][0].name; + expect(seen).toBe("Override Name"); }); }); diff --git a/src/onboarding/__tests__/prompt.test.ts b/src/onboarding/__tests__/prompt.test.ts index 8dafe477..a5fb7f7d 100644 --- a/src/onboarding/__tests__/prompt.test.ts +++ b/src/onboarding/__tests__/prompt.test.ts @@ -1,7 +1,8 @@ import { describe, expect, test } from "bun:test"; +import type { OwnerResearchResult } from "../../agent/research/types.ts"; import type { RoleTemplate } from "../../roles/types.ts"; import type { OwnerProfile } from "../profiler.ts"; -import { buildOnboardingPrompt } from "../prompt.ts"; +import { buildOnboardingPrompt, buildResearchContext } from "../prompt.ts"; const mockRole: RoleTemplate = { id: "swe", @@ -172,3 +173,81 @@ describe("buildOnboardingPrompt with owner profile", () => { expect(prompt).not.toContain("workspace owner"); }); }); + +describe("buildResearchContext", () => { + const research: OwnerResearchResult = { + bullets: ["On GitHub as @matt: building tools.", "LinkedIn headline: Senior Engineer."], + sources: [ + { kind: "github", url: "https://github.com/matt" }, + { kind: "linkedin_public", url: "https://www.linkedin.com/in/matt" }, + ], + outcome: "ok", + }; + + test("returns empty string when research is null", () => { + expect(buildResearchContext(null)).toBe(""); + }); + + test("returns empty string when bullets is null", () => { + expect(buildResearchContext({ bullets: null, sources: [], outcome: "empty" })).toBe(""); + }); + + test("returns empty string when bullets is empty", () => { + expect(buildResearchContext({ bullets: [], sources: [], outcome: "ok" })).toBe(""); + }); + + test("emits the public-sources heading when bullets exist", () => { + const out = buildResearchContext(research); + expect(out).toContain("## What I Learned About Them (Public Sources)"); + }); + + test("includes each bullet with its source kind tag", () => { + const out = buildResearchContext(research); + expect(out).toContain("- On GitHub as @matt"); + expect(out).toContain("[github]"); + expect(out).toContain("[linkedin_public]"); + }); + + test("warns the agent against treating public bullets as deep knowledge", () => { + const out = buildResearchContext(research); + expect(out).toContain("public sources only"); + expect(out).toContain("Verify with the user"); + }); +}); + +describe("buildOnboardingPrompt with research", () => { + const minimalResearch: OwnerResearchResult = { + bullets: ["On GitHub as @matt: building tools."], + sources: [{ kind: "github", url: "https://github.com/matt" }], + outcome: "ok", + }; + + test("research section appears when bullets are present", () => { + const prompt = buildOnboardingPrompt(mockRole, "Scout", undefined, minimalResearch); + expect(prompt).toContain("## What I Learned About Them"); + expect(prompt).toContain("@matt"); + }); + + test("research section is omitted when research is null", () => { + const prompt = buildOnboardingPrompt(mockRole, "Scout", undefined, null); + expect(prompt).not.toContain("## What I Learned About Them"); + }); + + test("research and owner profile coexist", () => { + const profile: OwnerProfile = { + name: "Cheema", + title: "Founder", + timezone: null, + status: null, + isAdmin: true, + isOwner: true, + teamName: "Ghostwright", + channels: [], + }; + const prompt = buildOnboardingPrompt(mockRole, "Scout", profile, minimalResearch); + expect(prompt).toContain("## Owner Context"); + expect(prompt).toContain("## What I Learned About Them"); + // Research section appears AFTER owner context. + expect(prompt.indexOf("## What I Learned About Them")).toBeGreaterThan(prompt.indexOf("## Owner Context")); + }); +}); diff --git a/src/onboarding/flow.ts b/src/onboarding/flow.ts index f0eaf478..b32f2e4f 100644 --- a/src/onboarding/flow.ts +++ b/src/onboarding/flow.ts @@ -1,11 +1,22 @@ import type { Database } from "bun:sqlite"; +import { enrichOwner } from "../agent/research/enrich-owner.ts"; +import type { OwnerResearchResult } from "../agent/research/types.ts"; import type { SlackTransport } from "../channels/slack-transport.ts"; import type { RoleTemplate } from "../roles/types.ts"; import { type OwnerProfile, type SlackProfileClient, hasPersonalizationData, profileOwner } from "./profiler.ts"; -import { markOnboardingStarted } from "./state.ts"; +import { isIntroSent, markIntroSent, markOnboardingStarted } from "./state.ts"; export type OnboardingTarget = { type: "channel"; channelId: string } | { type: "dm"; userId: string }; +/** Result of an onboarding run. The caller uses both the OwnerProfile + * (Slack-side personalization) and the OwnerResearchResult (Phase 12 + * public-source enrichment) to compose the system-prompt overlay. */ +export type StartOnboardingResult = { + profile: OwnerProfile | null; + research: OwnerResearchResult | null; + skipped: boolean; +}; + function buildGenericIntro(phantomName: string, _role: RoleTemplate): string { return [ `Hey there. I'm ${phantomName}, just got spun up on my own machine.`, @@ -40,8 +51,44 @@ function buildPersonalizedIntro(phantomName: string, _role: RoleTemplate, profil } /** - * Start the onboarding flow by profiling the owner and sending a personalized DM. - * Falls back to generic intro if profiling fails or no owner is configured. + * Append the Phase 12 "What I learned" research section to an intro + * message. Returns the original message unchanged when research yielded + * no bullets; we never fabricate, so an empty result means the section + * does not render at all (architect invariant: don't fabricate). + */ +export function appendResearchSection(intro: string, research: OwnerResearchResult | null): string { + if (!research || !research.bullets || research.bullets.length === 0) { + return intro; + } + const bulletLines = research.bullets.map((b) => ` - ${b}`); + return [intro, "", "What I learned about you so far:", ...bulletLines].join("\n"); +} + +export interface StartOnboardingOptions { + /** PHANTOM_OWNER_EMAIL. Phase 12 research input. */ + ownerEmail?: string; + /** PHANTOM_OWNER_NAME. Optional. */ + ownerName?: string; + /** PHANTOM_OWNER_LINKEDIN_URL. Optional, future wizard step. */ + ownerLinkedinUrl?: string; + /** When false, skip Phase 12 research entirely (operator escape + * hatch via PHANTOM_OWNER_RESEARCH_ENABLED=false). Defaults to true. */ + researchEnabled?: boolean; + /** Inject a research function for tests. Defaults to enrichOwner. */ + enrichImpl?: typeof enrichOwner; +} + +/** + * Start the onboarding flow by profiling the owner and sending a + * personalized DM. Falls back to generic intro if profiling fails or no + * owner is configured. Phase 12: also runs the public-source research + * subroutine and appends a "What I learned" section to the intro. + * + * Idempotency (Phase 12 fix for the LOW bug at phantom CLAUDE.md:284): + * if the firstboot ledger already records intro_sent_at, the function + * returns immediately with skipped=true. Both the research probe AND + * the DM send are skipped, so a process restart never re-introduces + * the agent. */ export async function startOnboarding( slack: SlackTransport, @@ -50,7 +97,13 @@ export async function startOnboarding( role: RoleTemplate, db: Database, slackClient?: SlackProfileClient, -): Promise<OwnerProfile | null> { + options: StartOnboardingOptions = {}, +): Promise<StartOnboardingResult> { + if (isIntroSent(db)) { + console.log("[onboarding] firstboot ledger says intro_sent_at is set; skipping intro DM and research"); + return { profile: null, research: null, skipped: true }; + } + markOnboardingStarted(db); // If we have a DM target and a slack client, profile the owner for personalization @@ -65,10 +118,17 @@ export async function startOnboarding( } } - const intro = + // Phase 12 research subroutine. Time-bounded inside enrichOwner; + // budget elapsed -> partial result -> we still send the DM. Anything + // thrown by the research path is caught here so a transient network + // failure cannot break onboarding for the owner. + const research = await runResearch(options, profile); + + const baseIntro = profile !== null && hasPersonalizationData(profile) ? buildPersonalizedIntro(phantomName, role, profile) : buildGenericIntro(phantomName, role); + const intro = appendResearchSection(baseIntro, research); const hasUsefulProfile = profile !== null && hasPersonalizationData(profile); if (target.type === "dm") { @@ -79,6 +139,42 @@ export async function startOnboarding( console.log(`[onboarding] Introduction posted to channel ${target.channelId}`); } - // Return profile only if it has useful data for onboarding prompt injection - return hasUsefulProfile ? profile : null; + // Mark the intro as sent only AFTER the Slack call succeeds. If + // sendDm or postToChannel throws, we want the next process start to + // retry the DM, not skip it because of a stale ledger row. + markIntroSent(db, research?.outcome ?? "disabled"); + + return { + profile: hasUsefulProfile ? profile : null, + research, + skipped: false, + }; +} + +async function runResearch( + options: StartOnboardingOptions, + profile: OwnerProfile | null, +): Promise<OwnerResearchResult | null> { + const enabled = options.researchEnabled !== false; + const email = (options.ownerEmail ?? "").trim(); + if (!enabled || !email) { + // No email -> nothing to research. We could still surface the + // Slack profile but that is already in the personalized intro. + return null; + } + + const enrichFn = options.enrichImpl ?? enrichOwner; + try { + // Prefer the explicit PHANTOM_OWNER_NAME (operator-set); fall + // back to the Slack profile's real_name when present so the + // research path has something to work with even when phantomd + // has not yet stamped PHANTOM_OWNER_NAME. + const nameFromProfile = profile?.name && profile.name !== "there" ? profile.name : undefined; + const name = options.ownerName?.trim() || nameFromProfile; + return await enrichFn({ email, name, linkedinUrl: options.ownerLinkedinUrl?.trim() || undefined }); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + console.warn(`[onboarding] Phase 12 research subroutine threw: ${msg}. Continuing without bullets.`); + return { bullets: null, sources: [], outcome: "error" }; + } } diff --git a/src/onboarding/prompt.ts b/src/onboarding/prompt.ts index 3dadd06a..20979ebd 100644 --- a/src/onboarding/prompt.ts +++ b/src/onboarding/prompt.ts @@ -1,3 +1,4 @@ +import type { OwnerResearchResult } from "../agent/research/types.ts"; import type { RoleTemplate } from "../roles/types.ts"; import type { OwnerProfile } from "./profiler.ts"; @@ -32,18 +33,54 @@ function buildOwnerContext(profile: OwnerProfile): string { return lines.join("\n"); } +/** + * Phase 12: Build the public-research section for the system prompt. + * The agent reads what we learned about the owner from public sources + * BEFORE the first conversation. We mark the source kind on each line + * so the agent can talk about it honestly ("I noticed on your GitHub + * that...") instead of presenting public bullets as deep knowledge. + * Returns the empty string when research is null or yielded no + * bullets, so the assembler skips the section instead of emitting an + * empty stub. + */ +export function buildResearchContext(research: OwnerResearchResult | null | undefined): string { + if (!research || !research.bullets || research.bullets.length === 0) return ""; + + const lines: string[] = []; + lines.push("## What I Learned About Them (Public Sources)"); + lines.push(""); + lines.push( + "The following bullets come from public sources only (GitHub, personal site, LinkedIn public profile). Reference them naturally if it helps; never present them as deep knowledge of the user. Verify with the user when you act on any of this.", + ); + lines.push(""); + for (let i = 0; i < research.bullets.length; i++) { + const bullet = research.bullets[i]; + const source = research.sources[i]; + const tag = source ? ` [${source.kind}]` : ""; + lines.push(`- ${bullet}${tag}`); + } + return lines.join("\n"); +} + /** * Build the system prompt section injected when the agent is onboarding. * Role-agnostic: the agent follows the user's lead instead of running * through a predefined checklist. Cardinal Rule applies here too. */ -export function buildOnboardingPrompt(_role: RoleTemplate, phantomName: string, ownerProfile?: OwnerProfile): string { +export function buildOnboardingPrompt( + _role: RoleTemplate, + phantomName: string, + ownerProfile?: OwnerProfile, + research?: OwnerResearchResult | null, +): string { const ownerSection = ownerProfile ? `\n\n${buildOwnerContext(ownerProfile)}` : ""; + const researchBlock = buildResearchContext(research); + const researchSection = researchBlock ? `\n\n${researchBlock}` : ""; const ownerName = ownerProfile?.name ?? "your user"; return `## Onboarding Mode -This is your first real conversation with ${ownerName}. You are ${phantomName}.${ownerSection} +This is your first real conversation with ${ownerName}. You are ${phantomName}.${ownerSection}${researchSection} Your goal: understand their work well enough to be immediately useful. Not "onboard them through a checklist." Understand their work. diff --git a/src/onboarding/state.ts b/src/onboarding/state.ts index a71ba037..6afe9fdc 100644 --- a/src/onboarding/state.ts +++ b/src/onboarding/state.ts @@ -29,3 +29,42 @@ export function markOnboardingComplete(db: Database): void { WHERE status = 'in_progress'`, ); } + +// Phase 12 idempotency state: a single-row firstboot ledger that records +// whether the intro DM has gone out and what the research outcome was. +// We pin id=1 with a CHECK so the table behaves as a singleton; the +// first insert and every later upsert touch the same row. This closes +// the LOW bug at phantom CLAUDE.md:284 where the intro DM re-fired on +// every restart while evolution generation stayed at 0. + +export type FirstbootState = { + intro_sent_at: string | null; + research_outcome: string | null; +}; + +export function getFirstbootState(db: Database): FirstbootState { + const row = db + .query("SELECT intro_sent_at, research_outcome FROM firstboot_state WHERE id = 1") + .get() as FirstbootState | null; + return row ?? { intro_sent_at: null, research_outcome: null }; +} + +/** True when the intro DM has already been sent successfully. The + * firstboot path checks this BEFORE running research or sending the DM. */ +export function isIntroSent(db: Database): boolean { + return getFirstbootState(db).intro_sent_at !== null; +} + +/** Mark the intro DM as sent. Idempotent: the upsert preserves the + * original intro_sent_at on the second call so we keep the truthful + * first-time record. The research_outcome is allowed to be updated on a + * later call so a poison-pill empty research can be replaced by a later + * successful run; the timestamp itself is immutable once set. */ +export function markIntroSent(db: Database, researchOutcome: string): void { + db.run( + `INSERT INTO firstboot_state (id, intro_sent_at, research_outcome) + VALUES (1, datetime('now'), ?) + ON CONFLICT(id) DO UPDATE SET research_outcome = excluded.research_outcome`, + [researchOutcome], + ); +}