From ce7c8bb4e82372ca4b23946ed4816aa2db41f22f Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Sun, 21 Jun 2026 11:34:01 +0800
Subject: [PATCH 01/27] emit: emit type-checked TypeScript (tsTarget, issue #6
 first step)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

emitParser now emits a standalone TypeScript module that passes `tsc --strict
--noEmit`, replacing the previously untyped JS output. This makes the emitted
parser's type contract explicit and gated by construction — the monomorphic
parse-state struct (Doc), the matcher/runtime signatures, the spare-buffer
mirrors, and the baked op/rule tables all carry types tsc verifies for
consistency. That contract is the part a future Go/Rust target must reproduce,
so surfacing it now (rather than deferring it to the first non-JS target) is the
de-risking first step of issue #6.

The additions are erasable TypeScript only (annotations, optional params, `!`
assertions) — Node runs the emitted parser by stripping types, so the runtime is
unchanged. The arity-looseness the JS output relied on (calling matchers with
omitted trailing diagnostic args) is replaced by explicit optional params, the
one JS-ism that would not survive a typed/Go/Rust target.

Gates:
- new emit-tsc-gate: the emitted parser type-checks under `tsc --strict` for the
  soa + emitted-lexer family (typescript, javascript, typescriptreact,
  javascriptreact). The fallback-lexer / non-soa path (yaml, html) is logged as
  deferred — it carries additional untyped surface and a pre-existing latent
  scope reference (the non-soa editCore branch names cs/ceOld/parenCachePos that
  exist only in the soa branch; unreached at runtime, hence invisible until now).
- emit-parser-verify unchanged: emitted CST stays byte-identical to the
  interpreter (109/109 in-repo + 401/401 external, 0 mismatches).
- bench unchanged (~14x): type-stripping happens once at import, not per parse.

Test harnesses that import the emitted module now write `.mts` so Node strips
types on import. K_ARR/T_ARR column widths are single-sourced in analyze() so
emitRuntime and emitDriver's spare buffers pick the same width.
---
 src/emit-lexer.ts            |  68 +++----
 src/emit-parser.ts           | 373 +++++++++++++++++++----------------
 test/check.ts                |   1 +
 test/cst-match-totality.ts   |   2 +-
 test/emit-lexer-verify.ts    |   2 +-
 test/emit-parser-bench.ts    |   2 +-
 test/emit-parser-verify.ts   |   2 +-
 test/emit-reject-messages.ts |   2 +-
 test/emit-tsc-gate.ts        |  72 +++++++
 test/exhaustive-edits.ts     |   2 +-
 test/head-to-head.ts         |   2 +-
 test/incremental-grammars.ts |   4 +-
 test/incremental-verify.ts   |   2 +-
 test/multi-doc.ts            |   2 +-
 test/recovery-conformance.ts |   2 +-
 test/recovery.ts             |   2 +-
 16 files changed, 319 insertions(+), 221 deletions(-)
 create mode 100644 test/emit-tsc-gate.ts

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index 13e254d..ba09347 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -39,7 +39,7 @@ const resyncRetractLine = (indent: string): string =>
 // loop, so `cc>127 && lxNonAsciiWs(cc)` is EXACTLY "the regex would match here" → byte-
 // identical, minus the wasted exec on the common non-whitespace case (#45 B4).
 const NON_ASCII_WS_FN =
-  `function lxNonAsciiWs(cc) { return cc === 0xa0 || cc === 0x1680 || (cc >= 0x2000 && cc <= 0x200a) || cc === 0x2028 || cc === 0x2029 || cc === 0x202f || cc === 0x205f || cc === 0x3000 || cc === 0xfeff; }`;
+  `function lxNonAsciiWs(cc: number) { return cc === 0xa0 || cc === 0x1680 || (cc >= 0x2000 && cc <= 0x200a) || cc === 0x2028 || cc === 0x2029 || cc === 0x202f || cc === 0x205f || cc === 0x3000 || cc === 0xfeff; }`;
 // The non-ASCII whitespace fallback, emitted at the two sites that need it (after an ASCII run,
 // and as the lead char). `cont` appends the `continue` the lead-char site needs.
 const nonAsciiWsConsume = (v: string, cont: boolean, indent: string): string =>
@@ -134,22 +134,22 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// min paren depth recorded over the old suffix [j, altN) (pop-on-empty = -1),`);
   emit(`// built lazily once per edit (the caller nulls it when the alt stream changes).`);
   emit(`let lexResyncPd = 0;`);
-  emit(`let altSuffMin = null;`);
-  emit(`let altSuffMinBuf = null;`);
+  emit(`let altSuffMin: Int32Array | null = null;`);
+  emit(`let altSuffMinBuf: Int32Array | null = null;`);
   emit(`// ')' pops that found an empty stack, in THIS lexCore call's token indices`);
-  emit(`let lexEmptyPops = [];`);
+  emit(`let lexEmptyPops: number[] = [];`);
   emit(`// Min OLD-stream paren depth over the tokens inside the damage itself (set by the`);
   emit(`// caller before the window lex): the old-side trajectory min starts from here.`);
   emit(`let wndOldMin0 = 0x7fffffff;`);
-  emit(`function buildAltSuffMin(lo) {`);
+  emit(`function buildAltSuffMin(lo: number) {`);
   emit(`  if (altSuffMinBuf === null || altSuffMinBuf.length < altN + 1) altSuffMinBuf = new Int32Array(altN + 1025);`);
   emit(`  altSuffMin = altSuffMinBuf;`);
-  emit(`  altSuffMin[altN] = 0x7fffffff;`);
+  emit(`  altSuffMin![altN] = 0x7fffffff;`);
   emit(`  for (let j = altN - 1; j >= lo; j--) {`);
-  emit(`    let d = altPd[j];`);
-  emit(`    if (d === 0 && altK[j] === K_PUNCT && altT[j] === ${tOf(')')} && (j === 0 || altPd[j - 1] === 0)) d = -1;`);
-  emit(`    const nx = altSuffMin[j + 1];`);
-  emit(`    altSuffMin[j] = d < nx ? d : nx;`);
+  emit(`    let d = altPd![j];`);
+  emit(`    if (d === 0 && altK![j] === K_PUNCT && altT![j] === ${tOf(')')} && (j === 0 || altPd![j - 1] === 0)) d = -1;`);
+  emit(`    const nx = altSuffMin![j + 1];`);
+  emit(`    altSuffMin![j] = d < nx ? d : nx;`);
   emit(`  }`);
   emit(`}`);
   emit(`const LX_UNI_IDENT = /[$_\\p{ID_Start}][$\\u200c\\u200d\\p{ID_Continue}]*/uy;`);
@@ -175,7 +175,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   // Length window → first-charCode switch → per-keyword compare chains (shortest first);
   // returns exactly what LIT_KW.get(source.slice(a, b)) ?? 0 would — the keyword set is
   // enumerated completely and keywords are pure ASCII, so charCode compares are exact.
-  emit(`function lexKwT(source, a, b) {`);
+  emit(`function lexKwT(source: string, a: number, b: number) {`);
   const kwEntries = [...st.kwLitKind.entries()];
   if (kwEntries.length === 0) {
     emit(`  return 0;`);
@@ -205,11 +205,11 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   }
   emit(`}`);
   // identTextValid, with the per-token prefix length baked at the call site.
-  emit(`function lexIdentValid(text, prefixLen) {`);
+  emit(`function lexIdentValid(text: string, prefixLen: number) {`);
   emit(`  const body = prefixLen > 0 ? text.slice(prefixLen) : text;`);
   emit(`  if (!body.includes('\\\\')) return true;`);
   emit(`  let bad = false;`);
-  emit(`  const decoded = body.replace(LX_DECODE_ESC, (_m, braced, fixed) => {`);
+  emit(`  const decoded = body.replace(LX_DECODE_ESC, (_m: string, braced: string, fixed: string) => {`);
   emit(`    const cp = parseInt(braced ?? fixed, 16);`);
   emit(`    if (cp > 0x10FFFF) { bad = true; return ''; }`);
   emit(`    return String.fromCodePoint(cp);`);
@@ -219,7 +219,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  return m !== null && m[0].length === decoded.length;`);
   emit(`}`);
   if (templateToken) {
-    emit(`function lexTplSpan(source, pos, validateEscapes) {`);
+    emit(`function lexTplSpan(source: string, pos: number, validateEscapes: boolean) {`);
     emit(`  const tplFrom = pos;`);
     emit(`  while (pos < source.length) {`);
     emit(`    if (${startsWithExpr('source', 'pos', tplInterpOpen)}) return { endsWithInterp: true, end: pos + ${tplInterpOpen.length} };`);
@@ -256,7 +256,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   // — no per-token object, no text slice: text is materialized from the source span only
   // when a CST leaf is built. Flag bits: 1 = newlineBefore (the only stamp this emitted
   // lexer ever sets; comment/multilineFlow stamps belong to fallback-only grammars).
-  emit(`function tokenize(source) {`);
+  emit(`function tokenize(source: string) {`);
   emit(`  docPieces = [source]; docPieceOff = [0]; docLen = source.length;`);
   emit(`  docFlat = source; docCur = 0;`);
   emit(`  tokN = 0;`);
@@ -281,7 +281,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// old token (same k/t, offsets shifted by wndDelta, both depth records 0) while`);
   emit(`// the window's own stacks are empty — returns that OLD index (the duplicate push`);
   emit(`// is retracted), or -1 when lexing ran to EOF.`);
-  emit(`function lexCore(source, startPos, pvK, pvT, wndPtr0, wndMinOff, wndDelta, wndCs, initParens, srcBase, hasMore) {`);
+  emit(`function lexCore(source: string, startPos: number, pvK: number, pvT: number, wndPtr0: number, wndMinOff: number, wndDelta: number, wndCs?: number, initParens?: boolean[] | null, srcBase?: number, hasMore?: boolean) {`);
   emit(`  if (srcBase === undefined) srcBase = 0;`);
   emit(`  lexWindowMore = hasMore === true;`);
   emit(`  lexSrcBase = srcBase;`);
@@ -291,7 +291,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  let extraFl = 0;`);
   emit(`  let lastBangWasPostfix = false;`);
   emit(`  let lastCloseWasParenHead = false;`);
-  emit(`  const templateStack = [];`);
+  emit(`  const templateStack: number[] = [];`);
   emit(`  const parenHeadStack = initParens !== undefined && initParens !== null ? initParens : [];`);
   emit(`  let wndPtr = wndPtr0;`);
   emit(`  let wndHit = -1;`);
@@ -301,8 +301,8 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  // tokens and stack ops). An entry at depth <= BOTH mins was open at the`);
   emit(`  // divergence point in both lexes - i.e. it is the SAME entry.`);
   emit(`  let dmgMinOld = wndOldMin0, dmgMinNew = -1;`);
-  emit(`  function tkPush(k, t, off, end) {`);
-  emit(`    off += srcBase; end += srcBase;`);
+  emit(`  function tkPush(k: number, t: number, off: number, end: number) {`);
+  emit(`    off += srcBase!; end += srcBase!;`);
   emit(`    if (tokN === tkCap) growTok();`);
   emit(`    tkK[tokN] = k; tkT[tokN] = t; tkOff[tokN] = off; tkEnd[tokN] = end;`);
   emit(`    tkFl[tokN] = (pendingNl ? 1 : 0) | extraFl;`);
@@ -331,20 +331,20 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`    //    adopted tkPd column by lexResyncPd to the new truth.`);
   emit(`    if (wndPtr >= 0) {`);
   emit(`      const pd = tkPd[tokN - 1];`);
-  emit(`      if (dmgMinNew < 0) { if (off >= wndCs) dmgMinNew = pd; }`);
+  emit(`      if (dmgMinNew < 0) { if (off >= wndCs!) dmgMinNew = pd; }`);
   emit(`      else if (pd < dmgMinNew) dmgMinNew = pd;`);
   emit(`      if (off >= wndMinOff) {`);
-  emit(`        while (wndPtr < altN && (altOff[wndPtr] < 0 ? altOff[wndPtr] + srcLenP1 : altOff[wndPtr]) + wndDelta < off) { if (altPd[wndPtr] < dmgMinOld) dmgMinOld = altPd[wndPtr]; wndPtr++; }`);
-  emit(`        if (wndPtr < altN && (altOff[wndPtr] < 0 ? altOff[wndPtr] + srcLenP1 : altOff[wndPtr]) + wndDelta === off && altK[wndPtr] === k && altT[wndPtr] === t`);
-  emit(`            && (altEnd[wndPtr] < 0 ? altEnd[wndPtr] + srcLenP1 : altEnd[wndPtr]) + wndDelta === end`);
+  emit(`        while (wndPtr < altN && (altOff![wndPtr] < 0 ? altOff![wndPtr] + srcLenP1 : altOff![wndPtr]) + wndDelta < off) { if (altPd![wndPtr] < dmgMinOld) dmgMinOld = altPd![wndPtr]; wndPtr++; }`);
+  emit(`        if (wndPtr < altN && (altOff![wndPtr] < 0 ? altOff![wndPtr] + srcLenP1 : altOff![wndPtr]) + wndDelta === off && altK![wndPtr] === k && altT![wndPtr] === t`);
+  emit(`            && (altEnd![wndPtr] < 0 ? altEnd![wndPtr] + srcLenP1 : altEnd![wndPtr]) + wndDelta === end`);
   emit(`            // the candidate's LEADING-TRIVIA flags must match too: the gap before`);
   emit(`            // it may sit inside the edit (newline removed/added without moving any`);
   emit(`            // token bytes), and parsers read these flags (sameLine / commentBefore)`);
-  emit(`            && altFl[wndPtr] === tkFl[tokN - 1]`);
-  emit(`            && templateStack.length === 0 && altDp[wndPtr] === 0`);
+  emit(`            && altFl![wndPtr] === tkFl[tokN - 1]`);
+  emit(`            && templateStack.length === 0 && altDp![wndPtr] === 0`);
   emit(`            && LX_PFXV[t] === 0 && LX_PARENKW[t] === 0`);
   emit(`            && !(k === K_PUNCT && (t === ${tLParen} || t === ${tRParen}))) {`);
-  emit(`          const q = altPd[wndPtr];`);
+  emit(`          const q = altPd![wndPtr];`);
   emit(`          if (q < dmgMinOld) dmgMinOld = q;`);
   emit(`          if (q === pd && pd <= dmgMinOld && pd <= dmgMinNew) {`);
   emit(`            wndHit = wndPtr;`);
@@ -358,7 +358,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`              okTail = docEmptyPops.length === 0 || docEmptyPops[docEmptyPops.length - 1] <= wndPtr;`);
   emit(`            } else {`);
   emit(`              if (altSuffMin === null) buildAltSuffMin(wndPtr0);`);
-  emit(`              okTail = altSuffMin[wndPtr + 1] >= q;`);
+  emit(`              okTail = altSuffMin![wndPtr + 1] >= q;`);
   emit(`            }`);
   emit(`            if (okTail) {`);
   emit(`              wndHit = wndPtr;`);
@@ -495,7 +495,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
       emit(`${ind}  extraFl = _ph ? 8 : 0; }`);
     } else if (lit === ')') {
       emit(`${ind}if (parenHeadStack.length === 0) { lastCloseWasParenHead = false; lexEmptyPops.push(tokN); }`);
-      emit(`${ind}else lastCloseWasParenHead = parenHeadStack.pop();`);
+      emit(`${ind}else lastCloseWasParenHead = parenHeadStack.pop()!;`);
     }
     if (regexCtx?.postfixAfterValueTexts?.includes(lit)) {
       emit(`${ind}lastBangWasPostfix = prevIsValue();`);
@@ -635,7 +635,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// lexer flag live (a control-head ')' or a postfix-ambiguous operator would`);
   emit(`// make the next token's regex-context depend on unrecoverable state). -1 = file`);
   emit(`// head (always sound, degrades to a full re-lex).`);
-  emit(`function findRestart(cs) {`);
+  emit(`function findRestart(cs: number) {`);
   emit(`  let lo = 0, hi = tokN;`);
   // STRICTLY before the damage: a token ENDING exactly at cs can be EXTENDED by
   // the edit under maximal munch ('b' + inserted 'x' = 'bx'; '=' + '=' = '==';
@@ -658,9 +658,9 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// openers at that depth are re-opened later, and the re-opener comes first`);
   emit(`// backward). The '(' records its depth INCLUDING itself, and carries its`);
   emit(`// control-head-ness as tkFl bit 8.`);
-  emit(`function reconstructParens(b) {`);
+  emit(`function reconstructParens(b: number) {`);
   emit(`  let need = b >= 0 ? tkPd[b] : 0;`);
-  emit(`  const out = new Array(need);`);
+  emit(`  const out: boolean[] = new Array(need);`);
   emit(`  for (let i = b; i >= 0 && need > 0; i--) {`);
   emit(`    if (tkK[i] === 1 && tkT[i] === ${tOf('(')} && tkPd[i] === need) { out[need - 1] = (tkFl[i] & 8) !== 0; need--; }`);
   emit(`  }`);
@@ -673,9 +673,9 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// are splice-stable (every splice begins past its own anchor), so the baseline`);
   emit(`// stays exact; a backward jump (b < cached) falls back to the full scan.`);
   emit(`let parenCachePos = -1;`);
-  emit(`let parenCacheStack = [];`);
-  emit(`function reconstructParensCached(b) {`);
-  emit(`  let stack;`);
+  emit(`let parenCacheStack: boolean[] = [];`);
+  emit(`function reconstructParensCached(b: number) {`);
+  emit(`  let stack: boolean[];`);
   emit(`  if (b < 0) stack = [];`);
   emit(`  else if (parenCachePos >= 0 && parenCachePos <= b) {`);
   emit(`    stack = parenCacheStack;`);
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 68923f3..7bd889b 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -346,12 +346,19 @@ function analyze(grammar: CstGrammar) {
     typeKind, kwLitKind, puLitKind, classifyKey,
   };
 
+  // Column element types: Uint8 when the kind/literal id spaces fit a byte (the SoA
+  // token columns and their spare-buffer mirrors). Single-sourced here so every emit
+  // function — emitRuntime's `let tk* = new …`, emitDriver's `let alt* …` — agrees.
+  const tMaxT = Math.max(1, ...kwLitKind.values(), ...puLitKind.values());
+  const kArr = KIND_NAMED_FALLBACK <= 255 ? 'Uint8Array' : 'Uint16Array';
+  const tArr = tMaxT <= 255 ? 'Uint8Array' : 'Uint16Array';
+
   return {
     grammar, tokenNames, opTable, prefixOps, noUnaryLhsOps, postfixOpValues, requireTargetOps, binaryConnectors,
     prattRules, leftRecSet, ruleByName, prattClassified, leftRecClassified,
     maxBp, templateTokenName, templateTokenNames, firstTokenOf, altDeepFirst, altNullable,
     altSecond, ledMeta, contMeta, nudCap, nullableRules, firstSets, symtab, qualKeys,
-    exprFirst, exprNullable,
+    exprFirst, exprNullable, kArr, tArr,
   };
 }
 
@@ -865,7 +872,7 @@ class Emitter {
     if (!nm) {
       nm = `_q${this.memberFns.size}`;
       this.memberFns.set(fnKey, nm);
-      this.helperDefs.push(`function ${nm}(i) { return i >= cap || (${kArr}[tkK[i]] | ${tArr}[tkT[i]]) !== 0; }`);
+      this.helperDefs.push(`function ${nm}(i: number) { return i >= cap || (${kArr}[tkK[i]] | ${tArr}[tkT[i]]) !== 0; }`);
     }
     return nm;
   }
@@ -1052,7 +1059,7 @@ class Emitter {
     let nm = this.u8Consts.get(key);
     if (!nm) {
       if (!this.u8Emitted) {
-        this.helperDefs.push(`function u8(n, ones) { const a = new Uint8Array(n); for (let i = 0; i < ones.length; i++) a[ones[i]] = 1; return a; }`);
+        this.helperDefs.push(`function u8(n: number, ones: number[]) { const a = new Uint8Array(n); for (let i = 0; i < ones.length; i++) a[ones[i]] = 1; return a; }`);
         this.u8Emitted = true;
       }
       nm = `_qb${this.u8Consts.size}`;
@@ -1178,8 +1185,9 @@ export function emitParser(grammar: CstGrammar): string {
       }
       return arr;
     };
-    e.emit(`const OP_BY_T = ${J(byT(a.opTable))};`);
-    e.emit(`const PREFIX_BY_T = ${J(byT(a.prefixOps))};`);
+    e.emit(`type OpInfo = { lbp: number; rbp: number; assoc: string; position: string; requireTarget?: boolean };`);
+    e.emit(`const OP_BY_T: (OpInfo | null)[] = ${J(byT(a.opTable))};`);
+    e.emit(`const PREFIX_BY_T: (OpInfo | null)[] = ${J(byT(a.prefixOps))};`);
   }
   e.emit(`const noUnaryLhsOps = new Set(${J([...a.noUnaryLhsOps])});`);
   {
@@ -1213,7 +1221,7 @@ export function emitParser(grammar: CstGrammar): string {
   // `++x`) — head kid is an operator-tag leaf in prefixOps — or a postfix-update (`x++`) —
   // tail kid is an operator-tag leaf in postfixOpValues. A parenthesized cover / member /
   // element / call / non-null tail has no operator-tag leaf at head or tail, so it passes.
-  e.emit(`function _notTarget(lhs) {`);
+  e.emit(`function _notTarget(lhs: number) {`);
   e.emit(`  const n = rowCount[lhs]; if (n === 0) return false;`);
   e.emit(`  const cs = rowStart[lhs];`);
   e.emit(`  const _h = kids[cs];`);
@@ -1238,7 +1246,7 @@ export function emitParser(grammar: CstGrammar): string {
   // nodes). Drives the notLeftLeaf LED gate: a node whose head leaf text is in the arm's word set
   // (e.g. `void`/`null`/`this` for the type `.` qualification) is not a valid LEFT operand of the
   // arm. A childless ($missing recovery) node returns '' (matches no word → the arm is not blocked).
-  e.emit(`function _headLeafText(id) {`);
+  e.emit(`function _headLeafText(id: number) {`);
   e.emit(`  while (rowCount[id] > 0) {`);
   e.emit(`    const _hh = kids[rowStart[id]];`);
   e.emit(`    if (_hh >= 0) { id = _hh; continue; }`);
@@ -1311,13 +1319,10 @@ function resolveLexerImport(): string { return pathResolve(__dir, 'gen-lexer.ts'
 // ONLY change: where the interpreter called matchExpr(alt)/matchSeq(items) per arm,
 // these call the GENERATED per-arm matcher functions (installed via the rule fns).
 function emitRuntime(e: Emitter) {
-  // Column element type: Uint8 when the kind/literal id spaces fit a byte.
-  const st = e.a.symtab;
-  let tMax = 1;
-  for (const v of st.kwLitKind.values()) tMax = Math.max(tMax, v);
-  for (const v of st.puLitKind.values()) tMax = Math.max(tMax, v);
-  const K_ARR = st.KIND_NAMED_FALLBACK <= 255 ? 'Uint8Array' : 'Uint16Array';
-  const T_ARR = tMax <= 255 ? 'Uint8Array' : 'Uint16Array';
+  // Column element type: Uint8 when the kind/literal id spaces fit a byte (single-
+  // sourced in analyze() so emitDriver's spare-buffer mirrors pick the same width).
+  const K_ARR = e.a.kArr;
+  const T_ARR = e.a.tArr;
   e.emit(String.raw`
 // ── Token stream: struct-of-arrays (no per-token object, no eager text) ──
 // tkK = type kind, tkT = literal kind, tkOff/tkEnd = source span, tkFl = stamp bits
@@ -1345,14 +1350,14 @@ let tokN = 0;
 // joined form for the cold paths that need one (errors, debug views); batch parses
 // set it directly. Reads route through docChar/docText: flat fast path, piece
 // lookup (cursor-cached) otherwise.
-let docPieces = null;
-let docPieceOff = null;
+let docPieces: string[] | null = null;
+let docPieceOff: number[] | null = null;
 let docLen = 0;
-let docFlat = null;
+let docFlat: string | null = null;
 let docCur = 0;
-function docLocate(i) {
+function docLocate(i: number) {
   let k = docCur;
-  const po = docPieceOff;
+  const po = docPieceOff!;
   const n = po.length;
   if (k >= n || po[k] > i || (k + 1 < n && po[k + 1] <= i)) {
     let lo = 0, hi = n;
@@ -1362,57 +1367,57 @@ function docLocate(i) {
   }
   return k;
 }
-function docChar(i) {
+function docChar(i: number) {
   if (docFlat !== null) return docFlat.charCodeAt(i);
   const k = docLocate(i);
-  return docPieces[k].charCodeAt(i - docPieceOff[k]);
+  return docPieces![k].charCodeAt(i - docPieceOff![k]);
 }
-function docText(a, b) {
+function docText(a: number, b: number) {
   if (docFlat !== null) return docFlat.slice(a, b);
   if (b <= a) return '';
   let k = docLocate(a);
-  const first = docPieces[k];
-  const lo = a - docPieceOff[k];
-  if (b - docPieceOff[k] <= first.length) return first.slice(lo, b - docPieceOff[k]);
+  const first = docPieces![k];
+  const lo = a - docPieceOff![k];
+  if (b - docPieceOff![k] <= first.length) return first.slice(lo, b - docPieceOff![k]);
   let out = first.slice(lo);
   k++;
-  while (k < docPieces.length && docPieceOff[k] < b) {
-    const piece = docPieces[k];
-    const need = b - docPieceOff[k];
+  while (k < docPieces!.length && docPieceOff![k] < b) {
+    const piece = docPieces![k];
+    const need = b - docPieceOff![k];
     out += need >= piece.length ? piece : piece.slice(0, need);
     k++;
   }
   return out;
 }
 function flattenDoc() {
-  if (docFlat === null) docFlat = docPieces.join('');
+  if (docFlat === null) docFlat = docPieces!.join('');
   return docFlat;
 }
-function applyChange(start, end, text) {
+function applyChange(start: number, end: number, text: string) {
   const ks = docLocate(start);
   const ke = docLocate(end > start ? end - 1 : start);
-  const head = docPieces[ks].slice(0, start - docPieceOff[ks]);
-  const tailPiece = end > start ? docPieces[ke] : docPieces[ks];
-  const tailOff = end - docPieceOff[end > start ? ke : ks];
+  const head = docPieces![ks].slice(0, start - docPieceOff![ks]);
+  const tailPiece = end > start ? docPieces![ke] : docPieces![ks];
+  const tailOff = end - docPieceOff![end > start ? ke : ks];
   const tail = tailPiece.slice(tailOff);
   const repl = [];
   if (head.length > 0) repl.push(head);
   if (text.length > 0) repl.push(text);
   if (tail.length > 0) repl.push(tail);
-  docPieces.splice(ks, (end > start ? ke : ks) - ks + 1, ...repl);
+  docPieces!.splice(ks, (end > start ? ke : ks) - ks + 1, ...repl);
   // consolidate when fragmenting (amortized: a join every ≥256 edits)
-  if (docPieces.length > 256) {
-    docPieces = [docPieces.join('')];
+  if (docPieces!.length > 256) {
+    docPieces = [docPieces!.join('')];
   }
   docLen += text.length - (end - start);
   // rebuild offsets from the splice point (suffix offsets shifted anyway)
-  if (docPieceOff.length !== docPieces.length) docPieceOff.length = docPieces.length;
-  let off = ks > 0 && ks - 1 < docPieces.length ? docPieceOff[ks - 1] + docPieces[ks - 1].length : 0;
-  for (let k2 = ks > 0 ? ks : 0; k2 < docPieces.length; k2++) {
-    docPieceOff[k2] = off;
-    off += docPieces[k2].length;
+  if (docPieceOff!.length !== docPieces!.length) docPieceOff!.length = docPieces!.length;
+  let off = ks > 0 && ks - 1 < docPieces!.length ? docPieceOff![ks - 1] + docPieces![ks - 1].length : 0;
+  for (let k2 = ks > 0 ? ks : 0; k2 < docPieces!.length; k2++) {
+    docPieceOff![k2] = off;
+    off += docPieces![k2].length;
   }
-  if (docPieces.length === 1) docPieceOff[0] = 0;
+  if (docPieces!.length === 1) docPieceOff![0] = 0;
   docCur = 0;
   docFlat = null;
 }
@@ -1425,8 +1430,8 @@ function applyChange(start, end, text) {
 // parses are all-positive and the decode branch never fires.
 let srcLenP1 = 1;
 let negFrom = 0x7fffffff;
-function toff(i) { const v = tkOff[i]; return v < 0 ? v + srcLenP1 : v; }
-function tend(i) { const v = tkEnd[i]; return v < 0 ? v + srcLenP1 : v; }
+function toff(i: number) { const v = tkOff[i]; return v < 0 ? v + srcLenP1 : v; }
+function tend(i: number) { const v = tkEnd[i]; return v < 0 ? v + srcLenP1 : v; }
 ${e.soa ? '' : 'let tkText = [];   // fallback-lexer text column (synthetic tokens are not source spans)'}
 function growTok() {
   tkCap *= 2;
@@ -1483,8 +1488,8 @@ let rowNF = new Int32Array(8192).fill(0x7fffffff);
 // 'succeed' over broken text and wipe its diagnostics). Recovering passes adopt
 // these rows freely.
 let rowRM = new Uint8Array(8192);
-function ktr(p, k) { const v = kidTokRel[k]; return v < 0 ? v + rowTokLen[p] + 1 : v; }
-function kcr(p, k) { const v = kidRel[k]; return v < 0 ? v + rowLen[p] + 1 : v; }
+function ktr(p: number, k: number) { const v = kidTokRel[k]; return v < 0 ? v + rowTokLen[p] + 1 : v; }
+function kcr(p: number, k: number) { const v = kidRel[k]; return v < 0 ? v + rowLen[p] + 1 : v; }
 // transient BUILD coordinates (absolute), valid for rows completed in the current
 // parse and REFRESHED at memo-hit time for reused roots — parents read them at
 // finishNode to write the children's relative fields; never part of the green tree.
@@ -1531,24 +1536,24 @@ function growRows() {
   const ac = new Int32Array(rowCap); ac.set(absChar); absChar = ac;
   const at = new Int32Array(rowCap); at.set(absTok); absTok = at;
 }
-function growKids(n) {
+function growKids(n: number) {
   while (kidN + n > kidCap) kidCap *= 2;
   const k = new Int32Array(kidCap); k.set(kids.subarray(0, kidN)); kids = k;
   const r = new Int32Array(kidCap); r.set(kidRel.subarray(0, kidN)); kidRel = r;
   const t = new Int32Array(kidCap); t.set(kidTokRel.subarray(0, kidN)); kidTokRel = t;
 }
-function scPush(e) {
+function scPush(e: number) {
   if (scn === scCap) { scCap *= 2; const s = new Int32Array(scCap); s.set(sc); sc = s; }
   sc[scn++] = e;
 }
-function entryOff(e) { return e >= 0 ? absChar[e] : toff((~e) >>> 2); }
-function entryEnd(e) { return e >= 0 ? absChar[e] + rowLen[e] : tend((~e) >>> 2); }
-function entryTok(e) { return e >= 0 ? absTok[e] : (~e) >>> 2; }
-function entryTokEnd(e) { return e >= 0 ? absTok[e] + rowTokLen[e] : ((~e) >>> 2) + 1; }
+function entryOff(e: number) { return e >= 0 ? absChar[e] : toff((~e) >>> 2); }
+function entryEnd(e: number) { return e >= 0 ? absChar[e] + rowLen[e] : tend((~e) >>> 2); }
+function entryTok(e: number) { return e >= 0 ? absTok[e] : (~e) >>> 2; }
+function entryTokEnd(e: number) { return e >= 0 ? absTok[e] + rowTokLen[e] : ((~e) >>> 2) + 1; }
 // Complete a node whose children are scratch[mark..scn): copy them into kids, write
 // the row, truncate scratch, return the id. Empty children = a zero-width node
 // at the current token (the old offset() rule).
-function finishNode(rid, mark) {
+function finishNode(rid: number, mark: number) {
   const n = scn - mark;
   if (nodeN === rowCap) growRows();
   const id = nodeN++;
@@ -1607,7 +1612,7 @@ function finishNode(rid, mark) {
   return id;
 }
 // Complete a LED/continuation wrap: children = [lhs, ...scratch[mark..scn)].
-function finishWrap(rid, lhsId, mark) {
+function finishWrap(rid: number, lhsId: number, mark: number) {
   const n = scn - mark;
   if (nodeN === rowCap) growRows();
   const id = nodeN++;
@@ -1675,22 +1680,22 @@ let _prattCapped = false;
 // be identical between a fresh parse and an adoption re-run. frameMax <= maxPos
 // always, so the hot advance pays one extra compare only at frontier breaches.
 let frameMax = 0;
-let memoNode = [];
-let memoEnd = [];
-let memoExt = [];   // per-entry lookahead extent (see parseRuleEntry)
+let memoNode: number[][] = [];
+let memoEnd: number[][] = [];
+let memoExt: number[][] = [];   // per-entry lookahead extent (see parseRuleEntry)
 // GENERATION-STAMPED memo: the per-rule arrays persist across parses (allocating
 // fresh multi-million-slot arrays per edit cost ~30% of a large-file edit in GC
 // alone); an entry is live iff its stamp equals the current generation — bumping
 // memoGenCur IS the whole reset.
-let memoGen = [];
+let memoGen: Int32Array[] = [];
 let memoGenCur = 0;
 let parseLimit = -1;
 // cap = the exclusive lookahead bound: min(parseLimit-or-∞, tokN), maintained at the
 // parseLimit set/restore sites and the one token-stream mutation (the '>' splice).
 let cap = 0;
-let currentPrattContext = null;
-let suppressNext = null;
-let suppressCur = null;
+let currentPrattContext: string | null = null;
+let suppressNext: Set<string> | null = null;
+let suppressCur: Set<string> | null = null;
 
 function offset() {
   if (pos < cap) return toff(pos);
@@ -1703,7 +1708,7 @@ function offset() {
 // Keyword literal: the interpreter required tok.type !== '' && tokenNames.has(tok.type)
 // && tok.text === value. With interned kinds that is tok.k >= K_NAMED_MIN (a declared
 // token name; '' is PUNCT, templates are below NAMED_MIN) && tok.t === KW(value).
-function matchKwLit(kw, vs) {
+function matchKwLit(kw: number, vs?: number) {
   // A kw-range t can only come from a named token (template spans never intern to a
   // keyword), so the old k >= K_NAMED_MIN guard was redundant — one int compare.
   // vs (optional) = the call site's viable-set id, threaded into the $missing row.
@@ -1715,7 +1720,7 @@ function matchKwLit(kw, vs) {
 // Punct literal: tok.type === '' && tok.text === value, with the gt-splice fallback.
 // tok.t === PU(value) is the exact-text fast path; the splice handles a longer
 // gt-led token matching the gt key. value/pu are baked by the caller.
-function matchPuLit(pu, vs) {
+function matchPuLit(pu: number, vs?: number) {
   // A pu-range t can only come from a punct token, so the old k === K_PUNCT guard was
   // redundant — one int compare. The '>'-split lives only in matchPuLitGT ('>' sites).
   if (pos >= cap || tkT[pos] !== pu) return recovering ? missTok(pu, vs) : false;
@@ -1723,7 +1728,7 @@ function matchPuLit(pu, vs) {
   if (++pos > frameMax) { frameMax = pos; if (pos > maxPos) maxPos = pos; }
   return true;
 }
-function matchPuLitGT(pu, vs) {
+function matchPuLitGT(pu: number, vs?: number) {
   if (pos >= cap) return false;
   const off = toff(pos);
   if (tkT[pos] === pu) {
@@ -1783,7 +1788,7 @@ function matchPuLitGT(pu, vs) {
 }
 // Generic matchLiteral kept for any unspecialized site: classify value via the baked
 // tables (no per-call isKeywordLiteral / string compares) and delegate.
-function matchLiteral(value) {
+function matchLiteral(value: string) {
   const kw = LIT_KW.get(value);
   if (kw !== undefined) return matchKwLit(kw);
   if (value === '>') return matchPuLitGT(LIT_PU.get(value) ?? 0);
@@ -1793,7 +1798,7 @@ function matchLiteral(value) {
 // Match a token ref by its baked TYPE kind: tok.type === name  ⟺  tok.k === nameKind.
 // (No named-token kind equals K_NAMED_FALLBACK, so an unforeseen type never matches.)
 // The materialized tokenType is type-derived (kind 0) — name needs no baking here.
-function matchTokK(nameKind) {
+function matchTokK(nameKind: number) {
   if (pos >= cap || tkK[pos] !== nameKind) return recovering ? missTok(-nameKind) : false;
   scPush(~(pos << 2));
   if (++pos > frameMax) { frameMax = pos; if (pos > maxPos) maxPos = pos; }
@@ -1858,7 +1863,7 @@ function emitRuleFns(e: Emitter, a: ReturnType<typeof analyze>) {
     else emitNonRecRule(e, a, rule, spine.has(rule.name) && !a.prattRules.has(rule.name) && !a.leftRecSet.has(rule.name));
   }
   // Dispatch table (string rule name → fn), for parseTemplateExpr's dynamic interp rule.
-  e.emit(`const RULES = {`);
+  e.emit(`const RULES: Record<string, () => boolean> = {`);
   for (const rule of a.grammar.rules) e.emit(`  ${J(rule.name)}: ${ruleFn(rule.name)},`);
   e.emit(`};`);
 
@@ -1954,7 +1959,7 @@ function emitNonRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDec
   // pratt/left-rec rules.
   if (memoized) {
     e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${rid}, ${J(rule.name)}, ${ruleFn}_core); }`);
-    e.emit(`function ${ruleFn}_core(_minBp) {`);
+    e.emit(`function ${ruleFn}_core(_minBp: number) {`);
   } else {
     e.emit(`function ${ruleFn}() {`);
   }
@@ -2000,7 +2005,7 @@ function emitLeftRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDe
   contNotLeftLeaf.forEach((words, i) => {
     if (words) e.emit(`const _NLLC_${sn}_${i} = new Set(${J(words)});`);
   });
-  e.emit(`function ${ruleFn}_lr(_minBp) {`);
+  e.emit(`function ${ruleFn}_lr(_minBp: number) {`);
   e.emit(`  const saved = pos; const mark = scn;`);
   e.emit(`  let node = -1; let bestAtomPos = saved;`);
   const atomDispatch = e.altMaskDispatch(atoms, '_am');
@@ -2065,7 +2070,7 @@ function emitPrattRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl
   meta.notLeftLeaf.forEach((words, i) => {
     if (words) e.emit(`const _NLL_${sn}_${i} = new Set(${J(words)});`);
   });
-  e.emit(`function ${ruleFn}_pratt(minBp) {`);
+  e.emit(`function ${ruleFn}_pratt(minBp: number) {`);
   e.emit(`  const saved = pos; const mark = scn;`);
   e.emit(`  let lhs = -1; let bestNudPos = saved;`);
   // `capped` becomes true iff the winning NUD is a capped (assignment-level) expression —
@@ -2322,7 +2327,7 @@ function emitDriver(e: Emitter, a: ReturnType<typeof analyze>, entry: string) {
 // and SECOND-token reads past it. Left-to-right parsing keeps the watermark near the
 // current frontier, so the value is tight on the dominant flow and only OVER-
 // invalidates (soundly) near big-backtrack clusters.
-function parseRuleEntry(idx, rid, name, core) {
+function parseRuleEntry(idx: number, rid: number, name: string, core: (minBp: number) => number) {
   const mySup = suppressNext;
   suppressNext = null;
   const capped = parseLimit >= 0;
@@ -2499,14 +2504,14 @@ function parseRuleEntry(idx, rid, name, core) {
 }
 
 // Token text at an arbitrary index (cold paths: errors, the tokenAt debug view).
-function tokTextAt(i) {
+function tokTextAt(i: number) {
   return ${e.soa ? 'docText(toff(i), tend(i))' : 'tkText[i]'};
 }
 // The k → type-name inverse, for reconstructing a token object (tokenAt).
-const K_NAMES = [];
+const K_NAMES: string[] = [];
 for (const [n, k] of TYPE_KIND) K_NAMES[k] = n;
 // A per-token object view over the columns (gates / debugging — the parser never builds these).
-export function tokenAt(i) {
+export function tokenAt(i: number) {
   return {
     type: K_NAMES[tkK[i]] ?? '',
     text: tokTextAt(i),
@@ -2524,7 +2529,7 @@ export function tokenAt(i) {
 // The arena IS the tree: parse() returns the root node id and consumers traverse
 // via visit()/the accessors — nothing is materialized on the parse path. All views
 // are valid until the NEXT parse (the columns are reused).
-function leafTokenType(entry, tokBase) {
+function leafTokenType(entry: number, tokBase: number) {
   const tok = tokBase + ((~entry) >>> 2);
   const kind = (~entry) & 3;
   return kind === 1 ? '$keyword'
@@ -2539,36 +2544,36 @@ function leafTokenType(entry, tokBase) {
 // — the node's own absolute start coordinates. Leaf spans come from the token
 // columns at tokBase + the entry's node-relative token index.
 export const tree = {
-  ruleNameOf: (id) => RULE_DISPLAY[rowRule[id]],
-  ruleIdOf: (id) => rowRule[id],
-  lenOf: (id) => rowLen[id],
-  tokLenOf: (id) => rowTokLen[id],
+  ruleNameOf: (id: number) => RULE_DISPLAY[rowRule[id]],
+  ruleIdOf: (id: number) => rowRule[id],
+  lenOf: (id: number) => rowLen[id],
+  tokLenOf: (id: number) => rowTokLen[id],
   // a node CHILD's relative coordinates live on the parent edge (kids-parallel)
-  childRelAt: (id, i) => kcr(id, rowStart[id] + i),
-  childTokRelAt: (id, i) => ktr(id, rowStart[id] + i),
+  childRelAt: (id: number, i: number) => kcr(id, rowStart[id] + i),
+  childTokRelAt: (id: number, i: number) => ktr(id, rowStart[id] + i),
   // base-threaded spans: nodes from their bases, leaves from the token columns
-  offsetOf: (entry, charBase, tokBase) => entry >= 0 ? charBase : toff(tokBase + ((~entry) >>> 2)),
-  endOf: (entry, charBase, tokBase) => entry >= 0 ? charBase + rowLen[entry] : tend(tokBase + ((~entry) >>> 2)),
-  childCount: (id) => rowCount[id],
-  childAt: (id, i) => kids[rowStart[id] + i],
+  offsetOf: (entry: number, charBase: number, tokBase: number) => entry >= 0 ? charBase : toff(tokBase + ((~entry) >>> 2)),
+  endOf: (entry: number, charBase: number, tokBase: number) => entry >= 0 ? charBase + rowLen[entry] : tend(tokBase + ((~entry) >>> 2)),
+  childCount: (id: number) => rowCount[id],
+  childAt: (id: number, i: number) => kids[rowStart[id] + i],
   // Bulk child load into a caller-owned array; returns the count. One call per node
   // instead of childCount+childAt-per-probe (the generated matchers' hot path).
-  childrenInto: (id, out2) => {
+  childrenInto: (id: number, out2: number[]) => {
     const n2 = rowCount[id];
     const cs2 = rowStart[id];
     for (let i2 = 0; i2 < n2; i2++) out2[i2] = kids[cs2 + i2];
     return n2;
   },
-  isLeaf: (entry) => entry < 0,
-  leafToken: (entry, tokBase) => tokBase + ((~entry) >>> 2),
+  isLeaf: (entry: number) => entry < 0,
+  leafToken: (entry: number, tokBase: number) => tokBase + ((~entry) >>> 2),
   leafTokenType,
   // Int-world leaf accessors (the match-path encoding): kind bits — 0 type-derived,
   // 1 '$keyword', 2 '$operator' — and the token's TYPE kind int (1 = punctuation).
-  leafKindOf: (entry) => (~entry) & 3,
-  leafTokKindOf: (entry, tokBase) => tkK[tokBase + ((~entry) >>> 2)],
-  leafOffsetOf: (entry, tokBase) => toff(tokBase + ((~entry) >>> 2)),
-  leafEndOf: (entry, tokBase) => tend(tokBase + ((~entry) >>> 2)),
-  textOf: (entry, source, charBase, tokBase) => entry >= 0
+  leafKindOf: (entry: number) => (~entry) & 3,
+  leafTokKindOf: (entry: number, tokBase: number) => tkK[tokBase + ((~entry) >>> 2)],
+  leafOffsetOf: (entry: number, tokBase: number) => toff(tokBase + ((~entry) >>> 2)),
+  leafEndOf: (entry: number, tokBase: number) => tend(tokBase + ((~entry) >>> 2)),
+  textOf: (entry: number, source: string, charBase: number, tokBase: number) => entry >= 0
     ? source.slice(charBase, charBase + rowLen[entry])
     : source.slice(toff(tokBase + ((~entry) >>> 2)), tend(tokBase + ((~entry) >>> 2))),
 };
@@ -2579,22 +2584,23 @@ export const tree = {
 // Depth-first traversal threading the RED coordinates: enter/leave receive the
 // node's absolute (charBase, tokBase); leaf receives its absolute token index.
 // Call with the root only — the bases default from the root's rel fields.
-function visitCore(entry, fns, charBase, tokBase) {
+type _VisitFns = { enter?: (id: number, charBase: number, tokBase: number) => boolean | void; leave?: (id: number, charBase: number, tokBase: number) => void; leaf?: (entry: number, tok: number) => void };
+function visitCore(entry: number, fns: _VisitFns, charBase?: number, tokBase?: number) {
   if (charBase === undefined) { charBase = rootCharBase; tokBase = rootTokBase; }
-  if (entry < 0) { if (fns.leaf) fns.leaf(entry, tokBase + ((~entry) >>> 2)); return; }
-  if (fns.enter && fns.enter(entry, charBase, tokBase) === false) return;
+  if (entry < 0) { if (fns.leaf) fns.leaf(entry, tokBase! + ((~entry) >>> 2)); return; }
+  if (fns.enter && fns.enter(entry, charBase, tokBase!) === false) return;
   const n = rowCount[entry];
   const cs = rowStart[entry];
   for (let i = 0; i < n; i++) {
     const e = kids[cs + i];
-    if (e < 0) { if (fns.leaf) fns.leaf(e, tokBase + ((~e) >>> 2)); }
-    else visitCore(e, fns, charBase + kcr(entry, cs + i), tokBase + ktr(entry, cs + i));
+    if (e < 0) { if (fns.leaf) fns.leaf(e, tokBase! + ((~e) >>> 2)); }
+    else visitCore(e, fns, charBase + kcr(entry, cs + i), tokBase! + ktr(entry, cs + i));
   }
-  if (fns.leave) fns.leave(entry, charBase, tokBase);
+  if (fns.leave) fns.leave(entry, charBase, tokBase!);
 }
 
 // Parse to the ARENA: returns the root node id.
-function lexInto(source) {
+function lexInto(source: string) {
 ${e.soa ? `  tokenize(source);
   docEmptyPops = lexEmptyPops.slice();` : String.raw`  docPieces = [source]; docPieceOff = [0]; docLen = source.length; docFlat = source; docCur = 0;
   const _toks = tokenize(source);
@@ -2611,14 +2617,14 @@ ${e.soa ? `  tokenize(source);
   tokN = _n;`}
 }
 
-function farthest(errPos) {
+function farthest(errPos: number) {
   if (maxPos <= errPos || maxPos >= tokN) return '';
   return ' [farthest: offset ' + toff(maxPos) + " near '" + tokTextAt(maxPos).slice(0, 20) + "']";
 }
 
 // Run the entry rule over the CURRENT token stream (shared by parse / parseEdited —
 // everything per-parse EXCEPT the memo and the arena cursor, which parseEdited carries).
-function runParse(entryRule) {
+function runParse(entryRule?: string) {
   pos = 0;
   maxPos = 0;
   frameMax = 0;
@@ -2691,15 +2697,15 @@ let adoptDmgStart = 0;       // damage window in OLD token coords: [adoptDmgStar
 let adoptDmgOldEnd = 0;
 let adoptDelta = 0;          // new-minus-old token delta past the damage
 // cached descent path (top-down): ids + their absolute old token bases
-let adoptPath = [];
-let adoptBase = [];
+let adoptPath: number[] = [];
+let adoptBase: number[] = [];
 // run-extension state: where the last single adoption sat in the old tree (its
 // parent row / kid index / parent token base), published by adoptSeek, plus the
 // (pos, rid, generation) signature a repetition must present to consume it.
 let adoptHitP = -1, adoptHitKid = 0, adoptHitBase = 0;
 let adoptRunPos = -1, adoptRunRid = -1, adoptRunGen = -1;
 let adoptRunP = -1, adoptRunKid = 0, adoptRunOq = 0, adoptRunBase = 0;
-function adoptSeek(q, rid) {
+function adoptSeek(q: number, rid: number) {
   // reuse the cached path while it still CONTAINS q (strictly inside, not at start)
   let depth = 0;
   while (depth < adoptPath.length) {
@@ -2710,7 +2716,7 @@ function adoptSeek(q, rid) {
   }
   adoptPath.length = depth;
   adoptBase.length = depth;
-  let id, base;
+  let id: number, base: number;
   if (depth === 0) {
     if (q < adoptRootTok || q >= adoptRootTok + rowTokLen[adoptRoot]) return -1;
     id = adoptRoot; base = adoptRootTok;
@@ -2779,11 +2785,11 @@ let recovering = false;
 //     adoption reused this pass (a recovering pass adopts error regions wholesale,
 //     so per-pass collection alone would silently drop their diagnostics). docPar
 //     keeps the formatted result for the paths that do not re-parse (surgery).
-let docDiags = [];
-let docLex = [];
-let docPar = [];
+let docDiags: Diag[] = [];
+let docLex: LexDiag[] = [];
+let docPar: Diag[] = [];
 
-function lexMsg(g) {
+function lexMsg(g: LexDiag) {
   if (g.kind === 0) return "Unexpected character at offset " + g.offset + ": '" + g.ch + "'";
   if (g.kind === 1) return 'Invalid escape sequence in template at offset ' + g.offset;
   if (g.kind === 2) return 'Unterminated template literal at offset ' + g.offset;
@@ -2801,7 +2807,7 @@ function lexMsg(g) {
 // past the last bar aborts the attempt, appends the new farthest-fail bar, and the
 // pass re-runs (adoption keeps re-runs cheap). Bars are text-determined, so fresh
 // and incremental recovering parses are byte-identical by construction.
-let recoverBars = [];
+let recoverBars: number[] = [];
 // (rule, pos) frames currently ON THE STACK during a recovering run, keyed to
 // their entry SERIAL. Token synthesis makes zero-width matches possible, so a rule
 // can re-enter itself at the SAME position through a synthesized leading token —
@@ -2828,7 +2834,7 @@ let cycleMinSerial = 0x7fffffff;
 // non-consuming probes, so the frame behaved strictly: a pure function of the
 // window text, stable under any bar list that stays out of the window.
 let memoRecFloor = 0x7fffffff;
-function barFreeWin(s, m) {
+function barFreeWin(s: number, m: number) {
   const hi = m + 2;
   for (let i = 0; i < recoverBars.length; i++) {
     const b = recoverBars[i];
@@ -2855,7 +2861,7 @@ let probing = 0;
 // group is allowed only once the group consumed past this (committed) — failures
 // of an uncommitted probe are ordinary "the optional thing isn't there".
 let probeBase = -1;
-function missAt(p2) {
+function missAt(p2: number) {
   for (let i = 0; i < recoverBars.length; i++) {
     const b = recoverBars[i];
     if (b > p2 + 2) break;
@@ -2863,7 +2869,7 @@ function missAt(p2) {
   }
   return false;
 }
-function missTok(t, vs) {
+function missTok(t: number, vs?: number) {
   if (probing !== 0 || pos <= probeBase || recoverFree || !missAt(pos)) return false;
   const id = finishNode(RID_MISSING, scn);
   rowStart[id] = vs ? t | (vs << 21) : t;
@@ -2881,7 +2887,7 @@ function missTok(t, vs) {
 // row carrying the rule identity. Same purity rules as missTok. Returns the node
 // id (not pushed — call sites differ) or -1.
 const RULE_MISS_BASE = 1 << 20;
-function missRule(rid) {
+function missRule(rid: number) {
   if (probing !== 0 || pos <= probeBase || recoverFree || !missAt(pos)) return -1;
   const id = finishNode(RID_MISSING, scn);
   rowStart[id] = RULE_MISS_BASE + rid;
@@ -2897,11 +2903,11 @@ function missRule(rid) {
 // Decode a $missing row's packed expected identity (see missTok): bits 21+ carry
 // the call site's viable-set id; bit 20 marks a missing nonterminal; else a plain
 // literal int (>0) or a named token kind (<0).
-function missLit(v) {
+function missLit(v: number) {
   if (v >= 1 << 21) return v & 0xFFFFF;
   return v > 0 && v < RULE_MISS_BASE ? v : 0;
 }
-function missEntry(v, kb) {
+function missEntry(v: number, kb: number): Diag {
   let message;
   if (v >= 1 << 21) message = 'expected ' + VSETS[v >>> 21];
   else if (v >= RULE_MISS_BASE) message = 'expected ' + RULE_DISPLAY[v - RULE_MISS_BASE];
@@ -2909,7 +2915,7 @@ function missEntry(v, kb) {
   else message = "expected '" + (K_NAMES[-v] ?? '?') + "'";
   return { offset: kb, end: kb, message };
 }
-function collectErrRows(id, charBase, tokBase) {
+function collectErrRows(id: number, charBase: number, tokBase: number) {
   if (rowRule[id] === RID_MISSING) {
     docPar.push(missEntry(rowStart[id], charBase));
     return;
@@ -2990,16 +2996,16 @@ function rebuildDiagView() {
 // stray closer beyond balance. The shifted lexer resync's dominant q=0 case needs
 // exactly one fact about the whole old suffix ("no pop-on-empty beyond the
 // candidate"), which this list answers O(1) instead of an O(suffix) min-build.
-let docEmptyPops = [];
+let docEmptyPops: number[] = [];
 // Bar list that built lastRoot (that run's token coords); null = free-fire built
 // (free-fire decisions are not bar-pure — such a tree is never adoptable while
 // recovering). Strict trees carry [].
-let lastBars = [];
+let lastBars: number[] | null = [];
 // A row replays identically in a recovering run iff its window sees the SAME bars
 // (shifted) the build run saw there — every recovery decision (hook arming,
 // missTok/missRule, the cycle sentinel) is position-pure, so window text + window
 // bars determine the frame's behavior completely.
-function barsWindowEq(s, q, ext) {
+function barsWindowEq(s: number, q: number, ext: number) {
   if (lastBars === null) return false;
   const hiN = s + ext + 2, hiO = q + ext + 2;
   let i = 0, j = 0;
@@ -3013,7 +3019,7 @@ function barsWindowEq(s, q, ext) {
     i++; j++;
   }
 }
-function recoverArmed(from, reach) {
+function recoverArmed(from: number, reach: number) {
   // armed iff THE FAILING ELEMENT is stuck at a bar: it starts at/before the bar
   // and its OWN farthest probe sits ON it (+2 read slack). The reach is the
   // element's frame-local watermark, NOT the global maxPos — a global frontier
@@ -3028,7 +3034,7 @@ function recoverArmed(from, reach) {
   }
   return false;
 }
-function recoverSkip(canStart, closerT, from0, reach) {
+function recoverSkip(canStart: ((p: number) => boolean) | null, closerT: number, from0: number, reach: number) {
   if (!recoverArmed(from0, reach)) return false;
   if (pos >= cap) return false;
   if (closerT >= 0 && tkK[pos] === K_PUNCT && tkT[pos] === closerT) return false;
@@ -3055,7 +3061,7 @@ function recoverSkip(canStart, closerT, from0, reach) {
 // proves the loop's FIRST-set guard true at its position (its first token starts
 // the rule), and the loop's own continuation checks run again after the run
 // breaks. Members get no memo entries — a backtracking re-probe just re-adopts.
-function runExtend(rid) {
+function runExtend(rid: number) {
   if (rid !== adoptRunRid || memoGenCur !== adoptRunGen) { adoptRunPos = -1; return; }
   adoptRunPos = -1;
   const P = adoptRunP;
@@ -3100,10 +3106,10 @@ function runExtend(rid) {
 // re-parse. Prefix kids are kept under the same watermark rule single adoption
 // uses, made transitive by rowKC: each kid's probe watermark stays at/below the
 // next kid's start, so checking the LAST kept kid bounds them all.
-let surgX = [], surgBase = [], surgA = [], surgB = [];
+let surgX: number[] = [], surgBase: number[] = [], surgA: number[] = [], surgB: number[] = [];
 // composed change envelope handed from the text-application step to the window relex
 let editDmgS = 0, editDmgE = 0;
-function rowKCof(id) {
+function rowKCof(id: number) {
   const c = rowKC[id];
   if (c !== 0) return c;
   const cs = rowStart[id], n = rowCount[id];
@@ -3117,7 +3123,7 @@ function rowKCof(id) {
   rowKC[id] = ok;
   return ok;
 }
-function trySurgery(dmgA, dmgB, tokD, chrD) {
+function trySurgery(dmgA: number, dmgB: number, tokD: number, chrD: number) {
   if (adoptRoot < 0) return -1;
   if (rowRule[adoptRoot] >= RID_ERROR) return -1;
   // A recovery-made tree (rowRM root) CAN take a strict splice when the edit
@@ -3240,8 +3246,8 @@ function trySurgery(dmgA, dmgB, tokD, chrD) {
   if (recTree) {
     // the strict re-parse stands for the fresh recovering parse of this span only
     // if no bar window touches anything it read (probes included)
-    for (let i = 0; i < lastBars.length; i++) {
-      const b = lastBars[i];
+    for (let i = 0; i < lastBars!.length; i++) {
+      const b = lastBars![i];
       const bn = b < dmgA ? b : b + tokD;
       if (bn + 2 >= s0 && bn <= maxPos + 2) return -1;
     }
@@ -3458,7 +3464,7 @@ function trySurgery(dmgA, dmgB, tokD, chrD) {
 
 // The spare token-column buffer set (parseEdited ping-pongs between the live set and
 // this one, so steady-state edits never allocate columns).
-let altK = null, altT = null, altOff = null, altEnd = null, altFl = null, altDp = null, altPd = null;
+let altK: typeof tkK | null = null, altT: typeof tkT | null = null, altOff: typeof tkOff | null = null, altEnd: typeof tkEnd | null = null, altFl: typeof tkFl | null = null, altDp: typeof tkDp | null = null, altPd: typeof tkPd | null = null;
 let altCap = 0;
 let altN = 0;   // old-stream token count while a window lex runs (lexCore's resync bound)
 
@@ -3469,9 +3475,28 @@ let altN = 0;   // old-stream token count while a window lex runs (lexCore's res
 // variables are the truth, and is written back only when another doc activates.
 // Per-PARSE transients (pos/maxPos/scratch/adopt*/surg*) reset on every entry and
 // are shared safely.
-function makeDoc() {
+type Diag = { offset: number; end: number; message: string; related?: { offset: number; end: number; message: string } };
+type LexDiag = { offset: number; end: number; kind: number; ch: string };
+type Edit = { start: number; end: number; text: string };
+type Doc = {
+  tkK: typeof tkK; tkT: typeof tkT; tkOff: typeof tkOff; tkEnd: typeof tkEnd; tkFl: typeof tkFl; tkDp: typeof tkDp; tkPd: typeof tkPd;
+  tkCap: number; tokN: number; srcLenP1: number; negFrom: number;
+  rowRule: typeof rowRule; rowLen: typeof rowLen; rowTokLen: typeof rowTokLen; rowStart: typeof rowStart; rowCount: typeof rowCount; rowExt: typeof rowExt;
+  rowOK: typeof rowOK; rowKC: typeof rowKC; rowNF: typeof rowNF; rowRM: typeof rowRM; absChar: typeof absChar; absTok: typeof absTok;
+  rowCap: number; nodeN: number;
+  kids: typeof kids; kidRel: typeof kidRel; kidTokRel: typeof kidTokRel; kidCap: number; kidN: number;
+  memoNode: number[][]; memoEnd: number[][]; memoExt: number[][]; memoGen: Int32Array[]; memoGenCur: number;
+  docDiags: Diag[]; docLex: LexDiag[]; docPar: Diag[];
+  docPieces: string[] | null; docPieceOff: number[] | null; docLen: number; docFlat: string | null; docCur: number;
+  rootCharBase: number; rootTokBase: number; lastRoot: number; lastRootTok: number; lastBars: number[] | null; docEmptyPops: number[];
+${e.soa ? '  parenCachePos: number; parenCacheStack: boolean[];' : ''}
+  altK: typeof tkK | null; altT: typeof tkT | null; altOff: typeof tkOff | null; altEnd: typeof tkEnd | null; altFl: typeof tkFl | null; altDp: typeof tkDp | null; altPd: typeof tkPd | null;
+  altCap: number; altN: number;
+};
+type Handle = { d: Doc; gen: number; root: number; errors: Diag[] };
+function makeDoc(): Doc {
   return {
-    tkK: new tkK.constructor(4096), tkT: new tkT.constructor(4096),
+    tkK: new (tkK.constructor as any)(4096), tkT: new (tkT.constructor as any)(4096),
     tkOff: new Int32Array(4096), tkEnd: new Int32Array(4096), tkFl: new Uint8Array(4096),
     tkDp: new Uint8Array(4096), tkPd: new Uint16Array(4096),
     tkCap: 4096, tokN: 0, srcLenP1: 1, negFrom: 0x7fffffff,
@@ -3487,13 +3512,13 @@ function makeDoc() {
     memoNode: [], memoEnd: [], memoExt: [], memoGen: [], memoGenCur: 0,
     docDiags: [], docLex: [], docPar: [],
     docPieces: null, docPieceOff: null, docLen: 0, docFlat: null, docCur: 0,
-    rootCharBase: 0, rootTokBase: 0, lastRoot: -1, lastRootTok: 0, docEmptyPops: [],
+    rootCharBase: 0, rootTokBase: 0, lastRoot: -1, lastRootTok: 0, lastBars: null, docEmptyPops: [],
 ${e.soa ? '    parenCachePos: -1, parenCacheStack: [],' : ''}
     altK: null, altT: null, altOff: null, altEnd: null, altFl: null, altDp: null, altPd: null,
     altCap: 0, altN: 0,
   };
 }
-function saveDoc(d) {
+function saveDoc(d: Doc) {
   d.tkK = tkK; d.tkT = tkT; d.tkOff = tkOff; d.tkEnd = tkEnd; d.tkFl = tkFl;
   d.tkDp = tkDp; d.tkPd = tkPd; d.tkCap = tkCap; d.tokN = tokN;
   d.srcLenP1 = srcLenP1; d.negFrom = negFrom;
@@ -3511,7 +3536,7 @@ ${e.soa ? '  d.parenCachePos = parenCachePos; d.parenCacheStack = parenCacheStac
   d.altK = altK; d.altT = altT; d.altOff = altOff; d.altEnd = altEnd; d.altFl = altFl;
   d.altDp = altDp; d.altPd = altPd; d.altCap = altCap; d.altN = altN;
 }
-function loadDoc(d) {
+function loadDoc(d: Doc) {
   tkK = d.tkK; tkT = d.tkT; tkOff = d.tkOff; tkEnd = d.tkEnd; tkFl = d.tkFl;
   tkDp = d.tkDp; tkPd = d.tkPd; tkCap = d.tkCap; tokN = d.tokN;
   srcLenP1 = d.srcLenP1; negFrom = d.negFrom;
@@ -3532,26 +3557,26 @@ ${e.soa ? '  parenCachePos = d.parenCachePos; parenCacheStack = d.parenCacheStac
 const docDefault = makeDoc();
 let curDoc = docDefault;
 loadDoc(docDefault);
-function activate(d) {
+function activate(d: Doc) {
   if (d === curDoc) return;
   saveDoc(curDoc);
   loadDoc(d);
   curDoc = d;
 }
 function swapBuffers() {
-  let x;
-  x = tkK; tkK = altK; altK = x;
-  x = tkT; tkT = altT; altT = x;
-  x = tkOff; tkOff = altOff; altOff = x;
-  x = tkEnd; tkEnd = altEnd; altEnd = x;
-  x = tkFl; tkFl = altFl; altFl = x;
-  x = tkDp; tkDp = altDp; altDp = x;
-  x = tkPd; tkPd = altPd; altPd = x;
+  let x: any;
+  x = tkK; tkK = altK!; altK = x;
+  x = tkT; tkT = altT!; altT = x;
+  x = tkOff; tkOff = altOff!; altOff = x;
+  x = tkEnd; tkEnd = altEnd!; altEnd = x;
+  x = tkFl; tkFl = altFl!; altFl = x;
+  x = tkDp; tkDp = altDp!; altDp = x;
+  x = tkPd; tkPd = altPd!; altPd = x;
   x = tkCap; tkCap = altCap; altCap = x;
 }
 ${e.soa ? '' : 'let altText = [];'}
 
-function parseCore(source, entryRule) {
+function parseCore(source: string, entryRule?: string) {
   adoptRoot = -1;
   adoptRunPos = -1;
   lexInto(source);
@@ -3578,7 +3603,7 @@ function parseCore(source, entryRule) {
 // Parser-diag shift for the LOCALLY-strict paths (surgery / strict success): the
 // LEXER list is maintained by the window block (which already dropped the re-lexed
 // range and shifted the suffix — shifting here would double-apply the delta).
-function shiftDiags(a, b, delta) {
+function shiftDiags(a: number, b: number, delta: number) {
   let w = 0;
   for (let i = 0; i < docPar.length; i++) {
     const g = docPar[i];
@@ -3617,7 +3642,7 @@ function shiftDiags(a, b, delta) {
 // Last-resort totality net: a layer without recovery support threw — the handle
 // API still never crashes. Zero-width $error root + the thrown message as the
 // diagnostic; the next successful parse/edit resumes normal service.
-function totalNet(e) {
+function totalNet(e: any) {
   // the message lives in the SOURCE layer (docLex kind 4) — a later settle rebuilds
   // the view from the sources, and a view-only push would be wiped by it
   docLex.length = 0;
@@ -3633,12 +3658,12 @@ function totalNet(e) {
   rootTokBase = 0;
   return root;
 }
-function apiMisuse(msg) {
-  const e = new Error(msg);
+function apiMisuse(msg: string) {
+  const e: any = new Error(msg);
   e.apiMisuse = true;
   return e;
 }
-function editCore(entryRule, edits) {
+function editCore(entryRule: string | undefined, edits?: Edit[]) {
   if (edits === undefined || edits.length === 0) {
     throw apiMisuse('edit() requires the changes: [{ start, end, text }] (LSP-style - each edit in the coordinates of the document AFTER the preceding edits in the array)');
   }
@@ -3711,7 +3736,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   }
   // Lex the window into the spare buffers (the old stream stays live for resync).
   if (altK === null || altCap < tkCap) {
-    altK = new tkK.constructor(tkCap); altT = new tkT.constructor(tkCap);
+    altK = new (tkK.constructor as any)(tkCap); altT = new (tkT.constructor as any)(tkCap);
     altOff = new Int32Array(tkCap); altEnd = new Int32Array(tkCap); altFl = new Uint8Array(tkCap);
     altDp = new Uint8Array(tkCap); altPd = new Uint16Array(tkCap);
     altCap = tkCap;
@@ -3720,7 +3745,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   altSuffMin = null;          // the old-suffix min-depth cache follows the alt stream
   swapBuffers();              // live = scratch, alt = OLD stream
   tokN = 0;
-  const startOff = B >= 0 ? (altEnd[B] < 0 ? altEnd[B] + srcLenP1 : altEnd[B]) : 0;
+  const startOff = B >= 0 ? (altEnd![B] < 0 ? altEnd![B] + srcLenP1 : altEnd![B]) : 0;
   // Window-materialized relex: lexCore reads a SMALL flat slice of the pieces with
   // an absolute bias; -2 = ran off the window end before resyncing — re-materialize
   // a larger window and retry (the common case fits the first one).
@@ -3736,7 +3761,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
       docLex.length = preLexN;     // an aborted attempt re-lexes: drop its pushes
       tokN = 0;
       try {
-        R0 = lexCore(windowStr, 0, B >= 0 ? altK[B] : -1, B >= 0 ? altT[B] : 0, r0, ceNew, charDelta, cs, initParens.slice(), startOff, wHi < docLen);
+        R0 = lexCore(windowStr, 0, B >= 0 ? altK![B] : -1, B >= 0 ? altT![B] : 0, r0, ceNew, charDelta, cs, initParens.slice(), startOff, wHi < docLen);
       } catch (e2) {
         if (e2 !== LEX_RETRY) {
           if (recovering) throw e2;        // a recovering lexer never throws — a bug
@@ -3796,8 +3821,8 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // p is real damage (compared BEFORE the splice clobbers the old slots).
   let p = B + 1;
   { let i = 0;
-    while (i < W && p < R && altK[i] === tkK[p] && altT[i] === tkT[p] && altOff[i] === tkOff[p]
-        && altEnd[i] === tkEnd[p] && altFl[i] === tkFl[p]) { i++; p++; }
+    while (i < W && p < R && altK![i] === tkK[p] && altT![i] === tkT[p] && altOff![i] === tkOff[p]
+        && altEnd![i] === tkEnd[p] && altFl![i] === tkFl[p]) { i++; p++; }
   }
   const dOldEnd = R;
   const tokenDelta = (B + 1 + W) - R;
@@ -3810,9 +3835,9 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
     tkFl.copyWithin(B + 1 + W, R, oN); tkDp.copyWithin(B + 1 + W, R, oN); tkPd.copyWithin(B + 1 + W, R, oN);
   }
   if (W > 0) {
-    tkK.set(altK.subarray(0, W), B + 1); tkT.set(altT.subarray(0, W), B + 1);
-    tkOff.set(altOff.subarray(0, W), B + 1); tkEnd.set(altEnd.subarray(0, W), B + 1);
-    tkFl.set(altFl.subarray(0, W), B + 1); tkDp.set(altDp.subarray(0, W), B + 1); tkPd.set(altPd.subarray(0, W), B + 1);
+    tkK.set(altK!.subarray(0, W), B + 1); tkT.set(altT!.subarray(0, W), B + 1);
+    tkOff.set(altOff!.subarray(0, W), B + 1); tkEnd.set(altEnd!.subarray(0, W), B + 1);
+    tkFl.set(altFl!.subarray(0, W), B + 1); tkDp.set(altDp!.subarray(0, W), B + 1); tkPd.set(altPd!.subarray(0, W), B + 1);
   }
   negFrom = B + 1 + W;
   srcLenP1 = newLen + 1;
@@ -3837,7 +3862,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   const oK = tkK, oT = tkT, oOff = tkOff, oEnd = tkEnd, oFl = tkFl, oN = tokN;
   const oText = tkText;
   if (altK === null || altK.length !== tkCap) {
-    altK = new tkK.constructor(tkCap); altT = new tkT.constructor(tkCap);
+    altK = new (tkK.constructor as any)(tkCap); altT = new (tkT.constructor as any)(tkCap);
     altOff = new Int32Array(tkCap); altEnd = new Int32Array(tkCap); altFl = new Uint8Array(tkCap);
     altDp = new Uint8Array(tkCap); altPd = new Uint16Array(tkCap);
   }
@@ -3906,7 +3931,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
     shiftDiags(cs, ceOld, charDelta);
     return sroot;
   }
-  let root;
+  let root!: number;
   {
     // recovering may already be true here (the window relex recovered a lex error
     // and pushed its diagnostics): the first attempt then runs with EMPTY bars —
@@ -4008,14 +4033,14 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
 export { tokenize };
 // ── Module-level API: the DEFAULT document (one shared session; tokenize and the
 // raw tree/tokenAt views read the ACTIVE doc — they are gate/debug surfaces) ──
-export function parse(source, entryRule) { activate(docDefault); return parseCore(source, entryRule); }
-export function parseEdited(entryRule, edits) { activate(docDefault); return editCore(entryRule, edits); }
+export function parse(source: string, entryRule?: string) { activate(docDefault); return parseCore(source, entryRule); }
+export function parseEdited(entryRule?: string, edits?: Edit[]) { activate(docDefault); return editCore(entryRule, edits); }
 // Arena reclamation introspection + budget override — TEST HOOKS (issue #45 C1). __arenaStats
 // reports the live arena, the compacted-size baseline, and how many edits re-parsed to reclaim;
 // __setArenaBudget lowers the factor/min so a gate can force compaction deterministically.
 export function __arenaStats() { return { nodeN, kidN, baseline: arenaLiveBaseline, compactions: arenaCompactions, inPlaceShrink: arenaInPlaceShrink }; }
-export function __setArenaBudget(factor, min) { arenaCompactFactor = factor; arenaCompactMin = min; }
-export function visit(entry, fns, charBase, tokBase) { activate(docDefault); return visitCore(entry, fns, charBase, tokBase); }
+export function __setArenaBudget(factor: number, min: number) { arenaCompactFactor = factor; arenaCompactMin = min; }
+export function visit(entry: number, fns: _VisitFns, charBase?: number, tokBase?: number) { activate(docDefault); return visitCore(entry, fns, charBase, tokBase); }
 // ── Handle API: explicit trees over per-instance documents ──
 // const p = createParser(); const cst = p.parse(text); p.edit(cst, next[, edits]);
 // The handle is the STABLE IDENTITY of this document's tree: edit() mutates it in
@@ -4026,25 +4051,25 @@ export function visit(entry, fns, charBase, tokBase) { activate(docDefault); ret
 export function createParser() {
   const d = makeDoc();
   let gen = 0;
-  let entryUsed;
-  const chk = (cst) => {
+  let entryUsed: string | undefined;
+  const chk = (cst: Handle | null | undefined) => {
     if (cst === null || cst === undefined || cst.d !== d) throw new Error('foreign tree handle: it belongs to another parser instance');
     if (cst.gen !== gen) throw new Error('stale tree handle: parse() re-opened this document - use the handle from the latest parse()');
   };
-  const view = {};
+  const view: Record<string, (a: number, b: number) => any> = {};
   for (const k of Object.keys(tree)) {
-    const f = tree[k];
-    view[k] = (a, b) => { activate(d); return f(a, b); };
+    const f = (tree as any)[k];
+    view[k] = (a: number, b: number) => { activate(d); return f(a, b); };
   }
   return {
-    parse(source, entryRule) {
+    parse(source: string, entryRule?: string) {
       activate(d);
       entryUsed = entryRule;
       gen++;   // re-opening resets the arena: old handles die regardless of outcome
       docDiags.length = 0;
       docLex.length = 0;
       docPar.length = 0;
-      let root;
+      let root!: number;
       try {
         root = parseCore(source, entryRule);
         lastBars = [];
@@ -4095,17 +4120,17 @@ export function createParser() {
       }
       return { d, gen, root, errors: docDiags };
     },
-    edit(cst, edits) {
+    edit(cst: Handle, edits?: Edit[]) {
       chk(cst);
       activate(d);
       try {
         cst.root = editCore(entryUsed, edits);
       } catch (e) {
-        if (e instanceof RangeError || (e && e.apiMisuse)) throw e;
+        if (e instanceof RangeError || (e && (e as any).apiMisuse)) throw e;
         cst.root = totalNet(e);
       }
     },
-    visit(cst, fns) { chk(cst); activate(d); return visitCore(cst.root, fns); },
+    visit(cst: Handle, fns: _VisitFns) { chk(cst); activate(d); return visitCore(cst.root, fns); },
     tree: view,
   };
 }
diff --git a/test/check.ts b/test/check.ts
index 1658343..cf61a77 100644
--- a/test/check.ts
+++ b/test/check.ts
@@ -26,6 +26,7 @@ const GATES: Gate[] = [
   { group: 'emit-parity', name: 'emit-parser-verify', args: ['test/emit-parser-verify.ts'] },
   { group: 'emit-parity', name: 'emit-reject-messages', args: ['test/emit-reject-messages.ts'] },
   { group: 'emit-parity', name: 'emit-lexer-verify', args: ['test/emit-lexer-verify.ts'] },
+  { group: 'emit-parity', name: 'emit-tsc-gate', args: ['test/emit-tsc-gate.ts'] },
   { group: 'core', name: 'multi-doc', args: ['test/multi-doc.ts'] },
   { group: 'core', name: 'recovery', args: ['test/recovery.ts'] },
   { group: 'core', name: 'incremental-grammars', args: ['test/incremental-grammars.ts'] },
diff --git a/test/cst-match-totality.ts b/test/cst-match-totality.ts
index d6e382c..25c0d8b 100644
--- a/test/cst-match-totality.ts
+++ b/test/cst-match-totality.ts
@@ -51,7 +51,7 @@ function checkTree(em: Emitted, root: number, src: string, matchers: Record<stri
 for (const name of GRAMMARS) {
   const grammar = (await import(`../${name}.ts`)).default;
   const matchers = (await import(`../${name}.cst-match.ts`)).MATCHERS;
-  const emPath = `/tmp/emitted-totality-${name}.mjs`;
+  const emPath = `/tmp/emitted-totality-${name}.mts`;
   writeFileSync(emPath, emitParser(grammar));
   const em = (await import(emPath + '?v=' + process.pid)) as Emitted;
   let parsed = 0;
diff --git a/test/emit-lexer-verify.ts b/test/emit-lexer-verify.ts
index 44fef62..d4128d0 100644
--- a/test/emit-lexer-verify.ts
+++ b/test/emit-lexer-verify.ts
@@ -15,7 +15,7 @@ import { inRepoCorpus, externalTsFiles } from './emit-corpus.ts';
 const grammar = (await import('../typescript.ts')).default;
 
 // The reference: createLexer with the SAME intern config the emitted parser bakes.
-const EMITTED = '/tmp/emit-lexer-verify-parser.mjs';
+const EMITTED = '/tmp/emit-lexer-verify-parser.mts';
 writeFileSync(EMITTED, emitParser(grammar));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 const src = readFileSync(EMITTED, 'utf-8');
diff --git a/test/emit-parser-bench.ts b/test/emit-parser-bench.ts
index 1680386..5f9a2a3 100644
--- a/test/emit-parser-bench.ts
+++ b/test/emit-parser-bench.ts
@@ -15,7 +15,7 @@ import { readFileSync, writeFileSync } from 'fs';
 const grammar = (await import('../typescript.ts')).default;
 const oracle = createParser(grammar);
 
-const EMITTED = '/tmp/emitted-parser.mjs';
+const EMITTED = '/tmp/emitted-parser.mts';
 writeFileSync(EMITTED, emitParser(grammar));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 
diff --git a/test/emit-parser-verify.ts b/test/emit-parser-verify.ts
index 2f39fe4..63228c6 100644
--- a/test/emit-parser-verify.ts
+++ b/test/emit-parser-verify.ts
@@ -21,7 +21,7 @@ const grammar = (await import('../typescript.ts')).default;
 const oracle = createParser(grammar);
 
 // Emit, write to /tmp, import the standalone module.
-const EMITTED = '/tmp/emitted-parser.mjs';
+const EMITTED = '/tmp/emitted-parser.mts';
 writeFileSync(EMITTED, emitParser(grammar));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 
diff --git a/test/emit-reject-messages.ts b/test/emit-reject-messages.ts
index dd5c0a1..9d549b0 100644
--- a/test/emit-reject-messages.ts
+++ b/test/emit-reject-messages.ts
@@ -23,7 +23,7 @@ import { readFileSync, writeFileSync } from 'fs';
 const grammar = (await import('../typescript.ts')).default;
 const oracle = createParser(grammar);
 
-const EMITTED = '/tmp/emitted-parser-msg.mjs';
+const EMITTED = '/tmp/emitted-parser-msg.mts';
 writeFileSync(EMITTED, emitParser(grammar));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 
diff --git a/test/emit-tsc-gate.ts b/test/emit-tsc-gate.ts
new file mode 100644
index 0000000..713d251
--- /dev/null
+++ b/test/emit-tsc-gate.ts
@@ -0,0 +1,72 @@
+// Gate: the EMITTED parser (emit-parser.ts) is type-checked TypeScript.
+//
+// emitParser produces a standalone TS module — explicit types on every declaration
+// (the monomorphic Doc state struct, the matcher/runtime signatures, the baked op /
+// rule tables). This gate compiles that module under `tsc --strict --noEmit` and
+// fails on ANY diagnostic. Two properties it guards by construction:
+//   - the type CONTRACT is real and consistent (no implicit any, no arity looseness,
+//     no shape drift between the swapped buffers and the doc struct) — the part that
+//     ports to a Go/Rust target;
+//   - the emitted source stays ERASABLE TypeScript (annotations only): Node runs the
+//     emitted parser by stripping types, and the CST-identity gate (emit-parser-verify)
+//     proves the stripped runtime is byte-for-byte the interpreter.
+//
+// SCOPE: the self-contained emit path — soa token columns + an emitted lexer — which
+// is every grammar WITHOUT markup / indent / newline modes (emitLexer covers them).
+// The ts/js family (+ the jsx/tsx variants) goes through it and is enforced here.
+// yaml / html take the FALLBACK path (emitLexer returns null → the parser imports
+// createLexer) plus the non-soa piece-text layer; that path carries additional
+// untyped surface and a pre-existing latent scope issue the gate surfaced (the
+// non-soa editCore branch references cs/ceOld/parenCachePos declared only in the soa
+// branch). Typing it is tracked separately — listed as DEFERRED below, not silently
+// dropped.
+import { emitParser } from '../src/emit-parser.ts';
+import { writeFileSync } from 'node:fs';
+import { execFileSync } from 'node:child_process';
+import type { CstGrammar } from '../src/types.ts';
+
+// Enforced: the self-contained soa + emitted-lexer path.
+const CHECKED: Array<[string, string]> = [
+  ['typescript', '../typescript.ts'],
+  ['javascript', '../javascript.ts'],
+  ['typescriptreact', '../typescriptreact.ts'],
+  ['javascriptreact', '../javascriptreact.ts'],
+];
+// Deferred: the fallback-lexer / non-soa path (logged, not gated yet).
+const DEFERRED = ['yaml', 'html'];
+
+const TSC_FLAGS = [
+  '--strict', '--noEmit', '--target', 'ES2022',
+  '--module', 'ES2022', '--moduleResolution', 'Bundler', '--skipLibCheck',
+];
+
+let failures = 0;
+for (const [name, path] of CHECKED) {
+  let grammar: CstGrammar;
+  try {
+    grammar = (await import(path)).default;
+  } catch {
+    console.log(`  ${name}: (grammar not present — skipped)`);
+    continue;
+  }
+  const out = `/tmp/emit-tsc-gate-${name}.ts`;
+  writeFileSync(out, emitParser(grammar));
+  try {
+    execFileSync('npx', ['tsc', ...TSC_FLAGS, out], { stdio: 'pipe' });
+    console.log(`  ${name}: ✓ emitted parser type-checks (tsc --strict)`);
+  } catch (e: any) {
+    failures++;
+    const log = (e.stdout?.toString() ?? '') + (e.stderr?.toString() ?? '');
+    const errs = log.split('\n').filter((l: string) => l.includes('error TS'));
+    console.log(`  ${name}: ✗ ${errs.length} tsc error(s):`);
+    for (const l of errs.slice(0, 30)) console.log(`      ${l.replace(out, `emit(${name})`)}`);
+    if (errs.length > 30) console.log(`      … and ${errs.length - 30} more`);
+  }
+}
+console.log(`  deferred (fallback-lexer / non-soa path, not yet typed): ${DEFERRED.join(', ')}`);
+
+if (failures > 0) {
+  console.error(`\n✗ emitted parser fails strict type-check for ${failures} grammar(s)`);
+  process.exit(1);
+}
+console.log('\n✓ emitted parser type-checks under tsc --strict (soa + emitted-lexer family)');
diff --git a/test/exhaustive-edits.ts b/test/exhaustive-edits.ts
index 1485a4f..72a8ca9 100644
--- a/test/exhaustive-edits.ts
+++ b/test/exhaustive-edits.ts
@@ -31,7 +31,7 @@ const g = defineGrammar({
   rules: { Expr, Stmt, Program }, entry: Program,
 });
 
-const emPath = '/tmp/emitted-exhaustive.mjs';
+const emPath = '/tmp/emitted-exhaustive.mts';
 writeFileSync(emPath, emitParser(g));
 type Cst = { root: number; errors: object[] };
 type Parser = { parse(s: string): Cst; edit(c: Cst, e: object[]): void; visit(c: Cst, fns: object): void; tree: import('./emitted-obj.ts').TreeView };
diff --git a/test/head-to-head.ts b/test/head-to-head.ts
index 4613e67..15f913b 100644
--- a/test/head-to-head.ts
+++ b/test/head-to-head.ts
@@ -26,7 +26,7 @@ const TreeSitter = require(TS_BENCH + '/node_modules/tree-sitter');
 const TSLang = require(TS_BENCH + '/node_modules/tree-sitter-typescript').typescript;
 
 const grammar = (await import('../typescript.ts')).default;
-const emPath = '/tmp/emitted-h2h.mjs';
+const emPath = '/tmp/emitted-h2h.mts';
 writeFileSync(emPath, emitParser(grammar));
 const { createParser } = await import(emPath + '?v=' + process.pid);
 
diff --git a/test/incremental-grammars.ts b/test/incremental-grammars.ts
index 6c2bbd0..404272b 100644
--- a/test/incremental-grammars.ts
+++ b/test/incremental-grammars.ts
@@ -84,7 +84,7 @@ let fails = 0;
 const failures: string[] = [];
 for (const name of GRAMMARS) {
   const grammar = (await import(`../${name}.ts`)).default;
-  const emPath = `/tmp/emitted-incr-${name}.mjs`;
+  const emPath = `/tmp/emitted-incr-${name}.mts`;
   writeFileSync(emPath, emitParser(grammar));
   const em = (await import(emPath + '?v=' + process.pid)) as Em;
   const session = em.createParser();
@@ -183,7 +183,7 @@ function replaceOnce(text: string, find: string, repl: string): { next: string;
   return { next: text.slice(0, at) + repl + text.slice(at + find.length), edit: { start: at, end: at + find.length, text: repl } };
 }
 for (const name of ['javascript', 'typescript']) {
-  const em = (await import(`/tmp/emitted-incr-${name}.mjs?v=` + process.pid)) as Em;
+  const em = (await import(`/tmp/emitted-incr-${name}.mts?v=` + process.pid)) as Em;
   const session = em.createParser();
   const fresh = em.createParser();
   for (const doc of FORK_DOCS) {
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index 04fdf3b..cd01c17 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -11,7 +11,7 @@ import { existsSync, readFileSync, writeFileSync } from 'node:fs';
 import { emitParser } from '../src/emit-parser.ts';
 
 const grammar = (await import('../typescript.ts')).default;
-const emPath = '/tmp/emitted-incremental.mjs';
+const emPath = '/tmp/emitted-incremental.mts';
 writeFileSync(emPath, emitParser(grammar));
 type Edit = { start: number; end: number; text: string };
 type Cst = { root: number; errors: { offset: number; end: number; message: string }[] };
diff --git a/test/multi-doc.ts b/test/multi-doc.ts
index f5af760..c3d844d 100644
--- a/test/multi-doc.ts
+++ b/test/multi-doc.ts
@@ -13,7 +13,7 @@ import { emitParser } from '../src/emit-parser.ts';
 import { objectify } from './emitted-obj.ts';
 
 const grammar = (await import('../typescript.ts')).default;
-const emPath = '/tmp/emitted-multidoc.mjs';
+const emPath = '/tmp/emitted-multidoc.mts';
 writeFileSync(emPath, emitParser(grammar));
 type Edit = { start: number; end: number; text: string };
 type Cst = { root: number; errors: { offset: number; end: number; message: string }[] };
diff --git a/test/recovery-conformance.ts b/test/recovery-conformance.ts
index 8f1f28c..a2eda45 100644
--- a/test/recovery-conformance.ts
+++ b/test/recovery-conformance.ts
@@ -17,7 +17,7 @@ import { emitParser } from '../src/emit-parser.ts';
 import ts from 'typescript';
 
 const grammar = (await import('../typescript.ts')).default;
-const emPath = '/tmp/emitted-recovery-conf.mjs';
+const emPath = '/tmp/emitted-recovery-conf.mts';
 writeFileSync(emPath, emitParser(grammar));
 type Cst = { root: number; errors: { offset: number; end: number; message: string }[] };
 const em = (await import(emPath + '?v=' + process.pid)) as { createParser(): { parse(s: string): Cst } };
diff --git a/test/recovery.ts b/test/recovery.ts
index 5e1d721..9215c46 100644
--- a/test/recovery.ts
+++ b/test/recovery.ts
@@ -17,7 +17,7 @@ import { emitParser } from '../src/emit-parser.ts';
 import { objectify } from './emitted-obj.ts';
 
 const grammar = (await import('../typescript.ts')).default;
-const emPath = '/tmp/emitted-recovery.mjs';
+const emPath = '/tmp/emitted-recovery.mts';
 writeFileSync(emPath, emitParser(grammar));
 type Edit = { start: number; end: number; text: string };
 type Diag = { offset: number; end: number; message: string; related?: { offset: number; end: number; message: string } };

From 2c87267492133b95cc4a8e4fa9ac24fc8d9eb5ad Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Sun, 21 Jun 2026 11:48:47 +0800
Subject: [PATCH 02/27] emit: extend the tsc gate to the fallback-lexer /
 non-soa path (yaml, html)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Brings the yaml/html emit path under the same strict type-check as the ts/js
family, so the gate now covers every grammar. Three things this required:

- Hoist the edit-damage envelope (newLen/cs/ceNew/ceOld/charDelta) out of the
  e.soa window branch. shiftDiags(cs, ceOld, charDelta) runs in the SHARED
  post-fork settle, but those names were declared only in the soa branch — so
  the non-soa branch referenced undeclared variables. The path is unreached at
  runtime for the fallback grammars (they full-relex), which is why it stayed
  invisible; the tsc gate surfaced it. They derive only from shared inputs, so
  hoisting is behavior-neutral for soa and correct for non-soa. Same fix gates
  the soa-only parenCachePos cache-invalidation in the '>'-split.

- Type the non-soa piece-text columns (tkText/altText: string[]), assert the
  fallback column swap against the nullable spare buffers, and cast the baked
  LEX_GRAMMAR at the createLexer boundary.

- Give every baked Map/Set an explicit element type at emission. They inferred
  correctly only when non-empty (ts/js); an empty vocabulary set (yaml/html)
  collapsed to Map<unknown,unknown> / Set<never>.

emit-lexer-verify's TYPE_KIND/LIT_KW/LIT_PU extraction regex now tolerates the
`new Map<string, number>(` generic. Full suite 41/41; emitted CST byte-identical
across all 6 grammars (incremental-grammars 610/610).
---
 src/emit-parser.ts        | 47 ++++++++++++++++++++-------------------
 test/emit-lexer-verify.ts |  6 ++---
 test/emit-tsc-gate.ts     | 36 +++++++++++++++---------------
 3 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 7bd889b..6368898 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -656,7 +656,7 @@ class Emitter {
         // A suppress-carrying group stages the LED-connector exclusion for the next
         // parseRule, then matches its body (same as matchExpr 'group').
         const pre = (expr.suppress && expr.suppress.length)
-          ? `suppressNext = new Set(${J(expr.suppress)});`
+          ? `suppressNext = new Set<string>(${J(expr.suppress)});`
           : ``;
         return [pre, this.matchInto(expr.body, onFail)].filter(Boolean).join('\n');
       }
@@ -1143,9 +1143,9 @@ export function emitParser(grammar: CstGrammar): string {
   // TYPE_KIND: tok.type → int. LIT_KW / LIT_PU: tok.text → keyword / punct literal int.
   // Every token is BORN with tok.k (type kind) + tok.t (literal kind) and the stamp
   // flags — one monomorphic shape, one allocation, no post-pass.
-  e.emit(`const TYPE_KIND = new Map(${J([...st.typeKind])});`);
-  e.emit(`const LIT_KW = new Map(${J([...st.kwLitKind])});`);
-  e.emit(`const LIT_PU = new Map(${J([...st.puLitKind])});`);
+  e.emit(`const TYPE_KIND = new Map<string, number>(${J([...st.typeKind])});`);
+  e.emit(`const LIT_KW = new Map<string, number>(${J([...st.kwLitKind])});`);
+  e.emit(`const LIT_PU = new Map<string, number>(${J([...st.puLitKind])});`);
   e.emit(`const K_PUNCT = ${st.KIND_PUNCT};`);
   e.emit(`const K_TEMPLATE_HEAD = ${st.KIND_TEMPLATE_HEAD};`);
   e.emit(`const K_TEMPLATE_MIDDLE = ${st.KIND_TEMPLATE_HEAD + 1};`);
@@ -1158,15 +1158,15 @@ export function emitParser(grammar: CstGrammar): string {
   if (lexSrc) {
     e.emit(lexSrc);
   } else {
-    e.emit(`const { tokenize } = createLexer(LEX_GRAMMAR, {`);
+    e.emit(`const { tokenize } = createLexer(LEX_GRAMMAR as any, {`);
     e.emit(`  typeKind: TYPE_KIND, kwLit: LIT_KW, puLit: LIT_PU,`);
     e.emit(`  punctKind: K_PUNCT, namedFallback: K_NAMED_FALLBACK,`);
     e.emit(`});`);
   }
   e.emit(``);
   // Baked maps. Emit as object literals → Map.
-  e.emit(`const opTable = new Map(${J([...a.opTable])});`);
-  e.emit(`const prefixOps = new Map(${J([...a.prefixOps])});`);
+  e.emit(`const opTable = new Map<string, any>(${J([...a.opTable])});`);
+  e.emit(`const prefixOps = new Map<string, any>(${J([...a.prefixOps])});`);
   // The same op tables re-keyed by the literal int (tok.t): the Pratt loops look an
   // operator up for EVERY token they reach, and tok.t is already interned — an array
   // load replaces the string-keyed Map.get. Equivalent because a token's text can equal
@@ -1189,7 +1189,7 @@ export function emitParser(grammar: CstGrammar): string {
     e.emit(`const OP_BY_T: (OpInfo | null)[] = ${J(byT(a.opTable))};`);
     e.emit(`const PREFIX_BY_T: (OpInfo | null)[] = ${J(byT(a.prefixOps))};`);
   }
-  e.emit(`const noUnaryLhsOps = new Set(${J([...a.noUnaryLhsOps])});`);
+  e.emit(`const noUnaryLhsOps = new Set<string>(${J([...a.noUnaryLhsOps])});`);
   {
     let tSize = 1;
     for (const v of st.kwLitKind.values()) tSize = Math.max(tSize, v + 1);
@@ -1214,8 +1214,8 @@ export function emitParser(grammar: CstGrammar): string {
     }
     e.emit(`const REQTGT_T = Uint8Array.from([${rt.join(',')}]);`);
   }
-  e.emit(`const postfixOpValues = new Set(${J([...a.postfixOpValues])});`);
-  e.emit(`const binaryConnectors = new Set(${J([...a.binaryConnectors])});`);
+  e.emit(`const postfixOpValues = new Set<string>(${J([...a.postfixOpValues])});`);
+  e.emit(`const binaryConnectors = new Set<string>(${J([...a.binaryConnectors])});`);
   // Assignment-target shape test (ECMAScript AssignmentTargetType): a node id is NOT a
   // valid LHS target iff its outermost form is a prefix-op (prefix-unary OR prefix-update
   // `++x`) — head kid is an operator-tag leaf in prefixOps — or a postfix-update (`x++`) —
@@ -1255,8 +1255,8 @@ export function emitParser(grammar: CstGrammar): string {
   e.emit(`  }`);
   e.emit(`  return '';`);
   e.emit(`}`);
-  e.emit(`const tokenNames = new Set(${J([...a.tokenNames])});`);
-  e.emit(`const templateTokenNames = new Set(${J([...a.templateTokenNames])});`);
+  e.emit(`const tokenNames = new Set<string>(${J([...a.tokenNames])});`);
+  e.emit(`const templateTokenNames = new Set<string>(${J([...a.templateTokenNames])});`);
   e.emit(`const templateTokenName = ${J(a.templateTokenName ?? null)};`);
   e.emit(`const maxBp = ${a.maxBp};`);
   e.emit(`const ENTRY = ${J(entry)};`);
@@ -1280,7 +1280,7 @@ export function emitParser(grammar: CstGrammar): string {
   }
   // (recovery sync closers are threaded per-loop from the enclosing seq — see
   // quantFollowT; a global closer table froze top-level recovery at any ']'.)
-  e.emit(`const prattRuleNames = new Set(${J([...a.prattRules])});`);
+  e.emit(`const prattRuleNames = new Set<string>(${J([...a.prattRules])});`);
   // The expression rule the template-interpolation fallback (findExprRule) picks:
   // first pratt rule that isn't Type, in declaration order. Bake the resolved name.
   const exprRuleName = (() => {
@@ -1432,7 +1432,7 @@ let srcLenP1 = 1;
 let negFrom = 0x7fffffff;
 function toff(i: number) { const v = tkOff[i]; return v < 0 ? v + srcLenP1 : v; }
 function tend(i: number) { const v = tkEnd[i]; return v < 0 ? v + srcLenP1 : v; }
-${e.soa ? '' : 'let tkText = [];   // fallback-lexer text column (synthetic tokens are not source spans)'}
+${e.soa ? '' : 'let tkText: string[] = [];   // fallback-lexer text column (synthetic tokens are not source spans)'}
 function growTok() {
   tkCap *= 2;
   const k = new ${K_ARR}(tkCap); k.set(tkK); tkK = k;
@@ -1743,7 +1743,7 @@ function matchPuLitGT(pu: number, vs?: number) {
     const end0 = tend(pos);
     ${e.soa ? '' : 'const restText = tkText[pos].slice(1);'}
     if (tokN === tkCap) growTok();
-    parenCachePos = -1;
+    ${e.soa ? 'parenCachePos = -1;' : ''}   // invalidate the paren-stack cache (soa emitted lexer only)
     // token indices shift past this point: the OLD-TREE adoption mapping
     // (adoptDmg*/adoptDelta, frozen at edit start) is no longer valid — turn
     // adoption off for the remainder of this parse (the '>' split is rare; the
@@ -2003,7 +2003,7 @@ function emitLeftRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDe
   e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${rid}, ${J(rule.name)}, ${ruleFn}_lr); }`);
   // notLeftLeaf head-leaf word sets (module-level, built once) for this rule's gated continuations.
   contNotLeftLeaf.forEach((words, i) => {
-    if (words) e.emit(`const _NLLC_${sn}_${i} = new Set(${J(words)});`);
+    if (words) e.emit(`const _NLLC_${sn}_${i} = new Set<string>(${J(words)});`);
   });
   e.emit(`function ${ruleFn}_lr(_minBp: number) {`);
   e.emit(`  const saved = pos; const mark = scn;`);
@@ -2068,7 +2068,7 @@ function emitPrattRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl
   e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${rid}, ${J(rule.name)}, ${ruleFn}_pratt); }`);
   // notLeftLeaf head-leaf word sets (module-level, built once) for this rule's gated LED arms.
   meta.notLeftLeaf.forEach((words, i) => {
-    if (words) e.emit(`const _NLL_${sn}_${i} = new Set(${J(words)});`);
+    if (words) e.emit(`const _NLL_${sn}_${i} = new Set<string>(${J(words)});`);
   });
   e.emit(`function ${ruleFn}_pratt(minBp: number) {`);
   e.emit(`  const saved = pos; const mark = scn;`);
@@ -3574,7 +3574,7 @@ function swapBuffers() {
   x = tkPd; tkPd = altPd!; altPd = x;
   x = tkCap; tkCap = altCap; altCap = x;
 }
-${e.soa ? '' : 'let altText = [];'}
+${e.soa ? '' : 'let altText: string[] = [];'}
 
 function parseCore(source: string, entryRule?: string) {
   adoptRoot = -1;
@@ -3696,14 +3696,16 @@ function editCore(entryRule: string | undefined, edits?: Edit[]) {
     editDmgE = dE;
   }
 
-${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // Damage envelope from the composed changes: prefix coordinates are shared, the
-  // old end comes back through the total delta.
+  // old end comes back through the total delta. The shared post-fork settle
+  // (shiftDiags) and the soa window both read these, so they live OUTSIDE the
+  // lex fork — the non-soa branch reads cs/ceOld/charDelta too.
   const newLen = docLen;
   const cs = editDmgS < newLen ? editDmgS : newLen;
   const ceNew = editDmgE < cs ? cs : editDmgE;
   const ceOld = ceNew - (newLen - oldLen);
   const charDelta = newLen - oldLen;
+${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // Restart anchor: the last token B ending at/before the damage whose recorded
   // depths are zero and whose shape carries no cross-token lexer flag (')' control-
   // head, postfix-ambiguous op). B = -1 restarts at the file head — always sound.
@@ -3866,8 +3868,8 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
     altOff = new Int32Array(tkCap); altEnd = new Int32Array(tkCap); altFl = new Uint8Array(tkCap);
     altDp = new Uint8Array(tkCap); altPd = new Uint16Array(tkCap);
   }
-  tkK = altK; tkT = altT; tkOff = altOff; tkEnd = altEnd; tkFl = altFl;
-  { const _d = tkDp; tkDp = altDp; altDp = _d; const _q = tkPd; tkPd = altPd; altPd = _q; }
+  tkK = altK!; tkT = altT!; tkOff = altOff!; tkEnd = altEnd!; tkFl = altFl!;
+  { const _d = tkDp; tkDp = altDp!; altDp = _d; const _q = tkPd; tkPd = altPd!; altPd = _q; }
   tkText = altText; tkText.length = 0;
   altK = oK; altT = oT; altOff = oOff; altEnd = oEnd; altFl = oFl;
   altText = oText;
@@ -3876,7 +3878,6 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
                        // from an earlier totality-net edit would go stale
   lexInto(flattenDoc());
   const nN = tokN;
-  const charDelta = docLen - oldLen;
   const minN = oN < nN ? oN : nN;
   let p = 0;
   while (p < minN && oK[p] === tkK[p] && oT[p] === tkT[p] && oFl[p] === tkFl[p]
diff --git a/test/emit-lexer-verify.ts b/test/emit-lexer-verify.ts
index d4128d0..e0ab8a5 100644
--- a/test/emit-lexer-verify.ts
+++ b/test/emit-lexer-verify.ts
@@ -26,9 +26,9 @@ if (src.includes('createLexer(')) {
 // Rebuild the intern config from the emitted tables' source of truth: re-emit via the
 // analyzer is private, so read the reference lexer through a tiny probe grammar parse —
 // simplest faithful route: intern maps are exactly the emitted TYPE_KIND/LIT_KW/LIT_PU.
-const tk = new Map<string, number>(JSON.parse(src.match(/const TYPE_KIND = new Map\((.*)\);/)![1]));
-const kw = new Map<string, number>(JSON.parse(src.match(/const LIT_KW = new Map\((.*)\);/)![1]));
-const pu = new Map<string, number>(JSON.parse(src.match(/const LIT_PU = new Map\((.*)\);/)![1]));
+const tk = new Map<string, number>(JSON.parse(src.match(/const TYPE_KIND = new Map(?:<[^>]*>)?\((.*)\);/)![1]));
+const kw = new Map<string, number>(JSON.parse(src.match(/const LIT_KW = new Map(?:<[^>]*>)?\((.*)\);/)![1]));
+const pu = new Map<string, number>(JSON.parse(src.match(/const LIT_PU = new Map(?:<[^>]*>)?\((.*)\);/)![1]));
 const kPunct = Number(src.match(/const K_PUNCT = (\d+);/)![1]);
 const kFallback = Number(src.match(/const K_NAMED_FALLBACK = (\d+);/)![1]);
 const ref = createLexer(grammar, { typeKind: tk, kwLit: kw, puLit: pu, punctKind: kPunct, namedFallback: kFallback });
diff --git a/test/emit-tsc-gate.ts b/test/emit-tsc-gate.ts
index 713d251..e6df929 100644
--- a/test/emit-tsc-gate.ts
+++ b/test/emit-tsc-gate.ts
@@ -11,37 +11,38 @@
 //     emitted parser by stripping types, and the CST-identity gate (emit-parser-verify)
 //     proves the stripped runtime is byte-for-byte the interpreter.
 //
-// SCOPE: the self-contained emit path — soa token columns + an emitted lexer — which
-// is every grammar WITHOUT markup / indent / newline modes (emitLexer covers them).
-// The ts/js family (+ the jsx/tsx variants) goes through it and is enforced here.
-// yaml / html take the FALLBACK path (emitLexer returns null → the parser imports
-// createLexer) plus the non-soa piece-text layer; that path carries additional
-// untyped surface and a pre-existing latent scope issue the gate surfaced (the
-// non-soa editCore branch references cs/ceOld/parenCachePos declared only in the soa
-// branch). Typing it is tracked separately — listed as DEFERRED below, not silently
-// dropped.
+// Both emit paths are covered: the self-contained path (soa columns + an emitted
+// lexer — the ts/js family) and the fallback path (yaml/html: emitLexer returns null
+// so the parser imports createLexer, plus the non-soa piece-text layer). Checking
+// every grammar is what forces grammar-specific emission (token width, soa vs piece
+// layer, empty vocab sets, the fallback createLexer contract) to stay type-sound —
+// and it already paid off: the fallback editCore branch referenced cs/ceOld/
+// parenCachePos declared only in the soa branch (unreached at runtime, invisible
+// until this gate), now hoisted/gated correctly.
 import { emitParser } from '../src/emit-parser.ts';
 import { writeFileSync } from 'node:fs';
 import { execFileSync } from 'node:child_process';
 import type { CstGrammar } from '../src/types.ts';
 
-// Enforced: the self-contained soa + emitted-lexer path.
-const CHECKED: Array<[string, string]> = [
+const GRAMMARS: Array<[string, string]> = [
   ['typescript', '../typescript.ts'],
   ['javascript', '../javascript.ts'],
   ['typescriptreact', '../typescriptreact.ts'],
   ['javascriptreact', '../javascriptreact.ts'],
+  ['yaml', '../yaml.ts'],
+  ['html', '../html.ts'],
 ];
-// Deferred: the fallback-lexer / non-soa path (logged, not gated yet).
-const DEFERRED = ['yaml', 'html'];
 
+// --allowImportingTsExtensions: the fallback-lexer grammars import createLexer from
+// '…/src/gen-lexer.ts' (an absolute path baked at emit time); harmless for the
+// self-contained grammars, which import nothing.
 const TSC_FLAGS = [
-  '--strict', '--noEmit', '--target', 'ES2022',
-  '--module', 'ES2022', '--moduleResolution', 'Bundler', '--skipLibCheck',
+  '--strict', '--noEmit', '--target', 'ES2022', '--module', 'ES2022',
+  '--moduleResolution', 'Bundler', '--skipLibCheck', '--allowImportingTsExtensions',
 ];
 
 let failures = 0;
-for (const [name, path] of CHECKED) {
+for (const [name, path] of GRAMMARS) {
   let grammar: CstGrammar;
   try {
     grammar = (await import(path)).default;
@@ -63,10 +64,9 @@ for (const [name, path] of CHECKED) {
     if (errs.length > 30) console.log(`      … and ${errs.length - 30} more`);
   }
 }
-console.log(`  deferred (fallback-lexer / non-soa path, not yet typed): ${DEFERRED.join(', ')}`);
 
 if (failures > 0) {
   console.error(`\n✗ emitted parser fails strict type-check for ${failures} grammar(s)`);
   process.exit(1);
 }
-console.log('\n✓ emitted parser type-checks under tsc --strict (soa + emitted-lexer family)');
+console.log('\n✓ emitted parser type-checks under tsc --strict for every grammar');

From 7d47ca3a9f77e06f929094e45b90623639b43fb6 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Sun, 21 Jun 2026 12:20:02 +0800
Subject: [PATCH 03/27] =?UTF-8?q?emit:=20target-agnostic=20emitter=20?=
 =?UTF-8?q?=E2=80=94=20derived=20Go=20+=20Rust=20parsers=20(issue=20#6)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The agnosticism payoff of #6, proven by EXECUTION. emit-portable.ts adds
`emitPortableParser(grammar, target)`: one analysis → one plain-data IR → a parser
rendered in each target language through a `Target` interface. The same grammar
(examples/calc.ts) derives a TypeScript, a Go, and a Rust parser; the Go and Rust
sources are compiled (`go build` / `rustc`) and run, and every parser's CST is
compared node-for-node against the createParser interpreter.

This is a SEPARATE, minimal emitter from the optimized emit-parser.ts (no
incremental/recovery/arena — each target supplies its own runtime, as the issue
frames it). It is the real Target seam: adding a language is implementing one
`render(ir)`; buildIR is untouched.

Scope = the verifiable core: char-class tokens, recursive descent with backtracking
alternation and `*`, and a Pratt expression engine with operator precedence /
associativity, prefix unary, and parenthesised grouping. The portable lexer is a
dependency-free char scanner (no regex), so the emitted Go/Rust compile offline —
sidestepping both the full-TS lexer's lookahead (which Go's RE2 and Rust's regex
crate reject) and any crate fetch. buildIR THROWS on a construct it does not model
rather than emit a wrong parser; mixfix/postfix LEDs, sep/opt, and lexer lookahead
are the documented next increment.

Gate: test/portable-targets.ts (group emit-parity) — typescript + go + rust each
21/21 accept ≡ oracle and 7/7 reject ≡ oracle over an adversarial corpus
(precedence both directions, left-associativity, prefix chains, nested grouping,
multi-statement programs, the empty program, malformed input). Go/Rust toolchains
are optional — a missing `go`/`rustc` is skipped (the TS rendering needs only node).
Full suite 42/42.
---
 examples/calc.ts         |  57 +++++++++++
 src/emit-portable.ts     | 173 +++++++++++++++++++++++++++++++++
 src/target-go.ts         | 203 +++++++++++++++++++++++++++++++++++++++
 src/target-rust.ts       | 188 ++++++++++++++++++++++++++++++++++++
 src/target-ts.ts         | 163 +++++++++++++++++++++++++++++++
 test/check.ts            |   1 +
 test/portable-targets.ts | 116 ++++++++++++++++++++++
 7 files changed, 901 insertions(+)
 create mode 100644 examples/calc.ts
 create mode 100644 src/emit-portable.ts
 create mode 100644 src/target-go.ts
 create mode 100644 src/target-rust.ts
 create mode 100644 src/target-ts.ts
 create mode 100644 test/portable-targets.ts

diff --git a/examples/calc.ts b/examples/calc.ts
new file mode 100644
index 0000000..2bfcfff
--- /dev/null
+++ b/examples/calc.ts
@@ -0,0 +1,57 @@
+// A small Pratt grammar — the cross-language target proof for issue #6.
+//
+// Deliberately minimal but it exercises the constructs that make parsing-as-
+// derivation non-trivial: token kinds (Ident/Number), literal keywords, sequences,
+// backtracking alternation, quantifiers (opt/many/sep), recursion (grouping), and —
+// the crux — a Pratt expression engine with operator PRECEDENCE and associativity
+// (`1 + 2 * 3` must group as `1 + (2 * 3)`), prefix unary, and a left-associative
+// call/postfix continuation. emitPortableParser derives a TS, Go, and Rust parser
+// from THIS one definition; the cross-language gate proves all three produce the
+// byte-identical CST the interpreter (createParser) does.
+//
+// No lexer lookahead (the full TS grammar's number tokens use `(?!…)`, which Go's
+// RE2 and Rust's regex crate reject) — the portable lexer is a dependency-free
+// char-class scanner, so the emitted Go/Rust compile offline with no regex engine.
+import {
+  token, rule, defineGrammar, left, right, op, prefix,
+  seq, oneOf, range, star, many,
+} from '../src/api.ts';
+
+const digit = range('0', '9');
+const identStart = oneOf(range('a', 'z'), range('A', 'Z'), '_');
+const identPart = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_');
+
+const Ident = token(seq(identStart, star(identPart)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(digit, star(digit)), { scope: 'constant.numeric' });
+
+// Precedence ladder (earlier = looser): `+` `-` loosest, then `*` `/`, then prefix
+// `-` tightest — so `1 + 2 * 3` is `1 + (2 * 3)` and `-a * b` is `(-a) * b`.
+const calcPrec = [
+  left('+', '-'),
+  left('*', '/'),
+  right(prefix('-')),
+];
+
+const Expr = rule(($) => [
+  Number_,
+  Ident,
+  ['(', $, ')'],            // grouping (recursion)
+  [prefix, $],              // prefix unary minus (operators from the ladder)
+  [$, op, $],               // binary infix, precedence from the ladder
+]);
+
+const Stmt = rule(($) => [
+  ['let', Ident, '=', Expr, ';'],
+  [Expr, ';'],
+]);
+
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'calc',
+  scopeName: 'source.calc',
+  tokens: { Ident, Number: Number_ },
+  prec: calcPrec,
+  // findEntryRule = the LAST rule, so Program is the entry point.
+  rules: { Expr, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
new file mode 100644
index 0000000..900beec
--- /dev/null
+++ b/src/emit-portable.ts
@@ -0,0 +1,173 @@
+// ── emit-portable ──
+//
+// The target-agnostic emitter (issue #6). `emitPortableParser(grammar, target)` derives
+// a COMPLETE, self-contained parser in the target's language from the same CstGrammar the
+// TS engine uses. It is the agnosticism proof: ONE analysis → ONE intermediate form (IR)
+// → N language renderings, all producing the byte-identical CST the interpreter does.
+//
+// SHARED + target-agnostic (here): the grammar ANALYSIS (reused from grammar-analysis.ts)
+// and `buildIR` — the parse plan as plain data (recursive-descent rules as alternative
+// step-lists, the Pratt rule as NUD-atom / prefix / binary tables, the char-class lexer
+// specs, the literal vocabulary, the entry rule). PER-TARGET (a Target): `render(ir)` —
+// the language's lexer + CST runtime + the rendering of each IR node. Adding a language is
+// implementing one Target; nothing here changes.
+//
+// SCOPE (the verifiable core): char-class tokens (`charClass` then `star(charClass)`), a
+// recursive-descent + backtracking-alternation + `*` body, and a Pratt expression engine
+// with operator PRECEDENCE/associativity + prefix unary + parenthesised grouping. The
+// portable lexer is a dependency-free char scanner (no regex), so the emitted Go/Rust
+// compile offline. Richer surface (mixfix/postfix LEDs, `sep`/`opt`, lexer lookahead,
+// left-recursion beyond Pratt) is the documented next increment; buildIR THROWS on a
+// construct it does not model rather than emit a wrong parser.
+import type { CstGrammar, RuleExpr, TokenDecl, TokenPattern } from './types.ts';
+import { analyzeGrammar, findEntryRule } from './grammar-analysis.ts';
+import { collectLiterals, isKeywordLiteral } from './grammar-utils.ts';
+
+// ── Intermediate representation (plain data; every Target renders THIS) ──
+
+export type CharRange = [number, number];   // inclusive char-code range
+export type TokenSpec = { name: string; first: CharRange[]; cont: CharRange[] };
+
+export type Step =
+  | { t: 'lit'; value: string; ttype: '$keyword' | '$punct' }   // match a literal by text
+  | { t: 'tok'; name: string }                                  // match a token kind
+  | { t: 'rule'; name: string }                                 // call a rule, append its node
+  | { t: 'star'; step: Step };                                  // repeat the inner step 0+ times
+export type Alt = Step[];
+
+export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
+export type PrattRule = {
+  kind: 'pratt';
+  name: string;
+  atomToks: string[];                                  // NUD: a bare token (Number/Ident) wrapped in a node
+  group: { open: string; close: string } | null;      // NUD: '(' Expr ')'
+  prefix: Array<{ op: string; rbp: number }>;          // NUD: prefix op then operand parsed at rbp
+  binary: Array<{ op: string; lbp: number; rbp: number }>;  // LED: infix op, bind iff lbp > minBp, rhs at rbp
+};
+export type RuleIR = RdRule | PrattRule;
+
+export type ParserIR = {
+  grammarName: string;
+  entry: string;
+  tokens: TokenSpec[];   // named tokens, for the char scanner (tried in declaration order)
+  puncts: string[];      // punctuation literals, sorted longest-first (maximal munch)
+  rules: RuleIR[];
+};
+
+export interface Target {
+  name: string;
+  ext: string;                       // emitted file extension (no dot)
+  render(ir: ParserIR): string;      // the complete, compilable source
+}
+
+export function emitPortableParser(grammar: CstGrammar, target: Target): string {
+  return target.render(buildIR(grammar));
+}
+
+// ── buildIR: grammar + analysis → the target-agnostic parse plan ──
+
+function buildIR(grammar: CstGrammar): ParserIR {
+  const a = analyzeGrammar(grammar);
+  const tokenNames = a.tokenNames;
+
+  // Lexer token specs: each token must be `charClass` then `star(charClass)` (the portable
+  // scanner's shape). Anything else is out of the verifiable core → throw, don't mis-lex.
+  const tokens: TokenSpec[] = grammar.tokens.map((t) => {
+    const { first, cont } = charClassFirstCont(t);
+    return { name: t.name, first, cont };
+  });
+
+  // Literal vocabulary, split keyword (alpha — lexed as an identifier, matched by text) vs
+  // punctuation (lexed as its own token). Puncts longest-first for maximal munch.
+  const lits = new Set<string>();
+  for (const r of grammar.rules) for (const l of collectLiterals(r.body)) lits.add(l);
+  for (const lv of grammar.precs) for (const o of lv.operators) lits.add(o.value);
+  const puncts = [...lits].filter((l) => !isKeywordLiteral(l)).sort((x, y) => y.length - x.length);
+
+  const litTtype = (v: string): '$keyword' | '$punct' => (isKeywordLiteral(v) ? '$keyword' : '$punct');
+
+  const rules: RuleIR[] = grammar.rules.map((r) => {
+    if (a.prattRules.has(r.name)) return buildPratt(r.name, r.body, a);
+    return { kind: 'rd', name: r.name, alts: buildRdAlts(r.body) };
+  });
+
+  function buildRdAlts(body: RuleExpr): Alt[] {
+    if (body.type === 'alt') return body.items.map(altSteps);
+    return [altSteps(body)];
+  }
+  function altSteps(e: RuleExpr): Step[] {
+    if (e.type === 'seq') return e.items.flatMap(stepOf);
+    return stepOf(e);
+  }
+  function stepOf(e: RuleExpr): Step[] {
+    switch (e.type) {
+      case 'literal': return [{ t: 'lit', value: e.value, ttype: litTtype(e.value) }];
+      case 'ref': return [tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name }];
+      case 'quantifier': {
+        if (e.kind !== '*') throw new Error(`portable: quantifier '${e.kind}' not in the verifiable core (only '*')`);
+        const inner = stepOf(e.body);
+        if (inner.length !== 1) throw new Error('portable: `*` body must be a single step (a rule/token ref)');
+        return [{ t: 'star', step: inner[0] }];
+      }
+      case 'group': return altSteps(e.body);
+      default: throw new Error(`portable: rd construct '${e.type}' not in the verifiable core`);
+    }
+  }
+
+  return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules };
+}
+
+// A Pratt rule's alternatives, classified into NUD atoms / grouping / prefix and LED binary.
+// The binding powers come from the analysis (opTable/prefixOps), so precedence is single-
+// sourced with the interpreter.
+function buildPratt(name: string, body: RuleExpr, a: ReturnType<typeof analyzeGrammar>): PrattRule {
+  const alts = body.type === 'alt' ? body.items : [body];
+  const atomToks: string[] = [];
+  let group: { open: string; close: string } | null = null;
+  let sawPrefix = false;
+  let sawBinary = false;
+  for (const alt of alts) {
+    const items = alt.type === 'seq' ? alt.items : [alt];
+    if (items.length === 1 && items[0].type === 'ref' && a.tokenNames.has(items[0].name)) {
+      atomToks.push(items[0].name);                                  // [Token]
+    } else if (items.length === 3 && items[0].type === 'literal' && items[2].type === 'literal'
+               && items[1].type === 'ref' && items[1].name === name) {
+      group = { open: items[0].value, close: items[2].value };       // [ '(' $ ')' ]
+    } else if (items.length === 2 && items[0].type === 'prefix' && items[1].type === 'ref' && items[1].name === name) {
+      sawPrefix = true;                                              // [ prefix $ ]
+    } else if (items.length === 3 && items[0].type === 'ref' && items[0].name === name
+               && items[1].type === 'op' && items[2].type === 'ref' && items[2].name === name) {
+      sawBinary = true;                                              // [ $ op $ ]
+    } else {
+      throw new Error(`portable: Pratt alt shape not in the verifiable core (rule ${name})`);
+    }
+  }
+  const prefix = sawPrefix
+    ? [...a.prefixOps.entries()].map(([op, info]) => ({ op, rbp: info.rbp }))
+    : [];
+  const binary = sawBinary
+    ? [...a.opTable.entries()]
+        .filter(([, info]) => info.position === 'infix')
+        .map(([op, info]) => ({ op, lbp: info.lbp, rbp: info.rbp }))
+    : [];
+  return { kind: 'pratt', name, atomToks, group, prefix, binary };
+}
+
+// Extract a token's (first-char, continue-char) code ranges from a `charClass` then
+// `star(charClass)` pattern. Throws for any other shape (out of the verifiable core).
+function charClassFirstCont(t: TokenDecl): { first: CharRange[]; cont: CharRange[] } {
+  const p = t.pattern;
+  if (typeof p === 'string' || p.type !== 'seq' || p.items.length !== 2) throw new Error(`portable: token ${t.name} not [charClass, star(charClass)]`);
+  const head = p.items[0];
+  const tail = p.items[1];
+  if (typeof tail === 'string' || tail.type !== 'repeat' || tail.min !== 0) throw new Error(`portable: token ${t.name} tail is not star(charClass)`);
+  return { first: classRanges(head, t.name), cont: classRanges(tail.body, t.name) };
+}
+function classRanges(p: TokenPattern, tok: string): CharRange[] {
+  if (typeof p === 'string' || p.type !== 'charClass' || p.negate) throw new Error(`portable: token ${tok} uses a non-positive char class`);
+  return p.items.map((it): CharRange => {
+    if (it.type === 'char') return [it.value.charCodeAt(0), it.value.charCodeAt(0)];
+    if (it.type === 'range') return [it.from.charCodeAt(0), it.to.charCodeAt(0)];
+    throw new Error(`portable: token ${tok} char-class item '${(it as { type: string }).type}' unsupported`);
+  });
+}
diff --git a/src/target-go.ts b/src/target-go.ts
new file mode 100644
index 0000000..bc81629
--- /dev/null
+++ b/src/target-go.ts
@@ -0,0 +1,203 @@
+// The Go Target for emit-portable. Renders the same language-agnostic ParserIR as tsTarget
+// into a self-contained Go program (Go stdlib only — the char-class lexer is regex-free, so
+// it compiles with no module dependencies). Its CST JSON is checked byte-for-byte against
+// the interpreter, so `emitPortableParser(grammar, goTarget)` is a real, verified Go parser
+// derived from the same grammar definition.
+import type { ParserIR, RdRule, PrattRule, Step, CharRange, Target } from './emit-portable.ts';
+
+const J = (v: unknown) => JSON.stringify(v);
+const goStr = (s: string) => J(s);   // Go and JSON string literals coincide for our ASCII vocab
+const rangeCond = (v: string, rs: CharRange[]) =>
+  rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ');
+
+function lexer(ir: ParserIR): string {
+  const cases = ir.tokens.map((t) => `\t\tif ${rangeCond('c', t.first)} {
+\t\t\te := pos + 1
+\t\t\tfor e < n { cc := int(src[e]); if !(${rangeCond('cc', t.cont)}) { break }; e++ }
+\t\t\ttoks = append(toks, Tok{${goStr(t.name)}, src[pos:e], pos, e}); pos = e; continue
+\t\t}`).join('\n');
+  const punctChecks = ir.puncts.map((p) =>
+    `\t\tif strings.HasPrefix(src[pos:], ${goStr(p)}) { toks = append(toks, Tok{"", ${goStr(p)}, pos, pos + ${p.length}}); pos += ${p.length}; continue }`).join('\n');
+  return `func lex(src string) []Tok {
+\ttoks := []Tok{}
+\tn := len(src)
+\tpos := 0
+\tfor pos < n {
+\t\tc := int(src[pos])
+\t\tif c == 32 || c == 9 || c == 10 || c == 13 { pos++; continue }
+${cases}
+${punctChecks}
+\t\tpanic(fmt.Sprintf("lex error at %d", pos))
+\t}
+\treturn toks
+}`;
+}
+
+function rdRule(r: RdRule): string {
+  const alt = (steps: Step[]) => {
+    const conds = steps.map(stepCond).join(' && ');
+    return `\t{ kids := []*Cst{}; if ${conds} { return branch(${goStr(r.name)}, kids, save) }; pos = save }`;
+  };
+  return `func parse${r.name}() *Cst {
+\tsave := pos
+${r.alts.map(alt).join('\n')}
+\treturn nil
+}`;
+}
+function stepCond(s: Step): string {
+  switch (s.t) {
+    case 'lit': return `matchLit(${goStr(s.value)}, ${goStr(s.ttype)}, &kids)`;
+    case 'tok': return `matchTok(${goStr(s.name)}, &kids)`;
+    case 'rule': return `callRule(parse${s.name}, &kids)`;
+    case 'star': return `star(func() bool { return ${stepCond(s.step)} }, &kids)`;
+  }
+}
+
+function prattRule(r: PrattRule): string {
+  const bin = r.binary.map((b) => `${goStr(b.op)}: {${b.lbp}, ${b.rbp}}`).join(', ');
+  const pre = r.prefix.map((p) => `${goStr(p.op)}: ${p.rbp}`).join(', ');
+  const atoms = r.atomToks.map((k) => `${goStr(k)}: true`).join(', ');
+  const g = r.group;
+  return `var ${r.name}BIN = map[string]bp{${bin}}
+var ${r.name}PRE = map[string]int{${pre}}
+var ${r.name}ATOM = map[string]bool{${atoms}}
+func parse${r.name}() *Cst { return ${r.name}bp(0) }
+func ${r.name}bp(minBp int) *Cst {
+\tleft := ${r.name}nud()
+\tif left == nil { return nil }
+\tfor {
+\t\tt := peek()
+\t\tif t == nil { break }
+\t\tinfo, ok := ${r.name}BIN[t.Text]
+\t\tif !ok || info.lbp <= minBp { break }
+\t\tledSave := pos
+\t\tpos++
+\t\topLeaf := &Cst{IsLeaf: true, TokenType: "$operator", Offset: t.Off, End: t.End}
+\t\trhs := ${r.name}bp(info.rbp)
+\t\tif rhs == nil { pos = ledSave; break }
+\t\tleft = &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{left, opLeaf, rhs}, Offset: left.Offset, End: rhs.End}
+\t}
+\treturn left
+}
+func ${r.name}nud() *Cst {
+\tt := peek()
+\tif t == nil { return nil }
+\tif ${r.name}ATOM[t.Kind] {
+\t\tpos++
+\t\treturn &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{{IsLeaf: true, TokenType: t.Kind, Offset: t.Off, End: t.End}}, Offset: t.Off, End: t.End}
+\t}
+${g ? `\tif t.Text == ${goStr(g.open)} {
+\t\tsave := pos; pos++
+\t\tinner := ${r.name}bp(0)
+\t\tc := peek()
+\t\tif inner == nil || c == nil || c.Text != ${goStr(g.close)} { pos = save; return nil }
+\t\tpos++
+\t\treturn &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{{IsLeaf: true, TokenType: "$punct", Offset: t.Off, End: t.End}, inner, {IsLeaf: true, TokenType: "$punct", Offset: c.Off, End: c.End}}, Offset: t.Off, End: c.End}
+\t}` : ''}
+\tif pbp, ok := ${r.name}PRE[t.Text]; ok {
+\t\tsave := pos; pos++
+\t\topLeaf := &Cst{IsLeaf: true, TokenType: "$operator", Offset: t.Off, End: t.End}
+\t\toperand := ${r.name}bp(pbp)
+\t\tif operand == nil { pos = save; return nil }
+\t\treturn &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{opLeaf, operand}, Offset: t.Off, End: operand.End}
+\t}
+\treturn nil
+}`;
+}
+
+export const goTarget: Target = {
+  name: 'go',
+  ext: 'go',
+  render(ir: ParserIR): string {
+    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r) : rdRule(r))).join('\n\n');
+    return `// GENERATED by emit-portable.ts (goTarget) — parser for grammar "${ir.grammarName}".
+package main
+
+import (
+\t"fmt"
+\t"io"
+\t"os"
+\t"strings"
+)
+
+type Tok struct {
+\tKind, Text string
+\tOff, End   int
+}
+type Cst struct {
+\tRule      string
+\tChildren  []*Cst
+\tIsLeaf    bool
+\tTokenType string
+\tOffset    int
+\tEnd       int
+}
+type bp struct{ lbp, rbp int }
+
+${lexer(ir)}
+
+var toks []Tok
+var pos int
+
+func peek() *Tok {
+\tif pos < len(toks) { return &toks[pos] }
+\treturn nil
+}
+func branch(rule string, kids []*Cst, save int) *Cst {
+\toffset := 0
+\tif len(kids) > 0 { offset = kids[0].Offset } else if save < len(toks) { offset = toks[save].Off } else if len(toks) > 0 { offset = toks[len(toks)-1].End }
+\tend := offset
+\tif len(kids) > 0 { end = kids[len(kids)-1].End }
+\treturn &Cst{Rule: rule, Children: kids, Offset: offset, End: end}
+}
+func matchLit(value, ttype string, kids *[]*Cst) bool {
+\tt := peek()
+\tif t == nil || t.Text != value { return false }
+\t*kids = append(*kids, &Cst{IsLeaf: true, TokenType: ttype, Offset: t.Off, End: t.End}); pos++; return true
+}
+func matchTok(name string, kids *[]*Cst) bool {
+\tt := peek()
+\tif t == nil || t.Kind != name { return false }
+\t*kids = append(*kids, &Cst{IsLeaf: true, TokenType: name, Offset: t.Off, End: t.End}); pos++; return true
+}
+func callRule(fn func() *Cst, kids *[]*Cst) bool {
+\tn := fn()
+\tif n == nil { return false }
+\t*kids = append(*kids, n); return true
+}
+func star(once func() bool, kids *[]*Cst) bool {
+\tfor { sp := pos; before := len(*kids); if !once() { pos = sp; *kids = (*kids)[:before]; break } }
+\treturn true
+}
+
+${ruleFns}
+
+func writeJSON(c *Cst, b *strings.Builder) {
+\tif c.IsLeaf {
+\t\tfmt.Fprintf(b, "{\\"tokenType\\":%q,\\"offset\\":%d,\\"end\\":%d}", c.TokenType, c.Offset, c.End)
+\t\treturn
+\t}
+\tfmt.Fprintf(b, "{\\"rule\\":%q,\\"children\\":[", c.Rule)
+\tfor i, k := range c.Children {
+\t\tif i > 0 { b.WriteByte(',') }
+\t\twriteJSON(k, b)
+\t}
+\tfmt.Fprintf(b, "],\\"offset\\":%d,\\"end\\":%d}", c.Offset, c.End)
+}
+
+func main() {
+\tdata, _ := io.ReadAll(os.Stdin)
+\ttoks = lex(string(data))
+\tpos = 0
+\troot := parse${ir.entry}()
+\tif root == nil || pos != len(toks) {
+\t\tfmt.Fprintf(os.Stderr, "parse error (pos %d/%d)\\n", pos, len(toks))
+\t\tos.Exit(1)
+\t}
+\tvar b strings.Builder
+\twriteJSON(root, &b)
+\tos.Stdout.WriteString(b.String())
+}
+`;
+  },
+};
diff --git a/src/target-rust.ts b/src/target-rust.ts
new file mode 100644
index 0000000..726ff1a
--- /dev/null
+++ b/src/target-rust.ts
@@ -0,0 +1,188 @@
+// The Rust Target for emit-portable. Renders the same language-agnostic ParserIR as
+// tsTarget/goTarget into a self-contained Rust program (no external crates — the char-class
+// lexer is regex-free, so it compiles with rustc alone, no Cargo/network). Its CST JSON is
+// checked byte-for-byte against the interpreter, so `emitPortableParser(grammar, rustTarget)`
+// is a real, verified Rust parser derived from the same grammar definition.
+import type { ParserIR, RdRule, PrattRule, Step, CharRange, Target } from './emit-portable.ts';
+
+const J = (v: unknown) => JSON.stringify(v);
+const rsStr = (s: string) => J(s);   // Rust and JSON string literals coincide for our ASCII vocab
+const rangeCond = (v: string, rs: CharRange[]) =>
+  rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `(${lo}..=${hi}).contains(&${v})`)).join(' || ');
+
+function lexer(ir: ParserIR): string {
+  const cases = ir.tokens.map((t) => `        if ${rangeCond('c', t.first)} {
+            let mut e = pos + 1;
+            while e < n { let cc = b[e] as u32; if !(${rangeCond('cc', t.cont)}) { break } e += 1; }
+            toks.push(Tok { kind: ${rsStr(t.name)}.to_string(), text: src[pos..e].to_string(), off: pos, end: e }); pos = e; continue;
+        }`).join('\n');
+  const punctChecks = ir.puncts.map((p) =>
+    `        if src[pos..].starts_with(${rsStr(p)}) { toks.push(Tok { kind: String::new(), text: ${rsStr(p)}.to_string(), off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
+  return `fn lex(src: &str) -> Vec<Tok> {
+    let b = src.as_bytes();
+    let n = b.len();
+    let mut toks: Vec<Tok> = Vec::new();
+    let mut pos = 0usize;
+    while pos < n {
+        let c = b[pos] as u32;
+        if c == 32 || c == 9 || c == 10 || c == 13 { pos += 1; continue; }
+${cases}
+${punctChecks}
+        panic!("lex error at {}", pos);
+    }
+    toks
+}`;
+}
+
+function rdRule(r: RdRule): string {
+  const alt = (steps: Step[]) => {
+    const conds = steps.map(stepCond).join(' && ');
+    return `        { let mut kids: Vec<Cst> = Vec::new(); if ${conds} { return Some(self.branch(${rsStr(r.name)}, kids, save)); } self.pos = save; }`;
+  };
+  return `    fn parse_${r.name}(&mut self) -> Option<Cst> {
+        let save = self.pos;
+${r.alts.map(alt).join('\n')}
+        None
+    }`;
+}
+function stepCond(s: Step): string {
+  switch (s.t) {
+    case 'lit': return `self.match_lit(${rsStr(s.value)}, ${rsStr(s.ttype)}, &mut kids)`;
+    case 'tok': return `self.match_tok(${rsStr(s.name)}, &mut kids)`;
+    case 'rule': return `self.call_rule(Parser::parse_${s.name}, &mut kids)`;
+    case 'star': return `self.star(|p, k| ${starInner(s.step)}, &mut kids)`;
+  }
+}
+function starInner(s: Step): string {
+  switch (s.t) {
+    case 'lit': return `p.match_lit(${rsStr(s.value)}, ${rsStr(s.ttype)}, k)`;
+    case 'tok': return `p.match_tok(${rsStr(s.name)}, k)`;
+    case 'rule': return `p.call_rule(Parser::parse_${s.name}, k)`;
+    case 'star': throw new Error('portable: nested star unsupported');
+  }
+}
+
+function prattRule(r: PrattRule): string {
+  const binArms = r.binary.map((b) => `${rsStr(b.op)} => Some((${b.lbp}, ${b.rbp}))`).join(', ');
+  const preArms = r.prefix.map((p) => `${rsStr(p.op)} => Some(${p.rbp})`).join(', ');
+  const atomArm = r.atomToks.map(rsStr).join(' | ');
+  const g = r.group;
+  return `    fn parse_${r.name}(&mut self) -> Option<Cst> { self.${r.name}_bp(0) }
+    fn ${r.name}_bin(op: &str) -> Option<(i64, i64)> { match op { ${binArms}${binArms ? ', ' : ''}_ => None } }
+    fn ${r.name}_pre(op: &str) -> Option<i64> { match op { ${preArms}${preArms ? ', ' : ''}_ => None } }
+    fn ${r.name}_atom(kind: &str) -> bool { matches!(kind, ${atomArm || '""'}) }
+    fn ${r.name}_bp(&mut self, min_bp: i64) -> Option<Cst> {
+        let mut left = self.${r.name}_nud()?;
+        loop {
+            let t = match self.peek() { Some(t) => t, None => break };
+            let (lbp, rbp) = match Parser::${r.name}_bin(&t.text) { Some(x) => x, None => break };
+            if lbp <= min_bp { break; }
+            let led_save = self.pos;
+            self.pos += 1;
+            let op_leaf = Cst::leaf("$operator", t.off, t.end);
+            let rhs = match self.${r.name}_bp(rbp) { Some(r) => r, None => { self.pos = led_save; break; } };
+            let (off, end) = (left.offset, rhs.end);
+            left = Cst::node(${rsStr(r.name)}, vec![left, op_leaf, rhs], off, end);
+        }
+        Some(left)
+    }
+    fn ${r.name}_nud(&mut self) -> Option<Cst> {
+        let t = self.peek()?;
+        if Parser::${r.name}_atom(&t.kind) {
+            self.pos += 1;
+            return Some(Cst::node(${rsStr(r.name)}, vec![Cst::leaf(&t.kind, t.off, t.end)], t.off, t.end));
+        }
+${g ? `        if t.text == ${rsStr(g.open)} {
+            let save = self.pos; self.pos += 1;
+            let inner = self.${r.name}_bp(0);
+            let c = self.peek();
+            match (inner, c) {
+                (Some(inner), Some(c)) if c.text == ${rsStr(g.close)} => {
+                    self.pos += 1;
+                    let (off, end) = (t.off, c.end);
+                    return Some(Cst::node(${rsStr(r.name)}, vec![Cst::leaf("$punct", t.off, t.end), inner, Cst::leaf("$punct", c.off, c.end)], off, end));
+                }
+                _ => { self.pos = save; return None; }
+            }
+        }` : ''}
+        if let Some(pbp) = Parser::${r.name}_pre(&t.text) {
+            let save = self.pos; self.pos += 1;
+            let op_leaf = Cst::leaf("$operator", t.off, t.end);
+            match self.${r.name}_bp(pbp) {
+                Some(operand) => { let (off, end) = (t.off, operand.end); return Some(Cst::node(${rsStr(r.name)}, vec![op_leaf, operand], off, end)); }
+                None => { self.pos = save; return None; }
+            }
+        }
+        None
+    }`;
+}
+
+export const rustTarget: Target = {
+  name: 'rust',
+  ext: 'rs',
+  render(ir: ParserIR): string {
+    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r) : rdRule(r))).join('\n\n');
+    return `// GENERATED by emit-portable.ts (rustTarget) — parser for grammar "${ir.grammarName}".
+#![allow(non_snake_case)]
+use std::io::Read;
+
+#[derive(Clone)]
+struct Tok { kind: String, text: String, off: usize, end: usize }
+
+struct Cst { rule: String, children: Vec<Cst>, is_leaf: bool, token_type: String, offset: usize, end: usize }
+impl Cst {
+    fn leaf(tt: &str, off: usize, end: usize) -> Cst { Cst { rule: String::new(), children: Vec::new(), is_leaf: true, token_type: tt.to_string(), offset: off, end } }
+    fn node(rule: &str, children: Vec<Cst>, offset: usize, end: usize) -> Cst { Cst { rule: rule.to_string(), children, is_leaf: false, token_type: String::new(), offset, end } }
+}
+
+${lexer(ir)}
+
+struct Parser { toks: Vec<Tok>, pos: usize }
+impl Parser {
+    fn peek(&self) -> Option<Tok> { if self.pos < self.toks.len() { Some(self.toks[self.pos].clone()) } else { None } }
+    fn branch(&self, rule: &str, kids: Vec<Cst>, save: usize) -> Cst {
+        let offset = if !kids.is_empty() { kids[0].offset } else if save < self.toks.len() { self.toks[save].off } else if !self.toks.is_empty() { self.toks[self.toks.len() - 1].end } else { 0 };
+        let end = if !kids.is_empty() { kids[kids.len() - 1].end } else { offset };
+        Cst::node(rule, kids, offset, end)
+    }
+    fn match_lit(&mut self, value: &str, ttype: &str, kids: &mut Vec<Cst>) -> bool {
+        match self.peek() { Some(t) if t.text == value => { kids.push(Cst::leaf(ttype, t.off, t.end)); self.pos += 1; true } _ => false }
+    }
+    fn match_tok(&mut self, name: &str, kids: &mut Vec<Cst>) -> bool {
+        match self.peek() { Some(t) if t.kind == name => { kids.push(Cst::leaf(name, t.off, t.end)); self.pos += 1; true } _ => false }
+    }
+    fn call_rule(&mut self, f: fn(&mut Parser) -> Option<Cst>, kids: &mut Vec<Cst>) -> bool {
+        match f(self) { Some(n) => { kids.push(n); true } None => false }
+    }
+    fn star(&mut self, once: fn(&mut Parser, &mut Vec<Cst>) -> bool, kids: &mut Vec<Cst>) -> bool {
+        loop { let sp = self.pos; let before = kids.len(); if !once(self, kids) { self.pos = sp; kids.truncate(before); break; } }
+        true
+    }
+
+${ruleFns}
+}
+
+fn write_json(c: &Cst, out: &mut String) {
+    if c.is_leaf {
+        out.push_str(&format!("{{\\"tokenType\\":\\"{}\\",\\"offset\\":{},\\"end\\":{}}}", c.token_type, c.offset, c.end));
+        return;
+    }
+    out.push_str(&format!("{{\\"rule\\":\\"{}\\",\\"children\\":[", c.rule));
+    for (i, k) in c.children.iter().enumerate() { if i > 0 { out.push(','); } write_json(k, out); }
+    out.push_str(&format!("],\\"offset\\":{},\\"end\\":{}}}", c.offset, c.end));
+}
+
+fn main() {
+    let mut src = String::new();
+    std::io::stdin().read_to_string(&mut src).unwrap();
+    let toks = lex(&src);
+    let n = toks.len();
+    let mut p = Parser { toks, pos: 0 };
+    match p.parse_${ir.entry}() {
+        Some(root) if p.pos == n => { let mut out = String::new(); write_json(&root, &mut out); print!("{}", out); }
+        _ => { eprintln!("parse error (pos {}/{})", p.pos, n); std::process::exit(1); }
+    }
+}
+`;
+  },
+};
diff --git a/src/target-ts.ts b/src/target-ts.ts
new file mode 100644
index 0000000..ab37220
--- /dev/null
+++ b/src/target-ts.ts
@@ -0,0 +1,163 @@
+// The TypeScript Target for emit-portable. Renders the language-agnostic ParserIR into a
+// self-contained TS parser: a char-class lexer, a backtracking recursive-descent core, a
+// Pratt expression engine, and a CST→JSON printer over stdin. It is the reference rendering
+// — its CST is checked byte-for-byte against the interpreter (createParser), so a divergence
+// in the portable logic shows up here before Go/Rust are even compiled.
+import type { ParserIR, RdRule, PrattRule, Step, CharRange, Target } from './emit-portable.ts';
+
+const J = (v: unknown) => JSON.stringify(v);
+const rangeCond = (v: string, rs: CharRange[]) =>
+  rs.map(([lo, hi]) => (lo === hi ? `${v} === ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ');
+
+function lexer(ir: ParserIR): string {
+  const cases = ir.tokens.map((t) => `    if (${rangeCond('c', t.first)}) {
+      let e = pos + 1;
+      while (e < n) { const cc = src.charCodeAt(e); if (!(${rangeCond('cc', t.cont)})) break; e++; }
+      toks.push({ kind: ${J(t.name)}, text: src.slice(pos, e), off: pos, end: e }); pos = e; continue;
+    }`).join('\n');
+  const punctChecks = ir.puncts.map((p) =>
+    `    if (src.startsWith(${J(p)}, pos)) { toks.push({ kind: '', text: ${J(p)}, off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
+  return `function lex(src: string): Tok[] {
+  const toks: Tok[] = [];
+  const n = src.length;
+  let pos = 0;
+  while (pos < n) {
+    const c = src.charCodeAt(pos);
+    if (c === 32 || c === 9 || c === 10 || c === 13) { pos++; continue; }
+${cases}
+${punctChecks}
+    throw new Error('lex error at ' + pos + ': ' + JSON.stringify(src[pos]));
+  }
+  return toks;
+}`;
+}
+
+function rdRule(r: RdRule): string {
+  const alt = (steps: Step[]) => {
+    const conds = steps.map(stepCond).join(' && ');
+    return `  { const kids: Cst[] = []; if (${conds}) return branch(${J(r.name)}, kids, save); pos = save; }`;
+  };
+  return `function parse${r.name}(): Node | null {
+  const save = pos;
+${r.alts.map(alt).join('\n')}
+  return null;
+}`;
+}
+function stepCond(s: Step): string {
+  switch (s.t) {
+    case 'lit': return `matchLit(${J(s.value)}, ${J(s.ttype)}, kids)`;
+    case 'tok': return `matchTok(${J(s.name)}, kids)`;
+    case 'rule': return `callRule(parse${s.name}, kids)`;
+    case 'star': return `star(() => ${stepCond(s.step)}, kids)`;
+  }
+}
+
+function prattRule(r: PrattRule): string {
+  const BIN = `{ ${r.binary.map((b) => `${J(b.op)}: { lbp: ${b.lbp}, rbp: ${b.rbp} }`).join(', ')} }`;
+  const PRE = `{ ${r.prefix.map((p) => `${J(p.op)}: ${p.rbp}`).join(', ')} }`;
+  const atomSet = `new Set([${r.atomToks.map(J).join(', ')}])`;
+  const group = r.group;
+  return `const ${r.name}_BIN: Record<string, { lbp: number; rbp: number }> = ${BIN};
+const ${r.name}_PRE: Record<string, number> = ${PRE};
+const ${r.name}_ATOM = ${atomSet};
+function parse${r.name}(): Node | null { return ${r.name}_bp(0); }
+function ${r.name}_bp(minBp: number): Node | null {
+  let left = ${r.name}_nud();
+  if (left === null) return null;
+  for (;;) {
+    const t = peek();
+    if (t === null) break;
+    const info = ${r.name}_BIN[t.text];
+    if (info === undefined || info.lbp <= minBp) break;
+    const ledSave = pos;
+    pos++;
+    const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end };
+    const rhs = ${r.name}_bp(info.rbp);
+    if (rhs === null) { pos = ledSave; break; }
+    left = { rule: ${J(r.name)}, children: [left, opLeaf, rhs], offset: left.offset, end: rhs.end };
+  }
+  return left;
+}
+function ${r.name}_nud(): Node | null {
+  const t = peek();
+  if (t === null) return null;
+  if (${r.name}_ATOM.has(t.kind)) { pos++; return { rule: ${J(r.name)}, children: [{ tokenType: t.kind, offset: t.off, end: t.end }], offset: t.off, end: t.end }; }
+${group ? `  if (t.text === ${J(group.open)}) {
+    const save = pos; pos++;
+    const inner = ${r.name}_bp(0);
+    const c = peek();
+    if (inner === null || c === null || c.text !== ${J(group.close)}) { pos = save; return null; }
+    pos++;
+    return { rule: ${J(r.name)}, children: [{ tokenType: '$punct', offset: t.off, end: t.end }, inner, { tokenType: '$punct', offset: c.off, end: c.end }], offset: t.off, end: c.end };
+  }` : ''}
+  const pbp = ${r.name}_PRE[t.text];
+  if (pbp !== undefined) {
+    const save = pos; pos++;
+    const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end };
+    const operand = ${r.name}_bp(pbp);
+    if (operand === null) { pos = save; return null; }
+    return { rule: ${J(r.name)}, children: [opLeaf, operand], offset: t.off, end: operand.end };
+  }
+  return null;
+}`;
+}
+
+export const tsTarget: Target = {
+  name: 'typescript',
+  ext: 'ts',
+  render(ir: ParserIR): string {
+    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r) : rdRule(r))).join('\n\n');
+    return `// GENERATED by emit-portable.ts (tsTarget) — parser for grammar "${ir.grammarName}".
+import { readFileSync } from 'node:fs';
+
+type Tok = { kind: string; text: string; off: number; end: number };
+type Leaf = { tokenType: string; offset: number; end: number };
+type Node = { rule: string; children: Cst[]; offset: number; end: number };
+type Cst = Node | Leaf;
+
+${lexer(ir)}
+
+let toks: Tok[] = [];
+let pos = 0;
+function peek(): Tok | null { return pos < toks.length ? toks[pos] : null; }
+function curOff(): number { return pos < toks.length ? toks[pos].off : (toks.length > 0 ? toks[toks.length - 1].end : 0); }
+function branch(rule: string, kids: Cst[], save: number): Node {
+  const offset = kids.length > 0 ? kids[0].offset : (save < toks.length ? toks[save].off : curOff());
+  const end = kids.length > 0 ? kids[kids.length - 1].end : offset;
+  return { rule, children: kids, offset, end };
+}
+function matchLit(value: string, ttype: string, kids: Cst[]): boolean {
+  const t = peek();
+  if (t === null || t.text !== value) return false;
+  kids.push({ tokenType: ttype, offset: t.off, end: t.end }); pos++; return true;
+}
+function matchTok(name: string, kids: Cst[]): boolean {
+  const t = peek();
+  if (t === null || t.kind !== name) return false;
+  kids.push({ tokenType: name, offset: t.off, end: t.end }); pos++; return true;
+}
+function callRule(fn: () => Node | null, kids: Cst[]): boolean {
+  const n = fn();
+  if (n === null) return false;
+  kids.push(n); return true;
+}
+function star(once: () => boolean, kids: Cst[]): boolean {
+  for (;;) { const sp = pos; const before = kids.length; if (!once()) { pos = sp; kids.length = before; break; } }
+  return true;
+}
+
+${ruleFns}
+
+function offsetEnd(n: Cst): number { return n.end; }
+const src = readFileSync(0, 'utf8');
+toks = lex(src);
+pos = 0;
+const root = parse${ir.entry}();
+if (root === null || pos !== toks.length) {
+  process.stderr.write('parse error (pos ' + pos + '/' + toks.length + ')\\n');
+  process.exit(1);
+}
+process.stdout.write(JSON.stringify(root));
+`;
+  },
+};
diff --git a/test/check.ts b/test/check.ts
index cf61a77..97b5a27 100644
--- a/test/check.ts
+++ b/test/check.ts
@@ -27,6 +27,7 @@ const GATES: Gate[] = [
   { group: 'emit-parity', name: 'emit-reject-messages', args: ['test/emit-reject-messages.ts'] },
   { group: 'emit-parity', name: 'emit-lexer-verify', args: ['test/emit-lexer-verify.ts'] },
   { group: 'emit-parity', name: 'emit-tsc-gate', args: ['test/emit-tsc-gate.ts'] },
+  { group: 'emit-parity', name: 'portable-targets', args: ['test/portable-targets.ts'] },
   { group: 'core', name: 'multi-doc', args: ['test/multi-doc.ts'] },
   { group: 'core', name: 'recovery', args: ['test/recovery.ts'] },
   { group: 'core', name: 'incremental-grammars', args: ['test/incremental-grammars.ts'] },
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
new file mode 100644
index 0000000..5d4e3b0
--- /dev/null
+++ b/test/portable-targets.ts
@@ -0,0 +1,116 @@
+// Gate: the TARGET-AGNOSTIC emitter (issue #6) — `emitPortableParser(grammar, target)`
+// derives a parser in EACH target language that produces the byte-identical CST the
+// interpreter (createParser) does. This is the agnosticism proof by EXECUTION: the same
+// examples/calc.ts grammar is rendered to TypeScript, Go, and Rust; the Go and Rust
+// sources are COMPILED and RUN, and every parser's CST output is compared, node-for-node,
+// against the createParser oracle over an adversarial corpus (operator precedence /
+// associativity, prefix chains, nested grouping, multi-statement programs, and the empty
+// program), plus reject-parity on malformed input.
+//
+// Go/Rust toolchains are optional: a missing `go` or `rustc` is logged and skipped (the
+// TS rendering, which needs only node, always runs) — the same graceful-degrade pattern
+// the external-corpus gates use, so this stays green on a machine without them.
+import { execFileSync } from 'node:child_process';
+import { mkdirSync, rmSync, writeFileSync } from 'node:fs';
+import { createParser } from '../src/gen-parser.ts';
+import { emitPortableParser } from '../src/emit-portable.ts';
+import { tsTarget } from '../src/target-ts.ts';
+import { goTarget } from '../src/target-go.ts';
+import { rustTarget } from '../src/target-rust.ts';
+
+const grammar = (await import('../examples/calc.ts')).default;
+const oracle = createParser(grammar);
+
+// Accepted inputs — each must parse to the SAME CST in every language.
+const ACCEPT = [
+  '1;', 'a;', '',                               // atoms + the empty program
+  '1 + 2 * 3;', '1 * 2 + 3;',                   // precedence both directions
+  '1 - 2 - 3;', 'a / b / c;', '1 + 2 + 3 + 4;', // left-associativity
+  '-a;', '-(-a);', '- - a;',                    // prefix + prefix chains
+  '-a * b;', '-a + b * c;', '-(a + b) * c;',    // prefix vs infix vs grouping
+  '(1);', '((a));', '(1 + 2) * (3 - 4);',       // nested grouping
+  'a * b + c * d - e / f;',                     // mixed precedence ladder
+  'let x = 1; let y = x + 2 * x; (y);',         // multi-statement program
+  'let z = -(a * b) / (c - -d);', 'foo; bar; baz;',
+];
+// Malformed inputs — every parser must REJECT (the oracle throws; the emitted parsers exit 1).
+const REJECT = ['1 +;', '(1;', '1 2;', 'let = 1;', ') ;', '* a;', 'let x 1;'];
+
+type Json = unknown;
+const sortKeys = (o: Json): Json =>
+  Array.isArray(o) ? o.map(sortKeys)
+  : (o && typeof o === 'object') ? Object.fromEntries(Object.keys(o as object).sort().map((k) => [k, sortKeys((o as Record<string, Json>)[k])]))
+  : o;
+const canon = (o: Json) => JSON.stringify(sortKeys(o));
+
+function oracleOutcome(src: string): { ok: true; cst: string } | { ok: false } {
+  try { return { ok: true, cst: canon(oracle.parse(src)) }; }
+  catch { return { ok: false }; }
+}
+
+const TMP = '/tmp/portable-targets';
+rmSync(TMP, { recursive: true, force: true });
+mkdirSync(TMP, { recursive: true });
+
+function have(cmd: string, args: string[]): boolean {
+  try { execFileSync(cmd, args, { stdio: 'pipe' }); return true; } catch { return false; }
+}
+
+// A runnable target: writes its source, (optionally) compiles, and returns a `run(src)->{ok,cst?}`.
+type Runner = { label: string; run: (src: string) => { ok: true; cst: string } | { ok: false } };
+
+function tsRunner(): Runner {
+  const f = `${TMP}/calc.ts`;
+  writeFileSync(f, emitPortableParser(grammar, tsTarget));
+  return { label: 'typescript', run: (src) => runProc('node', [f], src) };
+}
+function goRunner(): Runner | null {
+  if (!have('go', ['version'])) { console.log('  go: (toolchain absent — skipped)'); return null; }
+  const dir = `${TMP}/go`; mkdirSync(dir, { recursive: true });
+  writeFileSync(`${dir}/main.go`, emitPortableParser(grammar, goTarget));
+  writeFileSync(`${dir}/go.mod`, 'module calc\n\ngo 1.21\n');
+  execFileSync('go', ['build', '-o', `${dir}/calc`, '.'], { cwd: dir, stdio: 'pipe' });
+  return { label: 'go', run: (src) => runProc(`${dir}/calc`, [], src) };
+}
+function rustRunner(): Runner | null {
+  if (!have('rustc', ['--version'])) { console.log('  rust: (toolchain absent — skipped)'); return null; }
+  const dir = `${TMP}/rust`; mkdirSync(dir, { recursive: true });
+  const f = `${dir}/main.rs`;
+  writeFileSync(f, emitPortableParser(grammar, rustTarget));
+  execFileSync('rustc', ['-O', f, '-o', `${dir}/calc`], { stdio: 'pipe' });
+  return { label: 'rust', run: (src) => runProc(`${dir}/calc`, [], src) };
+}
+function runProc(cmd: string, args: string[], src: string): { ok: true; cst: string } | { ok: false } {
+  try { return { ok: true, cst: canon(JSON.parse(execFileSync(cmd, args, { input: src, stdio: ['pipe', 'pipe', 'pipe'] }).toString())) }; }
+  catch { return { ok: false }; }
+}
+
+const runners: Runner[] = [tsRunner(), goRunner(), rustRunner()].filter((r): r is Runner => r !== null);
+
+let failures = 0;
+for (const r of runners) {
+  let acc = 0, rej = 0;
+  for (const src of ACCEPT) {
+    const want = oracleOutcome(src);
+    const got = r.run(src);
+    if (want.ok && got.ok && want.cst === got.cst) { acc++; continue; }
+    failures++;
+    console.log(`  ${r.label}: ACCEPT mismatch on ${JSON.stringify(src)}`);
+    if (want.ok && got.ok) { console.log(`      want ${want.cst.slice(0, 140)}`); console.log(`      got  ${got.cst.slice(0, 140)}`); }
+    else console.log(`      want.ok=${want.ok} got.ok=${got.ok}`);
+  }
+  for (const src of REJECT) {
+    const want = oracleOutcome(src);
+    const got = r.run(src);
+    if (!want.ok && !got.ok) { rej++; continue; }
+    failures++;
+    console.log(`  ${r.label}: REJECT mismatch on ${JSON.stringify(src)} (oracle ok=${want.ok}, ${r.label} ok=${got.ok})`);
+  }
+  console.log(`  ${r.label}: ${acc}/${ACCEPT.length} accept ≡ oracle · ${rej}/${REJECT.length} reject ≡ oracle`);
+}
+
+if (failures > 0) {
+  console.error(`\n✗ portable targets diverge from the interpreter (${failures} case(s))`);
+  process.exit(1);
+}
+console.log(`\n✓ ${runners.map((r) => r.label).join(' + ')} parsers derived from one grammar ≡ interpreter CST (compiled & run)`);

From 3059804a49879d2f799e02ff4ce773a46c958574 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Sun, 21 Jun 2026 18:47:46 +0800
Subject: [PATCH 04/27] Remove gen-ast-types: the typed-CST generator had no
 load-bearing consumer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`gen-ast-types.ts` emitted `<grammar>.cst-types.ts` (discriminated-union typing of
the CST). Those artifacts are gitignored build outputs, and nothing depended on
them: the only consumer was a non-gated smoke test, and gen-cst-match's `importFrom`
parameter (the cst-types path) was never used in its body — so the gated cst-match
subsystem is fully independent of cst-types.

Removed the generator and its smoke test, dropped the cst-types emit + the dead
`importFrom` parameter from the gen pipeline, and cleaned the references
(.gitignore/.gitattributes/CI comment/README diagram/emit-corpus filter).

Verified: `npm run gen` emits no cst-types and the committed artifacts stay in
sync; src type-checks; cst-match-totality 31356/0 and the full suite 42/42.
---
 .gitattributes           |   4 +-
 .github/workflows/ci.yml |   6 +-
 .gitignore               |   3 +-
 README.md                |   3 +-
 src/cli.ts               |   8 +-
 src/gen-ast-types.ts     | 277 ---------------------------------------
 src/gen-cst-match.ts     |   5 +-
 test/ast-types-smoke.ts  | 184 --------------------------
 test/emit-corpus.ts      |   8 +-
 9 files changed, 15 insertions(+), 483 deletions(-)
 delete mode 100644 src/gen-ast-types.ts
 delete mode 100644 test/ast-types-smoke.ts

diff --git a/.gitattributes b/.gitattributes
index eb61982..e141d46 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,8 +1,8 @@
 # Generated artifacts (npm run gen) — committed for consumers, CI-gated for
 # staleness, collapsed in GitHub diffs. The grammar sources (*.ts at the repo
 # root) are the hand-written truth; everything below is derived from them.
-# (*.cst-types.ts / *.cst-match.ts are generated too but NOT committed — see
-# .gitignore; they regenerate locally and in CI before typecheck/gates.)
+# (*.cst-match.ts is generated too but NOT committed — see .gitignore;
+# it regenerates locally and in CI before typecheck/gates.)
 *.tmLanguage.json linguist-generated=true
 *.language-configuration.json linguist-generated=true
 *.monarch.json linguist-generated=true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4816031..c710619 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,9 +29,9 @@ jobs:
 
       - run: npm ci
 
-      # Regenerate every grammar's artifacts FIRST: the uncommitted ones
-      # (*.cst-types.ts / *.cst-match.ts, gitignored) must exist before Typecheck
-      # and the gates, which import them. Then fail if any COMMITTED artifact
+      # Regenerate every grammar's artifacts FIRST: the uncommitted one
+      # (*.cst-match.ts, gitignored) must exist before Typecheck
+      # and the gates, which import it. Then fail if any COMMITTED artifact
       # drifts from the regenerated output (someone edited a grammar but forgot
       # to regenerate). Covers all grammars (sources at the repo root) + the
       # tree-sitter packages.
diff --git a/.gitignore b/.gitignore
index bb05bd4..72189b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,7 +18,6 @@ tree-sitter/*/src/node-types.json
 tree-sitter/*/src/tree_sitter/
 tree-sitter/*/*.wasm
 
-# Generated CST consumer artifacts (npm run gen) — derived from the grammar, not
+# Generated CST consumer artifact (npm run gen) — derived from the grammar, not
 # committed: generate locally / in CI before typecheck and gates.
-*.cst-types.ts
 *.cst-match.ts
diff --git a/README.md b/README.md
index ea56dd7..a8f69cf 100644
--- a/README.md
+++ b/README.md
@@ -375,8 +375,7 @@ typescript.ts                one grammar (TypeScript combinator API)
         ├─ src/gen-tm.ts ───────────▶ typescript.tmLanguage.json            (TextMate highlighter)
         ├─ src/gen-vscode-config.ts ▶ typescript.language-configuration.json (editor behavior)
         ├─ src/gen-treesitter.ts ───▶ tree-sitter/  (grammar.js + highlights.scm + scanner.c)
-        ├─ src/gen-monarch.ts ──────▶ typescript.monarch.json
-        └─ src/gen-ast-types.ts ────▶ typescript.cst-types.ts
+        └─ src/gen-monarch.ts ──────▶ typescript.monarch.json
 
 shared  src/grammar-utils.ts          structural helpers used across stages
         src/api.ts, types.ts          the grammar's combinator + type surface
diff --git a/src/cli.ts b/src/cli.ts
index 9752e16..6a567df 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -4,7 +4,6 @@ import { generateTmLanguage, generateMarkupInjection, generateAliasGrammar, gene
 import { generateLanguageConfig } from './gen-vscode-config.ts';
 import { generateTreeSitter } from './gen-treesitter.ts';
 import { generateMonarch } from './gen-monarch.ts';
-import { generateAstTypes } from './gen-ast-types.ts';
 import { generateCstMatch } from './gen-cst-match.ts';
 import type { CstGrammar, RuleExpr } from './types.ts';
 import { tokenPatternSource } from './token-pattern.ts';
@@ -115,11 +114,8 @@ emit(`tree-sitter/${langName}/package.json`,
 // Monaco Monarch tokenizer (markup-aware: emits a tag/text/raw-text state machine).
 emit(`${langName}.monarch.json`, JSON.stringify(generateMonarch(grammar), null, 2));
 
-// CST node types (TypeScript) — generic over rules, fine for markup too.
-emit(`${langName}.cst-types.ts`, generateAstTypes(grammar));
-
-// Per-arm CST destructurers (value-level sibling of the types above).
-emit(`${langName}.cst-match.ts`, generateCstMatch(grammar, `./${langName}.cst-types.ts`));
+// Per-arm CST destructurers.
+emit(`${langName}.cst-match.ts`, generateCstMatch(grammar));
 
 function formatExpr(expr: RuleExpr): string {
   switch (expr.type) {
diff --git a/src/gen-ast-types.ts b/src/gen-ast-types.ts
deleted file mode 100644
index d76d124..0000000
--- a/src/gen-ast-types.ts
+++ /dev/null
@@ -1,277 +0,0 @@
-// Generate a TypeScript `.d.ts`-style source describing the *typed* CST that
-// `createParser(grammar).parse()` (gen-parser.ts) produces for THIS grammar.
-//
-// The runtime CST is generic — `CstNode.rule` and `CstLeaf.tokenType` are both
-// `string`. This generator specialises those `string`s into the actual set of
-// rule names / token types the grammar can yield, so a consumer gets:
-//   * a discriminated union `Node` keyed on the `rule` literal (exhaustive switch)
-//   * a `TokenType` union for `CstLeaf.tokenType`
-//   * per-rule structural typing of `children` (which child kinds can appear)
-//
-// Everything is DERIVED from the grammar (CstGrammar) — nothing TypeScript- or
-// language-specific is hardcoded, matching the engine's language-agnostic rule.
-// Field NAMES are deliberately absent: the grammar has no labels on elements
-// (e.g. `[$, '.', Ident]`), so children are typed positionally-by-kind, not as
-// named accessors. See the note emitted into the output + the report.
-
-import type { CstGrammar, RuleExpr } from './types.ts';
-import { isKeywordLiteral } from './grammar-utils.ts';
-
-// The synthetic leaf/node `tokenType`s the lexer + parser emit in addition to
-// the grammar's declared token names. Kept in sync with gen-lexer.ts / gen-parser.ts
-// (grep the literal `$...` strings there). `$template` is a *node* rule the parser
-// builds for interpolated templates, but it surfaces in `CstChild` positions and
-// as a `tokenType` is harmless to include; we also emit a `$template` Node below.
-const SYNTHETIC_LEAF_TYPES = [
-  '$keyword',        // matchLiteral: keyword-shaped literal
-  '$punct',          // matchLiteral: punctuation literal
-  '$operator',       // Pratt: infix/prefix/postfix operator leaf
-  '$templateHead',   // lexer: template up to first `${`
-  '$templateMiddle', // lexer: `}` … `${`
-  '$templateTail',   // lexer: `}` … closing backtick
-] as const;
-
-// `$template` is the synthetic *node* the parser emits for an interpolated
-// template literal (gen-parser.ts parseTemplateExpr).
-const SYNTHETIC_TEMPLATE_NODE = '$template';
-
-/** A child element a node can contain: either a Node (by rule) or a Leaf (by token type). */
-type ChildKind =
-  | { kind: 'node'; rule: string }
-  | { kind: 'leaf'; tokenType: string };
-
-/**
- * Generate the typed-CST TypeScript source for `grammar`.
- * Returns a self-contained module string (no imports) suitable for writing to a
- * `.ts`/`.d.ts` file and `import`ing or type-checking.
- */
-export function generateAstTypes(grammar: CstGrammar): string {
-  // The grammar's template token (if any): a ref to it can yield either a plain
-  // leaf of that token type OR a `$template` node (parseTemplateExpr) / a
-  // `$templateHead` leaf — mirror gen-parser's matchExpr 'ref' branch.
-  const templateTokenNames = new Set(grammar.tokens.filter(t => t.template).map(t => t.name));
-
-  // ── 1. Token-type union ──
-  // Declared token names + the synthetic leaf types the engine injects.
-  const leafTokenTypes = [
-    ...grammar.tokens.map(t => t.name).sort(),
-    ...SYNTHETIC_LEAF_TYPES,
-  ];
-
-  // ── 2/3. Per-rule child kinds ──
-  // For each rule, the set of child element kinds a node of that rule can hold,
-  // collapsed across the rule body's structure. The parser flattens quantifiers,
-  // `sep`, `alt`, and `group` straight into the parent's `children` array, so a
-  // node's children are a *sequence drawn from* this set (not a fixed tuple).
-  const childKindsByRule = new Map<string, ChildKind[]>();
-  for (const rule of grammar.rules) {
-    childKindsByRule.set(rule.name, deriveChildKinds(rule.name, grammar));
-  }
-
-  // ── Emit ──
-  const lines: string[] = [];
-  lines.push('// AUTO-GENERATED by src/gen-ast-types.ts — do not edit by hand.');
-  lines.push('// Typed CST for the parser output of this grammar.');
-  lines.push('//');
-  lines.push('// LIMITATION — no named fields: the grammar carries no labels on rule');
-  lines.push('// elements (e.g. `[$, \'.\', Ident]` has no field names), so children are');
-  lines.push('// typed by *kind* (which Node rules / leaf token types can appear), not as');
-  lines.push('// named accessors like `node.name`/`node.value`. Named-field accessors would');
-  lines.push('// require adding field labels to the grammar DSL (a future enrichment).');
-  lines.push('');
-
-  // Position info (mirrors CstNode/CstLeaf in gen-parser.ts).
-  lines.push('export interface CstPos {');
-  lines.push('  offset: number;');
-  lines.push('  end: number;');
-  lines.push('}');
-  lines.push('');
-
-  // Token-type union.
-  lines.push('/** Every `tokenType` a CstLeaf in this grammar can carry. */');
-  lines.push(`export type TokenType =\n${unionBody(leafTokenTypes.map(quote))};`);
-  lines.push('');
-
-  // Generic leaf — narrowable on `tokenType`.
-  lines.push('/** A terminal: one lexer token (or synthetic keyword/punct/operator leaf). */');
-  lines.push('export interface CstLeaf extends CstPos {');
-  lines.push('  tokenType: TokenType;');
-  lines.push('}');
-  lines.push('');
-
-  // The `$template` synthetic node, if the grammar has a template token.
-  const hasTemplate = templateTokenNames.size > 0;
-  if (hasTemplate) {
-    lines.push('/** Synthetic node the parser builds for an interpolated template literal. */');
-    lines.push(`export interface ${nodeIfaceName(SYNTHETIC_TEMPLATE_NODE)} extends CstPos {`);
-      lines.push(`  rule: ${quote(SYNTHETIC_TEMPLATE_NODE)};`);
-    // A $template node holds template leaves plus interpolated expression nodes.
-    lines.push('  children: CstChild[];');
-    lines.push('}');
-    lines.push('');
-  }
-
-  // Per-rule interfaces.
-  for (const rule of grammar.rules) {
-    const kinds = childKindsByRule.get(rule.name)!;
-    lines.push(`/** \`${rule.name}\` node. Children (flattened, in source order) are drawn from: */`);
-    lines.push(`export interface ${nodeIfaceName(rule.name)} extends CstPos {`);
-      lines.push(`  rule: ${quote(rule.name)};`);
-    lines.push(`  children: ${childArrayType(kinds)};`);
-    lines.push('}');
-    lines.push('');
-  }
-
-  // Discriminated union of all node interfaces (keyed on `rule`).
-  const nodeMembers = [
-    ...(hasTemplate ? [nodeIfaceName(SYNTHETIC_TEMPLATE_NODE)] : []),
-    ...grammar.rules.map(r => nodeIfaceName(r.name)),
-  ];
-  lines.push('/** Discriminated union of every node kind. Switch on `node.rule` for exhaustiveness. */');
-  lines.push(`export type CstNode =\n${unionBody(nodeMembers)};`);
-  lines.push('');
-
-  // The `rule` discriminant as a standalone union (handy for callers).
-  const ruleLiterals = [
-    ...(hasTemplate ? [SYNTHETIC_TEMPLATE_NODE] : []),
-    ...grammar.rules.map(r => r.name),
-  ];
-  lines.push('/** Every `rule` discriminant value (the keys of the CstNode union). */');
-  lines.push(`export type RuleName =\n${unionBody(ruleLiterals.map(quote))};`);
-  lines.push('');
-
-  // CstChild.
-  lines.push('/** Any CST element: a node or a leaf. */');
-  lines.push('export type CstChild = CstNode | CstLeaf;');
-  lines.push('');
-
-  // A by-rule lookup type, so callers can write `NodeOf<\'Expr\'>`.
-  lines.push('/** Narrow the CstNode union to the node for a given rule name. */');
-  lines.push('export type NodeOf<R extends RuleName> = Extract<CstNode, { rule: R }>;');
-  lines.push('');
-
-  return lines.join('\n');
-}
-
-// ── Child-kind derivation ──
-
-/**
- * The set of child element kinds a node of `ruleName` can directly contain.
- *
- * The parser (gen-parser.ts) flattens `seq`/`alt`/`group`/`quantifier`/`sep`
- * straight into the parent node's `children` array, so we walk the whole body
- * and union every terminal/ref it can reach as a direct child:
- *   - literal           → `$keyword` (keyword-shaped) or `$punct` (punctuation) leaf
- *   - ref → token       → a leaf of that token name (+ `$template`/`$templateHead`
- *                          if it is the template token)
- *   - ref → rule        → that rule's Node
- *   - op/prefix/postfix → handled below for Pratt rules (operator leaves + self-ref)
- *
- * Pratt / left-recursive rules additionally build children the body doesn't show
- * literally — `[lhs, opLeaf, rhs]`, `[opLeaf, rhs]`, `[lhs, opLeaf]` — where `lhs`
- * and `rhs` are nodes of the SAME rule and `opLeaf` is a `$operator` leaf. We add
- * a self Node-ref and `$operator` whenever the body contains an op/prefix/postfix
- * marker, so the type matches what the parser actually emits.
- */
-function deriveChildKinds(ruleName: string, grammar: CstGrammar): ChildKind[] {
-  const tokenNames = new Set(grammar.tokens.map(t => t.name));
-  const ruleNames = new Set(grammar.rules.map(r => r.name));
-  const templateTokenNames = new Set(grammar.tokens.filter(t => t.template).map(t => t.name));
-  const rule = grammar.rules.find(r => r.name === ruleName)!;
-
-  // De-dup by a stable key.
-  const seen = new Map<string, ChildKind>();
-  const add = (c: ChildKind) => {
-    const key = c.kind === 'node' ? `n:${c.rule}` : `l:${c.tokenType}`;
-    if (!seen.has(key)) seen.set(key, c);
-  };
-
-  let sawMarker = false;
-
-  function walk(expr: RuleExpr): void {
-    switch (expr.type) {
-      case 'literal':
-        add({ kind: 'leaf', tokenType: isKeywordLiteral(expr.value) ? '$keyword' : '$punct' });
-        return;
-      case 'ref':
-        if (tokenNames.has(expr.name)) {
-          add({ kind: 'leaf', tokenType: expr.name });
-          // A ref to the template token can instead yield a `$template` node or a
-          // `$templateHead` leaf (parser's matchExpr 'ref' → parseTemplateExpr).
-          if (templateTokenNames.has(expr.name)) {
-            add({ kind: 'node', rule: SYNTHETIC_TEMPLATE_NODE });
-            add({ kind: 'leaf', tokenType: '$templateHead' });
-          }
-        } else if (ruleNames.has(expr.name)) {
-          add({ kind: 'node', rule: expr.name });
-        }
-        return;
-      case 'seq':
-      case 'alt':
-        for (const item of expr.items) walk(item);
-        return;
-      case 'quantifier':
-      case 'group':
-        walk(expr.body);
-        return;
-      case 'sep':
-        // `sep(el, ',')` → repeated `el` interleaved with the `,` delimiter leaf.
-        walk(expr.element);
-        add({ kind: 'leaf', tokenType: isKeywordLiteral(expr.delimiter) ? '$keyword' : '$punct' });
-        return;
-      case 'op':
-      case 'prefix':
-      case 'postfix':
-        sawMarker = true;
-        return;
-    }
-  }
-
-  walk(rule.body);
-
-  // Pratt synthesis: operator leaves + self node-refs the parser injects.
-  if (sawMarker) {
-    add({ kind: 'leaf', tokenType: '$operator' });
-    add({ kind: 'node', rule: ruleName });
-  }
-
-  return [...seen.values()];
-}
-
-// ── Emit helpers ──
-
-function nodeIfaceName(ruleName: string): string {
-  // `$template` → `$templateNode`; `Expr` → `ExprNode`. The `$` is a legal TS
-  // identifier char, so `$templateNode` is a valid interface name.
-  return `${ruleName}Node`;
-}
-
-/** A single-quoted string literal type, with internal quotes/backslashes escaped. */
-function quote(s: string): string {
-  return `'${s.replace(/\\/g, '\\\\').replace(/'/g, "\\'")}'`;
-}
-
-/** Render a list of member type strings as an indented `| a | b | c` union body. */
-function unionBody(members: string[]): string {
-  if (members.length === 0) return '  never';
-  return members.map(m => `  | ${m}`).join('\n');
-}
-
-/** The `children` array type for a set of child kinds. */
-function childArrayType(kinds: ChildKind[]): string {
-  if (kinds.length === 0) {
-    // No derivable children (e.g. an empty/marker-only body) — still an array.
-    return 'CstChild[]';
-  }
-  const members = kinds.map(k =>
-    k.kind === 'node' ? nodeIfaceName(k.rule) : leafOf(k.tokenType),
-  );
-  // Sort for stable output; nodes and leaves intermixed is fine.
-  members.sort();
-  return `Array<\n${members.map(m => `    | ${m}`).join('\n')}\n  >`;
-}
-
-/** A `CstLeaf` narrowed to a specific tokenType. */
-function leafOf(tokenType: string): string {
-  return `(CstLeaf & { tokenType: ${quote(tokenType)} })`;
-}
diff --git a/src/gen-cst-match.ts b/src/gen-cst-match.ts
index a2dca89..c0b3148 100644
--- a/src/gen-cst-match.ts
+++ b/src/gen-cst-match.ts
@@ -1,5 +1,4 @@
-// Generate per-rule, per-ARM destructurers for a grammar's CST — the VALUE-level
-// sibling of gen-ast-types.ts. For every rule it emits
+// Generate per-rule, per-ARM destructurers for a grammar's CST. For every rule it emits
 //
 //   export type <Rule>Match = { arm: 'if', expr: NodeEntry<'Expr'>, … } | …
 //   export function match<Rule>(t: TreeAccess, n: NodeEntry<'Rule'>, src: string): <Rule>Match
@@ -74,7 +73,7 @@ function sanitizeIdent(s: string): string {
 
 const J = (v: unknown) => JSON.stringify(v);
 
-export function generateCstMatch(grammar: CstGrammar, importFrom: string): string {
+export function generateCstMatch(grammar: CstGrammar): string {
   // Same [Await]/[Yield] fork the parsers apply, so the rule-id space (ruleIdOf)
   // agrees with the tree. Matchers/types are emitted for BASE rules only (a fork
   // collapses to its base via RULE_CANON); no-op without ctx markers.
diff --git a/test/ast-types-smoke.ts b/test/ast-types-smoke.ts
deleted file mode 100644
index 1b5714c..0000000
--- a/test/ast-types-smoke.ts
+++ /dev/null
@@ -1,184 +0,0 @@
-// Smoke test for src/gen-ast-types.ts.
-//
-//  1. Generate the typed-CST source from the real TypeScript grammar.
-//  2. Write it to a temp `.ts` file.
-//  3. Write a consumer module that (a) imports the generated types, (b) does an
-//     exhaustive `switch (node.rule)` proving the discriminated union narrows
-//     and is complete (a `never` assertion in `default`), and (c) narrows a
-//     leaf on `tokenType`.
-//  4. Type-check BOTH with `tsc --noEmit --strict`. A non-empty diagnostic =
-//     the generated types are wrong (or not exhaustive) → fail.
-//  5. Also assert a few structural facts about the generated string directly.
-//
-// Run: `node test/ast-types-smoke.ts`. (This file lives under test/, which the
-// project tsconfig excludes, so it does not affect `npx tsc --noEmit` for src.)
-
-import { generateAstTypes } from '../src/gen-ast-types.ts';
-import { execFileSync } from 'node:child_process';
-import { mkdtempSync, writeFileSync, rmSync, existsSync } from 'node:fs';
-import { tmpdir } from 'node:os';
-import { dirname, join, parse as parsePath } from 'node:path';
-
-const grammar = (await import('../typescript.ts')).default;
-
-// Resolve the workspace `tsc` so the temp dir uses the same compiler. Walk up
-// from the cwd — under a git worktree, node_modules lives in the parent repo.
-function resolveTsc(): string {
-  let dir = process.cwd();
-  while (true) {
-    const candidate = join(dir, 'node_modules', '.bin', 'tsc');
-    if (existsSync(candidate)) return candidate;
-    const parent = dirname(dir);
-    if (parent === dir || dir === parsePath(dir).root) break;
-    dir = parent;
-  }
-  return 'tsc'; // fall back to PATH
-}
-const tscBin = resolveTsc();
-
-let fail = 0;
-const check = (label: string, cond: boolean) => {
-  if (cond) console.log('  ok  ', label);
-  else { fail++; console.log('  FAIL', label); }
-};
-
-// ── 1. Generate ──
-const src = generateAstTypes(grammar);
-
-// ── 5. Direct structural assertions on the generated text ──
-check('emits a CstNode discriminated union', /export type CstNode =/.test(src));
-check('emits a TokenType union', /export type TokenType =/.test(src));
-check('emits a RuleName union', /export type RuleName =/.test(src));
-check('emits NodeOf<R> helper', /export type NodeOf<R extends RuleName>/.test(src));
-
-// Every declared rule gets an interface with a literal `rule` discriminant.
-const missingRule = grammar.rules.find(
-  r => !src.includes(`export interface ${r.name}Node `) || !src.includes(`rule: '${r.name}'`),
-);
-check('every grammar rule has a <Rule>Node interface + literal rule', missingRule === undefined);
-
-// Synthetic leaf token types are present in the TokenType union.
-for (const t of ['$keyword', '$punct', '$operator', '$templateHead', '$templateMiddle', '$templateTail']) {
-  check(`TokenType includes ${t}`, src.includes(`'${t}'`));
-}
-// Declared token names are present too.
-check('TokenType includes a declared token (Ident)', src.includes("'Ident'"));
-
-// The grammar has a template token → a `$template` node interface should exist.
-check('emits $templateNode (grammar has a template token)', src.includes("rule: '$template'"));
-
-// ── 2/3/4. Type-check the generated types + a consumer ──
-const dir = mkdtempSync(join(tmpdir(), 'monogram-ast-types-'));
-const typesPath = join(dir, 'cst-types.ts');
-const consumerPath = join(dir, 'consumer.ts');
-const tsconfigPath = join(dir, 'tsconfig.json');
-
-writeFileSync(typesPath, src);
-
-// Pick a few real rule names from the grammar to exercise narrowing.
-const ruleSample = grammar.rules.slice(0, 3).map(r => r.name);
-
-// Consumer: exhaustive switch over EVERY rule (built from the grammar so it
-// stays complete as the grammar grows), plus explicit narrowing on a couple of
-// sampled rules and a leaf. If the union is missing a member, the per-case
-// access fails; if it has an EXTRA member we don't handle, the `default`
-// `never` assignment fails — both prove the union is exactly right.
-const allRuleNames = [
-  '$template',
-  ...grammar.rules.map(r => r.name),
-];
-const cases = allRuleNames.map(name =>
-  `    case '${name}': { const _c: CstNode = node; void _c; return node.children.length; }`,
-).join('\n');
-
-const consumer = `import type { CstNode, CstLeaf, NodeOf, RuleName, TokenType } from './cst-types.ts';
-
-// (a) Exhaustive switch on the \`rule\` discriminant: narrows, and \`default\`
-// proves completeness via a \`never\` assignment.
-export function childCount(node: CstNode): number {
-  switch (node.rule) {
-${cases}
-    default: {
-      const _exhaustive: never = node;
-      return _exhaustive;
-    }
-  }
-}
-
-// (b) NodeOf<R> narrows the union to one rule's node.
-function sampleNarrowing(n: CstNode) {
-  ${ruleSample.map((r, i) => `if (n.rule === '${r}') { const x${i}: NodeOf<'${r}'> = n; void x${i}; }`).join('\n  ')}
-}
-void sampleNarrowing;
-
-// (c) A RuleName value is assignable from a literal in the union.
-const someRule: RuleName = '${ruleSample[0]}';
-void someRule;
-
-// (d) Leaf narrowing on tokenType.
-function leafText(leaf: CstLeaf): string {
-  if (leaf.tokenType === '$keyword') return leaf.text;
-  const t: TokenType = leaf.tokenType;
-  void t;
-  return leaf.text;
-}
-void leafText;
-`;
-writeFileSync(consumerPath, consumer);
-
-writeFileSync(tsconfigPath, JSON.stringify({
-  compilerOptions: {
-    target: 'ES2022',
-    module: 'Node16',
-    moduleResolution: 'Node16',
-    allowImportingTsExtensions: true,
-    noEmit: true,
-    strict: true,
-    skipLibCheck: true,
-  },
-  include: ['cst-types.ts', 'consumer.ts'],
-}, null, 2));
-
-let tscOut = '';
-let tscOk = true;
-try {
-  execFileSync(tscBin, ['--noEmit', '-p', tsconfigPath], { stdio: 'pipe' });
-} catch (e: any) {
-  tscOk = false;
-  tscOut = `${e.stdout?.toString() ?? ''}${e.stderr?.toString() ?? ''}`;
-}
-check('generated types + exhaustive-switch consumer type-check under tsc --strict', tscOk);
-if (!tscOk) {
-  console.log('\n--- tsc diagnostics ---\n' + tscOut + '\n--- generated source ---\n' + src + '\n--- consumer ---\n' + consumer);
-}
-
-// Negative control: a bogus rule literal must NOT be assignable to RuleName,
-// confirming RuleName is a closed union (not widened to `string`).
-const badConsumerPath = join(dir, 'bad.ts');
-writeFileSync(badConsumerPath, `import type { RuleName } from './cst-types.ts';
-const bad: RuleName = '___definitely_not_a_rule___';
-void bad;
-`);
-writeFileSync(join(dir, 'tsconfig.bad.json'), JSON.stringify({
-  compilerOptions: {
-    target: 'ES2022', module: 'Node16', moduleResolution: 'Node16',
-    allowImportingTsExtensions: true, noEmit: true, strict: true, skipLibCheck: true,
-  },
-  include: ['cst-types.ts', 'bad.ts'],
-}, null, 2));
-let bogusRejected = false;
-try {
-  execFileSync(tscBin, ['--noEmit', '-p', join(dir, 'tsconfig.bad.json')], { stdio: 'pipe' });
-} catch {
-  bogusRejected = true; // tsc errored → the bogus literal was correctly rejected
-}
-check('RuleName is a closed union (rejects an unknown rule literal)', bogusRejected);
-
-rmSync(dir, { recursive: true, force: true });
-
-console.log(
-  fail === 0
-    ? `\n${grammar.rules.length} rules typed; all AST-type smoke checks pass`
-    : `\n${fail} FAILED`,
-);
-process.exit(fail === 0 ? 0 : 1);
diff --git a/test/emit-corpus.ts b/test/emit-corpus.ts
index 6fca455..95d48c4 100644
--- a/test/emit-corpus.ts
+++ b/test/emit-corpus.ts
@@ -136,11 +136,11 @@ export const CURATED_TS_INVALID: string[] = [
 ];
 
 // ── 2) The repo's own hand-written .ts sources ──────────────────────────────────────────
-// Excludes generated artifacts (*.cst-match.ts / *.cst-types.ts) and caps file size so the
-// gate stays fast (the byte-identical CST compare is O(tree size); a 250 KB cap keeps the
-// rich, deeply-nested sources like emit-parser.ts while dropping the multi-hundred-KB ones).
+// Excludes generated artifacts (*.cst-match.ts) and caps file size so the gate stays fast
+// (the byte-identical CST compare is O(tree size); a 250 KB cap keeps the rich, deeply-
+// nested sources like emit-parser.ts while dropping the multi-hundred-KB ones).
 const SIZE_CAP = 250 * 1024;
-const isGenerated = (f: string) => f.endsWith('.cst-match.ts') || f.endsWith('.cst-types.ts') || f.endsWith('.d.ts');
+const isGenerated = (f: string) => f.endsWith('.cst-match.ts') || f.endsWith('.d.ts');
 
 export function repoTsFiles(): string[] {
   const out: string[] = [];

From 070b965a726ce843c091834888c15c878bfdb140 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Sun, 21 Jun 2026 20:26:26 +0800
Subject: [PATCH 05/27] emit-portable: grow to a real JS subset; derived Rust
 matches oxc throughput
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends the target-agnostic emitter from the calc proof to examples/minijs.ts — a
real JavaScript subset (string/comment lexer, the full operator-precedence ladder,
call/member/index mixfix chains, arrays, and the common statement forms) — so the
emitted Go/Rust parsers can be benchmarked against oxc on the same bytes.

What grew:
- Lexer: driven by token-pattern.ts's structural recognizers (char runs, quote
  strings, line/block comments) — still a regex-free char scanner, so Go/Rust
  compile offline.
- Parser IR: opt/sep/inline-literal-alternation, Pratt bracket NUDs (grouping,
  array), and mixfix LEDs (call/member/index) tried before operators.
- Rust target: zero-allocation tokens (`&str` slices, Copy) and `&'static str` CST
  labels — no per-token/per-node String. This is decisive: the first naive version
  (String everywhere, a clone per peek) ran at 9 MB/s, slower than Go; the fix took
  it to 39 MB/s.

Verified: test/portable-targets.ts now covers calc + minijs; ts/go/rust each ≡ the
createParser CST (minijs 29/29 accept + 7/7 reject) and byte-identical on a 2.92 MB
corpus. Full suite 42/42.

Benchmark (oxc-parser 0.137, 2.92 MB JS-subset both engines accept, self-timed
lex+parse with black_box): derived-Rust 39 MB/s (0.97x oxc — parity), derived-Go
19 MB/s (2x), oxc 38 MB/s. A grammar-DERIVED, un-hand-tuned Rust parser matches the
fastest hand-tuned native JS parser, while building a full CST. minijs is a subset
(oxc parses full JS), but both parse the same corpus, so it is a fair throughput
comparison on that work; the bench harness is not committed (it needs the external
oxc-parser package).
---
 examples/minijs.ts       |  77 +++++++++++++
 src/emit-portable.ts     | 227 ++++++++++++++++++++++-----------------
 src/target-go.ts         | 148 ++++++++++++++++---------
 src/target-rust.ts       | 208 ++++++++++++++++++++++-------------
 src/target-ts.ts         | 119 +++++++++++++-------
 test/portable-targets.ts | 177 ++++++++++++++++--------------
 6 files changed, 613 insertions(+), 343 deletions(-)
 create mode 100644 examples/minijs.ts

diff --git a/examples/minijs.ts b/examples/minijs.ts
new file mode 100644
index 0000000..6de468a
--- /dev/null
+++ b/examples/minijs.ts
@@ -0,0 +1,77 @@
+// A real JavaScript SUBSET — the grammar that makes the portable Go/Rust targets
+// "comparable with oxc": rich enough that parsing a corpus is realistic work
+// (strings, comments, the full operator-precedence ladder, call/member/index
+// chains, arrays, and the common statement forms), so the emitted Rust parser can
+// be benchmarked against oxc on the same bytes.
+//
+// Derived from ONE definition by emitPortableParser into TypeScript, Go, and Rust;
+// the cross-language gate proves all three produce the byte-identical CST that the
+// interpreter (createParser) does. The portable lexer is regex-free (char scanner
+// driven by token-pattern.ts's structural recognizers), so the Go/Rust output
+// compiles offline.
+//
+// Deliberately omitted (ambiguity / scope, not capability): object literals (the
+// `{`-block-vs-object split), ternary, template literals, regex literals, keyword
+// operators (typeof/void/...), and `for`. The subset stays unambiguous and real.
+import {
+  token, rule, defineGrammar, left, right, op, prefix, alt,
+  seq, oneOf, range, star, sep, opt, many, altPattern, noneOf, anyChar,
+} from '../src/api.ts';
+
+const digit = range('0', '9');
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(digit, star(digit)), { scope: 'constant.numeric' });
+const Str = token(seq('"', star(altPattern(noneOf('"', '\\'), seq('\\', anyChar()))), '"'), { scope: 'string.quoted.double' });
+const LineComment = token(seq('//', star(noneOf('\n'))), { skip: true, scope: 'comment.line' });
+const BlockComment = token(seq('/*', star(altPattern(noneOf('*'), seq('*', noneOf('/')))), '*/'), { skip: true, scope: 'comment.block' });
+
+// Operator-precedence ladder (earlier = looser), mirroring JavaScript.
+const jsPrec = [
+  right('='),
+  left('||'), left('&&'),
+  left('|'), left('^'), left('&'),
+  left('==', '!=', '===', '!=='),
+  left('<', '>', '<=', '>='),
+  left('<<', '>>'),
+  left('+', '-'),
+  left('*', '/', '%'),
+  right(prefix('!', '-', '+', '~')),
+];
+
+const Expr = rule(($) => [
+  Number_,
+  Str,
+  Ident,
+  ['(', $, ')'],                        // grouping
+  ['[', opt(sep($, ',')), ']'],         // array literal
+  [prefix, $],                          // prefix unary
+  [$, op, $],                           // binary infix (precedence from the ladder)
+  [$, '(', opt(sep($, ',')), ')'],      // call
+  [$, '.', Ident],                      // member access
+  [$, '[', $, ']'],                     // computed index
+]);
+
+const Block = rule(($) => [['{', many(Stmt), '}']]);
+
+const Stmt = rule(($) => [
+  Block,
+  [alt('var', 'let', 'const'), Ident, opt('=', Expr), ';'],
+  ['if', '(', Expr, ')', Stmt, opt('else', Stmt)],
+  ['while', '(', Expr, ')', Stmt],
+  ['return', opt(Expr), ';'],
+  ['function', Ident, '(', opt(sep(Ident, ',')), ')', Block],
+  [Expr, ';'],
+]);
+
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'minijs',
+  scopeName: 'source.minijs',
+  tokens: { Ident, Number: Number_, Str, LineComment, BlockComment },
+  prec: jsPrec,
+  rules: { Expr, Block, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 900beec..e445339 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -5,52 +5,67 @@
 // TS engine uses. It is the agnosticism proof: ONE analysis → ONE intermediate form (IR)
 // → N language renderings, all producing the byte-identical CST the interpreter does.
 //
-// SHARED + target-agnostic (here): the grammar ANALYSIS (reused from grammar-analysis.ts)
-// and `buildIR` — the parse plan as plain data (recursive-descent rules as alternative
-// step-lists, the Pratt rule as NUD-atom / prefix / binary tables, the char-class lexer
-// specs, the literal vocabulary, the entry rule). PER-TARGET (a Target): `render(ir)` —
-// the language's lexer + CST runtime + the rendering of each IR node. Adding a language is
-// implementing one Target; nothing here changes.
+// SHARED + target-agnostic (here): the grammar ANALYSIS (reused from grammar-analysis.ts),
+// the LEXER specs (derived from token-pattern.ts's structural recognizers — char runs,
+// quote-delimited strings, line/block comments — so NO regex engine is needed and the
+// emitted Go/Rust compile offline), and `buildIR` — the parse plan as plain data
+// (recursive-descent rules as alternative step-lists; the Pratt rule as NUD atoms/brackets/
+// prefix + binary tables + mixfix LEDs). PER-TARGET (a Target): `render(ir)` — the
+// language's lexer + CST runtime + the rendering of each IR node. Adding a language is
+// implementing one Target.
 //
-// SCOPE (the verifiable core): char-class tokens (`charClass` then `star(charClass)`), a
-// recursive-descent + backtracking-alternation + `*` body, and a Pratt expression engine
-// with operator PRECEDENCE/associativity + prefix unary + parenthesised grouping. The
-// portable lexer is a dependency-free char scanner (no regex), so the emitted Go/Rust
-// compile offline. Richer surface (mixfix/postfix LEDs, `sep`/`opt`, lexer lookahead,
-// left-recursion beyond Pratt) is the documented next increment; buildIR THROWS on a
-// construct it does not model rather than emit a wrong parser.
-import type { CstGrammar, RuleExpr, TokenDecl, TokenPattern } from './types.ts';
+// SCOPE: char-run / quote-string / line+block-comment tokens; recursive descent with
+// backtracking alternation, `*`/`?` quantifiers, `sep`, and inline literal-alternation;
+// and a Pratt expression engine with operator precedence/associativity, prefix unary,
+// bracket NUDs (grouping, array), and mixfix LEDs (call / member / index) tried before
+// operators. buildIR THROWS on a construct outside this set rather than emit a wrong
+// parser. This is enough to derive a real JavaScript-subset parser (examples/minijs.ts).
+import type { CstGrammar, RuleExpr, TokenDecl } from './types.ts';
 import { analyzeGrammar, findEntryRule } from './grammar-analysis.ts';
 import { collectLiterals, isKeywordLiteral } from './grammar-utils.ts';
+import {
+  tokenPatternCharLoop, tokenPatternQuoteDelimAndEscape,
+  tokenPatternBlockDelimiters, tokenPatternLiteralPrefix,
+} from './token-pattern.ts';
 
 // ── Intermediate representation (plain data; every Target renders THIS) ──
 
 export type CharRange = [number, number];   // inclusive char-code range
-export type TokenSpec = { name: string; first: CharRange[]; cont: CharRange[] };
+export type LexTok =
+  | { kind: 'run'; name: string; first: CharRange[]; cont: CharRange[]; skip: boolean }   // ident/number char run
+  | { kind: 'string'; name: string; delim: string; skip: boolean }                        // delim..delim, `\` escapes next
+  | { kind: 'line'; name: string; prefix: string; skip: boolean }                         // prefix..end-of-line
+  | { kind: 'block'; name: string; open: string; close: string; skip: boolean };          // open..close
 
+export type Lit = { value: string; ttype: '$keyword' | '$punct' };
 export type Step =
   | { t: 'lit'; value: string; ttype: '$keyword' | '$punct' }   // match a literal by text
   | { t: 'tok'; name: string }                                  // match a token kind
   | { t: 'rule'; name: string }                                 // call a rule, append its node
-  | { t: 'star'; step: Step };                                  // repeat the inner step 0+ times
+  | { t: 'star'; step: Step }                                   // repeat inner 0+
+  | { t: 'opt'; steps: Step[] }                                 // optional sub-sequence
+  | { t: 'sep'; elem: Step; delim: string }                     // elem (delim elem)*
+  | { t: 'altlit'; opts: Lit[] };                               // inline alternation of literals
 export type Alt = Step[];
 
 export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
+export type Bracket = { first: string; steps: Step[] };          // a literal-led sequence (grouping/array; LED call/index)
 export type PrattRule = {
   kind: 'pratt';
   name: string;
-  atomToks: string[];                                  // NUD: a bare token (Number/Ident) wrapped in a node
-  group: { open: string; close: string } | null;      // NUD: '(' Expr ')'
-  prefix: Array<{ op: string; rbp: number }>;          // NUD: prefix op then operand parsed at rbp
+  nudToks: string[];                                  // NUD: a bare token wrapped in a node
+  nudBrackets: Bracket[];                             // NUD: '(' … ')' / '[' … ']'
+  prefix: Array<{ op: string; rbp: number }>;         // NUD: prefix op then operand at rbp
   binary: Array<{ op: string; lbp: number; rbp: number }>;  // LED: infix op, bind iff lbp > minBp, rhs at rbp
+  leds: Bracket[];                                    // LED: mixfix continuation (call/member/index), tried before operators
 };
 export type RuleIR = RdRule | PrattRule;
 
 export type ParserIR = {
   grammarName: string;
   entry: string;
-  tokens: TokenSpec[];   // named tokens, for the char scanner (tried in declaration order)
-  puncts: string[];      // punctuation literals, sorted longest-first (maximal munch)
+  tokens: LexTok[];      // for the char scanner, tried in declaration order
+  puncts: string[];      // punctuation literals, longest-first (maximal munch)
   rules: RuleIR[];
 };
 
@@ -70,15 +85,7 @@ function buildIR(grammar: CstGrammar): ParserIR {
   const a = analyzeGrammar(grammar);
   const tokenNames = a.tokenNames;
 
-  // Lexer token specs: each token must be `charClass` then `star(charClass)` (the portable
-  // scanner's shape). Anything else is out of the verifiable core → throw, don't mis-lex.
-  const tokens: TokenSpec[] = grammar.tokens.map((t) => {
-    const { first, cont } = charClassFirstCont(t);
-    return { name: t.name, first, cont };
-  });
-
-  // Literal vocabulary, split keyword (alpha — lexed as an identifier, matched by text) vs
-  // punctuation (lexed as its own token). Puncts longest-first for maximal munch.
+  const tokens: LexTok[] = grammar.tokens.map((t) => lexTok(t));
   const lits = new Set<string>();
   for (const r of grammar.rules) for (const l of collectLiterals(r.body)) lits.add(l);
   for (const lv of grammar.precs) for (const o of lv.operators) lits.add(o.value);
@@ -86,88 +93,110 @@ function buildIR(grammar: CstGrammar): ParserIR {
 
   const litTtype = (v: string): '$keyword' | '$punct' => (isKeywordLiteral(v) ? '$keyword' : '$punct');
 
-  const rules: RuleIR[] = grammar.rules.map((r) => {
-    if (a.prattRules.has(r.name)) return buildPratt(r.name, r.body, a);
-    return { kind: 'rd', name: r.name, alts: buildRdAlts(r.body) };
-  });
-
-  function buildRdAlts(body: RuleExpr): Alt[] {
-    if (body.type === 'alt') return body.items.map(altSteps);
-    return [altSteps(body)];
-  }
-  function altSteps(e: RuleExpr): Step[] {
-    if (e.type === 'seq') return e.items.flatMap(stepOf);
-    return stepOf(e);
-  }
-  function stepOf(e: RuleExpr): Step[] {
+  // RuleExpr → Step. `selfName` (when set) maps a self-ref to a fresh rule call.
+  function stepOf(e: RuleExpr): Step {
     switch (e.type) {
-      case 'literal': return [{ t: 'lit', value: e.value, ttype: litTtype(e.value) }];
-      case 'ref': return [tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name }];
-      case 'quantifier': {
-        if (e.kind !== '*') throw new Error(`portable: quantifier '${e.kind}' not in the verifiable core (only '*')`);
-        const inner = stepOf(e.body);
-        if (inner.length !== 1) throw new Error('portable: `*` body must be a single step (a rule/token ref)');
-        return [{ t: 'star', step: inner[0] }];
+      case 'literal': return { t: 'lit', value: e.value, ttype: litTtype(e.value) };
+      case 'ref': return tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name };
+      case 'group': { const ss = altSteps(e.body); if (ss.length !== 1) throw new Error('portable: group must reduce to a single step'); return ss[0]; }
+      case 'sep': return { t: 'sep', elem: stepOf(e.element), delim: e.delimiter };
+      case 'quantifier':
+        if (e.kind === '*') return { t: 'star', step: stepOf(e.body) };
+        if (e.kind === '?') return { t: 'opt', steps: altSteps(e.body) };
+        if (e.kind === '+') throw new Error("portable: '+' not yet modeled (use '*')");
+        break;
+      case 'alt': {
+        const opts: Lit[] = [];
+        for (const it of e.items) {
+          if (it.type !== 'literal') throw new Error('portable: inline alt must be all literals');
+          opts.push({ value: it.value, ttype: litTtype(it.value) });
+        }
+        return { t: 'altlit', opts };
       }
-      case 'group': return altSteps(e.body);
-      default: throw new Error(`portable: rd construct '${e.type}' not in the verifiable core`);
     }
+    throw new Error(`portable: rd construct '${e.type}' not in scope`);
+  }
+  function altSteps(e: RuleExpr): Step[] {
+    if (e.type === 'seq') return e.items.map(stepOf);
+    return [stepOf(e)];
   }
 
+  const rules: RuleIR[] = grammar.rules.map((r) => {
+    if (a.prattRules.has(r.name)) return buildPratt(r.name, r.body, a, stepOf, altSteps, litTtype);
+    return { kind: 'rd', name: r.name, alts: r.body.type === 'alt' ? r.body.items.map(altSteps) : [altSteps(r.body)] };
+  });
+
   return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules };
 }
 
-// A Pratt rule's alternatives, classified into NUD atoms / grouping / prefix and LED binary.
-// The binding powers come from the analysis (opTable/prefixOps), so precedence is single-
-// sourced with the interpreter.
-function buildPratt(name: string, body: RuleExpr, a: ReturnType<typeof analyzeGrammar>): PrattRule {
+// Classify a token into a portable scanner spec via the structural recognizers.
+function lexTok(t: TokenDecl): LexTok {
+  const skip = t.flags.includes('skip');
+  const qs = tokenPatternQuoteDelimAndEscape(t);
+  if (qs) return { kind: 'string', name: t.name, delim: qs.delim, skip };
+  const bd = tokenPatternBlockDelimiters(t);
+  if (bd) return { kind: 'block', name: t.name, open: bd[0], close: bd[1], skip };
+  const loop = tokenPatternCharLoop(t);
+  if (loop) {
+    if (loop.bail.length > 0 || loop.bailNonAscii) throw new Error(`portable: token ${t.name} has a complex continuation (bail) — out of scope`);
+    return { kind: 'run', name: t.name, first: codesToRanges(loop.first), cont: codesToRanges(loop.cont), skip };
+  }
+  const prefix = tokenPatternLiteralPrefix(t);
+  if (prefix) return { kind: 'line', name: t.name, prefix, skip };   // prefix with no distinct suffix → to end-of-line
+  throw new Error(`portable: token ${t.name} shape not recognized by the portable lexer`);
+}
+
+function codesToRanges(codes: number[]): CharRange[] {
+  const s = [...new Set(codes)].sort((x, y) => x - y);
+  const out: CharRange[] = [];
+  for (const c of s) {
+    const last = out[out.length - 1];
+    if (last && c === last[1] + 1) last[1] = c;
+    else out.push([c, c]);
+  }
+  return out;
+}
+
+// A Pratt rule's alternatives → NUD atoms/brackets/prefix + binary + mixfix LEDs.
+// Binding powers come from the analysis (opTable/prefixOps), single-sourced with the interpreter.
+function buildPratt(
+  name: string, body: RuleExpr, a: ReturnType<typeof analyzeGrammar>,
+  stepOf: (e: RuleExpr) => Step, altSteps: (e: RuleExpr) => Step[],
+  litTtype: (v: string) => '$keyword' | '$punct',
+): PrattRule {
   const alts = body.type === 'alt' ? body.items : [body];
-  const atomToks: string[] = [];
-  let group: { open: string; close: string } | null = null;
-  let sawPrefix = false;
-  let sawBinary = false;
+  const nudToks: string[] = [];
+  const nudBrackets: Bracket[] = [];
+  let sawPrefix = false, sawBinary = false;
+  const leds: Bracket[] = [];
   for (const alt of alts) {
     const items = alt.type === 'seq' ? alt.items : [alt];
-    if (items.length === 1 && items[0].type === 'ref' && a.tokenNames.has(items[0].name)) {
-      atomToks.push(items[0].name);                                  // [Token]
-    } else if (items.length === 3 && items[0].type === 'literal' && items[2].type === 'literal'
-               && items[1].type === 'ref' && items[1].name === name) {
-      group = { open: items[0].value, close: items[2].value };       // [ '(' $ ')' ]
-    } else if (items.length === 2 && items[0].type === 'prefix' && items[1].type === 'ref' && items[1].name === name) {
-      sawPrefix = true;                                              // [ prefix $ ]
-    } else if (items.length === 3 && items[0].type === 'ref' && items[0].name === name
-               && items[1].type === 'op' && items[2].type === 'ref' && items[2].name === name) {
-      sawBinary = true;                                              // [ $ op $ ]
-    } else {
-      throw new Error(`portable: Pratt alt shape not in the verifiable core (rule ${name})`);
+    const startsSelf = items[0].type === 'ref' && items[0].name === name;
+    if (!startsSelf) {
+      // NUD
+      if (items.length === 1 && items[0].type === 'ref' && a.tokenNames.has(items[0].name)) { nudToks.push(items[0].name); continue; }
+      if (items[0].type === 'prefix') { sawPrefix = true; continue; }
+      if (items[0].type === 'literal') { nudBrackets.push({ first: items[0].value, steps: items.map((it) => stepOfPratt(it)) }); continue; }
+      throw new Error(`portable: Pratt NUD shape not in scope (rule ${name})`);
     }
+    // LED (starts with self): `$ op $` (binary, op slot + trailing self) or `$ <lit> …` (mixfix)
+    const rest = items.slice(1);
+    if (rest[0].type === 'op') { sawBinary = true; continue; }
+    if (rest[0].type === 'literal') { leds.push({ first: rest[0].value, steps: rest.map((it) => stepOfPratt(it)) }); continue; }
+    throw new Error(`portable: Pratt LED shape not in scope (rule ${name})`);
   }
-  const prefix = sawPrefix
-    ? [...a.prefixOps.entries()].map(([op, info]) => ({ op, rbp: info.rbp }))
-    : [];
+  // a self-ref inside a NUD/LED sub-sequence is a fresh parse of this rule
+  function stepOfPratt(e: RuleExpr): Step {
+    if (e.type === 'ref' && e.name === name) return { t: 'rule', name };
+    if (e.type === 'sep') return { t: 'sep', elem: stepOfPratt(e.element), delim: e.delimiter };
+    if (e.type === 'quantifier' && e.kind === '?') return { t: 'opt', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
+    if (e.type === 'quantifier' && e.kind === '*') return { t: 'star', step: stepOfPratt(e.body) };
+    if (e.type === 'literal') return { t: 'lit', value: e.value, ttype: litTtype(e.value) };
+    return stepOf(e);
+  }
+  const prefix = sawPrefix ? [...a.prefixOps.entries()].map(([op, info]) => ({ op, rbp: info.rbp })) : [];
   const binary = sawBinary
-    ? [...a.opTable.entries()]
-        .filter(([, info]) => info.position === 'infix')
-        .map(([op, info]) => ({ op, lbp: info.lbp, rbp: info.rbp }))
+    ? [...a.opTable.entries()].filter(([, info]) => info.position === 'infix').map(([op, info]) => ({ op, lbp: info.lbp, rbp: info.rbp }))
     : [];
-  return { kind: 'pratt', name, atomToks, group, prefix, binary };
-}
-
-// Extract a token's (first-char, continue-char) code ranges from a `charClass` then
-// `star(charClass)` pattern. Throws for any other shape (out of the verifiable core).
-function charClassFirstCont(t: TokenDecl): { first: CharRange[]; cont: CharRange[] } {
-  const p = t.pattern;
-  if (typeof p === 'string' || p.type !== 'seq' || p.items.length !== 2) throw new Error(`portable: token ${t.name} not [charClass, star(charClass)]`);
-  const head = p.items[0];
-  const tail = p.items[1];
-  if (typeof tail === 'string' || tail.type !== 'repeat' || tail.min !== 0) throw new Error(`portable: token ${t.name} tail is not star(charClass)`);
-  return { first: classRanges(head, t.name), cont: classRanges(tail.body, t.name) };
-}
-function classRanges(p: TokenPattern, tok: string): CharRange[] {
-  if (typeof p === 'string' || p.type !== 'charClass' || p.negate) throw new Error(`portable: token ${tok} uses a non-positive char class`);
-  return p.items.map((it): CharRange => {
-    if (it.type === 'char') return [it.value.charCodeAt(0), it.value.charCodeAt(0)];
-    if (it.type === 'range') return [it.from.charCodeAt(0), it.to.charCodeAt(0)];
-    throw new Error(`portable: token ${tok} char-class item '${(it as { type: string }).type}' unsupported`);
-  });
+  return { kind: 'pratt', name, nudToks, nudBrackets, prefix, binary, leds };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index bc81629..02af630 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -1,23 +1,43 @@
 // The Go Target for emit-portable. Renders the same language-agnostic ParserIR as tsTarget
-// into a self-contained Go program (Go stdlib only — the char-class lexer is regex-free, so
-// it compiles with no module dependencies). Its CST JSON is checked byte-for-byte against
-// the interpreter, so `emitPortableParser(grammar, goTarget)` is a real, verified Go parser
-// derived from the same grammar definition.
-import type { ParserIR, RdRule, PrattRule, Step, CharRange, Target } from './emit-portable.ts';
+// into a self-contained Go program (Go stdlib only — the lexer is regex-free, so it compiles
+// with no module dependencies). Its CST JSON is checked byte-for-byte against the interpreter,
+// so `emitPortableParser(grammar, goTarget)` is a real, verified Go parser derived from the
+// same grammar definition.
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
-const goStr = (s: string) => J(s);   // Go and JSON string literals coincide for our ASCII vocab
 const rangeCond = (v: string, rs: CharRange[]) =>
-  rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ');
+  '(' + rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ') + ')';
 
-function lexer(ir: ParserIR): string {
-  const cases = ir.tokens.map((t) => `\t\tif ${rangeCond('c', t.first)} {
+function scanTok(t: LexTok): string {
+  const push = t.skip ? '' : `toks = append(toks, Tok{${J((t as { name: string }).name)}, src[pos:e], pos, e}); `;
+  if (t.kind === 'run') return `\t\tif ${rangeCond('c', t.first)} {
+\t\t\te := pos + 1
+\t\t\tfor e < n { cc := int(src[e]); if !${rangeCond('cc', t.cont)} { break }; e++ }
+\t\t\t${push}pos = e; continue
+\t\t}`;
+  if (t.kind === 'string') return `\t\tif c == ${t.delim.charCodeAt(0)} {
 \t\t\te := pos + 1
-\t\t\tfor e < n { cc := int(src[e]); if !(${rangeCond('cc', t.cont)}) { break }; e++ }
-\t\t\ttoks = append(toks, Tok{${goStr(t.name)}, src[pos:e], pos, e}); pos = e; continue
-\t\t}`).join('\n');
-  const punctChecks = ir.puncts.map((p) =>
-    `\t\tif strings.HasPrefix(src[pos:], ${goStr(p)}) { toks = append(toks, Tok{"", ${goStr(p)}, pos, pos + ${p.length}}); pos += ${p.length}; continue }`).join('\n');
+\t\t\tfor e < n { ch := int(src[e]); if ch == 92 { e += 2; continue }; if ch == ${t.delim.charCodeAt(0)} { e++; break }; e++ }
+\t\t\t${push}pos = e; continue
+\t\t}`;
+  if (t.kind === 'line') return `\t\tif strings.HasPrefix(src[pos:], ${J(t.prefix)}) {
+\t\t\te := pos + ${t.prefix.length}
+\t\t\tfor e < n && src[e] != 10 { e++ }
+\t\t\t${push}pos = e; continue
+\t\t}`;
+  return `\t\tif strings.HasPrefix(src[pos:], ${J(t.open)}) {
+\t\t\te := pos + ${t.open.length}
+\t\t\tfor e < n && !strings.HasPrefix(src[e:], ${J(t.close)}) { e++ }
+\t\t\tif e < n { e += ${t.close.length} }
+\t\t\t${push}pos = e; continue
+\t\t}`;
+}
+
+function lexer(ir: ParserIR): string {
+  const toks = ir.tokens.map(scanTok).join('\n');
+  const puncts = ir.puncts.map((p) =>
+    `\t\tif strings.HasPrefix(src[pos:], ${J(p)}) { toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}}); pos += ${p.length}; continue }`).join('\n');
   return `func lex(src string) []Tok {
 \ttoks := []Tok{}
 \tn := len(src)
@@ -25,39 +45,50 @@ function lexer(ir: ParserIR): string {
 \tfor pos < n {
 \t\tc := int(src[pos])
 \t\tif c == 32 || c == 9 || c == 10 || c == 13 { pos++; continue }
-${cases}
-${punctChecks}
+${toks}
+${puncts}
 \t\tpanic(fmt.Sprintf("lex error at %d", pos))
 \t}
 \treturn toks
 }`;
 }
 
+function stepCond(s: Step): string {
+  switch (s.t) {
+    case 'lit': return `matchLit(${J(s.value)}, ${J(s.ttype)}, &kids)`;
+    case 'tok': return `matchTok(${J(s.name)}, &kids)`;
+    case 'rule': return `callRule(parse${s.name}, &kids)`;
+    case 'star': return `star(func() bool { return ${stepCond(s.step)} }, &kids)`;
+    case 'opt': return `opt(func() bool { return ${s.steps.map(stepCond).join(' && ')} }, &kids)`;
+    case 'sep': return `sepBy(func() bool { return ${stepCond(s.elem)} }, ${J(s.delim)}, &kids)`;
+    case 'altlit': return `altLit([][2]string{${s.opts.map((o) => `{${J(o.value)}, ${J(o.ttype)}}`).join(', ')}}, &kids)`;
+  }
+}
+
 function rdRule(r: RdRule): string {
-  const alt = (steps: Step[]) => {
-    const conds = steps.map(stepCond).join(' && ');
-    return `\t{ kids := []*Cst{}; if ${conds} { return branch(${goStr(r.name)}, kids, save) }; pos = save }`;
-  };
+  const alt = (steps: Step[]) =>
+    `\t{ kids := []*Cst{}; if ${steps.map(stepCond).join(' && ')} { return branch(${J(r.name)}, kids, save) }; pos = save }`;
   return `func parse${r.name}() *Cst {
 \tsave := pos
 ${r.alts.map(alt).join('\n')}
 \treturn nil
 }`;
 }
-function stepCond(s: Step): string {
-  switch (s.t) {
-    case 'lit': return `matchLit(${goStr(s.value)}, ${goStr(s.ttype)}, &kids)`;
-    case 'tok': return `matchTok(${goStr(s.name)}, &kids)`;
-    case 'rule': return `callRule(parse${s.name}, &kids)`;
-    case 'star': return `star(func() bool { return ${stepCond(s.step)} }, &kids)`;
-  }
-}
 
 function prattRule(r: PrattRule): string {
-  const bin = r.binary.map((b) => `${goStr(b.op)}: {${b.lbp}, ${b.rbp}}`).join(', ');
-  const pre = r.prefix.map((p) => `${goStr(p.op)}: ${p.rbp}`).join(', ');
-  const atoms = r.atomToks.map((k) => `${goStr(k)}: true`).join(', ');
-  const g = r.group;
+  const bin = r.binary.map((b) => `${J(b.op)}: {${b.lbp}, ${b.rbp}}`).join(', ');
+  const pre = r.prefix.map((p) => `${J(p.op)}: ${p.rbp}`).join(', ');
+  const atoms = r.nudToks.map((k) => `${J(k)}: true`).join(', ');
+  const bracketNud = (b: Bracket) => `\tif t.Text == ${J(b.first)} {
+\t\tsave := pos; kids := []*Cst{}
+\t\tif ${b.steps.map(stepCond).join(' && ')} { return node(${J(r.name)}, kids) }
+\t\tpos = save; return nil
+\t}`;
+  const ledArm = (b: Bracket) => `\t\tif t.Text == ${J(b.first)} {
+\t\t\tledSave := pos; kids := []*Cst{left}
+\t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = node(${J(r.name)}, kids); continue }
+\t\t\tpos = ledSave; break
+\t\t}`;
   return `var ${r.name}BIN = map[string]bp{${bin}}
 var ${r.name}PRE = map[string]int{${pre}}
 var ${r.name}ATOM = map[string]bool{${atoms}}
@@ -68,6 +99,7 @@ func ${r.name}bp(minBp int) *Cst {
 \tfor {
 \t\tt := peek()
 \t\tif t == nil { break }
+${r.leds.map(ledArm).join('\n')}
 \t\tinfo, ok := ${r.name}BIN[t.Text]
 \t\tif !ok || info.lbp <= minBp { break }
 \t\tledSave := pos
@@ -75,7 +107,7 @@ func ${r.name}bp(minBp int) *Cst {
 \t\topLeaf := &Cst{IsLeaf: true, TokenType: "$operator", Offset: t.Off, End: t.End}
 \t\trhs := ${r.name}bp(info.rbp)
 \t\tif rhs == nil { pos = ledSave; break }
-\t\tleft = &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{left, opLeaf, rhs}, Offset: left.Offset, End: rhs.End}
+\t\tleft = &Cst{Rule: ${J(r.name)}, Children: []*Cst{left, opLeaf, rhs}, Offset: left.Offset, End: rhs.End}
 \t}
 \treturn left
 }
@@ -84,22 +116,15 @@ func ${r.name}nud() *Cst {
 \tif t == nil { return nil }
 \tif ${r.name}ATOM[t.Kind] {
 \t\tpos++
-\t\treturn &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{{IsLeaf: true, TokenType: t.Kind, Offset: t.Off, End: t.End}}, Offset: t.Off, End: t.End}
+\t\treturn &Cst{Rule: ${J(r.name)}, Children: []*Cst{{IsLeaf: true, TokenType: t.Kind, Offset: t.Off, End: t.End}}, Offset: t.Off, End: t.End}
 \t}
-${g ? `\tif t.Text == ${goStr(g.open)} {
-\t\tsave := pos; pos++
-\t\tinner := ${r.name}bp(0)
-\t\tc := peek()
-\t\tif inner == nil || c == nil || c.Text != ${goStr(g.close)} { pos = save; return nil }
-\t\tpos++
-\t\treturn &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{{IsLeaf: true, TokenType: "$punct", Offset: t.Off, End: t.End}, inner, {IsLeaf: true, TokenType: "$punct", Offset: c.Off, End: c.End}}, Offset: t.Off, End: c.End}
-\t}` : ''}
+${r.nudBrackets.map(bracketNud).join('\n')}
 \tif pbp, ok := ${r.name}PRE[t.Text]; ok {
 \t\tsave := pos; pos++
 \t\topLeaf := &Cst{IsLeaf: true, TokenType: "$operator", Offset: t.Off, End: t.End}
 \t\toperand := ${r.name}bp(pbp)
 \t\tif operand == nil { pos = save; return nil }
-\t\treturn &Cst{Rule: ${goStr(r.name)}, Children: []*Cst{opLeaf, operand}, Offset: t.Off, End: operand.End}
+\t\treturn &Cst{Rule: ${J(r.name)}, Children: []*Cst{opLeaf, operand}, Offset: t.Off, End: operand.End}
 \t}
 \treturn nil
 }`;
@@ -117,7 +142,9 @@ import (
 \t"fmt"
 \t"io"
 \t"os"
+\t"strconv"
 \t"strings"
+\t"time"
 )
 
 type Tok struct {
@@ -145,11 +172,14 @@ func peek() *Tok {
 }
 func branch(rule string, kids []*Cst, save int) *Cst {
 \toffset := 0
-\tif len(kids) > 0 { offset = kids[0].Offset } else if save < len(toks) { offset = toks[save].Off } else if len(toks) > 0 { offset = toks[len(toks)-1].End }
+\tif len(kids) > 0 { offset = kids[0].Offset } else if save < len(toks) { offset = toks[save].Off }
 \tend := offset
 \tif len(kids) > 0 { end = kids[len(kids)-1].End }
 \treturn &Cst{Rule: rule, Children: kids, Offset: offset, End: end}
 }
+func node(rule string, kids []*Cst) *Cst {
+\treturn &Cst{Rule: rule, Children: kids, Offset: kids[0].Offset, End: kids[len(kids)-1].End}
+}
 func matchLit(value, ttype string, kids *[]*Cst) bool {
 \tt := peek()
 \tif t == nil || t.Text != value { return false }
@@ -169,6 +199,18 @@ func star(once func() bool, kids *[]*Cst) bool {
 \tfor { sp := pos; before := len(*kids); if !once() { pos = sp; *kids = (*kids)[:before]; break } }
 \treturn true
 }
+func opt(body func() bool, kids *[]*Cst) bool {
+\tsp := pos; before := len(*kids); if !body() { pos = sp; *kids = (*kids)[:before] }; return true
+}
+func sepBy(elem func() bool, delim string, kids *[]*Cst) bool {
+\tif !elem() { return false }
+\tfor { sp := pos; before := len(*kids); if matchLit(delim, "$punct", kids) && elem() { continue }; pos = sp; *kids = (*kids)[:before]; break }
+\treturn true
+}
+func altLit(opts [][2]string, kids *[]*Cst) bool {
+\tfor _, o := range opts { if matchLit(o[0], o[1], kids) { return true } }
+\treturn false
+}
 
 ${ruleFns}
 
@@ -178,16 +220,24 @@ func writeJSON(c *Cst, b *strings.Builder) {
 \t\treturn
 \t}
 \tfmt.Fprintf(b, "{\\"rule\\":%q,\\"children\\":[", c.Rule)
-\tfor i, k := range c.Children {
-\t\tif i > 0 { b.WriteByte(',') }
-\t\twriteJSON(k, b)
-\t}
+\tfor i, k := range c.Children { if i > 0 { b.WriteByte(',') }; writeJSON(k, b) }
 \tfmt.Fprintf(b, "],\\"offset\\":%d,\\"end\\":%d}", c.Offset, c.End)
 }
 
 func main() {
 \tdata, _ := io.ReadAll(os.Stdin)
-\ttoks = lex(string(data))
+\tsrc := string(data)
+\t// Self-bench: a numeric arg N times the lex+parse loop and prints ms/iteration.
+\tif len(os.Args) > 1 {
+\t\tif iters, err := strconv.Atoi(os.Args[1]); err == nil && iters > 0 {
+\t\t\tfor i := 0; i < 3; i++ { toks = lex(src); pos = 0; parse${ir.entry}() }
+\t\t\tt0 := time.Now()
+\t\t\tfor i := 0; i < iters; i++ { toks = lex(src); pos = 0; parse${ir.entry}() }
+\t\t\tfmt.Printf("%.4f\\n", float64(time.Since(t0).Nanoseconds())/1e6/float64(iters))
+\t\t\treturn
+\t\t}
+\t}
+\ttoks = lex(src)
 \tpos = 0
 \troot := parse${ir.entry}()
 \tif root == nil || pos != len(toks) {
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 726ff1a..fb0c641 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -1,24 +1,52 @@
 // The Rust Target for emit-portable. Renders the same language-agnostic ParserIR as
-// tsTarget/goTarget into a self-contained Rust program (no external crates — the char-class
-// lexer is regex-free, so it compiles with rustc alone, no Cargo/network). Its CST JSON is
-// checked byte-for-byte against the interpreter, so `emitPortableParser(grammar, rustTarget)`
-// is a real, verified Rust parser derived from the same grammar definition.
-import type { ParserIR, RdRule, PrattRule, Step, CharRange, Target } from './emit-portable.ts';
+// tsTarget/goTarget into a self-contained Rust program (no external crates — the lexer is
+// regex-free, so it compiles with rustc alone, no Cargo/network). Its CST JSON is checked
+// byte-for-byte against the interpreter, so `emitPortableParser(grammar, rustTarget)` is a
+// real, verified Rust parser derived from the same grammar definition.
+//
+// Rust ownership note: a CST node is OWNED (moved), unlike the TS/Go pointer trees. In the
+// Pratt LED loop `left` can only be moved into a child vec once the continuation is known to
+// match — so a mixfix LED matches its steps into a SEPARATE kids vec first, then (on success)
+// moves `left` to the front and reassigns; on failure `left` is untouched and the loop
+// returns it. Sub-sequence combinators (star/opt/sep) take non-capturing fn pointers
+// `fn(&mut Parser, &mut Vec<Cst>) -> bool`, threading the parser + kids as params (so nothing
+// is captured, sidestepping the borrow checker).
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
-const rsStr = (s: string) => J(s);   // Rust and JSON string literals coincide for our ASCII vocab
 const rangeCond = (v: string, rs: CharRange[]) =>
-  rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `(${lo}..=${hi}).contains(&${v})`)).join(' || ');
+  '(' + rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `(${lo}..=${hi}).contains(&${v})`)).join(' || ') + ')';
 
-function lexer(ir: ParserIR): string {
-  const cases = ir.tokens.map((t) => `        if ${rangeCond('c', t.first)} {
+function scanTok(t: LexTok): string {
+  const push = t.skip ? '' : `toks.push(Tok { kind: ${J((t as { name: string }).name)}, text: &src[pos..e], off: pos, end: e }); `;
+  if (t.kind === 'run') return `        if ${rangeCond('c', t.first)} {
+            let mut e = pos + 1;
+            while e < n { let cc = b[e] as u32; if !${rangeCond('cc', t.cont)} { break } e += 1; }
+            ${push}pos = e; continue;
+        }`;
+  if (t.kind === 'string') return `        if c == ${t.delim.charCodeAt(0)} {
             let mut e = pos + 1;
-            while e < n { let cc = b[e] as u32; if !(${rangeCond('cc', t.cont)}) { break } e += 1; }
-            toks.push(Tok { kind: ${rsStr(t.name)}.to_string(), text: src[pos..e].to_string(), off: pos, end: e }); pos = e; continue;
-        }`).join('\n');
-  const punctChecks = ir.puncts.map((p) =>
-    `        if src[pos..].starts_with(${rsStr(p)}) { toks.push(Tok { kind: String::new(), text: ${rsStr(p)}.to_string(), off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
-  return `fn lex(src: &str) -> Vec<Tok> {
+            while e < n { let ch = b[e] as u32; if ch == 92 { e += 2; continue } if ch == ${t.delim.charCodeAt(0)} { e += 1; break } e += 1; }
+            ${push}pos = e; continue;
+        }`;
+  if (t.kind === 'line') return `        if src[pos..].starts_with(${J(t.prefix)}) {
+            let mut e = pos + ${t.prefix.length};
+            while e < n && b[e] != 10 { e += 1; }
+            ${push}pos = e; continue;
+        }`;
+  return `        if src[pos..].starts_with(${J(t.open)}) {
+            let mut e = pos + ${t.open.length};
+            while e < n && !src[e..].starts_with(${J(t.close)}) { e += 1; }
+            if e < n { e += ${t.close.length}; }
+            ${push}pos = e; continue;
+        }`;
+}
+
+function lexer(ir: ParserIR): string {
+  const toks = ir.tokens.map(scanTok).join('\n');
+  const puncts = ir.puncts.map((p) =>
+    `        if src[pos..].starts_with(${J(p)}) { toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
+  return `fn lex<'a>(src: &'a str) -> Vec<Tok<'a>> {
     let b = src.as_bytes();
     let n = b.len();
     let mut toks: Vec<Tok> = Vec::new();
@@ -26,47 +54,66 @@ function lexer(ir: ParserIR): string {
     while pos < n {
         let c = b[pos] as u32;
         if c == 32 || c == 9 || c == 10 || c == 13 { pos += 1; continue; }
-${cases}
-${punctChecks}
+${toks}
+${puncts}
         panic!("lex error at {}", pos);
     }
     toks
 }`;
 }
 
-function rdRule(r: RdRule): string {
-  const alt = (steps: Step[]) => {
-    const conds = steps.map(stepCond).join(' && ');
-    return `        { let mut kids: Vec<Cst> = Vec::new(); if ${conds} { return Some(self.branch(${rsStr(r.name)}, kids, save)); } self.pos = save; }`;
-  };
-  return `    fn parse_${r.name}(&mut self) -> Option<Cst> {
-        let save = self.pos;
-${r.alts.map(alt).join('\n')}
-        None
-    }`;
-}
+// Top-level step: uses `self` and `&mut kids`.
 function stepCond(s: Step): string {
   switch (s.t) {
-    case 'lit': return `self.match_lit(${rsStr(s.value)}, ${rsStr(s.ttype)}, &mut kids)`;
-    case 'tok': return `self.match_tok(${rsStr(s.name)}, &mut kids)`;
+    case 'lit': return `self.match_lit(${J(s.value)}, ${J(s.ttype)}, &mut kids)`;
+    case 'tok': return `self.match_tok(${J(s.name)}, &mut kids)`;
     case 'rule': return `self.call_rule(Parser::parse_${s.name}, &mut kids)`;
-    case 'star': return `self.star(|p, k| ${starInner(s.step)}, &mut kids)`;
+    case 'star': return `self.star(|p, k| ${stepCondP(s.step)}, &mut kids)`;
+    case 'opt': return `self.opt(|p, k| ${s.steps.map(stepCondP).join(' && ')}, &mut kids)`;
+    case 'sep': return `self.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, &mut kids)`;
+    case 'altlit': return `self.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], &mut kids)`;
   }
 }
-function starInner(s: Step): string {
+// Inside a closure: uses `p` and `k`.
+function stepCondP(s: Step): string {
   switch (s.t) {
-    case 'lit': return `p.match_lit(${rsStr(s.value)}, ${rsStr(s.ttype)}, k)`;
-    case 'tok': return `p.match_tok(${rsStr(s.name)}, k)`;
+    case 'lit': return `p.match_lit(${J(s.value)}, ${J(s.ttype)}, k)`;
+    case 'tok': return `p.match_tok(${J(s.name)}, k)`;
     case 'rule': return `p.call_rule(Parser::parse_${s.name}, k)`;
-    case 'star': throw new Error('portable: nested star unsupported');
+    case 'star': return `p.star(|p, k| ${stepCondP(s.step)}, k)`;
+    case 'opt': return `p.opt(|p, k| ${s.steps.map(stepCondP).join(' && ')}, k)`;
+    case 'sep': return `p.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, k)`;
+    case 'altlit': return `p.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], k)`;
   }
 }
 
+function rdRule(r: RdRule): string {
+  const alt = (steps: Step[]) =>
+    `        { let mut kids: Vec<Cst> = Vec::new(); if ${steps.map(stepCond).join(' && ')} { return Some(self.branch(${J(r.name)}, kids, save)); } self.pos = save; }`;
+  return `    fn parse_${r.name}(&mut self) -> Option<Cst> {
+        let save = self.pos;
+${r.alts.map(alt).join('\n')}
+        None
+    }`;
+}
+
 function prattRule(r: PrattRule): string {
-  const binArms = r.binary.map((b) => `${rsStr(b.op)} => Some((${b.lbp}, ${b.rbp}))`).join(', ');
-  const preArms = r.prefix.map((p) => `${rsStr(p.op)} => Some(${p.rbp})`).join(', ');
-  const atomArm = r.atomToks.map(rsStr).join(' | ');
-  const g = r.group;
+  const binArms = r.binary.map((b) => `${J(b.op)} => Some((${b.lbp}, ${b.rbp}))`).join(', ');
+  const preArms = r.prefix.map((p) => `${J(p.op)} => Some(${p.rbp})`).join(', ');
+  const atomArm = r.nudToks.map(J).join(' | ');
+  const bracketNud = (b: Bracket) => `        if t.text == ${J(b.first)} {
+            let save = self.pos; let mut kids: Vec<Cst> = Vec::new();
+            if ${b.steps.map(stepCond).join(' && ')} { return Some(node(${J(r.name)}, kids)); }
+            self.pos = save; return None;
+        }`;
+  const ledArm = (b: Bracket) => `            if t.text == ${J(b.first)} {
+                let led_save = self.pos; let mut kids: Vec<Cst> = Vec::new();
+                if ${b.steps.map(stepCond).join(' && ')} {
+                    let mut full = vec![left]; full.append(&mut kids);
+                    left = node(${J(r.name)}, full); continue;
+                }
+                self.pos = led_save; break;
+            }`;
   return `    fn parse_${r.name}(&mut self) -> Option<Cst> { self.${r.name}_bp(0) }
     fn ${r.name}_bin(op: &str) -> Option<(i64, i64)> { match op { ${binArms}${binArms ? ', ' : ''}_ => None } }
     fn ${r.name}_pre(op: &str) -> Option<i64> { match op { ${preArms}${preArms ? ', ' : ''}_ => None } }
@@ -75,41 +122,29 @@ function prattRule(r: PrattRule): string {
         let mut left = self.${r.name}_nud()?;
         loop {
             let t = match self.peek() { Some(t) => t, None => break };
-            let (lbp, rbp) = match Parser::${r.name}_bin(&t.text) { Some(x) => x, None => break };
+${r.leds.map(ledArm).join('\n')}
+            let (lbp, rbp) = match Parser::${r.name}_bin(t.text) { Some(x) => x, None => break };
             if lbp <= min_bp { break; }
             let led_save = self.pos;
             self.pos += 1;
             let op_leaf = Cst::leaf("$operator", t.off, t.end);
             let rhs = match self.${r.name}_bp(rbp) { Some(r) => r, None => { self.pos = led_save; break; } };
-            let (off, end) = (left.offset, rhs.end);
-            left = Cst::node(${rsStr(r.name)}, vec![left, op_leaf, rhs], off, end);
+            left = node(${J(r.name)}, vec![left, op_leaf, rhs]);
         }
         Some(left)
     }
     fn ${r.name}_nud(&mut self) -> Option<Cst> {
         let t = self.peek()?;
-        if Parser::${r.name}_atom(&t.kind) {
+        if Parser::${r.name}_atom(t.kind) {
             self.pos += 1;
-            return Some(Cst::node(${rsStr(r.name)}, vec![Cst::leaf(&t.kind, t.off, t.end)], t.off, t.end));
+            return Some(Cst::node(${J(r.name)}, vec![Cst::leaf(t.kind, t.off, t.end)], t.off, t.end));
         }
-${g ? `        if t.text == ${rsStr(g.open)} {
-            let save = self.pos; self.pos += 1;
-            let inner = self.${r.name}_bp(0);
-            let c = self.peek();
-            match (inner, c) {
-                (Some(inner), Some(c)) if c.text == ${rsStr(g.close)} => {
-                    self.pos += 1;
-                    let (off, end) = (t.off, c.end);
-                    return Some(Cst::node(${rsStr(r.name)}, vec![Cst::leaf("$punct", t.off, t.end), inner, Cst::leaf("$punct", c.off, c.end)], off, end));
-                }
-                _ => { self.pos = save; return None; }
-            }
-        }` : ''}
-        if let Some(pbp) = Parser::${r.name}_pre(&t.text) {
+${r.nudBrackets.map(bracketNud).join('\n')}
+        if let Some(pbp) = Parser::${r.name}_pre(t.text) {
             let save = self.pos; self.pos += 1;
             let op_leaf = Cst::leaf("$operator", t.off, t.end);
             match self.${r.name}_bp(pbp) {
-                Some(operand) => { let (off, end) = (t.off, operand.end); return Some(Cst::node(${rsStr(r.name)}, vec![op_leaf, operand], off, end)); }
+                Some(operand) => { let (o, e) = (t.off, operand.end); return Some(Cst::node(${J(r.name)}, vec![op_leaf, operand], o, e)); }
                 None => { self.pos = save; return None; }
             }
         }
@@ -126,38 +161,56 @@ export const rustTarget: Target = {
 #![allow(non_snake_case)]
 use std::io::Read;
 
-#[derive(Clone)]
-struct Tok { kind: String, text: String, off: usize, end: usize }
+// Zero-alloc tokens: kind is a known grammar name (&'static str), text is a slice of the
+// source. Tok is Copy, so peek() copies pointers — no per-peek heap work.
+#[derive(Clone, Copy)]
+struct Tok<'a> { kind: &'static str, text: &'a str, off: usize, end: usize }
 
-struct Cst { rule: String, children: Vec<Cst>, is_leaf: bool, token_type: String, offset: usize, end: usize }
+// CST nodes hold only &'static str labels (rule names / token-type tags are all literals)
+// + usize spans — no per-node String allocation.
+struct Cst { rule: &'static str, children: Vec<Cst>, is_leaf: bool, token_type: &'static str, offset: usize, end: usize }
 impl Cst {
-    fn leaf(tt: &str, off: usize, end: usize) -> Cst { Cst { rule: String::new(), children: Vec::new(), is_leaf: true, token_type: tt.to_string(), offset: off, end } }
-    fn node(rule: &str, children: Vec<Cst>, offset: usize, end: usize) -> Cst { Cst { rule: rule.to_string(), children, is_leaf: false, token_type: String::new(), offset, end } }
+    fn leaf(tt: &'static str, off: usize, end: usize) -> Cst { Cst { rule: "", children: Vec::new(), is_leaf: true, token_type: tt, offset: off, end } }
+    fn node(rule: &'static str, children: Vec<Cst>, offset: usize, end: usize) -> Cst { Cst { rule, children, is_leaf: false, token_type: "", offset, end } }
 }
+// offset/end inferred from first/last child (children non-empty).
+fn node(rule: &'static str, kids: Vec<Cst>) -> Cst { let o = kids[0].offset; let e = kids[kids.len() - 1].end; Cst::node(rule, kids, o, e) }
 
 ${lexer(ir)}
 
-struct Parser { toks: Vec<Tok>, pos: usize }
-impl Parser {
-    fn peek(&self) -> Option<Tok> { if self.pos < self.toks.len() { Some(self.toks[self.pos].clone()) } else { None } }
-    fn branch(&self, rule: &str, kids: Vec<Cst>, save: usize) -> Cst {
-        let offset = if !kids.is_empty() { kids[0].offset } else if save < self.toks.len() { self.toks[save].off } else if !self.toks.is_empty() { self.toks[self.toks.len() - 1].end } else { 0 };
+struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize }
+impl<'a> Parser<'a> {
+    fn peek(&self) -> Option<Tok<'a>> { if self.pos < self.toks.len() { Some(self.toks[self.pos]) } else { None } }
+    fn branch(&self, rule: &'static str, kids: Vec<Cst>, save: usize) -> Cst {
+        let offset = if !kids.is_empty() { kids[0].offset } else if save < self.toks.len() { self.toks[save].off } else { 0 };
         let end = if !kids.is_empty() { kids[kids.len() - 1].end } else { offset };
         Cst::node(rule, kids, offset, end)
     }
-    fn match_lit(&mut self, value: &str, ttype: &str, kids: &mut Vec<Cst>) -> bool {
+    fn match_lit(&mut self, value: &str, ttype: &'static str, kids: &mut Vec<Cst>) -> bool {
         match self.peek() { Some(t) if t.text == value => { kids.push(Cst::leaf(ttype, t.off, t.end)); self.pos += 1; true } _ => false }
     }
-    fn match_tok(&mut self, name: &str, kids: &mut Vec<Cst>) -> bool {
+    fn match_tok(&mut self, name: &'static str, kids: &mut Vec<Cst>) -> bool {
         match self.peek() { Some(t) if t.kind == name => { kids.push(Cst::leaf(name, t.off, t.end)); self.pos += 1; true } _ => false }
     }
-    fn call_rule(&mut self, f: fn(&mut Parser) -> Option<Cst>, kids: &mut Vec<Cst>) -> bool {
+    fn call_rule(&mut self, f: fn(&mut Parser<'a>) -> Option<Cst>, kids: &mut Vec<Cst>) -> bool {
         match f(self) { Some(n) => { kids.push(n); true } None => false }
     }
-    fn star(&mut self, once: fn(&mut Parser, &mut Vec<Cst>) -> bool, kids: &mut Vec<Cst>) -> bool {
+    fn star(&mut self, once: fn(&mut Parser<'a>, &mut Vec<Cst>) -> bool, kids: &mut Vec<Cst>) -> bool {
         loop { let sp = self.pos; let before = kids.len(); if !once(self, kids) { self.pos = sp; kids.truncate(before); break; } }
         true
     }
+    fn opt(&mut self, body: fn(&mut Parser<'a>, &mut Vec<Cst>) -> bool, kids: &mut Vec<Cst>) -> bool {
+        let sp = self.pos; let before = kids.len(); if !body(self, kids) { self.pos = sp; kids.truncate(before); } true
+    }
+    fn sep_by(&mut self, elem: fn(&mut Parser<'a>, &mut Vec<Cst>) -> bool, delim: &str, kids: &mut Vec<Cst>) -> bool {
+        if !elem(self, kids) { return false; }
+        loop { let sp = self.pos; let before = kids.len(); if self.match_lit(delim, "$punct", kids) && elem(self, kids) { continue; } self.pos = sp; kids.truncate(before); break; }
+        true
+    }
+    fn alt_lit(&mut self, opts: &[(&str, &'static str)], kids: &mut Vec<Cst>) -> bool {
+        for (v, tt) in opts { if self.match_lit(v, tt, kids) { return true; } }
+        false
+    }
 
 ${ruleFns}
 }
@@ -175,6 +228,15 @@ fn write_json(c: &Cst, out: &mut String) {
 fn main() {
     let mut src = String::new();
     std::io::stdin().read_to_string(&mut src).unwrap();
+    // Self-bench: a numeric arg N times the lex+parse loop and prints ms/iteration.
+    if let Some(iters) = std::env::args().nth(1).and_then(|a| a.parse::<u64>().ok()) {
+        // black_box on the input + result so the optimizer can't elide the lex/parse.
+        for _ in 0..3 { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0 }; std::hint::black_box(p.parse_${ir.entry}()); }
+        let t = std::time::Instant::now();
+        for _ in 0..iters { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0 }; std::hint::black_box(p.parse_${ir.entry}()); }
+        println!("{:.4}", t.elapsed().as_secs_f64() * 1000.0 / iters as f64);
+        return;
+    }
     let toks = lex(&src);
     let n = toks.len();
     let mut p = Parser { toks, pos: 0 };
diff --git a/src/target-ts.ts b/src/target-ts.ts
index ab37220..304eef5 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -1,21 +1,43 @@
 // The TypeScript Target for emit-portable. Renders the language-agnostic ParserIR into a
-// self-contained TS parser: a char-class lexer, a backtracking recursive-descent core, a
-// Pratt expression engine, and a CST→JSON printer over stdin. It is the reference rendering
-// — its CST is checked byte-for-byte against the interpreter (createParser), so a divergence
-// in the portable logic shows up here before Go/Rust are even compiled.
-import type { ParserIR, RdRule, PrattRule, Step, CharRange, Target } from './emit-portable.ts';
+// self-contained TS parser: a char-class/string/comment lexer, a backtracking recursive-
+// descent core, a Pratt expression engine (prefix + binary precedence + mixfix call/member/
+// index LEDs), and a CST→JSON printer over stdin. It is the reference rendering — its CST
+// is checked byte-for-byte against the interpreter (createParser), so a divergence in the
+// portable logic surfaces here before Go/Rust are compiled.
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
-  rs.map(([lo, hi]) => (lo === hi ? `${v} === ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ');
+  '(' + rs.map(([lo, hi]) => (lo === hi ? `${v} === ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ') + ')';
 
-function lexer(ir: ParserIR): string {
-  const cases = ir.tokens.map((t) => `    if (${rangeCond('c', t.first)}) {
+function scanTok(t: LexTok): string {
+  const push = t.skip ? '' : `toks.push({ kind: ${J((t as { name: string }).name)}, text: src.slice(pos, e), off: pos, end: e }); `;
+  if (t.kind === 'run') return `    if (${rangeCond('c', t.first)}) {
+      let e = pos + 1;
+      while (e < n) { const cc = src.charCodeAt(e); if (!${rangeCond('cc', t.cont)}) break; e++; }
+      ${push}pos = e; continue;
+    }`;
+  if (t.kind === 'string') return `    if (c === ${t.delim.charCodeAt(0)}) {
       let e = pos + 1;
-      while (e < n) { const cc = src.charCodeAt(e); if (!(${rangeCond('cc', t.cont)})) break; e++; }
-      toks.push({ kind: ${J(t.name)}, text: src.slice(pos, e), off: pos, end: e }); pos = e; continue;
-    }`).join('\n');
-  const punctChecks = ir.puncts.map((p) =>
+      while (e < n) { const ch = src.charCodeAt(e); if (ch === 92) { e += 2; continue; } if (ch === ${t.delim.charCodeAt(0)}) { e++; break; } e++; }
+      ${push}pos = e; continue;
+    }`;
+  if (t.kind === 'line') return `    if (src.startsWith(${J(t.prefix)}, pos)) {
+      let e = pos + ${t.prefix.length};
+      while (e < n && src.charCodeAt(e) !== 10) e++;
+      ${push}pos = e; continue;
+    }`;
+  return `    if (src.startsWith(${J(t.open)}, pos)) {
+      let e = pos + ${t.open.length};
+      while (e < n && !src.startsWith(${J(t.close)}, e)) e++;
+      if (e < n) e += ${t.close.length};
+      ${push}pos = e; continue;
+    }`;
+}
+
+function lexer(ir: ParserIR): string {
+  const toks = ir.tokens.map(scanTok).join('\n');
+  const puncts = ir.puncts.map((p) =>
     `    if (src.startsWith(${J(p)}, pos)) { toks.push({ kind: '', text: ${J(p)}, off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
   return `function lex(src: string): Tok[] {
   const toks: Tok[] = [];
@@ -24,42 +46,54 @@ function lexer(ir: ParserIR): string {
   while (pos < n) {
     const c = src.charCodeAt(pos);
     if (c === 32 || c === 9 || c === 10 || c === 13) { pos++; continue; }
-${cases}
-${punctChecks}
+${toks}
+${puncts}
     throw new Error('lex error at ' + pos + ': ' + JSON.stringify(src[pos]));
   }
   return toks;
 }`;
 }
 
-function rdRule(r: RdRule): string {
-  const alt = (steps: Step[]) => {
-    const conds = steps.map(stepCond).join(' && ');
-    return `  { const kids: Cst[] = []; if (${conds}) return branch(${J(r.name)}, kids, save); pos = save; }`;
-  };
-  return `function parse${r.name}(): Node | null {
-  const save = pos;
-${r.alts.map(alt).join('\n')}
-  return null;
-}`;
-}
+// A Step as a boolean expression (appends to the in-scope `kids`).
 function stepCond(s: Step): string {
   switch (s.t) {
     case 'lit': return `matchLit(${J(s.value)}, ${J(s.ttype)}, kids)`;
     case 'tok': return `matchTok(${J(s.name)}, kids)`;
     case 'rule': return `callRule(parse${s.name}, kids)`;
     case 'star': return `star(() => ${stepCond(s.step)}, kids)`;
+    case 'opt': return `opt(() => ${s.steps.map(stepCond).join(' && ')}, kids)`;
+    case 'sep': return `sepBy(() => ${stepCond(s.elem)}, ${J(s.delim)}, kids)`;
+    case 'altlit': return `altLit([${s.opts.map((o) => `[${J(o.value)}, ${J(o.ttype)}]`).join(', ')}], kids)`;
   }
 }
 
+function rdRule(r: RdRule): string {
+  const alt = (steps: Step[]) =>
+    `  { const kids: Cst[] = []; if (${steps.map(stepCond).join(' && ')}) return branch(${J(r.name)}, kids, save); pos = save; }`;
+  return `function parse${r.name}(): Node | null {
+  const save = pos;
+${r.alts.map(alt).join('\n')}
+  return null;
+}`;
+}
+
 function prattRule(r: PrattRule): string {
   const BIN = `{ ${r.binary.map((b) => `${J(b.op)}: { lbp: ${b.lbp}, rbp: ${b.rbp} }`).join(', ')} }`;
   const PRE = `{ ${r.prefix.map((p) => `${J(p.op)}: ${p.rbp}`).join(', ')} }`;
-  const atomSet = `new Set([${r.atomToks.map(J).join(', ')}])`;
-  const group = r.group;
+  const atom = `new Set([${r.nudToks.map(J).join(', ')}])`;
+  const bracketNud = (b: Bracket) => `    if (t.text === ${J(b.first)}) {
+      const save = pos; const kids: Cst[] = [];
+      if (${b.steps.map(stepCond).join(' && ')}) return node(${J(r.name)}, kids);
+      pos = save; return null;
+    }`;
+  const ledArm = (b: Bracket) => `    if (t.text === ${J(b.first)}) {
+      const ledSave = pos; const kids: Cst[] = [left];
+      if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.name)}, kids); continue; }
+      pos = ledSave; break;
+    }`;
   return `const ${r.name}_BIN: Record<string, { lbp: number; rbp: number }> = ${BIN};
 const ${r.name}_PRE: Record<string, number> = ${PRE};
-const ${r.name}_ATOM = ${atomSet};
+const ${r.name}_ATOM = ${atom};
 function parse${r.name}(): Node | null { return ${r.name}_bp(0); }
 function ${r.name}_bp(minBp: number): Node | null {
   let left = ${r.name}_nud();
@@ -67,6 +101,7 @@ function ${r.name}_bp(minBp: number): Node | null {
   for (;;) {
     const t = peek();
     if (t === null) break;
+${r.leds.map(ledArm).join('\n')}
     const info = ${r.name}_BIN[t.text];
     if (info === undefined || info.lbp <= minBp) break;
     const ledSave = pos;
@@ -82,14 +117,7 @@ function ${r.name}_nud(): Node | null {
   const t = peek();
   if (t === null) return null;
   if (${r.name}_ATOM.has(t.kind)) { pos++; return { rule: ${J(r.name)}, children: [{ tokenType: t.kind, offset: t.off, end: t.end }], offset: t.off, end: t.end }; }
-${group ? `  if (t.text === ${J(group.open)}) {
-    const save = pos; pos++;
-    const inner = ${r.name}_bp(0);
-    const c = peek();
-    if (inner === null || c === null || c.text !== ${J(group.close)}) { pos = save; return null; }
-    pos++;
-    return { rule: ${J(r.name)}, children: [{ tokenType: '$punct', offset: t.off, end: t.end }, inner, { tokenType: '$punct', offset: c.off, end: c.end }], offset: t.off, end: c.end };
-  }` : ''}
+${r.nudBrackets.map(bracketNud).join('\n')}
   const pbp = ${r.name}_PRE[t.text];
   if (pbp !== undefined) {
     const save = pos; pos++;
@@ -120,12 +148,14 @@ ${lexer(ir)}
 let toks: Tok[] = [];
 let pos = 0;
 function peek(): Tok | null { return pos < toks.length ? toks[pos] : null; }
-function curOff(): number { return pos < toks.length ? toks[pos].off : (toks.length > 0 ? toks[toks.length - 1].end : 0); }
 function branch(rule: string, kids: Cst[], save: number): Node {
-  const offset = kids.length > 0 ? kids[0].offset : (save < toks.length ? toks[save].off : curOff());
+  const offset = kids.length > 0 ? kids[0].offset : (save < toks.length ? toks[save].off : 0);
   const end = kids.length > 0 ? kids[kids.length - 1].end : offset;
   return { rule, children: kids, offset, end };
 }
+function node(rule: string, kids: Cst[]): Node {
+  return { rule, children: kids, offset: kids[0].offset, end: kids[kids.length - 1].end };
+}
 function matchLit(value: string, ttype: string, kids: Cst[]): boolean {
   const t = peek();
   if (t === null || t.text !== value) return false;
@@ -145,10 +175,21 @@ function star(once: () => boolean, kids: Cst[]): boolean {
   for (;;) { const sp = pos; const before = kids.length; if (!once()) { pos = sp; kids.length = before; break; } }
   return true;
 }
+function opt(body: () => boolean, kids: Cst[]): boolean {
+  const sp = pos; const before = kids.length; if (!body()) { pos = sp; kids.length = before; } return true;
+}
+function sepBy(elem: () => boolean, delim: string, kids: Cst[]): boolean {
+  if (!elem()) return false;
+  for (;;) { const sp = pos; const before = kids.length; if (matchLit(delim, '$punct', kids) && elem()) continue; pos = sp; kids.length = before; break; }
+  return true;
+}
+function altLit(opts: [string, string][], kids: Cst[]): boolean {
+  for (const [v, tt] of opts) if (matchLit(v, tt, kids)) return true;
+  return false;
+}
 
 ${ruleFns}
 
-function offsetEnd(n: Cst): number { return n.end; }
 const src = readFileSync(0, 'utf8');
 toks = lex(src);
 pos = 0;
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 5d4e3b0..8c5384e 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -1,15 +1,17 @@
 // Gate: the TARGET-AGNOSTIC emitter (issue #6) — `emitPortableParser(grammar, target)`
 // derives a parser in EACH target language that produces the byte-identical CST the
-// interpreter (createParser) does. This is the agnosticism proof by EXECUTION: the same
-// examples/calc.ts grammar is rendered to TypeScript, Go, and Rust; the Go and Rust
-// sources are COMPILED and RUN, and every parser's CST output is compared, node-for-node,
-// against the createParser oracle over an adversarial corpus (operator precedence /
-// associativity, prefix chains, nested grouping, multi-statement programs, and the empty
-// program), plus reject-parity on malformed input.
+// interpreter (createParser) does. The agnosticism proof by EXECUTION: every grammar is
+// rendered to TypeScript, Go, and Rust; the Go/Rust sources are COMPILED and RUN, and each
+// parser's CST output is compared, node-for-node, against the createParser oracle over an
+// adversarial corpus, plus reject-parity on malformed input.
 //
-// Go/Rust toolchains are optional: a missing `go` or `rustc` is logged and skipped (the
-// TS rendering, which needs only node, always runs) — the same graceful-degrade pattern
-// the external-corpus gates use, so this stays green on a machine without them.
+//   - calc:   operator precedence/associativity, prefix unary, nested grouping.
+//   - minijs: a real JavaScript SUBSET — a string/comment lexer, the full operator ladder,
+//             call/member/index chains, arrays, and statement forms (the grammar the Go/Rust
+//             output is benchmarked against oxc with).
+//
+// Go/Rust toolchains are optional: a missing `go`/`rustc` is logged and skipped (the TS
+// rendering, which needs only node, always runs).
 import { execFileSync } from 'node:child_process';
 import { mkdirSync, rmSync, writeFileSync } from 'node:fs';
 import { createParser } from '../src/gen-parser.ts';
@@ -17,100 +19,109 @@ import { emitPortableParser } from '../src/emit-portable.ts';
 import { tsTarget } from '../src/target-ts.ts';
 import { goTarget } from '../src/target-go.ts';
 import { rustTarget } from '../src/target-rust.ts';
+import type { CstGrammar } from '../src/types.ts';
 
-const grammar = (await import('../examples/calc.ts')).default;
-const oracle = createParser(grammar);
-
-// Accepted inputs — each must parse to the SAME CST in every language.
-const ACCEPT = [
-  '1;', 'a;', '',                               // atoms + the empty program
-  '1 + 2 * 3;', '1 * 2 + 3;',                   // precedence both directions
-  '1 - 2 - 3;', 'a / b / c;', '1 + 2 + 3 + 4;', // left-associativity
-  '-a;', '-(-a);', '- - a;',                    // prefix + prefix chains
-  '-a * b;', '-a + b * c;', '-(a + b) * c;',    // prefix vs infix vs grouping
-  '(1);', '((a));', '(1 + 2) * (3 - 4);',       // nested grouping
-  'a * b + c * d - e / f;',                     // mixed precedence ladder
-  'let x = 1; let y = x + 2 * x; (y);',         // multi-statement program
-  'let z = -(a * b) / (c - -d);', 'foo; bar; baz;',
+type Case = { grammar: string; path: string; accept: string[]; reject: string[] };
+const CASES: Case[] = [
+  {
+    grammar: 'calc', path: '../examples/calc.ts',
+    accept: [
+      '1;', 'a;', '', '1 + 2 * 3;', '1 * 2 + 3;', '1 - 2 - 3;', 'a / b / c;', '1 + 2 + 3 + 4;',
+      '-a;', '-(-a);', '- - a;', '-a * b;', '-a + b * c;', '-(a + b) * c;',
+      '(1);', '((a));', '(1 + 2) * (3 - 4);', 'a * b + c * d - e / f;',
+      'let x = 1; let y = x + 2 * x; (y);', 'let z = -(a * b) / (c - -d);', 'foo; bar; baz;',
+    ],
+    reject: ['1 +;', '(1;', '1 2;', 'let = 1;', ') ;', '* a;', 'let x 1;'],
+  },
+  {
+    grammar: 'minijs', path: '../examples/minijs.ts',
+    accept: [
+      '1;', 'a;', '', 'x = 1 + 2 * 3;', '-a * b + 1;', '(1 + 2) * 3;',
+      'foo(a, b);', 'a.b.c;', 'a[0][1];', 'f()()();', 'a.b(c).d[e];',
+      'let x = 1; let y = x + 2;', '[1, 2, 3];', '[];', '[a, [b, c]];',
+      'if (x < 10) { x = x + 1; } else { y(); }', 'while (i) { i = i - 1; }',
+      'function f(a, b) { return a + b; }', 'var s = "hi\\"x"; // c\n s.length;',
+      '/* block */ a;', 'a === b !== c;', 'a && b || c;', '!a && -b;',
+      'return;', 'return a + b;', 'const PI = 3;', '{ a; b; }',
+      'f(g(h(x)), [1, 2], y.z);', 'while (a < b) { if (c) { d(); } e = e + 1; }',
+    ],
+    // (note: `let = 1;` is VALID minijs — no reserved-word guard, so `let` is an
+    // identifier and it's an assignment expression; the oracle accepts it too.)
+    reject: ['1 +;', '(1;', 'if x {}', 'foo(a,;', 'a.;', '[1,', 'function (){}'],
+  },
 ];
-// Malformed inputs — every parser must REJECT (the oracle throws; the emitted parsers exit 1).
-const REJECT = ['1 +;', '(1;', '1 2;', 'let = 1;', ') ;', '* a;', 'let x 1;'];
 
-type Json = unknown;
-const sortKeys = (o: Json): Json =>
+const sortKeys = (o: unknown): unknown =>
   Array.isArray(o) ? o.map(sortKeys)
-  : (o && typeof o === 'object') ? Object.fromEntries(Object.keys(o as object).sort().map((k) => [k, sortKeys((o as Record<string, Json>)[k])]))
+  : (o && typeof o === 'object') ? Object.fromEntries(Object.keys(o as object).sort().map((k) => [k, sortKeys((o as Record<string, unknown>)[k])]))
   : o;
-const canon = (o: Json) => JSON.stringify(sortKeys(o));
-
-function oracleOutcome(src: string): { ok: true; cst: string } | { ok: false } {
-  try { return { ok: true, cst: canon(oracle.parse(src)) }; }
-  catch { return { ok: false }; }
-}
+const canon = (o: unknown) => JSON.stringify(sortKeys(o));
 
 const TMP = '/tmp/portable-targets';
 rmSync(TMP, { recursive: true, force: true });
 mkdirSync(TMP, { recursive: true });
+const have = (cmd: string, args: string[]) => { try { execFileSync(cmd, args, { stdio: 'pipe' }); return true; } catch { return false; } };
+const HAS_GO = have('go', ['version']);
+const HAS_RUST = have('rustc', ['--version']);
+if (!HAS_GO) console.log('  go: (toolchain absent — skipped)');
+if (!HAS_RUST) console.log('  rust: (toolchain absent — skipped)');
 
-function have(cmd: string, args: string[]): boolean {
-  try { execFileSync(cmd, args, { stdio: 'pipe' }); return true; } catch { return false; }
-}
-
-// A runnable target: writes its source, (optionally) compiles, and returns a `run(src)->{ok,cst?}`.
-type Runner = { label: string; run: (src: string) => { ok: true; cst: string } | { ok: false } };
-
-function tsRunner(): Runner {
-  const f = `${TMP}/calc.ts`;
-  writeFileSync(f, emitPortableParser(grammar, tsTarget));
-  return { label: 'typescript', run: (src) => runProc('node', [f], src) };
-}
-function goRunner(): Runner | null {
-  if (!have('go', ['version'])) { console.log('  go: (toolchain absent — skipped)'); return null; }
-  const dir = `${TMP}/go`; mkdirSync(dir, { recursive: true });
-  writeFileSync(`${dir}/main.go`, emitPortableParser(grammar, goTarget));
-  writeFileSync(`${dir}/go.mod`, 'module calc\n\ngo 1.21\n');
-  execFileSync('go', ['build', '-o', `${dir}/calc`, '.'], { cwd: dir, stdio: 'pipe' });
-  return { label: 'go', run: (src) => runProc(`${dir}/calc`, [], src) };
-}
-function rustRunner(): Runner | null {
-  if (!have('rustc', ['--version'])) { console.log('  rust: (toolchain absent — skipped)'); return null; }
-  const dir = `${TMP}/rust`; mkdirSync(dir, { recursive: true });
-  const f = `${dir}/main.rs`;
-  writeFileSync(f, emitPortableParser(grammar, rustTarget));
-  execFileSync('rustc', ['-O', f, '-o', `${dir}/calc`], { stdio: 'pipe' });
-  return { label: 'rust', run: (src) => runProc(`${dir}/calc`, [], src) };
-}
-function runProc(cmd: string, args: string[], src: string): { ok: true; cst: string } | { ok: false } {
+type Outcome = { ok: true; cst: string } | { ok: false };
+function runProc(cmd: string, args: string[], src: string): Outcome {
   try { return { ok: true, cst: canon(JSON.parse(execFileSync(cmd, args, { input: src, stdio: ['pipe', 'pipe', 'pipe'] }).toString())) }; }
   catch { return { ok: false }; }
 }
 
-const runners: Runner[] = [tsRunner(), goRunner(), rustRunner()].filter((r): r is Runner => r !== null);
-
 let failures = 0;
-for (const r of runners) {
-  let acc = 0, rej = 0;
-  for (const src of ACCEPT) {
-    const want = oracleOutcome(src);
-    const got = r.run(src);
-    if (want.ok && got.ok && want.cst === got.cst) { acc++; continue; }
-    failures++;
-    console.log(`  ${r.label}: ACCEPT mismatch on ${JSON.stringify(src)}`);
-    if (want.ok && got.ok) { console.log(`      want ${want.cst.slice(0, 140)}`); console.log(`      got  ${got.cst.slice(0, 140)}`); }
-    else console.log(`      want.ok=${want.ok} got.ok=${got.ok}`);
+for (const c of CASES) {
+  const grammar: CstGrammar = (await import(c.path)).default;
+  const oracle = createParser(grammar);
+  const oracleOut = (src: string): Outcome => { try { return { ok: true, cst: canon(oracle.parse(src)) }; } catch { return { ok: false }; } };
+
+  const dir = `${TMP}/${c.grammar}`;
+  mkdirSync(dir, { recursive: true });
+  const runners: Array<{ label: string; run: (src: string) => Outcome }> = [];
+
+  const tsFile = `${dir}/p.ts`;
+  writeFileSync(tsFile, emitPortableParser(grammar, tsTarget));
+  runners.push({ label: 'typescript', run: (src) => runProc('node', [tsFile], src) });
+
+  if (HAS_GO) {
+    const gdir = `${dir}/go`; mkdirSync(gdir, { recursive: true });
+    writeFileSync(`${gdir}/main.go`, emitPortableParser(grammar, goTarget));
+    writeFileSync(`${gdir}/go.mod`, 'module p\n\ngo 1.21\n');
+    execFileSync('go', ['build', '-o', `${gdir}/p`, '.'], { cwd: gdir, stdio: 'pipe' });
+    runners.push({ label: 'go', run: (src) => runProc(`${gdir}/p`, [], src) });
   }
-  for (const src of REJECT) {
-    const want = oracleOutcome(src);
-    const got = r.run(src);
-    if (!want.ok && !got.ok) { rej++; continue; }
-    failures++;
-    console.log(`  ${r.label}: REJECT mismatch on ${JSON.stringify(src)} (oracle ok=${want.ok}, ${r.label} ok=${got.ok})`);
+  if (HAS_RUST) {
+    const rfile = `${dir}/main.rs`;
+    writeFileSync(rfile, emitPortableParser(grammar, rustTarget));
+    execFileSync('rustc', ['-O', '-A', 'warnings', rfile, '-o', `${dir}/pr`], { stdio: 'pipe' });
+    runners.push({ label: 'rust', run: (src) => runProc(`${dir}/pr`, [], src) });
+  }
+
+  for (const r of runners) {
+    let acc = 0, rej = 0;
+    for (const src of c.accept) {
+      const want = oracleOut(src), got = r.run(src);
+      if (want.ok && got.ok && want.cst === got.cst) { acc++; continue; }
+      failures++;
+      console.log(`  ${c.grammar}/${r.label}: ACCEPT mismatch on ${JSON.stringify(src)}`);
+      if (want.ok && got.ok) { console.log(`      want ${want.cst.slice(0, 140)}`); console.log(`      got  ${got.cst.slice(0, 140)}`); }
+      else console.log(`      want.ok=${want.ok} got.ok=${got.ok}`);
+    }
+    for (const src of c.reject) {
+      const want = oracleOut(src), got = r.run(src);
+      if (!want.ok && !got.ok) { rej++; continue; }
+      failures++;
+      console.log(`  ${c.grammar}/${r.label}: REJECT mismatch on ${JSON.stringify(src)} (oracle ok=${want.ok}, ${r.label} ok=${got.ok})`);
+    }
+    console.log(`  ${c.grammar}/${r.label}: ${acc}/${c.accept.length} accept ≡ oracle · ${rej}/${c.reject.length} reject ≡ oracle`);
   }
-  console.log(`  ${r.label}: ${acc}/${ACCEPT.length} accept ≡ oracle · ${rej}/${REJECT.length} reject ≡ oracle`);
 }
 
 if (failures > 0) {
   console.error(`\n✗ portable targets diverge from the interpreter (${failures} case(s))`);
   process.exit(1);
 }
-console.log(`\n✓ ${runners.map((r) => r.label).join(' + ')} parsers derived from one grammar ≡ interpreter CST (compiled & run)`);
+console.log('\n✓ portable parsers (ts/go/rust) derived from each grammar ≡ interpreter CST (compiled & run)');

From d1308d3c8f6482227d41e4bbe33d7b097f8b9030 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 00:32:01 +0800
Subject: [PATCH 06/27] emit-portable: arena-allocate the Go target (3.5x
 faster, vs tsgo)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Go target now allocates its CST from a flat arena instead of a heap *Cst per
node: nodes live in `nodes []Node` (a node is an int32 index), children in a flat
`kids []int32`, and in-progress children accumulate on a `scratch` stack.
Backtracking truncates the three slices to saved lengths; the slices keep their
capacity across parses, so a warmed parser allocates ~nothing. Indices (unlike the
previous pointers) survive slice reallocation, which is what makes the arena work.

This is the Go counterpart of the Rust target's zero-allocation change, and the
same allocation lever the optimized emit-parser.ts pays for in JS: it took the
derived Go parser from 19 MB/s to 67 MB/s (3.5x) on the 2.92 MB JS-subset corpus.

Verified: CST byte-identical to the interpreter on the corpus + the portable gate
(calc + minijs, ts/go/rust, 21/21+29/29 accept, 7/7 reject); the truncate-on-
backtrack reclamation is exercised by the reject cases. Full suite 42/42.

Benchmark vs tsgo (microsoft/typescript-go's native-Go parser, ParseSourceFile
only, both parse the corpus clean): derived-Go 67 MB/s, tsgo 33 MB/s. The 3.5x
arena win is the apples-to-apples result; the headline 2x-over-tsgo is partly
because minijs is a subset of TypeScript (tsgo builds a richer AST — trivia, full
node kinds — so it does more per node), not purely better codegen. Takeaway: a
grammar-derived parser with arena allocation is in the same league as a hand-tuned
native one; naive per-node allocation is what costs the 3.5x.
---
 src/target-go.ts | 187 ++++++++++++++++++++++++++---------------------
 1 file changed, 104 insertions(+), 83 deletions(-)

diff --git a/src/target-go.ts b/src/target-go.ts
index 02af630..85f9f30 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -3,6 +3,12 @@
 // with no module dependencies). Its CST JSON is checked byte-for-byte against the interpreter,
 // so `emitPortableParser(grammar, goTarget)` is a real, verified Go parser derived from the
 // same grammar definition.
+//
+// ARENA allocation (to minimise GC pressure, as tsgo does): nodes live in a flat `nodes []Node`,
+// their children in a flat `kids []int32`, and in-progress children accumulate on a `scratch`
+// stack. A node is an int32 index, never a heap pointer. Backtracking truncates the three
+// slices to saved lengths; the slices keep their capacity across parses (reset to len 0), so a
+// warmed parser allocates ~nothing per parse.
 import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
@@ -39,7 +45,7 @@ function lexer(ir: ParserIR): string {
   const puncts = ir.puncts.map((p) =>
     `\t\tif strings.HasPrefix(src[pos:], ${J(p)}) { toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}}); pos += ${p.length}; continue }`).join('\n');
   return `func lex(src string) []Tok {
-\ttoks := []Tok{}
+\ttoks := toks[:0]
 \tn := len(src)
 \tpos := 0
 \tfor pos < n {
@@ -55,23 +61,24 @@ ${puncts}
 
 function stepCond(s: Step): string {
   switch (s.t) {
-    case 'lit': return `matchLit(${J(s.value)}, ${J(s.ttype)}, &kids)`;
-    case 'tok': return `matchTok(${J(s.name)}, &kids)`;
-    case 'rule': return `callRule(parse${s.name}, &kids)`;
-    case 'star': return `star(func() bool { return ${stepCond(s.step)} }, &kids)`;
-    case 'opt': return `opt(func() bool { return ${s.steps.map(stepCond).join(' && ')} }, &kids)`;
-    case 'sep': return `sepBy(func() bool { return ${stepCond(s.elem)} }, ${J(s.delim)}, &kids)`;
-    case 'altlit': return `altLit([][2]string{${s.opts.map((o) => `{${J(o.value)}, ${J(o.ttype)}}`).join(', ')}}, &kids)`;
+    case 'lit': return `matchLit(${J(s.value)}, ${J(s.ttype)})`;
+    case 'tok': return `matchTok(${J(s.name)})`;
+    case 'rule': return `callRule(parse${s.name})`;
+    case 'star': return `star(func() bool { return ${stepCond(s.step)} })`;
+    case 'opt': return `opt(func() bool { return ${s.steps.map(stepCond).join(' && ')} })`;
+    case 'sep': return `sepBy(func() bool { return ${stepCond(s.elem)} }, ${J(s.delim)})`;
+    case 'altlit': return `altLit([][2]string{${s.opts.map((o) => `{${J(o.value)}, ${J(o.ttype)}}`).join(', ')}})`;
   }
 }
 
 function rdRule(r: RdRule): string {
   const alt = (steps: Step[]) =>
-    `\t{ kids := []*Cst{}; if ${steps.map(stepCond).join(' && ')} { return branch(${J(r.name)}, kids, save) }; pos = save }`;
-  return `func parse${r.name}() *Cst {
-\tsave := pos
+    `\tif ${steps.map(stepCond).join(' && ')} { return finish(${J(r.name)}, sb, offAt(save)) }
+\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]`;
+  return `func parse${r.name}() int32 {
+\tsave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 ${r.alts.map(alt).join('\n')}
-\treturn nil
+\treturn -1
 }`;
 }
 
@@ -80,53 +87,56 @@ function prattRule(r: PrattRule): string {
   const pre = r.prefix.map((p) => `${J(p.op)}: ${p.rbp}`).join(', ');
   const atoms = r.nudToks.map((k) => `${J(k)}: true`).join(', ');
   const bracketNud = (b: Bracket) => `\tif t.Text == ${J(b.first)} {
-\t\tsave := pos; kids := []*Cst{}
-\t\tif ${b.steps.map(stepCond).join(' && ')} { return node(${J(r.name)}, kids) }
-\t\tpos = save; return nil
+\t\tsave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
+\t\tif ${b.steps.map(stepCond).join(' && ')} { return finish(${J(r.name)}, sb, t.Off) }
+\t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1
 \t}`;
   const ledArm = (b: Bracket) => `\t\tif t.Text == ${J(b.first)} {
-\t\t\tledSave := pos; kids := []*Cst{left}
-\t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = node(${J(r.name)}, kids); continue }
-\t\t\tpos = ledSave; break
+\t\t\tledSave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
+\t\t\tscratch = append(scratch, left)
+\t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
+\t\t\tpos = ledSave; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break
 \t\t}`;
   return `var ${r.name}BIN = map[string]bp{${bin}}
 var ${r.name}PRE = map[string]int{${pre}}
 var ${r.name}ATOM = map[string]bool{${atoms}}
-func parse${r.name}() *Cst { return ${r.name}bp(0) }
-func ${r.name}bp(minBp int) *Cst {
+func parse${r.name}() int32 { return ${r.name}bp(0) }
+func ${r.name}bp(minBp int) int32 {
 \tleft := ${r.name}nud()
-\tif left == nil { return nil }
+\tif left < 0 { return -1 }
 \tfor {
 \t\tt := peek()
 \t\tif t == nil { break }
 ${r.leds.map(ledArm).join('\n')}
 \t\tinfo, ok := ${r.name}BIN[t.Text]
 \t\tif !ok || info.lbp <= minBp { break }
-\t\tledSave := pos
+\t\tledSave := pos; sb := len(scratch)
+\t\tscratch = append(scratch, left, mkLeaf("$operator", t.Off, t.End))
 \t\tpos++
-\t\topLeaf := &Cst{IsLeaf: true, TokenType: "$operator", Offset: t.Off, End: t.End}
 \t\trhs := ${r.name}bp(info.rbp)
-\t\tif rhs == nil { pos = ledSave; break }
-\t\tleft = &Cst{Rule: ${J(r.name)}, Children: []*Cst{left, opLeaf, rhs}, Offset: left.Offset, End: rhs.End}
+\t\tif rhs < 0 { pos = ledSave; scratch = scratch[:sb]; break }
+\t\tscratch = append(scratch, rhs)
+\t\tleft = finish(${J(r.name)}, sb, nodes[left].Offset)
 \t}
 \treturn left
 }
-func ${r.name}nud() *Cst {
+func ${r.name}nud() int32 {
 \tt := peek()
-\tif t == nil { return nil }
+\tif t == nil { return -1 }
 \tif ${r.name}ATOM[t.Kind] {
-\t\tpos++
-\t\treturn &Cst{Rule: ${J(r.name)}, Children: []*Cst{{IsLeaf: true, TokenType: t.Kind, Offset: t.Off, End: t.End}}, Offset: t.Off, End: t.End}
+\t\tsb := len(scratch); scratch = append(scratch, mkLeaf(t.Kind, t.Off, t.End)); pos++
+\t\treturn finish(${J(r.name)}, sb, t.Off)
 \t}
 ${r.nudBrackets.map(bracketNud).join('\n')}
 \tif pbp, ok := ${r.name}PRE[t.Text]; ok {
-\t\tsave := pos; pos++
-\t\topLeaf := &Cst{IsLeaf: true, TokenType: "$operator", Offset: t.Off, End: t.End}
+\t\tsave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
+\t\tscratch = append(scratch, mkLeaf("$operator", t.Off, t.End)); pos++
 \t\toperand := ${r.name}bp(pbp)
-\t\tif operand == nil { pos = save; return nil }
-\t\treturn &Cst{Rule: ${J(r.name)}, Children: []*Cst{opLeaf, operand}, Offset: t.Off, End: operand.End}
+\t\tif operand < 0 { pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1 }
+\t\tscratch = append(scratch, operand)
+\t\treturn finish(${J(r.name)}, sb, t.Off)
 \t}
-\treturn nil
+\treturn -1
 }`;
 }
 
@@ -151,77 +161,90 @@ type Tok struct {
 \tKind, Text string
 \tOff, End   int
 }
-type Cst struct {
-\tRule      string
-\tChildren  []*Cst
-\tIsLeaf    bool
-\tTokenType string
-\tOffset    int
-\tEnd       int
+// Arena node: an int32 index into nodes; children are a flat range in kids.
+type Node struct {
+\tRule, TokenType string
+\tIsLeaf          bool
+\tKidStart, KidCount, Offset, End int
 }
 type bp struct{ lbp, rbp int }
 
-${lexer(ir)}
-
 var toks []Tok
 var pos int
+var nodes []Node
+var kids []int32
+var scratch []int32
+
+${lexer(ir)}
 
 func peek() *Tok {
 \tif pos < len(toks) { return &toks[pos] }
 \treturn nil
 }
-func branch(rule string, kids []*Cst, save int) *Cst {
-\toffset := 0
-\tif len(kids) > 0 { offset = kids[0].Offset } else if save < len(toks) { offset = toks[save].Off }
-\tend := offset
-\tif len(kids) > 0 { end = kids[len(kids)-1].End }
-\treturn &Cst{Rule: rule, Children: kids, Offset: offset, End: end}
+func offAt(i int) int { if i < len(toks) { return toks[i].Off }; return 0 }
+func mkLeaf(ttype string, off, end int) int32 {
+\tnodes = append(nodes, Node{TokenType: ttype, IsLeaf: true, Offset: off, End: end})
+\treturn int32(len(nodes) - 1)
 }
-func node(rule string, kids []*Cst) *Cst {
-\treturn &Cst{Rule: rule, Children: kids, Offset: kids[0].Offset, End: kids[len(kids)-1].End}
+// Wrap the scratch entries [sb:] as one node's children (flattened into kids); truncate scratch.
+func finish(rule string, sb, fallbackOff int) int32 {
+\tnn := len(scratch)
+\tkidStart := len(kids)
+\toff, end := fallbackOff, fallbackOff
+\tif nn > sb { off = nodes[scratch[sb]].Offset; end = nodes[scratch[nn-1]].End }
+\tkids = append(kids, scratch[sb:nn]...)
+\tscratch = scratch[:sb]
+\tnodes = append(nodes, Node{Rule: rule, KidStart: kidStart, KidCount: nn - sb, Offset: off, End: end})
+\treturn int32(len(nodes) - 1)
 }
-func matchLit(value, ttype string, kids *[]*Cst) bool {
-\tt := peek()
-\tif t == nil || t.Text != value { return false }
-\t*kids = append(*kids, &Cst{IsLeaf: true, TokenType: ttype, Offset: t.Off, End: t.End}); pos++; return true
+func matchLit(value, ttype string) bool {
+\tif pos < len(toks) && toks[pos].Text == value { scratch = append(scratch, mkLeaf(ttype, toks[pos].Off, toks[pos].End)); pos++; return true }
+\treturn false
 }
-func matchTok(name string, kids *[]*Cst) bool {
-\tt := peek()
-\tif t == nil || t.Kind != name { return false }
-\t*kids = append(*kids, &Cst{IsLeaf: true, TokenType: name, Offset: t.Off, End: t.End}); pos++; return true
+func matchTok(name string) bool {
+\tif pos < len(toks) && toks[pos].Kind == name { scratch = append(scratch, mkLeaf(name, toks[pos].Off, toks[pos].End)); pos++; return true }
+\treturn false
 }
-func callRule(fn func() *Cst, kids *[]*Cst) bool {
-\tn := fn()
-\tif n == nil { return false }
-\t*kids = append(*kids, n); return true
+func callRule(fn func() int32) bool {
+\tid := fn()
+\tif id < 0 { return false }
+\tscratch = append(scratch, id); return true
 }
-func star(once func() bool, kids *[]*Cst) bool {
-\tfor { sp := pos; before := len(*kids); if !once() { pos = sp; *kids = (*kids)[:before]; break } }
+func star(once func() bool) bool {
+\tfor { sp := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if !once() { pos = sp; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break } }
 \treturn true
 }
-func opt(body func() bool, kids *[]*Cst) bool {
-\tsp := pos; before := len(*kids); if !body() { pos = sp; *kids = (*kids)[:before] }; return true
+func opt(body func() bool) bool {
+\tsp := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if !body() { pos = sp; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }; return true
 }
-func sepBy(elem func() bool, delim string, kids *[]*Cst) bool {
+func sepBy(elem func() bool, delim string) bool {
 \tif !elem() { return false }
-\tfor { sp := pos; before := len(*kids); if matchLit(delim, "$punct", kids) && elem() { continue }; pos = sp; *kids = (*kids)[:before]; break }
+\tfor { sp := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if matchLit(delim, "$punct") && elem() { continue }; pos = sp; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break }
 \treturn true
 }
-func altLit(opts [][2]string, kids *[]*Cst) bool {
-\tfor _, o := range opts { if matchLit(o[0], o[1], kids) { return true } }
+func altLit(opts [][2]string) bool {
+\tfor _, o := range opts { if matchLit(o[0], o[1]) { return true } }
 \treturn false
 }
 
 ${ruleFns}
 
-func writeJSON(c *Cst, b *strings.Builder) {
-\tif c.IsLeaf {
-\t\tfmt.Fprintf(b, "{\\"tokenType\\":%q,\\"offset\\":%d,\\"end\\":%d}", c.TokenType, c.Offset, c.End)
+func writeJSON(id int32, b *strings.Builder) {
+\tnd := &nodes[id]
+\tif nd.IsLeaf {
+\t\tfmt.Fprintf(b, "{\\"tokenType\\":%q,\\"offset\\":%d,\\"end\\":%d}", nd.TokenType, nd.Offset, nd.End)
 \t\treturn
 \t}
-\tfmt.Fprintf(b, "{\\"rule\\":%q,\\"children\\":[", c.Rule)
-\tfor i, k := range c.Children { if i > 0 { b.WriteByte(',') }; writeJSON(k, b) }
-\tfmt.Fprintf(b, "],\\"offset\\":%d,\\"end\\":%d}", c.Offset, c.End)
+\tfmt.Fprintf(b, "{\\"rule\\":%q,\\"children\\":[", nd.Rule)
+\tfor i := 0; i < nd.KidCount; i++ { if i > 0 { b.WriteByte(',') }; writeJSON(kids[nd.KidStart+i], b) }
+\tfmt.Fprintf(b, "],\\"offset\\":%d,\\"end\\":%d}", nd.Offset, nd.End)
+}
+
+func parseOnce(src string) int32 {
+\ttoks = lex(src)
+\tpos = 0
+\tnodes = nodes[:0]; kids = kids[:0]; scratch = scratch[:0]
+\treturn parse${ir.entry}()
 }
 
 func main() {
@@ -230,17 +253,15 @@ func main() {
 \t// Self-bench: a numeric arg N times the lex+parse loop and prints ms/iteration.
 \tif len(os.Args) > 1 {
 \t\tif iters, err := strconv.Atoi(os.Args[1]); err == nil && iters > 0 {
-\t\t\tfor i := 0; i < 3; i++ { toks = lex(src); pos = 0; parse${ir.entry}() }
+\t\t\tfor i := 0; i < 3; i++ { parseOnce(src) }
 \t\t\tt0 := time.Now()
-\t\t\tfor i := 0; i < iters; i++ { toks = lex(src); pos = 0; parse${ir.entry}() }
+\t\t\tfor i := 0; i < iters; i++ { parseOnce(src) }
 \t\t\tfmt.Printf("%.4f\\n", float64(time.Since(t0).Nanoseconds())/1e6/float64(iters))
 \t\t\treturn
 \t\t}
 \t}
-\ttoks = lex(src)
-\tpos = 0
-\troot := parse${ir.entry}()
-\tif root == nil || pos != len(toks) {
+\troot := parseOnce(src)
+\tif root < 0 || pos != len(toks) {
 \t\tfmt.Fprintf(os.Stderr, "parse error (pos %d/%d)\\n", pos, len(toks))
 \t\tos.Exit(1)
 \t}

From 7314ddebccdc5ecad8c5e350a4e1c6f4b073b324 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 01:28:53 +0800
Subject: [PATCH 07/27] emit-portable: general token-pattern matcher
 (real-grammar lexer, stage 1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Toward supporting the real grammar files, the portable lexer gains a GENERAL
matcher: a token whose shape the four fast paths (run/string/line/block) don't
cleanly recognise is now compiled, from its raw token-pattern AST, to a
backtracking-free matcher (literal / charClass / seq / ordered-alt / greedy-repeat
/ zero-width lookahead+anchor) — no regex engine, so it stays portable. This
replaces the previous over-eager `literalPrefix` heuristic that mis-classified
numbers/strings/decorators as line comments.

This handles the STATELESS real-JS token tier the fast paths could not: `\u`-escaped
identifiers, the decimal/hex number family with a `(?!IdentChar)` boundary, and
both-quote strings with escapes. examples/richtokens.ts exercises exactly these,
and the emitted lexer is verified ≡ createLexer (the gate's richtokens case:
14/14 accept, 5/5 reject — including the Hex-vs-Number boundary disambiguation).

Implemented in the TS target so far; Go/Rust throw a clear message on a `pattern`
token (their matcher port is the next stage), so calc/minijs stay green in all
three. Full suite 42/42.

Remaining for the real grammar files (each a further stage): port the matcher to
Go/Rust; the STATEFUL lexer (regex-vs-division context, template interpolation)
that javascript/typescript need; the markup/indent lexers (html/yaml); and the
full parser algebra (not/sameLine/exclude/ctxMode/tsRelax/+/…).
---
 examples/richtokens.ts   | 40 +++++++++++++++++++++++++++++
 src/emit-portable.ts     | 33 ++++++++++++++++++------
 src/target-go.ts         |  3 ++-
 src/target-rust.ts       |  3 ++-
 src/target-ts.ts         | 55 ++++++++++++++++++++++++++++++++--------
 test/portable-targets.ts | 18 ++++++++++---
 6 files changed, 129 insertions(+), 23 deletions(-)
 create mode 100644 examples/richtokens.ts

diff --git a/examples/richtokens.ts b/examples/richtokens.ts
new file mode 100644
index 0000000..ed10aec
--- /dev/null
+++ b/examples/richtokens.ts
@@ -0,0 +1,40 @@
+// A token-stress grammar for the portable lexer's GENERAL matcher (stage 1 of real-grammar
+// support). It uses the STATELESS real-JS token shapes the 4-shape fast paths can't handle —
+// `\u`-escaped identifiers, the decimal/hex number family with a `(?!IdentChar)` boundary,
+// both-quote strings with escapes, and comments — so the portable lexer must compile the raw
+// token-pattern AST to a backtracking-free matcher. A trivial parser (a stream of value
+// tokens) makes the emitted CST essentially the token stream, so checking it against
+// createParser verifies the LEXER. (Stateful tokens — regex, templates — are NOT here; they
+// need cross-token lexer state, the next stage.)
+import {
+  token, rule, defineGrammar,
+  seq, oneOf, range, star, plus, repeat, optPattern, altPattern, noneOf, anyChar, notFollowedBy, many,
+} from '../src/api.ts';
+
+const digit = range('0', '9');
+const hexDigit = oneOf(digit, range('a', 'f'), range('A', 'F'));
+const idChar = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const uEsc = altPattern(seq('\\u', repeat(hexDigit, 4, 4)), seq('\\u{', plus(hexDigit), '}'));
+const boundary = notFollowedBy(idChar);   // a number can't be glued to an identifier char
+
+const Hex = token(seq('0', oneOf('x', 'X'), plus(hexDigit), boundary), { scope: 'constant.numeric.hex' });
+const Number_ = token(seq(plus(digit), star(seq('_', plus(digit))), optPattern(seq('.', plus(digit))), boundary), { scope: 'constant.numeric' });
+const Ident = token(seq(altPattern(oneOf(range('a', 'z'), range('A', 'Z'), '_', '$'), uEsc), star(altPattern(idChar, uEsc))), { identifier: true });
+const Str = token(altPattern(
+  seq('"', star(altPattern(noneOf('"', '\\'), seq('\\', anyChar()))), '"'),
+  seq("'", star(altPattern(noneOf("'", '\\'), seq('\\', anyChar()))), "'"),
+), { scope: 'string.quoted' });
+const LineComment = token(seq('//', star(noneOf('\n'))), { skip: true, scope: 'comment.line' });
+const BlockComment = token(seq('/*', star(altPattern(noneOf('*'), seq('*', noneOf('/')))), '*/'), { skip: true, scope: 'comment.block' });
+
+// Value = one value token; Program = a stream of them. (Lexer-level disambiguation — Hex vs
+// Number — comes from token DECLARATION ORDER, which both engines follow.)
+const Value = rule(($) => [Hex, Number_, Ident, Str]);
+const Program = rule(($) => [many(Value)]);
+
+export default defineGrammar({
+  name: 'richtokens',
+  scopeName: 'source.richtokens',
+  tokens: { Hex, Number: Number_, Ident, Str, LineComment, BlockComment },
+  rules: { Value, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index e445339..8e8c4c3 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -20,7 +20,7 @@
 // bracket NUDs (grouping, array), and mixfix LEDs (call / member / index) tried before
 // operators. buildIR THROWS on a construct outside this set rather than emit a wrong
 // parser. This is enough to derive a real JavaScript-subset parser (examples/minijs.ts).
-import type { CstGrammar, RuleExpr, TokenDecl } from './types.ts';
+import type { CstGrammar, RuleExpr, TokenDecl, TokenPattern } from './types.ts';
 import { analyzeGrammar, findEntryRule } from './grammar-analysis.ts';
 import { collectLiterals, isKeywordLiteral } from './grammar-utils.ts';
 import {
@@ -35,7 +35,11 @@ export type LexTok =
   | { kind: 'run'; name: string; first: CharRange[]; cont: CharRange[]; skip: boolean }   // ident/number char run
   | { kind: 'string'; name: string; delim: string; skip: boolean }                        // delim..delim, `\` escapes next
   | { kind: 'line'; name: string; prefix: string; skip: boolean }                         // prefix..end-of-line
-  | { kind: 'block'; name: string; open: string; close: string; skip: boolean };          // open..close
+  | { kind: 'block'; name: string; open: string; close: string; skip: boolean }           // open..close
+  // The general case: the raw token-pattern AST, compiled to a backtracking-free matcher
+  // by the target (no regex engine). Subsumes the fast paths above; used for the token
+  // shapes they don't cleanly recognise (escaped identifiers, the number family, …).
+  | { kind: 'pattern'; name: string; pattern: TokenPattern; skip: boolean };
 
 export type Lit = { value: string; ttype: '$keyword' | '$punct' };
 export type Step =
@@ -129,7 +133,9 @@ function buildIR(grammar: CstGrammar): ParserIR {
   return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules };
 }
 
-// Classify a token into a portable scanner spec via the structural recognizers.
+// Classify a token: a fast-path shape (run/string/line/block) when one cleanly matches,
+// otherwise the general `pattern` matcher. The fast paths keep the common simple tokens
+// (and the calc/minijs grammars) on tight, readable scan code in every target.
 function lexTok(t: TokenDecl): LexTok {
   const skip = t.flags.includes('skip');
   const qs = tokenPatternQuoteDelimAndEscape(t);
@@ -137,13 +143,24 @@ function lexTok(t: TokenDecl): LexTok {
   const bd = tokenPatternBlockDelimiters(t);
   if (bd) return { kind: 'block', name: t.name, open: bd[0], close: bd[1], skip };
   const loop = tokenPatternCharLoop(t);
-  if (loop) {
-    if (loop.bail.length > 0 || loop.bailNonAscii) throw new Error(`portable: token ${t.name} has a complex continuation (bail) — out of scope`);
+  if (loop && loop.bail.length === 0 && !loop.bailNonAscii) {
     return { kind: 'run', name: t.name, first: codesToRanges(loop.first), cont: codesToRanges(loop.cont), skip };
   }
-  const prefix = tokenPatternLiteralPrefix(t);
-  if (prefix) return { kind: 'line', name: t.name, prefix, skip };   // prefix with no distinct suffix → to end-of-line
-  throw new Error(`portable: token ${t.name} shape not recognized by the portable lexer`);
+  const line = lineCommentShape(t.pattern);   // PRECISE: prefix-literal then star(non-newline)
+  if (line) return { kind: 'line', name: t.name, prefix: line, skip };
+  return { kind: 'pattern', name: t.name, pattern: t.pattern, skip };
+}
+
+// A token is a line comment iff its pattern is `seq(<literal>, star(charClass excluding \n))`.
+function lineCommentShape(p: TokenPattern): string | null {
+  if (typeof p === 'string' || p.type !== 'seq' || p.items.length !== 2) return null;
+  const [head, tail] = p.items;
+  if (typeof head !== 'string') return null;
+  if (typeof tail === 'string' || tail.type !== 'repeat' || tail.min !== 0) return null;
+  const body = tail.body;
+  if (typeof body === 'string' || body.type !== 'charClass' || !body.negate) return null;
+  const excludesNl = body.items.some((it): boolean => it.type === 'char' && it.value === '\n');
+  return excludesNl ? head : null;
 }
 
 function codesToRanges(codes: number[]): CharRange[] {
diff --git a/src/target-go.ts b/src/target-go.ts
index 85f9f30..9c809e1 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -32,12 +32,13 @@ function scanTok(t: LexTok): string {
 \t\t\tfor e < n && src[e] != 10 { e++ }
 \t\t\t${push}pos = e; continue
 \t\t}`;
-  return `\t\tif strings.HasPrefix(src[pos:], ${J(t.open)}) {
+  if (t.kind === 'block') return `\t\tif strings.HasPrefix(src[pos:], ${J(t.open)}) {
 \t\t\te := pos + ${t.open.length}
 \t\t\tfor e < n && !strings.HasPrefix(src[e:], ${J(t.close)}) { e++ }
 \t\t\tif e < n { e += ${t.close.length} }
 \t\t\t${push}pos = e; continue
 \t\t}`;
+  throw new Error(`portable Go lexer: general 'pattern' tokens not yet supported (token ${t.name}) — the stateless-token matcher is implemented in the TS target only so far`);
 }
 
 function lexer(ir: ParserIR): string {
diff --git a/src/target-rust.ts b/src/target-rust.ts
index fb0c641..2fd6a7f 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -34,12 +34,13 @@ function scanTok(t: LexTok): string {
             while e < n && b[e] != 10 { e += 1; }
             ${push}pos = e; continue;
         }`;
-  return `        if src[pos..].starts_with(${J(t.open)}) {
+  if (t.kind === 'block') return `        if src[pos..].starts_with(${J(t.open)}) {
             let mut e = pos + ${t.open.length};
             while e < n && !src[e..].starts_with(${J(t.close)}) { e += 1; }
             if e < n { e += ${t.close.length}; }
             ${push}pos = e; continue;
         }`;
+  throw new Error(`portable Rust lexer: general 'pattern' tokens not yet supported (token ${t.name}) — the stateless-token matcher is implemented in the TS target only so far`);
 }
 
 function lexer(ir: ParserIR): string {
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 304eef5..d39821b 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -10,40 +10,75 @@ const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
   '(' + rs.map(([lo, hi]) => (lo === hi ? `${v} === ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ') + ')';
 
-function scanTok(t: LexTok): string {
-  const push = t.skip ? '' : `toks.push({ kind: ${J((t as { name: string }).name)}, text: src.slice(pos, e), off: pos, end: e }); `;
+import type { TokenPattern } from './types.ts';
+
+// Compile a token-pattern AST to backtracking-free matcher functions `_mN(p): number`
+// (returns the new position, or -1 on no match). Greedy `repeat`, ordered `alt`,
+// zero-width `lookahead`/`anchor` — the regex-free token-matcher tier.
+function ccCond(p: Extract<TokenPattern, { type: 'charClass' }>): string {
+  const parts = p.items.map((it) =>
+    it.type === 'char' ? `cc === ${it.value.charCodeAt(0)}` : `cc >= ${it.from.charCodeAt(0)} && cc <= ${it.to.charCodeAt(0)}`);
+  const inSet = parts.length === 1 ? parts[0] : '(' + parts.join(' || ') + ')';
+  return p.negate ? `!${inSet}` : inSet;
+}
+function compilePat(p: TokenPattern, defs: string[]): string {
+  const name = `_m${defs.length}`;
+  defs.push('');   // reserve the slot (keeps numbering stable across recursion)
+  let body: string;
+  if (typeof p === 'string') {
+    body = `=> _s.startsWith(${J(p)}, p) ? p + ${p.length} : -1`;
+  } else switch (p.type) {
+    case 'anyChar': body = `=> p < _s.length ? p + 1 : -1`; break;
+    case 'charClass': body = `=> { if (p >= _s.length) return -1; const cc = _s.charCodeAt(p); return ${ccCond(p)} ? p + 1 : -1; }`; break;
+    case 'seq': { const ms = p.items.map((x) => compilePat(x, defs)); body = `=> { ${ms.map((m) => `p = ${m}(p); if (p < 0) return -1;`).join(' ')} return p; }`; break; }
+    case 'alt': { const ms = p.items.map((x) => compilePat(x, defs)); body = `=> { ${ms.map((m) => `{ const r = ${m}(p); if (r >= 0) return r; }`).join(' ')} return -1; }`; break; }
+    case 'repeat': { const m = compilePat(p.body, defs); const mx = p.max !== undefined ? `if (c >= ${p.max}) break;` : ''; body = `=> { let q = p, c = 0; for (;;) { const r = ${m}(q); if (r < 0 || r === q) break; q = r; c++; ${mx} } return c >= ${p.min} ? q : -1; }`; break; }
+    case 'lookahead': { const m = compilePat(p.body, defs); body = `=> { const r = ${m}(p); return ${p.negate ? 'r < 0' : 'r >= 0'} ? p : -1; }`; break; }
+    case 'anchor': body = p.kind === 'start' ? `=> p === 0 ? p : -1` : `=> p === _s.length ? p : -1`; break;
+    default: throw new Error(`portable TS lexer: pattern '${(p as { type: string }).type}' unsupported`);
+  }
+  defs[Number(name.slice(2))] = `const ${name} = (p: number): number ${body};`;
+  return name;
+}
+
+function scanTok(t: LexTok, defs: string[]): string {
+  const name = (t as { name: string }).name;
+  const push = (endExpr: string) => (t.skip ? '' : `toks.push({ kind: ${J(name)}, text: src.slice(pos, ${endExpr}), off: pos, end: ${endExpr} }); `);
   if (t.kind === 'run') return `    if (${rangeCond('c', t.first)}) {
       let e = pos + 1;
       while (e < n) { const cc = src.charCodeAt(e); if (!${rangeCond('cc', t.cont)}) break; e++; }
-      ${push}pos = e; continue;
+      ${push('e')}pos = e; continue;
     }`;
   if (t.kind === 'string') return `    if (c === ${t.delim.charCodeAt(0)}) {
       let e = pos + 1;
       while (e < n) { const ch = src.charCodeAt(e); if (ch === 92) { e += 2; continue; } if (ch === ${t.delim.charCodeAt(0)}) { e++; break; } e++; }
-      ${push}pos = e; continue;
+      ${push('e')}pos = e; continue;
     }`;
   if (t.kind === 'line') return `    if (src.startsWith(${J(t.prefix)}, pos)) {
       let e = pos + ${t.prefix.length};
       while (e < n && src.charCodeAt(e) !== 10) e++;
-      ${push}pos = e; continue;
+      ${push('e')}pos = e; continue;
     }`;
-  return `    if (src.startsWith(${J(t.open)}, pos)) {
+  if (t.kind === 'block') return `    if (src.startsWith(${J(t.open)}, pos)) {
       let e = pos + ${t.open.length};
       while (e < n && !src.startsWith(${J(t.close)}, e)) e++;
       if (e < n) e += ${t.close.length};
-      ${push}pos = e; continue;
+      ${push('e')}pos = e; continue;
     }`;
+  const m = compilePat(t.pattern, defs);
+  return `    { const e = ${m}(pos); if (e > pos) { ${push('e')}pos = e; continue; } }`;
 }
 
 function lexer(ir: ParserIR): string {
-  const toks = ir.tokens.map(scanTok).join('\n');
+  const defs: string[] = [];
+  const toks = ir.tokens.map((t) => scanTok(t, defs)).join('\n');
   const puncts = ir.puncts.map((p) =>
     `    if (src.startsWith(${J(p)}, pos)) { toks.push({ kind: '', text: ${J(p)}, off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
-  return `function lex(src: string): Tok[] {
+  return `${defs.length ? 'let _s = "";\n' + defs.join('\n') + '\n' : ''}function lex(src: string): Tok[] {
   const toks: Tok[] = [];
   const n = src.length;
   let pos = 0;
-  while (pos < n) {
+${defs.length ? '  _s = src;\n' : ''}  while (pos < n) {
     const c = src.charCodeAt(pos);
     if (c === 32 || c === 9 || c === 10 || c === 13) { pos++; continue; }
 ${toks}
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 8c5384e..afd1821 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -21,7 +21,7 @@ import { goTarget } from '../src/target-go.ts';
 import { rustTarget } from '../src/target-rust.ts';
 import type { CstGrammar } from '../src/types.ts';
 
-type Case = { grammar: string; path: string; accept: string[]; reject: string[] };
+type Case = { grammar: string; path: string; accept: string[]; reject: string[]; tsOnly?: boolean };
 const CASES: Case[] = [
   {
     grammar: 'calc', path: '../examples/calc.ts',
@@ -49,6 +49,18 @@ const CASES: Case[] = [
     // identifier and it's an assignment expression; the oracle accepts it too.)
     reject: ['1 +;', '(1;', 'if x {}', 'foo(a,;', 'a.;', '[1,', 'function (){}'],
   },
+  {
+    // The general token-pattern matcher (stateless real-JS token tier): \u-escaped
+    // identifiers, the decimal/hex number family with a boundary, both-quote strings.
+    // TS-only for now — the Go/Rust port of the pattern matcher is the next stage.
+    grammar: 'richtokens', path: '../examples/richtokens.ts', tsOnly: true,
+    accept: [
+      '123', '0xFF', '1_000_000', '3.14', 'foo', 'bar_$x9', '"hi"', "'single'",
+      '"esc\\"q\\n"', '123 0xa foo "s" 3.14', '0xDEADbeef 42 _id $x cafe // line\n 7',
+      '/* block */ 99 x', 'caf\\u00e9 \\u0041bc', '1_2_3 0X1F 10.5 a1 b2',
+    ],
+    reject: ['12abc', '0x', '"unterminated', '3.', '#'],   // ($ is a valid identifier start, not a reject)
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>
@@ -86,14 +98,14 @@ for (const c of CASES) {
   writeFileSync(tsFile, emitPortableParser(grammar, tsTarget));
   runners.push({ label: 'typescript', run: (src) => runProc('node', [tsFile], src) });
 
-  if (HAS_GO) {
+  if (HAS_GO && !c.tsOnly) {
     const gdir = `${dir}/go`; mkdirSync(gdir, { recursive: true });
     writeFileSync(`${gdir}/main.go`, emitPortableParser(grammar, goTarget));
     writeFileSync(`${gdir}/go.mod`, 'module p\n\ngo 1.21\n');
     execFileSync('go', ['build', '-o', `${gdir}/p`, '.'], { cwd: gdir, stdio: 'pipe' });
     runners.push({ label: 'go', run: (src) => runProc(`${gdir}/p`, [], src) });
   }
-  if (HAS_RUST) {
+  if (HAS_RUST && !c.tsOnly) {
     const rfile = `${dir}/main.rs`;
     writeFileSync(rfile, emitPortableParser(grammar, rustTarget));
     execFileSync('rustc', ['-O', '-A', 'warnings', rfile, '-o', `${dir}/pr`], { stdio: 'pipe' });

From 747b03950f138cc2b28bb65968dd1d87fa2f2aba Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 01:41:54 +0800
Subject: [PATCH 08/27] emit-portable: port the general token matcher to Go +
 Rust (lexer convergence)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The target-agnostic lexer is now uniform across all three targets: the general
token-pattern matcher (stage 1, TS only) is ported to Go and Rust, so a `pattern`
token compiles to a backtracking-free matcher in every language — Go as
package-level `_mN(p int) int` funcs over a module-level source, Rust as named
`_mN(s, p) -> i64` funcs (closures can't recurse) threading the source as a param.

This is the lexer half of the issue-#6 target parameter: ONE target-agnostic lexer,
rendered per language. The optimized emit-lexer.ts stays a separate, JS-perf path —
it fills the arena parser's struct-of-arrays integer columns, a different token
contract than the portable Tok list, so merging would deoptimize it; the two
already share what should be shared (the token-pattern.ts algebra + recognizers).

Verified: examples/richtokens.ts (escaped idents, the number family with a boundary,
both-quote strings) now runs in ts/go/rust, each CST byte-identical to createParser
(gate: 14/14 accept + 5/5 reject per target). Full suite 42/42.
---
 src/target-go.ts         | 52 ++++++++++++++++++++++++++++++++--------
 src/target-rust.ts       | 51 ++++++++++++++++++++++++++++++++-------
 test/portable-targets.ts |  6 ++---
 3 files changed, 87 insertions(+), 22 deletions(-)

diff --git a/src/target-go.ts b/src/target-go.ts
index 9c809e1..b5f1926 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -10,46 +10,78 @@
 // slices to saved lengths; the slices keep their capacity across parses (reset to len 0), so a
 // warmed parser allocates ~nothing per parse.
 import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
+import type { TokenPattern } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
   '(' + rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `${v} >= ${lo} && ${v} <= ${hi}`)).join(' || ') + ')';
 
-function scanTok(t: LexTok): string {
-  const push = t.skip ? '' : `toks = append(toks, Tok{${J((t as { name: string }).name)}, src[pos:e], pos, e}); `;
+// Compile a token-pattern AST to backtracking-free package-level matcher funcs
+// `_mN(p int) int` (new position, or -1) over the module-level source `_s`.
+function ccCondGo(p: Extract<TokenPattern, { type: 'charClass' }>): string {
+  const parts = p.items.map((it) =>
+    it.type === 'char' ? `cc == ${it.value.charCodeAt(0)}` : `cc >= ${it.from.charCodeAt(0)} && cc <= ${it.to.charCodeAt(0)}`);
+  const inSet = parts.length === 1 ? parts[0] : '(' + parts.join(' || ') + ')';
+  return p.negate ? `!${inSet}` : inSet;
+}
+function compilePat(p: TokenPattern, defs: string[]): string {
+  const name = `_m${defs.length}`;
+  defs.push('');
+  let body: string;
+  if (typeof p === 'string') {
+    body = `{ if p <= len(_s) && strings.HasPrefix(_s[p:], ${J(p)}) { return p + ${p.length} }; return -1 }`;
+  } else switch (p.type) {
+    case 'anyChar': body = `{ if p < len(_s) { return p + 1 }; return -1 }`; break;
+    case 'charClass': body = `{ if p >= len(_s) { return -1 }; cc := int(_s[p]); if ${ccCondGo(p)} { return p + 1 }; return -1 }`; break;
+    case 'seq': { const ms = p.items.map((x) => compilePat(x, defs)); body = `{ ${ms.map((m) => `p = ${m}(p); if p < 0 { return -1 }`).join('; ')}; return p }`; break; }
+    case 'alt': { const ms = p.items.map((x) => compilePat(x, defs)); body = `{ ${ms.map((m) => `if r := ${m}(p); r >= 0 { return r }`).join('; ')}; return -1 }`; break; }
+    case 'repeat': { const m = compilePat(p.body, defs); const mx = p.max !== undefined ? `; if c >= ${p.max} { break }` : ''; body = `{ q, c := p, 0; for { r := ${m}(q); if r < 0 || r == q { break }; q = r; c++${mx} }; if c >= ${p.min} { return q }; return -1 }`; break; }
+    case 'lookahead': { const m = compilePat(p.body, defs); body = `{ r := ${m}(p); if ${p.negate ? 'r < 0' : 'r >= 0'} { return p }; return -1 }`; break; }
+    case 'anchor': body = p.kind === 'start' ? `{ if p == 0 { return p }; return -1 }` : `{ if p == len(_s) { return p }; return -1 }`; break;
+    default: throw new Error(`portable Go lexer: pattern '${(p as { type: string }).type}' unsupported`);
+  }
+  defs[Number(name.slice(2))] = `func ${name}(p int) int ${body}`;
+  return name;
+}
+
+function scanTok(t: LexTok, defs: string[]): string {
+  const name = (t as { name: string }).name;
+  const push = (endE: string) => (t.skip ? '' : `toks = append(toks, Tok{${J(name)}, src[pos:${endE}], pos, ${endE}}); `);
   if (t.kind === 'run') return `\t\tif ${rangeCond('c', t.first)} {
 \t\t\te := pos + 1
 \t\t\tfor e < n { cc := int(src[e]); if !${rangeCond('cc', t.cont)} { break }; e++ }
-\t\t\t${push}pos = e; continue
+\t\t\t${push('e')}pos = e; continue
 \t\t}`;
   if (t.kind === 'string') return `\t\tif c == ${t.delim.charCodeAt(0)} {
 \t\t\te := pos + 1
 \t\t\tfor e < n { ch := int(src[e]); if ch == 92 { e += 2; continue }; if ch == ${t.delim.charCodeAt(0)} { e++; break }; e++ }
-\t\t\t${push}pos = e; continue
+\t\t\t${push('e')}pos = e; continue
 \t\t}`;
   if (t.kind === 'line') return `\t\tif strings.HasPrefix(src[pos:], ${J(t.prefix)}) {
 \t\t\te := pos + ${t.prefix.length}
 \t\t\tfor e < n && src[e] != 10 { e++ }
-\t\t\t${push}pos = e; continue
+\t\t\t${push('e')}pos = e; continue
 \t\t}`;
   if (t.kind === 'block') return `\t\tif strings.HasPrefix(src[pos:], ${J(t.open)}) {
 \t\t\te := pos + ${t.open.length}
 \t\t\tfor e < n && !strings.HasPrefix(src[e:], ${J(t.close)}) { e++ }
 \t\t\tif e < n { e += ${t.close.length} }
-\t\t\t${push}pos = e; continue
+\t\t\t${push('e')}pos = e; continue
 \t\t}`;
-  throw new Error(`portable Go lexer: general 'pattern' tokens not yet supported (token ${t.name}) — the stateless-token matcher is implemented in the TS target only so far`);
+  const m = compilePat(t.pattern, defs);
+  return `\t\tif e := ${m}(pos); e > pos { ${push('e')}pos = e; continue }`;
 }
 
 function lexer(ir: ParserIR): string {
-  const toks = ir.tokens.map(scanTok).join('\n');
+  const defs: string[] = [];
+  const toks = ir.tokens.map((t) => scanTok(t, defs)).join('\n');
   const puncts = ir.puncts.map((p) =>
     `\t\tif strings.HasPrefix(src[pos:], ${J(p)}) { toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}}); pos += ${p.length}; continue }`).join('\n');
-  return `func lex(src string) []Tok {
+  return `${defs.length ? 'var _s string\n' + defs.join('\n') + '\n' : ''}func lex(src string) []Tok {
 \ttoks := toks[:0]
 \tn := len(src)
 \tpos := 0
-\tfor pos < n {
+${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
 \t\tc := int(src[pos])
 \t\tif c == 32 || c == 9 || c == 10 || c == 13 { pos++; continue }
 ${toks}
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 2fd6a7f..7ad6382 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -12,42 +12,75 @@
 // `fn(&mut Parser, &mut Vec<Cst>) -> bool`, threading the parser + kids as params (so nothing
 // is captured, sidestepping the borrow checker).
 import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
+import type { TokenPattern } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
   '(' + rs.map(([lo, hi]) => (lo === hi ? `${v} == ${lo}` : `(${lo}..=${hi}).contains(&${v})`)).join(' || ') + ')';
 
-function scanTok(t: LexTok): string {
-  const push = t.skip ? '' : `toks.push(Tok { kind: ${J((t as { name: string }).name)}, text: &src[pos..e], off: pos, end: e }); `;
+// Compile a token-pattern AST to backtracking-free matcher fns `_mN(s, p) -> i64`
+// (new position, or -1). Named functions (Rust closures can't recurse); the source is
+// threaded as a param (Rust has no convenient module-level mutable string).
+function ccCondRs(p: Extract<TokenPattern, { type: 'charClass' }>): string {
+  const parts = p.items.map((it) =>
+    it.type === 'char' ? `cc == ${it.value.charCodeAt(0)}` : `(${it.from.charCodeAt(0)}..=${it.to.charCodeAt(0)}).contains(&cc)`);
+  const inSet = parts.length === 1 ? parts[0] : '(' + parts.join(' || ') + ')';
+  return p.negate ? `!${inSet}` : inSet;
+}
+function compilePat(p: TokenPattern, defs: string[]): string {
+  const name = `_m${defs.length}`;
+  defs.push('');
+  let body: string;
+  if (typeof p === 'string') {
+    body = `if (p as usize) <= s.len() && s[p as usize..].starts_with(${J(p)}) { p + ${p.length} } else { -1 }`;
+  } else switch (p.type) {
+    case 'anyChar': body = `if (p as usize) < s.len() { p + 1 } else { -1 }`; break;
+    case 'charClass': body = `let u = p as usize; if u >= s.len() { return -1; } let cc = s.as_bytes()[u] as u32; if ${ccCondRs(p)} { p + 1 } else { -1 }`; break;
+    case 'seq': { const ms = p.items.map((x) => compilePat(x, defs)); body = `let mut p = p; ${ms.map((m) => `p = ${m}(s, p); if p < 0 { return -1; }`).join(' ')} p`; break; }
+    case 'alt': { const ms = p.items.map((x) => compilePat(x, defs)); body = `${ms.map((m) => `{ let r = ${m}(s, p); if r >= 0 { return r; } }`).join(' ')} -1`; break; }
+    case 'repeat': { const m = compilePat(p.body, defs); const mx = p.max !== undefined ? ` if c >= ${p.max} { break; }` : ''; body = `let mut q = p; let mut c = 0i64; loop { let r = ${m}(s, q); if r < 0 || r == q { break; } q = r; c += 1;${mx} } if c >= ${p.min} { q } else { -1 }`; break; }
+    case 'lookahead': { const m = compilePat(p.body, defs); body = `let r = ${m}(s, p); if ${p.negate ? 'r < 0' : 'r >= 0'} { p } else { -1 }`; break; }
+    case 'anchor': body = p.kind === 'start' ? `if p == 0 { p } else { -1 }` : `if p as usize == s.len() { p } else { -1 }`; break;
+    default: throw new Error(`portable Rust lexer: pattern '${(p as { type: string }).type}' unsupported`);
+  }
+  defs[Number(name.slice(2))] = `fn ${name}(s: &str, p: i64) -> i64 { ${body} }`;
+  return name;
+}
+
+function scanTok(t: LexTok, defs: string[]): string {
+  const name = (t as { name: string }).name;
+  const push = (endE: string) => (t.skip ? '' : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE} }); `);
   if (t.kind === 'run') return `        if ${rangeCond('c', t.first)} {
             let mut e = pos + 1;
             while e < n { let cc = b[e] as u32; if !${rangeCond('cc', t.cont)} { break } e += 1; }
-            ${push}pos = e; continue;
+            ${push('e')}pos = e; continue;
         }`;
   if (t.kind === 'string') return `        if c == ${t.delim.charCodeAt(0)} {
             let mut e = pos + 1;
             while e < n { let ch = b[e] as u32; if ch == 92 { e += 2; continue } if ch == ${t.delim.charCodeAt(0)} { e += 1; break } e += 1; }
-            ${push}pos = e; continue;
+            ${push('e')}pos = e; continue;
         }`;
   if (t.kind === 'line') return `        if src[pos..].starts_with(${J(t.prefix)}) {
             let mut e = pos + ${t.prefix.length};
             while e < n && b[e] != 10 { e += 1; }
-            ${push}pos = e; continue;
+            ${push('e')}pos = e; continue;
         }`;
   if (t.kind === 'block') return `        if src[pos..].starts_with(${J(t.open)}) {
             let mut e = pos + ${t.open.length};
             while e < n && !src[e..].starts_with(${J(t.close)}) { e += 1; }
             if e < n { e += ${t.close.length}; }
-            ${push}pos = e; continue;
+            ${push('e')}pos = e; continue;
         }`;
-  throw new Error(`portable Rust lexer: general 'pattern' tokens not yet supported (token ${t.name}) — the stateless-token matcher is implemented in the TS target only so far`);
+  const m = compilePat(t.pattern, defs);
+  return `        { let e = ${m}(src, pos as i64); if e > pos as i64 { let e = e as usize; ${push('e')}pos = e; continue; } }`;
 }
 
 function lexer(ir: ParserIR): string {
-  const toks = ir.tokens.map(scanTok).join('\n');
+  const defs: string[] = [];
+  const toks = ir.tokens.map((t) => scanTok(t, defs)).join('\n');
   const puncts = ir.puncts.map((p) =>
     `        if src[pos..].starts_with(${J(p)}) { toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
-  return `fn lex<'a>(src: &'a str) -> Vec<Tok<'a>> {
+  return `${defs.length ? defs.join('\n') + '\n' : ''}fn lex<'a>(src: &'a str) -> Vec<Tok<'a>> {
     let b = src.as_bytes();
     let n = b.len();
     let mut toks: Vec<Tok> = Vec::new();
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index afd1821..4522023 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -51,9 +51,9 @@ const CASES: Case[] = [
   },
   {
     // The general token-pattern matcher (stateless real-JS token tier): \u-escaped
-    // identifiers, the decimal/hex number family with a boundary, both-quote strings.
-    // TS-only for now — the Go/Rust port of the pattern matcher is the next stage.
-    grammar: 'richtokens', path: '../examples/richtokens.ts', tsOnly: true,
+    // identifiers, the decimal/hex number family with a boundary, both-quote strings —
+    // compiled to a backtracking-free matcher in all three targets.
+    grammar: 'richtokens', path: '../examples/richtokens.ts',
     accept: [
       '123', '0xFF', '1_000_000', '3.14', 'foo', 'bar_$x9', '"hi"', "'single'",
       '"esc\\"q\\n"', '123 0xa foo "s" 3.14', '0xDEADbeef 42 _id $x cafe // line\n 7',

From b10cfddd9bb44c6828c3c99a158ac1c46fa80ba8 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 02:04:12 +0800
Subject: [PATCH 09/27] emit-portable: stateful regex-vs-division lexer in all
 three targets (stage 3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The portable lexer gains its first STATEFUL capability — the JS `/` problem. A `/`
starts a regex literal in expression context but is division after a value; the
lexer now threads the previous token plus a control-head paren stack to decide,
gating the regex token on the same prevIsValue predicate gen-lexer.ts uses. The
regexContext sets (division-after type/text, expression-start keywords, control-head
keywords, member accessors, ambiguous postfix ops) are baked from the grammar into
an IR.regexCtx and rendered per target: TS/Go via closures over the lex state, Rust
via a LexState struct (two closures can't co-capture the same mutable state).

examples/regexjs.ts (minijs + regex literals) verifies it: `a / b` is division,
`/re/` after `=`/keyword is a regex, `if (x) /re/` is a regex (control head),
`obj.for(x) / y` is division (member name, not a head), `[1,2] / 3` is division —
all ts/go/rust CSTs byte-identical to createParser (gate: 15/15 accept, 5/5 reject
per target). Full suite 42/42.

Also fixes a single-item negated char-class losing its parens (`!cc == 10` instead
of `!(cc == 10)`) in all three matchers — surfaced by the Go compiler, and by adding
regex-escape cases the earlier corpus had missed (an aggregate that passed for the
wrong reason). Remaining for the real grammar files: template interpolation, the
markup/indent lexers, and the full parser algebra.
---
 examples/regexjs.ts      | 77 ++++++++++++++++++++++++++++++++++++++++
 src/emit-portable.ts     | 35 +++++++++++++++++-
 src/target-go.ts         | 58 ++++++++++++++++++++++++------
 src/target-rust.ts       | 59 +++++++++++++++++++++++-------
 src/target-ts.ts         | 49 +++++++++++++++++++------
 test/portable-targets.ts | 14 ++++++++
 6 files changed, 256 insertions(+), 36 deletions(-)
 create mode 100644 examples/regexjs.ts

diff --git a/examples/regexjs.ts b/examples/regexjs.ts
new file mode 100644
index 0000000..b9ad82d
--- /dev/null
+++ b/examples/regexjs.ts
@@ -0,0 +1,77 @@
+// minijs + REGEX literals — exercises the portable lexer's STATEFUL regex-vs-division
+// disambiguation (stage 3). A `/` is a regex in expression context but division after a
+// value; `if (x) /re/` is a regex (control-head paren), `obj.for(x) / y` is division
+// (member name, not a head). The regexContext config + paren-head/bang state are ported
+// from createLexer; the gate checks the emitted CST is byte-identical on inputs that mix
+// regex literals and division.
+import {
+  token, rule, defineGrammar, left, right, op, prefix, alt,
+  seq, oneOf, range, star, sep, opt, many, altPattern, noneOf, anyChar,
+} from '../src/api.ts';
+
+const digit = range('0', '9');
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(digit, star(digit)), { scope: 'constant.numeric' });
+const Str = token(seq('"', star(altPattern(noneOf('"', '\\'), seq('\\', anyChar()))), '"'), { scope: 'string.quoted.double' });
+const LineComment = token(seq('//', star(noneOf('\n'))), { skip: true, scope: 'comment.line' });
+const BlockComment = token(seq('/*', star(altPattern(noneOf('*'), seq('*', noneOf('/')))), '*/'), { skip: true, scope: 'comment.block' });
+
+// Regex literal: `/ body / flags`, body is non-(/\[)newline chars, escapes, or `[...]` classes.
+const rxClass = seq('[', star(altPattern(noneOf(']', '\\', '\n'), seq('\\', noneOf('\n')))), ']');
+const rxChar = altPattern(noneOf('/', '\\', '[', '\n'), seq('\\', noneOf('\n')), rxClass);
+const rxFirst = altPattern(noneOf('/', '\\', '[', '*', '\n'), seq('\\', noneOf('\n')), rxClass);
+const Regex = token(seq('/', rxFirst, star(rxChar), '/', star(idCont)), {
+  regex: true, scope: 'string.regexp',
+  regexContext: {
+    divisionAfterTypes: ['Ident', 'Number', 'Str'],
+    divisionAfterTexts: [')', ']', 'this', 'true', 'false', 'null'],
+    regexAfterTexts: ['return', 'typeof', 'delete', 'void', 'in', 'instanceof', 'new', 'do', 'else'],
+    regexAfterParenKeywords: ['if', 'while', 'for'],
+    memberAccessTexts: ['.'],
+    postfixAfterValueTexts: [],
+  },
+});
+
+const jsPrec = [
+  right('='),
+  left('||'), left('&&'),
+  left('==', '!=', '===', '!=='),
+  left('<', '>', '<=', '>='),
+  left('+', '-'),
+  left('*', '/', '%'),
+  right(prefix('!', '-', '+', '~')),
+];
+
+const Expr = rule(($) => [
+  Number_, Str, Ident, Regex,
+  ['(', $, ')'],
+  ['[', opt(sep($, ',')), ']'],
+  [prefix, $],
+  [$, op, $],
+  [$, '(', opt(sep($, ',')), ')'],
+  [$, '.', Ident],
+  [$, '[', $, ']'],
+]);
+
+const Block = rule(($) => [['{', many(Stmt), '}']]);
+const Stmt = rule(($) => [
+  Block,
+  [alt('var', 'let', 'const'), Ident, opt('=', Expr), ';'],
+  ['if', '(', Expr, ')', Stmt, opt('else', Stmt)],
+  ['while', '(', Expr, ')', Stmt],
+  ['return', opt(Expr), ';'],
+  ['function', Ident, '(', opt(sep(Ident, ',')), ')', Block],
+  [Expr, ';'],
+]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'regexjs',
+  scopeName: 'source.regexjs',
+  tokens: { LineComment, BlockComment, Number: Number_, Str, Regex, Ident },
+  prec: jsPrec,
+  rules: { Expr, Block, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 8e8c4c3..30881ca 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -65,12 +65,28 @@ export type PrattRule = {
 };
 export type RuleIR = RdRule | PrattRule;
 
+// Stateful regex-vs-division disambiguation (the JS `/` problem): a `/` starts a regex
+// literal in expression context but is division after a value. The lexer threads the
+// previous token + a control-head paren stack to decide; the predicate sets are baked
+// from the grammar's `regexContext`. Mirrors gen-lexer.ts's prevIsValue exactly.
+export type RegexCtx = {
+  regexToken: string;          // the token flagged `regex`, gated on expression context
+  identToken: string;          // identifier token kind (for the keyword-vs-value test)
+  divisionTypes: string[];     // prev TOKEN KINDS after which `/` is division
+  divisionTexts: string[];     // prev TEXTS after which `/` is division
+  regexTexts: string[];        // expression-start keywords (a `/` after them is a regex)
+  parenHeadKw: string[];       // keywords whose `(` is a control head (regex after its `)`)
+  memberAccess: string[];      // accessors that make a following keyword a member name, not a head
+  postfixAfterValue: string[]; // ambiguous postfix/prefix ops (e.g. `!`): value only in postfix
+};
+
 export type ParserIR = {
   grammarName: string;
   entry: string;
   tokens: LexTok[];      // for the char scanner, tried in declaration order
   puncts: string[];      // punctuation literals, longest-first (maximal munch)
   rules: RuleIR[];
+  regexCtx: RegexCtx | null;   // null unless the grammar has a regex token with context
 };
 
 export interface Target {
@@ -130,7 +146,24 @@ function buildIR(grammar: CstGrammar): ParserIR {
     return { kind: 'rd', name: r.name, alts: r.body.type === 'alt' ? r.body.items.map(altSteps) : [altSteps(r.body)] };
   });
 
-  return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules };
+  // Regex-vs-division context (only if the grammar declares a regex token + config).
+  let regexCtx: RegexCtx | null = null;
+  const rxTok = grammar.tokens.find((t) => t.flags.includes('regex'));
+  const rxCfg = grammar.tokens.find((t) => t.regexContext)?.regexContext;
+  if (rxTok && rxCfg) {
+    regexCtx = {
+      regexToken: rxTok.name,
+      identToken: grammar.tokens.find((t) => t.identifier)?.name ?? '',
+      divisionTypes: [...(rxCfg.divisionAfterTypes ?? [])],
+      divisionTexts: [...(rxCfg.divisionAfterTexts ?? [])],
+      regexTexts: [...(rxCfg.regexAfterTexts ?? [])],
+      parenHeadKw: [...(rxCfg.regexAfterParenKeywords ?? [])],
+      memberAccess: [...(rxCfg.memberAccessTexts ?? [])],
+      postfixAfterValue: [...(rxCfg.postfixAfterValueTexts ?? [])],
+    };
+  }
+
+  return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules, regexCtx };
 }
 
 // Classify a token: a fast-path shape (run/string/line/block) when one cleanly matches,
diff --git a/src/target-go.ts b/src/target-go.ts
index b5f1926..b39a811 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -21,7 +21,7 @@ const rangeCond = (v: string, rs: CharRange[]) =>
 function ccCondGo(p: Extract<TokenPattern, { type: 'charClass' }>): string {
   const parts = p.items.map((it) =>
     it.type === 'char' ? `cc == ${it.value.charCodeAt(0)}` : `cc >= ${it.from.charCodeAt(0)} && cc <= ${it.to.charCodeAt(0)}`);
-  const inSet = parts.length === 1 ? parts[0] : '(' + parts.join(' || ') + ')';
+  const inSet = '(' + parts.join(' || ') + ')';
   return p.negate ? `!${inSet}` : inSet;
 }
 function compilePat(p: TokenPattern, defs: string[]): string {
@@ -44,44 +44,80 @@ function compilePat(p: TokenPattern, defs: string[]): string {
   return name;
 }
 
-function scanTok(t: LexTok, defs: string[]): string {
+function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
   const name = (t as { name: string }).name;
-  const push = (endE: string) => (t.skip ? '' : `toks = append(toks, Tok{${J(name)}, src[pos:${endE}], pos, ${endE}}); `);
-  if (t.kind === 'run') return `\t\tif ${rangeCond('c', t.first)} {
+  const stateful = rxTok !== undefined;
+  const push = (endE: string) => (t.skip ? '' : stateful ? `emit(${J(name)}, src[pos:${endE}], pos, ${endE}); ` : `toks = append(toks, Tok{${J(name)}, src[pos:${endE}], pos, ${endE}}); `);
+  const gate = stateful && name === rxTok ? '!prevIsValue() && ' : '';
+  if (t.kind === 'run') return `\t\tif ${gate}${rangeCond('c', t.first)} {
 \t\t\te := pos + 1
 \t\t\tfor e < n { cc := int(src[e]); if !${rangeCond('cc', t.cont)} { break }; e++ }
 \t\t\t${push('e')}pos = e; continue
 \t\t}`;
-  if (t.kind === 'string') return `\t\tif c == ${t.delim.charCodeAt(0)} {
+  if (t.kind === 'string') return `\t\tif ${gate}c == ${t.delim.charCodeAt(0)} {
 \t\t\te := pos + 1
 \t\t\tfor e < n { ch := int(src[e]); if ch == 92 { e += 2; continue }; if ch == ${t.delim.charCodeAt(0)} { e++; break }; e++ }
 \t\t\t${push('e')}pos = e; continue
 \t\t}`;
-  if (t.kind === 'line') return `\t\tif strings.HasPrefix(src[pos:], ${J(t.prefix)}) {
+  if (t.kind === 'line') return `\t\tif ${gate}strings.HasPrefix(src[pos:], ${J(t.prefix)}) {
 \t\t\te := pos + ${t.prefix.length}
 \t\t\tfor e < n && src[e] != 10 { e++ }
 \t\t\t${push('e')}pos = e; continue
 \t\t}`;
-  if (t.kind === 'block') return `\t\tif strings.HasPrefix(src[pos:], ${J(t.open)}) {
+  if (t.kind === 'block') return `\t\tif ${gate}strings.HasPrefix(src[pos:], ${J(t.open)}) {
 \t\t\te := pos + ${t.open.length}
 \t\t\tfor e < n && !strings.HasPrefix(src[e:], ${J(t.close)}) { e++ }
 \t\t\tif e < n { e += ${t.close.length} }
 \t\t\t${push('e')}pos = e; continue
 \t\t}`;
   const m = compilePat(t.pattern, defs);
-  return `\t\tif e := ${m}(pos); e > pos { ${push('e')}pos = e; continue }`;
+  return `\t\tif ${gate ? gate + 'true' : 'true'} { if e := ${m}(pos); e > pos { ${push('e')}pos = e; continue } }`;
 }
 
 function lexer(ir: ParserIR): string {
   const defs: string[] = [];
-  const toks = ir.tokens.map((t) => scanTok(t, defs)).join('\n');
+  const rx = ir.regexCtx;
+  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken)).join('\n');
+  const pushPunct = rx ? (p: string) => `emit("", ${J(p)}, pos, pos + ${p.length})` : (p: string) => `toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}})`;
   const puncts = ir.puncts.map((p) =>
-    `\t\tif strings.HasPrefix(src[pos:], ${J(p)}) { toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}}); pos += ${p.length}; continue }`).join('\n');
+    `\t\tif strings.HasPrefix(src[pos:], ${J(p)}) { ${pushPunct(p)}; pos += ${p.length}; continue }`).join('\n');
+  const goMap = (a: string[]) => `map[string]bool{${a.map((x) => `${J(x)}: true`).join(', ')}}`;
+  const stateBlock = rx ? `\tprevText, prevKind, bpText := "", "", ""
+\thasPrev, hasPrev2 := false, false
+\tparenHead := []bool{}
+\tlastClose, lastBang := false, false
+\t_divT := ${goMap(rx.divisionTexts)}
+\t_divK := ${goMap(rx.divisionTypes)}
+\t_rxT := ${goMap(rx.regexTexts)}
+\t_phK := ${goMap(rx.parenHeadKw)}
+\t_mem := ${goMap(rx.memberAccess)}
+\t_pav := ${goMap(rx.postfixAfterValue)}
+\tconst IDENT = ${J(rx.identToken)}
+\tprevIsValue := func() bool {
+\t\tif !hasPrev { return false }
+\t\tif _pav[prevText] { return lastBang }
+\t\tisExprKw := prevKind == IDENT && _rxT[prevText]
+\t\tisParenHead := prevText == ")" && lastClose
+\t\treturn !isExprKw && !isParenHead && (_divK[prevKind] || _divT[prevText])
+\t}
+\temit := func(kind, text string, off, end int) {
+\t\tif text == "(" {
+\t\t\tisMember := hasPrev2 && _mem[bpText]
+\t\t\tparenHead = append(parenHead, !isMember && prevKind == IDENT && _phK[prevText])
+\t\t} else if text == ")" {
+\t\t\tif len(parenHead) > 0 { lastClose = parenHead[len(parenHead)-1]; parenHead = parenHead[:len(parenHead)-1] } else { lastClose = false }
+\t\t}
+\t\tif _pav[text] { lastBang = prevIsValue() }
+\t\ttoks = append(toks, Tok{kind, text, off, end})
+\t\tbpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true
+\t}
+\t_ = bpText; _ = hasPrev2; _ = lastBang; _ = prevIsValue
+` : '';
   return `${defs.length ? 'var _s string\n' + defs.join('\n') + '\n' : ''}func lex(src string) []Tok {
 \ttoks := toks[:0]
 \tn := len(src)
 \tpos := 0
-${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
+${stateBlock}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
 \t\tc := int(src[pos])
 \t\tif c == 32 || c == 9 || c == 10 || c == 13 { pos++; continue }
 ${toks}
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 7ad6382..ba78f6f 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -24,7 +24,7 @@ const rangeCond = (v: string, rs: CharRange[]) =>
 function ccCondRs(p: Extract<TokenPattern, { type: 'charClass' }>): string {
   const parts = p.items.map((it) =>
     it.type === 'char' ? `cc == ${it.value.charCodeAt(0)}` : `(${it.from.charCodeAt(0)}..=${it.to.charCodeAt(0)}).contains(&cc)`);
-  const inSet = parts.length === 1 ? parts[0] : '(' + parts.join(' || ') + ')';
+  const inSet = '(' + parts.join(' || ') + ')';
   return p.negate ? `!${inSet}` : inSet;
 }
 function compilePat(p: TokenPattern, defs: string[]): string {
@@ -47,43 +47,76 @@ function compilePat(p: TokenPattern, defs: string[]): string {
   return name;
 }
 
-function scanTok(t: LexTok, defs: string[]): string {
+function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
   const name = (t as { name: string }).name;
-  const push = (endE: string) => (t.skip ? '' : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE} }); `);
-  if (t.kind === 'run') return `        if ${rangeCond('c', t.first)} {
+  const stateful = rxTok !== undefined;
+  const push = (endE: string) => (t.skip ? '' : stateful ? `st.emit(${J(name)}, &src[pos..${endE}], pos, ${endE}); ` : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE} }); `);
+  const gate = stateful && name === rxTok ? '!st.prev_is_value() && ' : '';
+  if (t.kind === 'run') return `        if ${gate}${rangeCond('c', t.first)} {
             let mut e = pos + 1;
             while e < n { let cc = b[e] as u32; if !${rangeCond('cc', t.cont)} { break } e += 1; }
             ${push('e')}pos = e; continue;
         }`;
-  if (t.kind === 'string') return `        if c == ${t.delim.charCodeAt(0)} {
+  if (t.kind === 'string') return `        if ${gate}c == ${t.delim.charCodeAt(0)} {
             let mut e = pos + 1;
             while e < n { let ch = b[e] as u32; if ch == 92 { e += 2; continue } if ch == ${t.delim.charCodeAt(0)} { e += 1; break } e += 1; }
             ${push('e')}pos = e; continue;
         }`;
-  if (t.kind === 'line') return `        if src[pos..].starts_with(${J(t.prefix)}) {
+  if (t.kind === 'line') return `        if ${gate}src[pos..].starts_with(${J(t.prefix)}) {
             let mut e = pos + ${t.prefix.length};
             while e < n && b[e] != 10 { e += 1; }
             ${push('e')}pos = e; continue;
         }`;
-  if (t.kind === 'block') return `        if src[pos..].starts_with(${J(t.open)}) {
+  if (t.kind === 'block') return `        if ${gate}src[pos..].starts_with(${J(t.open)}) {
             let mut e = pos + ${t.open.length};
             while e < n && !src[e..].starts_with(${J(t.close)}) { e += 1; }
             if e < n { e += ${t.close.length}; }
             ${push('e')}pos = e; continue;
         }`;
   const m = compilePat(t.pattern, defs);
-  return `        { let e = ${m}(src, pos as i64); if e > pos as i64 { let e = e as usize; ${push('e')}pos = e; continue; } }`;
+  return `        if ${gate}true { let e = ${m}(src, pos as i64); if e > pos as i64 { let e = e as usize; ${push('e')}pos = e; continue; } }`;
 }
 
 function lexer(ir: ParserIR): string {
   const defs: string[] = [];
-  const toks = ir.tokens.map((t) => scanTok(t, defs)).join('\n');
+  const rx = ir.regexCtx;
+  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken)).join('\n');
   const puncts = ir.puncts.map((p) =>
-    `        if src[pos..].starts_with(${J(p)}) { toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
-  return `${defs.length ? defs.join('\n') + '\n' : ''}fn lex<'a>(src: &'a str) -> Vec<Tok<'a>> {
+    `        if src[pos..].starts_with(${J(p)}) { ${rx ? `st.emit("", &src[pos..pos + ${p.length}], pos, pos + ${p.length});` : `toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length} });`} pos += ${p.length}; continue; }`).join('\n');
+  const rsArr = (a: string[]) => `&[${a.map(J).join(', ')}]`;
+  const rxPreamble = rx ? `const _DIVT: &[&str] = ${rsArr(rx.divisionTexts)};
+const _DIVK: &[&str] = ${rsArr(rx.divisionTypes)};
+const _RXT: &[&str] = ${rsArr(rx.regexTexts)};
+const _PHK: &[&str] = ${rsArr(rx.parenHeadKw)};
+const _MEM: &[&str] = ${rsArr(rx.memberAccess)};
+const _PAV: &[&str] = ${rsArr(rx.postfixAfterValue)};
+const _IDENT: &str = ${J(rx.identToken)};
+fn _in(set: &[&str], x: &str) -> bool { set.iter().any(|s| *s == x) }
+struct LexState<'a> { toks: Vec<Tok<'a>>, prev_text: &'a str, prev_kind: &'static str, bp_text: &'a str, has_prev: bool, has_prev2: bool, paren_head: Vec<bool>, last_close: bool, last_bang: bool }
+impl<'a> LexState<'a> {
+    fn prev_is_value(&self) -> bool {
+        if !self.has_prev { return false; }
+        if _in(_PAV, self.prev_text) { return self.last_bang; }
+        let is_expr_kw = self.prev_kind == _IDENT && _in(_RXT, self.prev_text);
+        let is_paren_head = self.prev_text == ")" && self.last_close;
+        !is_expr_kw && !is_paren_head && (_in(_DIVK, self.prev_kind) || _in(_DIVT, self.prev_text))
+    }
+    fn emit(&mut self, kind: &'static str, text: &'a str, off: usize, end: usize) {
+        if text == "(" { let is_member = self.has_prev2 && _in(_MEM, self.bp_text); self.paren_head.push(!is_member && self.prev_kind == _IDENT && _in(_PHK, self.prev_text)); }
+        else if text == ")" { self.last_close = self.paren_head.pop().unwrap_or(false); }
+        if _in(_PAV, text) { self.last_bang = self.prev_is_value(); }
+        self.toks.push(Tok { kind, text, off, end });
+        self.bp_text = self.prev_text; self.has_prev2 = self.has_prev; self.prev_kind = kind; self.prev_text = text; self.has_prev = true;
+    }
+}
+` : '';
+  const open = rx
+    ? `    let mut st = LexState { toks: Vec::new(), prev_text: "", prev_kind: "", bp_text: "", has_prev: false, has_prev2: false, paren_head: Vec::new(), last_close: false, last_bang: false };`
+    : `    let mut toks: Vec<Tok> = Vec::new();`;
+  return `${defs.length ? defs.join('\n') + '\n' : ''}${rxPreamble}fn lex<'a>(src: &'a str) -> Vec<Tok<'a>> {
     let b = src.as_bytes();
     let n = b.len();
-    let mut toks: Vec<Tok> = Vec::new();
+${open}
     let mut pos = 0usize;
     while pos < n {
         let c = b[pos] as u32;
@@ -92,7 +125,7 @@ ${toks}
 ${puncts}
         panic!("lex error at {}", pos);
     }
-    toks
+    ${rx ? 'st.toks' : 'toks'}
 }`;
 }
 
diff --git a/src/target-ts.ts b/src/target-ts.ts
index d39821b..08acf52 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -18,7 +18,7 @@ import type { TokenPattern } from './types.ts';
 function ccCond(p: Extract<TokenPattern, { type: 'charClass' }>): string {
   const parts = p.items.map((it) =>
     it.type === 'char' ? `cc === ${it.value.charCodeAt(0)}` : `cc >= ${it.from.charCodeAt(0)} && cc <= ${it.to.charCodeAt(0)}`);
-  const inSet = parts.length === 1 ? parts[0] : '(' + parts.join(' || ') + ')';
+  const inSet = '(' + parts.join(' || ') + ')';
   return p.negate ? `!${inSet}` : inSet;
 }
 function compilePat(p: TokenPattern, defs: string[]): string {
@@ -41,44 +41,71 @@ function compilePat(p: TokenPattern, defs: string[]): string {
   return name;
 }
 
-function scanTok(t: LexTok, defs: string[]): string {
+function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
   const name = (t as { name: string }).name;
-  const push = (endExpr: string) => (t.skip ? '' : `toks.push({ kind: ${J(name)}, text: src.slice(pos, ${endExpr}), off: pos, end: ${endExpr} }); `);
-  if (t.kind === 'run') return `    if (${rangeCond('c', t.first)}) {
+  const stateful = rxTok !== undefined;
+  // `emit(...)` threads the regex-context state in stateful mode; a plain push otherwise.
+  const push = (endExpr: string) => (t.skip ? '' : `${stateful ? 'emit' : 'push'}(${J(name)}, src.slice(pos, ${endExpr}), pos, ${endExpr}); `);
+  const gate = stateful && name === rxTok ? '!prevIsValue() && ' : '';
+  if (t.kind === 'run') return `    if (${gate}${rangeCond('c', t.first)}) {
       let e = pos + 1;
       while (e < n) { const cc = src.charCodeAt(e); if (!${rangeCond('cc', t.cont)}) break; e++; }
       ${push('e')}pos = e; continue;
     }`;
-  if (t.kind === 'string') return `    if (c === ${t.delim.charCodeAt(0)}) {
+  if (t.kind === 'string') return `    if (${gate}c === ${t.delim.charCodeAt(0)}) {
       let e = pos + 1;
       while (e < n) { const ch = src.charCodeAt(e); if (ch === 92) { e += 2; continue; } if (ch === ${t.delim.charCodeAt(0)}) { e++; break; } e++; }
       ${push('e')}pos = e; continue;
     }`;
-  if (t.kind === 'line') return `    if (src.startsWith(${J(t.prefix)}, pos)) {
+  if (t.kind === 'line') return `    if (${gate}src.startsWith(${J(t.prefix)}, pos)) {
       let e = pos + ${t.prefix.length};
       while (e < n && src.charCodeAt(e) !== 10) e++;
       ${push('e')}pos = e; continue;
     }`;
-  if (t.kind === 'block') return `    if (src.startsWith(${J(t.open)}, pos)) {
+  if (t.kind === 'block') return `    if (${gate}src.startsWith(${J(t.open)}, pos)) {
       let e = pos + ${t.open.length};
       while (e < n && !src.startsWith(${J(t.close)}, e)) e++;
       if (e < n) e += ${t.close.length};
       ${push('e')}pos = e; continue;
     }`;
   const m = compilePat(t.pattern, defs);
-  return `    { const e = ${m}(pos); if (e > pos) { ${push('e')}pos = e; continue; } }`;
+  return `    if (${gate}true) { const e = ${m}(pos); if (e > pos) { ${push('e')}pos = e; continue; } }`;
 }
 
 function lexer(ir: ParserIR): string {
   const defs: string[] = [];
-  const toks = ir.tokens.map((t) => scanTok(t, defs)).join('\n');
+  const rx = ir.regexCtx;
+  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken)).join('\n');
+  const pushFn = rx ? 'emit' : 'push';
   const puncts = ir.puncts.map((p) =>
-    `    if (src.startsWith(${J(p)}, pos)) { toks.push({ kind: '', text: ${J(p)}, off: pos, end: pos + ${p.length} }); pos += ${p.length}; continue; }`).join('\n');
+    `    if (src.startsWith(${J(p)}, pos)) { ${pushFn}('', ${J(p)}, pos, pos + ${p.length}); pos += ${p.length}; continue; }`).join('\n');
+  const set = (a: string[]) => `new Set([${a.map(J).join(', ')}])`;
+  const stateBlock = rx ? `  let prevText = '', prevKind = '', bpText = '', hasPrev = false, hasPrev2 = false;
+  const parenHead: boolean[] = [];
+  let lastClose = false, lastBang = false;
+  const _divT = ${set(rx.divisionTexts)}, _divK = ${set(rx.divisionTypes)}, _rxT = ${set(rx.regexTexts)};
+  const _phK = ${set(rx.parenHeadKw)}, _mem = ${set(rx.memberAccess)}, _pav = ${set(rx.postfixAfterValue)};
+  const IDENT = ${J(rx.identToken)};
+  function prevIsValue(): boolean {
+    if (!hasPrev) return false;
+    if (_pav.has(prevText)) return lastBang;
+    const isExprKw = prevKind === IDENT && _rxT.has(prevText);
+    const isParenHead = prevText === ')' && lastClose;
+    return !isExprKw && !isParenHead && (_divK.has(prevKind) || _divT.has(prevText));
+  }
+  function emit(kind: string, text: string, off: number, end: number): void {
+    if (text === '(') { const isMember = hasPrev2 && _mem.has(bpText); parenHead.push(!isMember && prevKind === IDENT && _phK.has(prevText)); }
+    else if (text === ')') { lastClose = parenHead.pop() ?? false; }
+    if (_pav.has(text)) lastBang = prevIsValue();
+    toks.push({ kind, text, off, end });
+    bpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true;
+  }
+` : '';
   return `${defs.length ? 'let _s = "";\n' + defs.join('\n') + '\n' : ''}function lex(src: string): Tok[] {
   const toks: Tok[] = [];
   const n = src.length;
   let pos = 0;
-${defs.length ? '  _s = src;\n' : ''}  while (pos < n) {
+${defs.length ? '  _s = src;\n' : ''}${stateBlock}${rx ? '' : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end }); };\n'}  while (pos < n) {
     const c = src.charCodeAt(pos);
     if (c === 32 || c === 9 || c === 10 || c === 13) { pos++; continue; }
 ${toks}
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 4522023..bf0e6ea 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -61,6 +61,20 @@ const CASES: Case[] = [
     ],
     reject: ['12abc', '0x', '"unterminated', '3.', '#'],   // ($ is a valid identifier start, not a reject)
   },
+  {
+    // The STATEFUL regex-vs-division lexer: `/` is a regex in expression context, division
+    // after a value. Exercises every branch of prevIsValue — after `=`/keyword/`(`-head
+    // (regex) vs after value/`)`/`]`/member/call (division), plus regex escapes & classes.
+    grammar: 'regexjs', path: '../examples/regexjs.ts',
+    accept: [
+      'a / b;', 'var r = /abc/g;', 'return /re/;', 'if (x) /re/;', '(a + b) / c;',
+      'a.b / c;', 'foo(x) / y;', '[1, 2] / 3;', 'var x = a / b / c;',
+      'var re = /[a-z]+/i; x / y;', 'f(/re/, a / b);', 'var z = /a\\/b/;',
+      'var d = /\\d+\\w/g;', 'var k = /[\\]]/;', 'if (a) /x/; else b / c;',
+    ],
+    // (`var ;` is VALID — `var` is an identifier, so it's the expression statement `var;`.)
+    reject: ['a / ;', 'if (x /re/;', '/re/', '* a;', 'a = = b;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From c0d84d004a4295f6495042130ab5254e5be4e51f Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 02:30:41 +0800
Subject: [PATCH 10/27] emit-portable: template-literal interpolation in all
 three targets (stage 4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The portable lexer's second stateful feature: `${…}` interpolation. A `` ` `` opens a
span scanned to the next `${` (emit $templateHead) or closing `` ` `` (the whole token,
no substitution); a `}` that closes a hole resumes the span ($templateMiddle / Tail).
A templateStack of brace-depths decides which `}` closes the hole versus a nested
`{…}` (object/block) or nested template inside it. The parser's Pratt nud sees a
$templateHead and assembles head·expr·(middle·expr)*·tail into a synthetic $template
node, parsing each hole with the Pratt expression rule.

The lexer state machine generalises cleanly with the regex one — a grammar can have
regex, templates, or both share one emit() / LexState (Rust: a struct that now also
carries the template_stack). examples/templatejs.ts (minijs + templates + a shorthand
object so a hole can hold `{…}`) verifies it: no-substitution, adjacent/multiple holes,
expressions in holes, NESTED templates, and an object inside a hole (the brace-depth
counter) — all ts/go/rust CSTs byte-identical to createParser (gate: 11/11 accept,
4/4 reject per target). Full suite 42/42.

Tagged templates (`` tag`…` `` — a postfix-token Pratt LED) are out of scope here;
that's a parser-algebra gap, the remaining work alongside the markup/indent lexers.
---
 examples/templatejs.ts   |  61 +++++++++++++++++++++
 src/emit-portable.ts     |  34 +++++++++++-
 src/target-go.ts         |  96 ++++++++++++++++++++++++++-------
 src/target-rust.ts       | 111 ++++++++++++++++++++++++++++++---------
 src/target-ts.ts         |  95 ++++++++++++++++++++++++++-------
 test/portable-targets.ts |  13 +++++
 6 files changed, 346 insertions(+), 64 deletions(-)
 create mode 100644 examples/templatejs.ts

diff --git a/examples/templatejs.ts b/examples/templatejs.ts
new file mode 100644
index 0000000..e2f2367
--- /dev/null
+++ b/examples/templatejs.ts
@@ -0,0 +1,61 @@
+// minijs + TEMPLATE LITERALS — exercises the portable lexer's second STATEFUL feature
+// (stage 4): `${…}` interpolation. The lexer splits `` `a${x}b${y}c` `` into
+// $templateHead·$templateMiddle·$templateTail around the holes, tracking a brace-depth
+// stack so a nested `{…}` (or a nested template) inside a hole doesn't close it; the
+// parser assembles the pieces and interpolated expressions into a `$template` node.
+import {
+  token, rule, defineGrammar, left, right, op, prefix, alt,
+  seq, oneOf, range, star, sep, opt, many, altPattern, noneOf, notFollowedBy,
+} from '../src/api.ts';
+
+const digit = range('0', '9');
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(digit, star(digit)), { scope: 'constant.numeric' });
+const Str = token(seq('"', star(altPattern(noneOf('"', '\\'), seq('\\', noneOf('\n')))), '"'), { scope: 'string.quoted.double' });
+const LineComment = token(seq('//', star(noneOf('\n'))), { skip: true, scope: 'comment.line' });
+
+// NoSubstitution template: backtick body excludes a real `${` (a `$` not followed by `{`
+// stays literal); the `template` config drives the interpolated split in the lexer.
+const Template = token(
+  seq('`', star(altPattern(noneOf('`', '\\', '$'), seq('\\', noneOf('\n')), seq('$', notFollowedBy('{')))), '`'),
+  { scope: 'string.template', template: { open: '`', interpOpen: '${', interpClose: '}' } },
+);
+
+const jsPrec = [
+  right('='),
+  left('||'), left('&&'),
+  left('+', '-'),
+  left('*', '/', '%'),
+  right(prefix('!', '-', '+')),
+];
+
+const Expr = rule(($) => [
+  Number_, Str, Template, Ident,
+  ['(', $, ')'],
+  ['{', opt(sep(Ident, ',')), '}'],     // shorthand object — gives a hole a nested `{ … }`
+  [prefix, $],
+  [$, op, $],
+  [$, '(', opt(sep($, ',')), ')'],
+  [$, '.', Ident],
+]);
+
+const Block = rule(($) => [['{', many(Stmt), '}']]);
+const Stmt = rule(($) => [
+  Block,
+  [alt('var', 'let', 'const'), Ident, opt('=', Expr), ';'],
+  ['if', '(', Expr, ')', Stmt, opt('else', Stmt)],
+  ['return', opt(Expr), ';'],
+  [Expr, ';'],
+]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'templatejs',
+  scopeName: 'source.templatejs',
+  tokens: { Ident, Number: Number_, Str, Template, LineComment },
+  prec: jsPrec,
+  rules: { Expr, Block, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 30881ca..bfdaf0b 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -80,6 +80,20 @@ export type RegexCtx = {
   postfixAfterValue: string[]; // ambiguous postfix/prefix ops (e.g. `!`): value only in postfix
 };
 
+// Template literals with `${…}` interpolation: a STATEFUL lexer split. A `` ` `` opens a
+// span scanned to the next `${` (→ $templateHead) or closing `` ` `` (→ the whole token,
+// no substitution); a `}` that closes a hole resumes the span (→ $templateMiddle / Tail).
+// A `templateStack` of brace-depths tracks which `}` closes the hole vs. a nested `{…}`.
+// The parser assembles head·expr·(middle·expr)*·tail into a synthetic `$template` node.
+export type TplCfg = {
+  token: string;        // the token flagged `template`; its NoSubstitution form is a plain leaf
+  open: string;         // `` ` ``
+  interpOpen: string;   // `${`
+  interpClose: string;  // `}`
+  braceOpen: string;    // `{` — a nested one deepens the hole, so its `}` is not the closer
+  interpRule: string;   // the rule that parses each `${…}` hole (the Pratt expression rule)
+};
+
 export type ParserIR = {
   grammarName: string;
   entry: string;
@@ -87,6 +101,7 @@ export type ParserIR = {
   puncts: string[];      // punctuation literals, longest-first (maximal munch)
   rules: RuleIR[];
   regexCtx: RegexCtx | null;   // null unless the grammar has a regex token with context
+  tpl: TplCfg | null;          // null unless the grammar has a template token
 };
 
 export interface Target {
@@ -163,7 +178,24 @@ function buildIR(grammar: CstGrammar): ParserIR {
     };
   }
 
-  return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules, regexCtx };
+  // Template literals (only if the grammar declares a template token). The interpolation
+  // holes are parsed by the Pratt expression rule — the rule that carries operator leds.
+  let tpl: TplCfg | null = null;
+  const tplTok = grammar.tokens.find((t) => t.template);
+  if (tplTok && tplTok.template) {
+    const prattName = rules.find((r) => r.kind === 'pratt')?.name;
+    if (!prattName) throw new Error('portable: a template token needs a Pratt expression rule to parse its interpolations');
+    tpl = {
+      token: tplTok.name,
+      open: tplTok.template.open,
+      interpOpen: tplTok.template.interpOpen,
+      interpClose: tplTok.template.interpClose,
+      braceOpen: tplTok.template.interpOpen.slice(-1),
+      interpRule: prattName,
+    };
+  }
+
+  return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules, regexCtx, tpl };
 }
 
 // Classify a token: a fast-path shape (run/string/line/block) when one cleanly matches,
diff --git a/src/target-go.ts b/src/target-go.ts
index b39a811..9d74c46 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -9,7 +9,7 @@
 // stack. A node is an int32 index, never a heap pointer. Backtracking truncates the three
 // slices to saved lengths; the slices keep their capacity across parses (reset to len 0), so a
 // warmed parser allocates ~nothing per parse.
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
 import type { TokenPattern } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
@@ -44,11 +44,12 @@ function compilePat(p: TokenPattern, defs: string[]): string {
   return name;
 }
 
-function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
+function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): string {
   const name = (t as { name: string }).name;
-  const stateful = rxTok !== undefined;
+  const stateful = rxTok !== undefined || tplTok !== undefined;
+  if (tplTok !== undefined && name === tplTok) return '';   // template token scanned by the state machine
   const push = (endE: string) => (t.skip ? '' : stateful ? `emit(${J(name)}, src[pos:${endE}], pos, ${endE}); ` : `toks = append(toks, Tok{${J(name)}, src[pos:${endE}], pos, ${endE}}); `);
-  const gate = stateful && name === rxTok ? '!prevIsValue() && ' : '';
+  const gate = rxTok !== undefined && name === rxTok ? '!prevIsValue() && ' : '';
   if (t.kind === 'run') return `\t\tif ${gate}${rangeCond('c', t.first)} {
 \t\t\te := pos + 1
 \t\t\tfor e < n { cc := int(src[e]); if !${rangeCond('cc', t.cont)} { break }; e++ }
@@ -77,12 +78,14 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
 function lexer(ir: ParserIR): string {
   const defs: string[] = [];
   const rx = ir.regexCtx;
-  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken)).join('\n');
-  const pushPunct = rx ? (p: string) => `emit("", ${J(p)}, pos, pos + ${p.length})` : (p: string) => `toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}})`;
+  const tpl = ir.tpl;
+  const stateful = !!(rx || tpl);
+  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken, tpl?.token)).join('\n');
+  const pushPunct = stateful ? (p: string) => `emit("", ${J(p)}, pos, pos + ${p.length})` : (p: string) => `toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}})`;
   const puncts = ir.puncts.map((p) =>
     `\t\tif strings.HasPrefix(src[pos:], ${J(p)}) { ${pushPunct(p)}; pos += ${p.length}; continue }`).join('\n');
   const goMap = (a: string[]) => `map[string]bool{${a.map((x) => `${J(x)}: true`).join(', ')}}`;
-  const stateBlock = rx ? `\tprevText, prevKind, bpText := "", "", ""
+  const rxState = rx ? `\tprevText, prevKind, bpText := "", "", ""
 \thasPrev, hasPrev2 := false, false
 \tparenHead := []bool{}
 \tlastClose, lastBang := false, false
@@ -100,27 +103,56 @@ function lexer(ir: ParserIR): string {
 \t\tisParenHead := prevText == ")" && lastClose
 \t\treturn !isExprKw && !isParenHead && (_divK[prevKind] || _divT[prevText])
 \t}
-\temit := func(kind, text string, off, end int) {
-\t\tif text == "(" {
+` : '';
+  const tplState = tpl ? `\ttemplateStack := []int{}
+\tscanTplSpan := func(p int) (bool, int) {
+\t\tfor p < n {
+\t\t\tif strings.HasPrefix(src[p:], ${J(tpl.interpOpen)}) { return true, p + ${tpl.interpOpen.length} }
+\t\t\tif src[p] == 92 { p += 2; continue }
+\t\t\tif strings.HasPrefix(src[p:], ${J(tpl.open)}) { return false, p + ${tpl.open.length} }
+\t\t\tp++
+\t\t}
+\t\treturn false, p
+\t}
+\t_ = scanTplSpan
+` : '';
+  const emitHooks = [
+    rx ? `\t\tif text == "(" {
 \t\t\tisMember := hasPrev2 && _mem[bpText]
 \t\t\tparenHead = append(parenHead, !isMember && prevKind == IDENT && _phK[prevText])
 \t\t} else if text == ")" {
 \t\t\tif len(parenHead) > 0 { lastClose = parenHead[len(parenHead)-1]; parenHead = parenHead[:len(parenHead)-1] } else { lastClose = false }
 \t\t}
-\t\tif _pav[text] { lastBang = prevIsValue() }
-\t\ttoks = append(toks, Tok{kind, text, off, end})
-\t\tbpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true
+\t\tif _pav[text] { lastBang = prevIsValue() }` : '',
+    tpl ? `\t\tif len(templateStack) > 0 { if text == ${J(tpl.braceOpen)} { templateStack[len(templateStack)-1]++ } else if text == ${J(tpl.interpClose)} { templateStack[len(templateStack)-1]-- } }` : '',
+  ].filter(Boolean).join('\n');
+  const emitTail = rx ? `\n\t\tbpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true` : '';
+  const emitFn = stateful ? `\temit := func(kind, text string, off, end int) {
+${emitHooks}
+\t\ttoks = append(toks, Tok{kind, text, off, end})${emitTail}
 \t}
-\t_ = bpText; _ = hasPrev2; _ = lastBang; _ = prevIsValue
+\t_ = emit
+` : '';
+  const tplDispatch = tpl ? `\t\tif len(templateStack) > 0 && strings.HasPrefix(src[pos:], ${J(tpl.interpClose)}) && templateStack[len(templateStack)-1] == 0 {
+\t\t\ttemplateStack = templateStack[:len(templateStack)-1]
+\t\t\tinterp, e := scanTplSpan(pos + ${tpl.interpClose.length})
+\t\t\tif interp { emit("$templateMiddle", src[pos:e], pos, e); templateStack = append(templateStack, 0) } else { emit("$templateTail", src[pos:e], pos, e) }
+\t\t\tpos = e; continue
+\t\t}
+\t\tif strings.HasPrefix(src[pos:], ${J(tpl.open)}) {
+\t\t\tinterp, e := scanTplSpan(pos + ${tpl.open.length})
+\t\t\tif interp { emit("$templateHead", src[pos:e], pos, e); templateStack = append(templateStack, 0) } else { emit(${J(tpl.token)}, src[pos:e], pos, e) }
+\t\t\tpos = e; continue
+\t\t}
 ` : '';
   return `${defs.length ? 'var _s string\n' + defs.join('\n') + '\n' : ''}func lex(src string) []Tok {
 \ttoks := toks[:0]
 \tn := len(src)
 \tpos := 0
-${stateBlock}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
+${rxState}${tplState}${emitFn}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
 \t\tc := int(src[pos])
 \t\tif c == 32 || c == 9 || c == 10 || c == 13 { pos++; continue }
-${toks}
+${tplDispatch}${toks}
 ${puncts}
 \t\tpanic(fmt.Sprintf("lex error at %d", pos))
 \t}
@@ -151,7 +183,15 @@ ${r.alts.map(alt).join('\n')}
 }`;
 }
 
-function prattRule(r: PrattRule): string {
+function prattRule(r: PrattRule, tpl: TplCfg | null): string {
+  const tplNud = tpl && r.nudToks.includes(tpl.token)
+    ? `\tif t.Kind == "$templateHead" {
+\t\tnode := matchTemplate()
+\t\tif node < 0 { return -1 }
+\t\tsb := len(scratch); scratch = append(scratch, node)
+\t\treturn finish(${J(r.name)}, sb, nodes[node].Offset)
+\t}\n`
+    : '';
   const bin = r.binary.map((b) => `${J(b.op)}: {${b.lbp}, ${b.rbp}}`).join(', ');
   const pre = r.prefix.map((p) => `${J(p.op)}: ${p.rbp}`).join(', ');
   const atoms = r.nudToks.map((k) => `${J(k)}: true`).join(', ');
@@ -192,7 +232,7 @@ ${r.leds.map(ledArm).join('\n')}
 func ${r.name}nud() int32 {
 \tt := peek()
 \tif t == nil { return -1 }
-\tif ${r.name}ATOM[t.Kind] {
+${tplNud}\tif ${r.name}ATOM[t.Kind] {
 \t\tsb := len(scratch); scratch = append(scratch, mkLeaf(t.Kind, t.Off, t.End)); pos++
 \t\treturn finish(${J(r.name)}, sb, t.Off)
 \t}
@@ -213,7 +253,25 @@ export const goTarget: Target = {
   name: 'go',
   ext: 'go',
   render(ir: ParserIR): string {
-    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r) : rdRule(r))).join('\n\n');
+    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
+    const matchTemplate = ir.tpl ? `func matchTemplate() int32 {
+\tt := peek()
+\tif t == nil || t.Kind != "$templateHead" { return -1 }
+\tsb := len(scratch); nb := len(nodes); kb := len(kids); save := pos
+\tscratch = append(scratch, mkLeaf("$templateHead", t.Off, t.End)); pos++
+\tfor {
+\t\texpr := parse${ir.tpl.interpRule}()
+\t\tif expr < 0 { pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1 }
+\t\tscratch = append(scratch, expr)
+\t\tnext := peek()
+\t\tif next == nil { pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1 }
+\t\tif next.Kind == "$templateMiddle" { scratch = append(scratch, mkLeaf("$templateMiddle", next.Off, next.End)); pos++; continue }
+\t\tif next.Kind == "$templateTail" { scratch = append(scratch, mkLeaf("$templateTail", next.Off, next.End)); pos++; break }
+\t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1
+\t}
+\treturn finish("$template", sb, t.Off)
+}
+` : '';
     return `// GENERATED by emit-portable.ts (goTarget) — parser for grammar "${ir.grammarName}".
 package main
 
@@ -296,7 +354,7 @@ func altLit(opts [][2]string) bool {
 \treturn false
 }
 
-${ruleFns}
+${matchTemplate}${ruleFns}
 
 func writeJSON(id int32, b *strings.Builder) {
 \tnd := &nodes[id]
diff --git a/src/target-rust.ts b/src/target-rust.ts
index ba78f6f..51aebf5 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -11,7 +11,7 @@
 // returns it. Sub-sequence combinators (star/opt/sep) take non-capturing fn pointers
 // `fn(&mut Parser, &mut Vec<Cst>) -> bool`, threading the parser + kids as params (so nothing
 // is captured, sidestepping the borrow checker).
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
 import type { TokenPattern } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
@@ -47,11 +47,12 @@ function compilePat(p: TokenPattern, defs: string[]): string {
   return name;
 }
 
-function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
+function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): string {
   const name = (t as { name: string }).name;
-  const stateful = rxTok !== undefined;
+  const stateful = rxTok !== undefined || tplTok !== undefined;
+  if (tplTok !== undefined && name === tplTok) return '';   // template token scanned by the state machine
   const push = (endE: string) => (t.skip ? '' : stateful ? `st.emit(${J(name)}, &src[pos..${endE}], pos, ${endE}); ` : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE} }); `);
-  const gate = stateful && name === rxTok ? '!st.prev_is_value() && ' : '';
+  const gate = rxTok !== undefined && name === rxTok ? '!st.prev_is_value() && ' : '';
   if (t.kind === 'run') return `        if ${gate}${rangeCond('c', t.first)} {
             let mut e = pos + 1;
             while e < n { let cc = b[e] as u32; if !${rangeCond('cc', t.cont)} { break } e += 1; }
@@ -80,11 +81,15 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
 function lexer(ir: ParserIR): string {
   const defs: string[] = [];
   const rx = ir.regexCtx;
-  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken)).join('\n');
+  const tpl = ir.tpl;
+  const stateful = !!(rx || tpl);
+  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken, tpl?.token)).join('\n');
   const puncts = ir.puncts.map((p) =>
-    `        if src[pos..].starts_with(${J(p)}) { ${rx ? `st.emit("", &src[pos..pos + ${p.length}], pos, pos + ${p.length});` : `toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length} });`} pos += ${p.length}; continue; }`).join('\n');
+    `        if src[pos..].starts_with(${J(p)}) { ${stateful ? `st.emit("", &src[pos..pos + ${p.length}], pos, pos + ${p.length});` : `toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length} });`} pos += ${p.length}; continue; }`).join('\n');
   const rsArr = (a: string[]) => `&[${a.map(J).join(', ')}]`;
-  const rxPreamble = rx ? `const _DIVT: &[&str] = ${rsArr(rx.divisionTexts)};
+  // Struct fields / emit hooks / init are assembled per-feature so a grammar can have regex,
+  // templates, or both share one LexState.
+  const rxConsts = rx ? `const _DIVT: &[&str] = ${rsArr(rx.divisionTexts)};
 const _DIVK: &[&str] = ${rsArr(rx.divisionTypes)};
 const _RXT: &[&str] = ${rsArr(rx.regexTexts)};
 const _PHK: &[&str] = ${rsArr(rx.parenHeadKw)};
@@ -92,28 +97,62 @@ const _MEM: &[&str] = ${rsArr(rx.memberAccess)};
 const _PAV: &[&str] = ${rsArr(rx.postfixAfterValue)};
 const _IDENT: &str = ${J(rx.identToken)};
 fn _in(set: &[&str], x: &str) -> bool { set.iter().any(|s| *s == x) }
-struct LexState<'a> { toks: Vec<Tok<'a>>, prev_text: &'a str, prev_kind: &'static str, bp_text: &'a str, has_prev: bool, has_prev2: bool, paren_head: Vec<bool>, last_close: bool, last_bang: bool }
-impl<'a> LexState<'a> {
-    fn prev_is_value(&self) -> bool {
+` : '';
+  const tplFn = tpl ? `fn _scan_tpl_span(s: &str, mut p: usize) -> (bool, usize) {
+    let n = s.len();
+    while p < n {
+        if s[p..].starts_with(${J(tpl.interpOpen)}) { return (true, p + ${tpl.interpOpen.length}); }
+        if s.as_bytes()[p] == 92 { p += 2; continue; }
+        if s[p..].starts_with(${J(tpl.open)}) { return (false, p + ${tpl.open.length}); }
+        p += 1;
+    }
+    (false, p)
+}
+` : '';
+  const fields = ['toks: Vec<Tok<\'a>>',
+    rx ? 'prev_text: &\'a str, prev_kind: &\'static str, bp_text: &\'a str, has_prev: bool, has_prev2: bool, paren_head: Vec<bool>, last_close: bool, last_bang: bool' : '',
+    tpl ? 'template_stack: Vec<i64>' : ''].filter(Boolean).join(', ');
+  const prevIsValue = rx ? `    fn prev_is_value(&self) -> bool {
         if !self.has_prev { return false; }
         if _in(_PAV, self.prev_text) { return self.last_bang; }
         let is_expr_kw = self.prev_kind == _IDENT && _in(_RXT, self.prev_text);
         let is_paren_head = self.prev_text == ")" && self.last_close;
         !is_expr_kw && !is_paren_head && (_in(_DIVK, self.prev_kind) || _in(_DIVT, self.prev_text))
     }
-    fn emit(&mut self, kind: &'static str, text: &'a str, off: usize, end: usize) {
-        if text == "(" { let is_member = self.has_prev2 && _in(_MEM, self.bp_text); self.paren_head.push(!is_member && self.prev_kind == _IDENT && _in(_PHK, self.prev_text)); }
+` : '';
+  const emitHooks = [
+    rx ? `        if text == "(" { let is_member = self.has_prev2 && _in(_MEM, self.bp_text); self.paren_head.push(!is_member && self.prev_kind == _IDENT && _in(_PHK, self.prev_text)); }
         else if text == ")" { self.last_close = self.paren_head.pop().unwrap_or(false); }
-        if _in(_PAV, text) { self.last_bang = self.prev_is_value(); }
-        self.toks.push(Tok { kind, text, off, end });
-        self.bp_text = self.prev_text; self.has_prev2 = self.has_prev; self.prev_kind = kind; self.prev_text = text; self.has_prev = true;
+        if _in(_PAV, text) { self.last_bang = self.prev_is_value(); }` : '',
+    tpl ? `        if !self.template_stack.is_empty() { if text == ${J(tpl.braceOpen)} { *self.template_stack.last_mut().unwrap() += 1; } else if text == ${J(tpl.interpClose)} { *self.template_stack.last_mut().unwrap() -= 1; } }` : '',
+  ].filter(Boolean).join('\n');
+  const emitTail = rx ? `
+        self.bp_text = self.prev_text; self.has_prev2 = self.has_prev; self.prev_kind = kind; self.prev_text = text; self.has_prev = true;` : '';
+  const stateImpl = stateful ? `struct LexState<'a> { ${fields} }
+impl<'a> LexState<'a> {
+${prevIsValue}    fn emit(&mut self, kind: &'static str, text: &'a str, off: usize, end: usize) {
+${emitHooks}
+        self.toks.push(Tok { kind, text, off, end });${emitTail}
     }
 }
 ` : '';
-  const open = rx
-    ? `    let mut st = LexState { toks: Vec::new(), prev_text: "", prev_kind: "", bp_text: "", has_prev: false, has_prev2: false, paren_head: Vec::new(), last_close: false, last_bang: false };`
-    : `    let mut toks: Vec<Tok> = Vec::new();`;
-  return `${defs.length ? defs.join('\n') + '\n' : ''}${rxPreamble}fn lex<'a>(src: &'a str) -> Vec<Tok<'a>> {
+  const initFields = ['toks: Vec::new()',
+    rx ? 'prev_text: "", prev_kind: "", bp_text: "", has_prev: false, has_prev2: false, paren_head: Vec::new(), last_close: false, last_bang: false' : '',
+    tpl ? 'template_stack: Vec::new()' : ''].filter(Boolean).join(', ');
+  const open = stateful ? `    let mut st = LexState { ${initFields} };` : `    let mut toks: Vec<Tok> = Vec::new();`;
+  const tplDispatch = tpl ? `        if !st.template_stack.is_empty() && src[pos..].starts_with(${J(tpl.interpClose)}) && *st.template_stack.last().unwrap() == 0 {
+            st.template_stack.pop();
+            let (interp, e) = _scan_tpl_span(src, pos + ${tpl.interpClose.length});
+            if interp { st.emit("$templateMiddle", &src[pos..e], pos, e); st.template_stack.push(0); } else { st.emit("$templateTail", &src[pos..e], pos, e); }
+            pos = e; continue;
+        }
+        if src[pos..].starts_with(${J(tpl.open)}) {
+            let (interp, e) = _scan_tpl_span(src, pos + ${tpl.open.length});
+            if interp { st.emit("$templateHead", &src[pos..e], pos, e); st.template_stack.push(0); } else { st.emit(${J(tpl.token)}, &src[pos..e], pos, e); }
+            pos = e; continue;
+        }
+` : '';
+  return `${defs.length ? defs.join('\n') + '\n' : ''}${rxConsts}${tplFn}${stateImpl}fn lex<'a>(src: &'a str) -> Vec<Tok<'a>> {
     let b = src.as_bytes();
     let n = b.len();
 ${open}
@@ -121,11 +160,11 @@ ${open}
     while pos < n {
         let c = b[pos] as u32;
         if c == 32 || c == 9 || c == 10 || c == 13 { pos += 1; continue; }
-${toks}
+${tplDispatch}${toks}
 ${puncts}
         panic!("lex error at {}", pos);
     }
-    ${rx ? 'st.toks' : 'toks'}
+    ${stateful ? 'st.toks' : 'toks'}
 }`;
 }
 
@@ -164,7 +203,12 @@ ${r.alts.map(alt).join('\n')}
     }`;
 }
 
-function prattRule(r: PrattRule): string {
+function prattRule(r: PrattRule, tpl: TplCfg | null): string {
+  const tplNud = tpl && r.nudToks.includes(tpl.token)
+    ? `        if t.kind == "$templateHead" {
+            return self.match_template().map(|n| { let (o, e) = (n.offset, n.end); Cst::node(${J(r.name)}, vec![n], o, e) });
+        }\n`
+    : '';
   const binArms = r.binary.map((b) => `${J(b.op)} => Some((${b.lbp}, ${b.rbp}))`).join(', ');
   const preArms = r.prefix.map((p) => `${J(p.op)} => Some(${p.rbp})`).join(', ');
   const atomArm = r.nudToks.map(J).join(' | ');
@@ -202,7 +246,7 @@ ${r.leds.map(ledArm).join('\n')}
     }
     fn ${r.name}_nud(&mut self) -> Option<Cst> {
         let t = self.peek()?;
-        if Parser::${r.name}_atom(t.kind) {
+${tplNud}        if Parser::${r.name}_atom(t.kind) {
             self.pos += 1;
             return Some(Cst::node(${J(r.name)}, vec![Cst::leaf(t.kind, t.off, t.end)], t.off, t.end));
         }
@@ -223,7 +267,24 @@ export const rustTarget: Target = {
   name: 'rust',
   ext: 'rs',
   render(ir: ParserIR): string {
-    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r) : rdRule(r))).join('\n\n');
+    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
+    const matchTemplate = ir.tpl ? `    fn match_template(&mut self) -> Option<Cst> {
+        let t = self.peek()?;
+        if t.kind != "$templateHead" { return None; }
+        let save = self.pos; self.pos += 1;
+        let mut children: Vec<Cst> = vec![Cst::leaf("$templateHead", t.off, t.end)];
+        loop {
+            let expr = match self.parse_${ir.tpl.interpRule}() { Some(e) => e, None => { self.pos = save; return None; } };
+            children.push(expr);
+            let next = match self.peek() { Some(x) => x, None => { self.pos = save; return None; } };
+            if next.kind == "$templateMiddle" { children.push(Cst::leaf("$templateMiddle", next.off, next.end)); self.pos += 1; continue; }
+            if next.kind == "$templateTail" { children.push(Cst::leaf("$templateTail", next.off, next.end)); self.pos += 1; break; }
+            self.pos = save; return None;
+        }
+        let o = children[0].offset; let e = children[children.len() - 1].end;
+        Some(Cst::node("$template", children, o, e))
+    }
+` : '';
     return `// GENERATED by emit-portable.ts (rustTarget) — parser for grammar "${ir.grammarName}".
 #![allow(non_snake_case)]
 use std::io::Read;
@@ -279,7 +340,7 @@ impl<'a> Parser<'a> {
         false
     }
 
-${ruleFns}
+${matchTemplate}${ruleFns}
 }
 
 fn write_json(c: &Cst, out: &mut String) {
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 08acf52..d9014d8 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -4,7 +4,7 @@
 // index LEDs), and a CST→JSON printer over stdin. It is the reference rendering — its CST
 // is checked byte-for-byte against the interpreter (createParser), so a divergence in the
 // portable logic surfaces here before Go/Rust are compiled.
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target } from './emit-portable.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
@@ -41,12 +41,13 @@ function compilePat(p: TokenPattern, defs: string[]): string {
   return name;
 }
 
-function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
+function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): string {
   const name = (t as { name: string }).name;
-  const stateful = rxTok !== undefined;
-  // `emit(...)` threads the regex-context state in stateful mode; a plain push otherwise.
+  const stateful = rxTok !== undefined || tplTok !== undefined;
+  if (tplTok !== undefined && name === tplTok) return '';   // template token is scanned by the state machine
+  // `emit(...)` threads the lexer state in stateful mode; a plain push otherwise.
   const push = (endExpr: string) => (t.skip ? '' : `${stateful ? 'emit' : 'push'}(${J(name)}, src.slice(pos, ${endExpr}), pos, ${endExpr}); `);
-  const gate = stateful && name === rxTok ? '!prevIsValue() && ' : '';
+  const gate = rxTok !== undefined && name === rxTok ? '!prevIsValue() && ' : '';
   if (t.kind === 'run') return `    if (${gate}${rangeCond('c', t.first)}) {
       let e = pos + 1;
       while (e < n) { const cc = src.charCodeAt(e); if (!${rangeCond('cc', t.cont)}) break; e++; }
@@ -75,12 +76,15 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string): string {
 function lexer(ir: ParserIR): string {
   const defs: string[] = [];
   const rx = ir.regexCtx;
-  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken)).join('\n');
-  const pushFn = rx ? 'emit' : 'push';
+  const tpl = ir.tpl;
+  const stateful = !!(rx || tpl);
+  const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken, tpl?.token)).join('\n');
+  const pushFn = stateful ? 'emit' : 'push';
   const puncts = ir.puncts.map((p) =>
     `    if (src.startsWith(${J(p)}, pos)) { ${pushFn}('', ${J(p)}, pos, pos + ${p.length}); pos += ${p.length}; continue; }`).join('\n');
   const set = (a: string[]) => `new Set([${a.map(J).join(', ')}])`;
-  const stateBlock = rx ? `  let prevText = '', prevKind = '', bpText = '', hasPrev = false, hasPrev2 = false;
+  // Per-feature pieces of the shared `emit`, so a grammar can have regex, templates, or both.
+  const rxState = rx ? `  let prevText = '', prevKind = '', bpText = '', hasPrev = false, hasPrev2 = false;
   const parenHead: boolean[] = [];
   let lastClose = false, lastBang = false;
   const _divT = ${set(rx.divisionTexts)}, _divK = ${set(rx.divisionTypes)}, _rxT = ${set(rx.regexTexts)};
@@ -93,22 +97,53 @@ function lexer(ir: ParserIR): string {
     const isParenHead = prevText === ')' && lastClose;
     return !isExprKw && !isParenHead && (_divK.has(prevKind) || _divT.has(prevText));
   }
-  function emit(kind: string, text: string, off: number, end: number): void {
-    if (text === '(') { const isMember = hasPrev2 && _mem.has(bpText); parenHead.push(!isMember && prevKind === IDENT && _phK.has(prevText)); }
+` : '';
+  const tplState = tpl ? `  const templateStack: number[] = [];
+  function scanTplSpan(p: number): { interp: boolean; end: number } {
+    while (p < n) {
+      if (src.startsWith(${J(tpl.interpOpen)}, p)) return { interp: true, end: p + ${tpl.interpOpen.length} };
+      if (src.charCodeAt(p) === 92) { p += 2; continue; }
+      if (src.startsWith(${J(tpl.open)}, p)) return { interp: false, end: p + ${tpl.open.length} };
+      p++;
+    }
+    return { interp: false, end: p };
+  }
+` : '';
+  const emitHooks = [
+    rx ? `    if (text === '(') { const isMember = hasPrev2 && _mem.has(bpText); parenHead.push(!isMember && prevKind === IDENT && _phK.has(prevText)); }
     else if (text === ')') { lastClose = parenHead.pop() ?? false; }
-    if (_pav.has(text)) lastBang = prevIsValue();
-    toks.push({ kind, text, off, end });
-    bpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true;
+    if (_pav.has(text)) lastBang = prevIsValue();` : '',
+    tpl ? `    if (templateStack.length > 0) { if (text === ${J(tpl.braceOpen)}) templateStack[templateStack.length - 1]++; else if (text === ${J(tpl.interpClose)}) templateStack[templateStack.length - 1]--; }` : '',
+  ].filter(Boolean).join('\n');
+  const emitTail = rx ? `\n    bpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true;` : '';
+  const emitFn = stateful ? `  function emit(kind: string, text: string, off: number, end: number): void {
+${emitHooks}
+    toks.push({ kind, text, off, end });${emitTail}
   }
+` : '';
+  // Template dispatch runs at the top of the loop, before token/punct scanning.
+  const tplDispatch = tpl ? `    if (templateStack.length > 0 && src.startsWith(${J(tpl.interpClose)}, pos) && templateStack[templateStack.length - 1] === 0) {
+      templateStack.pop();
+      const sp = scanTplSpan(pos + ${tpl.interpClose.length});
+      if (sp.interp) { emit('$templateMiddle', src.slice(pos, sp.end), pos, sp.end); templateStack.push(0); }
+      else emit('$templateTail', src.slice(pos, sp.end), pos, sp.end);
+      pos = sp.end; continue;
+    }
+    if (src.startsWith(${J(tpl.open)}, pos)) {
+      const sp = scanTplSpan(pos + ${tpl.open.length});
+      if (sp.interp) { emit('$templateHead', src.slice(pos, sp.end), pos, sp.end); templateStack.push(0); }
+      else emit(${J(tpl.token)}, src.slice(pos, sp.end), pos, sp.end);
+      pos = sp.end; continue;
+    }
 ` : '';
   return `${defs.length ? 'let _s = "";\n' + defs.join('\n') + '\n' : ''}function lex(src: string): Tok[] {
   const toks: Tok[] = [];
   const n = src.length;
   let pos = 0;
-${defs.length ? '  _s = src;\n' : ''}${stateBlock}${rx ? '' : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end }); };\n'}  while (pos < n) {
+${defs.length ? '  _s = src;\n' : ''}${rxState}${tplState}${stateful ? emitFn : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end }); };\n'}  while (pos < n) {
     const c = src.charCodeAt(pos);
     if (c === 32 || c === 9 || c === 10 || c === 13) { pos++; continue; }
-${toks}
+${tplDispatch}${toks}
 ${puncts}
     throw new Error('lex error at ' + pos + ': ' + JSON.stringify(src[pos]));
   }
@@ -139,7 +174,10 @@ ${r.alts.map(alt).join('\n')}
 }`;
 }
 
-function prattRule(r: PrattRule): string {
+function prattRule(r: PrattRule, tpl: TplCfg | null): string {
+  const tplNud = tpl && r.nudToks.includes(tpl.token)
+    ? `  if (t.kind === '$templateHead') { const node = matchTemplate(); return node === null ? null : { rule: ${J(r.name)}, children: [node], offset: node.offset, end: node.end }; }\n`
+    : '';
   const BIN = `{ ${r.binary.map((b) => `${J(b.op)}: { lbp: ${b.lbp}, rbp: ${b.rbp} }`).join(', ')} }`;
   const PRE = `{ ${r.prefix.map((p) => `${J(p.op)}: ${p.rbp}`).join(', ')} }`;
   const atom = `new Set([${r.nudToks.map(J).join(', ')}])`;
@@ -178,7 +216,7 @@ ${r.leds.map(ledArm).join('\n')}
 function ${r.name}_nud(): Node | null {
   const t = peek();
   if (t === null) return null;
-  if (${r.name}_ATOM.has(t.kind)) { pos++; return { rule: ${J(r.name)}, children: [{ tokenType: t.kind, offset: t.off, end: t.end }], offset: t.off, end: t.end }; }
+${tplNud}  if (${r.name}_ATOM.has(t.kind)) { pos++; return { rule: ${J(r.name)}, children: [{ tokenType: t.kind, offset: t.off, end: t.end }], offset: t.off, end: t.end }; }
 ${r.nudBrackets.map(bracketNud).join('\n')}
   const pbp = ${r.name}_PRE[t.text];
   if (pbp !== undefined) {
@@ -196,7 +234,26 @@ export const tsTarget: Target = {
   name: 'typescript',
   ext: 'ts',
   render(ir: ParserIR): string {
-    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r) : rdRule(r))).join('\n\n');
+    const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
+    const matchTemplate = ir.tpl ? `function matchTemplate(): Cst | null {
+  const t = peek();
+  if (t === null || t.kind !== '$templateHead') return null;
+  const children: Cst[] = [];
+  const save = pos; pos++;
+  children.push({ tokenType: '$templateHead', offset: t.off, end: t.end });
+  for (;;) {
+    const expr = parse${ir.tpl.interpRule}();
+    if (expr === null) { pos = save; return null; }
+    children.push(expr);
+    const next = peek();
+    if (next === null) { pos = save; return null; }
+    if (next.kind === '$templateMiddle') { pos++; children.push({ tokenType: '$templateMiddle', offset: next.off, end: next.end }); continue; }
+    if (next.kind === '$templateTail') { pos++; children.push({ tokenType: '$templateTail', offset: next.off, end: next.end }); break; }
+    pos = save; return null;
+  }
+  return { rule: '$template', children, offset: children[0].offset, end: children[children.length - 1].end };
+}
+` : '';
     return `// GENERATED by emit-portable.ts (tsTarget) — parser for grammar "${ir.grammarName}".
 import { readFileSync } from 'node:fs';
 
@@ -250,7 +307,7 @@ function altLit(opts: [string, string][], kids: Cst[]): boolean {
   return false;
 }
 
-${ruleFns}
+${matchTemplate}${ruleFns}
 
 const src = readFileSync(0, 'utf8');
 toks = lex(src);
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index bf0e6ea..8de7bb6 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -75,6 +75,19 @@ const CASES: Case[] = [
     // (`var ;` is VALID — `var` is an identifier, so it's the expression statement `var;`.)
     reject: ['a / ;', 'if (x /re/;', '/re/', '* a;', 'a = = b;'],
   },
+  {
+    // STATEFUL template literals: the `${…}` interpolation split (head/middle/tail) with a
+    // brace-depth stack — adjacent/multiple holes, exprs in holes, nested templates, and a
+    // nested `{…}` object inside a hole (which must NOT close the hole).
+    grammar: 'templatejs', path: '../examples/templatejs.ts',
+    accept: [
+      'var a = `hello`;', 'var b = `hi ${name}!`;', 'var c = `${x}${y}`;',
+      'var d = `a${ x + 1 }b${ y * 2 }c`;', 'var e = `outer ${ `inner ${z}` } end`;',
+      'var f = `${ {a} }`;', 'var f2 = `${ {a, b} } and ${ c }`;', 'var g = `no holes $ here`;',
+      'f(`${a}`, `${b}`);', 'var h = `${a}${b}${c}`;', 'return `${ {x, y} }`;',
+    ],
+    reject: ['var x = `${ }`;', 'var y = `${a`;', '`${a} ${}`;', 'tag`${a}`;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From c99a67d717c2f8e066b985f6e00e55e2a64cb485 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 02:40:52 +0800
Subject: [PATCH 11/27] =?UTF-8?q?emit-portable:=20postfix-token=20Pratt=20?=
 =?UTF-8?q?LED=20=E2=80=94=20tagged=20templates=20(stage=206=20begins)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The first parser-algebra construct toward the real grammar files: a LED whose
continuation is a single token, `$ X` (e.g. a tagged template `` tag`…` ``). buildPratt
classified LEDs only as binary (`$ op $`) or mixfix-literal (`$ lit …`) and threw on
this shape; it now collects such tokens into PrattRule.postfixToks, and each target
renders an LED arm that wraps `left X` into a node — tried like a mixfix led (binds
tight, no min-bp gate). When the postfix token is the template token the arm also
accepts a `$templateHead` and runs matchTemplate, so a tagged template can itself be
interpolated.

examples/templatejs.ts restores `[$, Template]`; the gate now covers `` tag`…` ``,
`` String.raw`a${b}c`.length ``, `` x.tag`${y}` `` (tagged after a member) across
ts/go/rust (15/15 accept, 3/3 reject per target). Full suite 42/42.
---
 examples/templatejs.ts   |  1 +
 src/emit-portable.ts     |  5 ++++-
 src/target-go.ts         | 12 ++++++++++++
 src/target-rust.ts       |  6 ++++++
 src/target-ts.ts         |  7 +++++++
 test/portable-targets.ts |  3 ++-
 6 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/examples/templatejs.ts b/examples/templatejs.ts
index e2f2367..8cda83d 100644
--- a/examples/templatejs.ts
+++ b/examples/templatejs.ts
@@ -40,6 +40,7 @@ const Expr = rule(($) => [
   [$, op, $],
   [$, '(', opt(sep($, ',')), ')'],
   [$, '.', Ident],
+  [$, Template],                        // tagged template — a postfix-token LED
 ]);
 
 const Block = rule(($) => [['{', many(Stmt), '}']]);
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index bfdaf0b..cd166a1 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -62,6 +62,7 @@ export type PrattRule = {
   prefix: Array<{ op: string; rbp: number }>;         // NUD: prefix op then operand at rbp
   binary: Array<{ op: string; lbp: number; rbp: number }>;  // LED: infix op, bind iff lbp > minBp, rhs at rbp
   leds: Bracket[];                                    // LED: mixfix continuation (call/member/index), tried before operators
+  postfixToks: string[];                              // LED: a postfix token `$ X` (e.g. a tagged template), tried like a mixfix led
 };
 export type RuleIR = RdRule | PrattRule;
 
@@ -251,6 +252,7 @@ function buildPratt(
   const nudBrackets: Bracket[] = [];
   let sawPrefix = false, sawBinary = false;
   const leds: Bracket[] = [];
+  const postfixToks: string[] = [];
   for (const alt of alts) {
     const items = alt.type === 'seq' ? alt.items : [alt];
     const startsSelf = items[0].type === 'ref' && items[0].name === name;
@@ -265,6 +267,7 @@ function buildPratt(
     const rest = items.slice(1);
     if (rest[0].type === 'op') { sawBinary = true; continue; }
     if (rest[0].type === 'literal') { leds.push({ first: rest[0].value, steps: rest.map((it) => stepOfPratt(it)) }); continue; }
+    if (rest.length === 1 && rest[0].type === 'ref' && a.tokenNames.has(rest[0].name)) { postfixToks.push(rest[0].name); continue; }  // postfix token (tagged template)
     throw new Error(`portable: Pratt LED shape not in scope (rule ${name})`);
   }
   // a self-ref inside a NUD/LED sub-sequence is a fresh parse of this rule
@@ -280,5 +283,5 @@ function buildPratt(
   const binary = sawBinary
     ? [...a.opTable.entries()].filter(([, info]) => info.position === 'infix').map(([op, info]) => ({ op, lbp: info.lbp, rbp: info.rbp }))
     : [];
-  return { kind: 'pratt', name, nudToks, nudBrackets, prefix, binary, leds };
+  return { kind: 'pratt', name, nudToks, nudBrackets, prefix, binary, leds, postfixToks };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index 9d74c46..c60fd41 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -206,6 +206,17 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
 \t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
 \t\t\tpos = ledSave; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break
 \t\t}`;
+  const postfixArm = (tok: string) => {
+    const tplPart = tpl && tok === tpl.token ? `
+\t\tif t.Kind == "$templateHead" {
+\t\t\tnode := matchTemplate()
+\t\t\tif node >= 0 { sb := len(scratch); scratch = append(scratch, left, node); left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
+\t\t}` : '';
+    return `\t\tif t.Kind == ${J(tok)} {
+\t\t\tsb := len(scratch); scratch = append(scratch, left, mkLeaf(t.Kind, t.Off, t.End)); pos++
+\t\t\tleft = finish(${J(r.name)}, sb, nodes[left].Offset); continue
+\t\t}${tplPart}`;
+  };
   return `var ${r.name}BIN = map[string]bp{${bin}}
 var ${r.name}PRE = map[string]int{${pre}}
 var ${r.name}ATOM = map[string]bool{${atoms}}
@@ -217,6 +228,7 @@ func ${r.name}bp(minBp int) int32 {
 \t\tt := peek()
 \t\tif t == nil { break }
 ${r.leds.map(ledArm).join('\n')}
+${r.postfixToks.map(postfixArm).join('\n')}
 \t\tinfo, ok := ${r.name}BIN[t.Text]
 \t\tif !ok || info.lbp <= minBp { break }
 \t\tledSave := pos; sb := len(scratch)
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 51aebf5..0ca28b1 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -225,6 +225,11 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
                 }
                 self.pos = led_save; break;
             }`;
+  const postfixArm = (tok: string) => {
+    const tplPart = tpl && tok === tpl.token ? `
+            if t.kind == "$templateHead" { if let Some(n) = self.match_template() { left = node(${J(r.name)}, vec![left, n]); continue; } }` : '';
+    return `            if t.kind == ${J(tok)} { self.pos += 1; let leaf = Cst::leaf(t.kind, t.off, t.end); left = node(${J(r.name)}, vec![left, leaf]); continue; }${tplPart}`;
+  };
   return `    fn parse_${r.name}(&mut self) -> Option<Cst> { self.${r.name}_bp(0) }
     fn ${r.name}_bin(op: &str) -> Option<(i64, i64)> { match op { ${binArms}${binArms ? ', ' : ''}_ => None } }
     fn ${r.name}_pre(op: &str) -> Option<i64> { match op { ${preArms}${preArms ? ', ' : ''}_ => None } }
@@ -234,6 +239,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
         loop {
             let t = match self.peek() { Some(t) => t, None => break };
 ${r.leds.map(ledArm).join('\n')}
+${r.postfixToks.map(postfixArm).join('\n')}
             let (lbp, rbp) = match Parser::${r.name}_bin(t.text) { Some(x) => x, None => break };
             if lbp <= min_bp { break; }
             let led_save = self.pos;
diff --git a/src/target-ts.ts b/src/target-ts.ts
index d9014d8..7d9418e 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -191,6 +191,12 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
       if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.name)}, kids); continue; }
       pos = ledSave; break;
     }`;
+  // A postfix token (e.g. a tagged template) binds like a mixfix led: `left X` → node(left, X).
+  const postfixArm = (tok: string) => {
+    const tplPart = tpl && tok === tpl.token ? `
+    if (t.kind === '$templateHead') { const node = matchTemplate(); if (node !== null) { left = { rule: ${J(r.name)}, children: [left, node], offset: left.offset, end: node.end }; continue; } }` : '';
+    return `    if (t.kind === ${J(tok)}) { const leaf: Leaf = { tokenType: t.kind, offset: t.off, end: t.end }; pos++; left = { rule: ${J(r.name)}, children: [left, leaf], offset: left.offset, end: leaf.end }; continue; }${tplPart}`;
+  };
   return `const ${r.name}_BIN: Record<string, { lbp: number; rbp: number }> = ${BIN};
 const ${r.name}_PRE: Record<string, number> = ${PRE};
 const ${r.name}_ATOM = ${atom};
@@ -202,6 +208,7 @@ function ${r.name}_bp(minBp: number): Node | null {
     const t = peek();
     if (t === null) break;
 ${r.leds.map(ledArm).join('\n')}
+${r.postfixToks.map(postfixArm).join('\n')}
     const info = ${r.name}_BIN[t.text];
     if (info === undefined || info.lbp <= minBp) break;
     const ledSave = pos;
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 8de7bb6..2712214 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -85,8 +85,9 @@ const CASES: Case[] = [
       'var d = `a${ x + 1 }b${ y * 2 }c`;', 'var e = `outer ${ `inner ${z}` } end`;',
       'var f = `${ {a} }`;', 'var f2 = `${ {a, b} } and ${ c }`;', 'var g = `no holes $ here`;',
       'f(`${a}`, `${b}`);', 'var h = `${a}${b}${c}`;', 'return `${ {x, y} }`;',
+      'tag`hello`;', 'tag`${a}${b}`;', 'String.raw`a${b}c`.length;', 'x.tag`${y}`;',  // tagged (postfix-token LED)
     ],
-    reject: ['var x = `${ }`;', 'var y = `${a`;', '`${a} ${}`;', 'tag`${a}`;'],
+    reject: ['var x = `${ }`;', 'var y = `${a`;', '`${a} ${}`;'],
   },
 ];
 

From 65498edbd4258ebd6a292aec9b3803cbf5966c10 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 02:49:39 +0800
Subject: [PATCH 12/27] emit-portable: general (non-literal) inline alt in all
 three targets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

buildIR only accepted an inline `alt(...)` whose every branch was a literal (the
altlit fast path) and threw otherwise — the first parser-algebra construct
javascript.ts hits. It now compiles a non-literal alternation into an `alt` step
whose branches are full sub-sequences, rendered as a backtracking try-each: each
branch saves the position (and the arena lengths) and restores them on failure
before the next branch. Rendered as an immediately-applied closure in every target
(Go needs `;` between the consecutive block statements; Rust reuses the closure body
in both the top-level and in-closure step contexts).

examples/altjs.ts (object keys are `alt(Ident | Str | Number)`) verifies it across
ts/go/rust — 9/9 accept, 4/4 reject per target, byte-identical to createParser.
Full suite 42/42. With this, javascript.ts clears the inline-alt wall and advances
to the next parser construct (a Pratt NUD shape).
---
 examples/altjs.ts        | 37 +++++++++++++++++++++++++++++++++++++
 src/emit-portable.ts     | 11 +++++------
 src/target-go.ts         |  1 +
 src/target-rust.ts       |  7 +++++++
 src/target-ts.ts         |  1 +
 test/portable-targets.ts | 10 ++++++++++
 6 files changed, 61 insertions(+), 6 deletions(-)
 create mode 100644 examples/altjs.ts

diff --git a/examples/altjs.ts b/examples/altjs.ts
new file mode 100644
index 0000000..d1f117d
--- /dev/null
+++ b/examples/altjs.ts
@@ -0,0 +1,37 @@
+// Exercises the portable parser's general inline `alt(...)` of NON-literals (the first
+// parser-algebra construct javascript.ts needs that buildIR previously rejected). Object
+// keys are `alt(Ident, Str, Number)` — a backtracking alternation of token references
+// inside a rule sequence, not the all-literal fast path.
+import {
+  token, rule, defineGrammar, left, op,
+  seq, oneOf, range, star, sep, opt, many, alt, noneOf,
+} from '../src/api.ts';
+
+const digit = range('0', '9');
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(digit, star(digit)), { scope: 'constant.numeric' });
+const Str = token(seq('"', star(noneOf('"', '\n')), '"'), { scope: 'string.quoted.double' });
+
+const jsPrec = [left('+', '-'), left('*', '/')];
+
+// key = a NON-literal inline alternation (Ident | Str | Number).
+const KeyVal = rule(($) => [[alt(Ident, Str, Number_), ':', Expr]]);
+const Expr = rule(($) => [
+  Number_, Str, Ident,
+  ['(', $, ')'],
+  ['{', opt(sep(KeyVal, ',')), '}'],   // object literal
+  [$, op, $],
+]);
+const Stmt = rule(($) => [[Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'altjs',
+  scopeName: 'source.altjs',
+  tokens: { Ident, Number: Number_, Str },
+  prec: jsPrec,
+  rules: { KeyVal, Expr, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index cd166a1..b773b9f 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -49,7 +49,8 @@ export type Step =
   | { t: 'star'; step: Step }                                   // repeat inner 0+
   | { t: 'opt'; steps: Step[] }                                 // optional sub-sequence
   | { t: 'sep'; elem: Step; delim: string }                     // elem (delim elem)*
-  | { t: 'altlit'; opts: Lit[] };                               // inline alternation of literals
+  | { t: 'altlit'; opts: Lit[] }                                // inline alternation of literals (fast path)
+  | { t: 'alt'; branches: Step[][] };                           // inline alternation of sub-sequences (backtracking)
 export type Alt = Step[];
 
 export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
@@ -142,12 +143,10 @@ function buildIR(grammar: CstGrammar): ParserIR {
         if (e.kind === '+') throw new Error("portable: '+' not yet modeled (use '*')");
         break;
       case 'alt': {
-        const opts: Lit[] = [];
-        for (const it of e.items) {
-          if (it.type !== 'literal') throw new Error('portable: inline alt must be all literals');
-          opts.push({ value: it.value, ttype: litTtype(it.value) });
+        if (e.items.every((it) => it.type === 'literal')) {   // fast path: all-literal alternation
+          return { t: 'altlit', opts: e.items.map((it) => ({ value: (it as { value: string }).value, ttype: litTtype((it as { value: string }).value) })) };
         }
-        return { t: 'altlit', opts };
+        return { t: 'alt', branches: e.items.map(altSteps) };   // general: backtracking over sub-sequences
       }
     }
     throw new Error(`portable: rd construct '${e.type}' not in scope`);
diff --git a/src/target-go.ts b/src/target-go.ts
index c60fd41..e9bfb03 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -169,6 +169,7 @@ function stepCond(s: Step): string {
     case 'opt': return `opt(func() bool { return ${s.steps.map(stepCond).join(' && ')} })`;
     case 'sep': return `sepBy(func() bool { return ${stepCond(s.elem)} }, ${J(s.delim)})`;
     case 'altlit': return `altLit([][2]string{${s.opts.map((o) => `{${J(o.value)}, ${J(o.ttype)}}`).join(', ')}})`;
+    case 'alt': return `func() bool { ${s.branches.map((br) => `{ save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${br.length ? br.map(stepCond).join(' && ') : 'true'} { return true }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('; ')}; return false }()`;
   }
 }
 
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 0ca28b1..381a1e3 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -178,8 +178,14 @@ function stepCond(s: Step): string {
     case 'opt': return `self.opt(|p, k| ${s.steps.map(stepCondP).join(' && ')}, &mut kids)`;
     case 'sep': return `self.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, &mut kids)`;
     case 'altlit': return `self.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], &mut kids)`;
+    case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(self, &mut kids)`;
   }
 }
+// A backtracking inline alternation rendered as an immediately-applied closure over (p, k),
+// so it composes identically whether it sits at top level or already inside a closure.
+function altBody(branches: Step[][]): string {
+  return `${branches.map((br) => `{ let sp = p.pos; let bk = k.len(); if ${br.length ? br.map(stepCondP).join(' && ') : 'true'} { return true; } p.pos = sp; k.truncate(bk); }`).join(' ')} false`;
+}
 // Inside a closure: uses `p` and `k`.
 function stepCondP(s: Step): string {
   switch (s.t) {
@@ -190,6 +196,7 @@ function stepCondP(s: Step): string {
     case 'opt': return `p.opt(|p, k| ${s.steps.map(stepCondP).join(' && ')}, k)`;
     case 'sep': return `p.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, k)`;
     case 'altlit': return `p.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], k)`;
+    case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(p, k)`;
   }
 }
 
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 7d9418e..8a4cc50 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -161,6 +161,7 @@ function stepCond(s: Step): string {
     case 'opt': return `opt(() => ${s.steps.map(stepCond).join(' && ')}, kids)`;
     case 'sep': return `sepBy(() => ${stepCond(s.elem)}, ${J(s.delim)}, kids)`;
     case 'altlit': return `altLit([${s.opts.map((o) => `[${J(o.value)}, ${J(o.ttype)}]`).join(', ')}], kids)`;
+    case 'alt': return `(() => { ${s.branches.map((br) => `{ const sp = pos; const bk = kids.length; if (${br.length ? br.map(stepCond).join(' && ') : 'true'}) return true; pos = sp; kids.length = bk; }`).join(' ')} return false; })()`;
   }
 }
 
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 2712214..cf3d881 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -89,6 +89,16 @@ const CASES: Case[] = [
     ],
     reject: ['var x = `${ }`;', 'var y = `${a`;', '`${a} ${}`;'],
   },
+  {
+    // General (non-literal) inline alt: object keys are alt(Ident | Str | Number) — a
+    // backtracking alternation of token refs inside a rule sequence.
+    grammar: 'altjs', path: '../examples/altjs.ts',
+    accept: [
+      '{a: 1};', '{"k": 2};', '{1: x};', '{a: 1, "b": 2, 3: c};', '{x: 1 + 2 * 3};',
+      '({nested: {inner: 1}});', '{};', 'a + b;', '{k: (1 + 2)};',
+    ],
+    reject: ['{a:};', '{: 1};', '{a 1};', '{a: 1,, b: 2};'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From 8fc593a8c06ca3a979dd7127ee3bc935629372d3 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 03:02:35 +0800
Subject: [PATCH 13/27] emit-portable: `not` step + general Pratt NUD sequences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two coupled parser-algebra constructs, the next javascript.ts wall after inline-alt:

- A `not` step — zero-width negative lookahead: try the inner steps, restore the
  position (and arena/kids) unconditionally, succeed iff they did NOT match. Rendered
  as an immediately-applied closure in every target (Rust shares one body across the
  two step contexts, like `alt`).

- General Pratt NUD sequences (PrattRule.nudSeqs) — a NUD that is neither a bare token,
  a prefix op, nor a literal-led bracket: a backtracking try-each sequence producing a
  node. Covers the reserved-word-guarded identifier (`not(kw)… Ident`) and the
  quantifier-first class expression (`Decorator? class Ident? … { … }`). A single
  transparent group unwraps to its body; a group carrying capBelow/ctxMode/suppress
  (arrow functions, await/yield context) is explicitly deferred with a clear message.

examples/nudjs.ts verifies both across ts/go/rust — 11/11 accept, 4/4 reject per
target, byte-identical to createParser. Full suite 42/42. javascript.ts now clears
the NUD wall and advances to the next construct (a Pratt LED shape).
---
 examples/nudjs.ts        | 41 ++++++++++++++++++++++++++++++++++++++++
 src/emit-portable.ts     | 22 ++++++++++++++++++---
 src/target-go.ts         |  2 ++
 src/target-rust.ts       |  7 +++++++
 src/target-ts.ts         |  2 ++
 test/portable-targets.ts | 11 +++++++++++
 6 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 examples/nudjs.ts

diff --git a/examples/nudjs.ts b/examples/nudjs.ts
new file mode 100644
index 0000000..a443b7b
--- /dev/null
+++ b/examples/nudjs.ts
@@ -0,0 +1,41 @@
+// Exercises two general Pratt NUD shapes javascript.ts needs (beyond bare-token / prefix /
+// bracket): a reserved-word-GUARDED identifier `[not(kw)… Ident]` (zero-width negative
+// lookahead before a token) and a quantifier-first NUD `[Decorator? "class" Ident? …]` (a
+// class expression). Both compile to a general backtracking NUD sequence; the `not` step
+// consumes nothing. (Arrow functions — group{capBelow,ctxMode} — are deferred.)
+import {
+  token, rule, defineGrammar, left, op,
+  seq, oneOf, range, star, sep, opt, many, alt, not, noneOf,
+} from '../src/api.ts';
+
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(range('0', '9'), star(range('0', '9'))), { scope: 'constant.numeric' });
+const Decorator = token(seq('@', idStart, star(idCont)), { scope: 'meta.decorator' });
+
+const reserved = alt('if', 'else', 'while', 'return', 'class', 'new', 'extends');
+
+const Expr = rule(($) => [
+  Number_,
+  [not(reserved), Ident],                                          // reserved-word-guarded identifier
+  [opt(Decorator), 'class', opt(Ident), opt('extends', $), '{', many(ClassMember), '}'],  // class expr (quantifier-first NUD)
+  ['new', $],                                                      // literal-led NUD (bracket)
+  ['(', $, ')'],
+  [$, op, $],
+  [$, '.', Ident],
+  [$, '(', opt(sep($, ',')), ')'],
+]);
+const ClassMember = rule(($) => [[opt(Decorator), Ident, '(', ')', '{', '}']]);
+
+const jsPrec = [left('+', '-'), left('*', '/')];
+const Stmt = rule(($) => [[Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'nudjs',
+  scopeName: 'source.nudjs',
+  tokens: { Decorator, Ident, Number: Number_ },
+  prec: jsPrec,
+  rules: { Expr, ClassMember, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index b773b9f..28a5196 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -50,7 +50,8 @@ export type Step =
   | { t: 'opt'; steps: Step[] }                                 // optional sub-sequence
   | { t: 'sep'; elem: Step; delim: string }                     // elem (delim elem)*
   | { t: 'altlit'; opts: Lit[] }                                // inline alternation of literals (fast path)
-  | { t: 'alt'; branches: Step[][] };                           // inline alternation of sub-sequences (backtracking)
+  | { t: 'alt'; branches: Step[][] }                            // inline alternation of sub-sequences (backtracking)
+  | { t: 'not'; steps: Step[] };                                // zero-width negative lookahead (consumes nothing)
 export type Alt = Step[];
 
 export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
@@ -60,6 +61,7 @@ export type PrattRule = {
   name: string;
   nudToks: string[];                                  // NUD: a bare token wrapped in a node
   nudBrackets: Bracket[];                             // NUD: '(' … ')' / '[' … ']'
+  nudSeqs: Step[][];                                  // NUD: a general sequence (guarded ident, class expr), tried with backtracking
   prefix: Array<{ op: string; rbp: number }>;         // NUD: prefix op then operand at rbp
   binary: Array<{ op: string; lbp: number; rbp: number }>;  // LED: infix op, bind iff lbp > minBp, rhs at rbp
   leds: Bracket[];                                    // LED: mixfix continuation (call/member/index), tried before operators
@@ -136,6 +138,7 @@ function buildIR(grammar: CstGrammar): ParserIR {
       case 'literal': return { t: 'lit', value: e.value, ttype: litTtype(e.value) };
       case 'ref': return tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name };
       case 'group': { const ss = altSteps(e.body); if (ss.length !== 1) throw new Error('portable: group must reduce to a single step'); return ss[0]; }
+      case 'not': return { t: 'not', steps: altSteps(e.body) };   // zero-width negative lookahead
       case 'sep': return { t: 'sep', elem: stepOf(e.element), delim: e.delimiter };
       case 'quantifier':
         if (e.kind === '*') return { t: 'star', step: stepOf(e.body) };
@@ -249,6 +252,7 @@ function buildPratt(
   const alts = body.type === 'alt' ? body.items : [body];
   const nudToks: string[] = [];
   const nudBrackets: Bracket[] = [];
+  const nudSeqs: Step[][] = [];
   let sawPrefix = false, sawBinary = false;
   const leds: Bracket[] = [];
   const postfixToks: string[] = [];
@@ -260,7 +264,17 @@ function buildPratt(
       if (items.length === 1 && items[0].type === 'ref' && a.tokenNames.has(items[0].name)) { nudToks.push(items[0].name); continue; }
       if (items[0].type === 'prefix') { sawPrefix = true; continue; }
       if (items[0].type === 'literal') { nudBrackets.push({ first: items[0].value, steps: items.map((it) => stepOfPratt(it)) }); continue; }
-      throw new Error(`portable: Pratt NUD shape not in scope (rule ${name})`);
+      // A single transparent group unwraps to its body (an explicit grouping of the NUD sequence).
+      let nudItems = items;
+      if (items.length === 1 && items[0].type === 'group' && !items[0].capBelow && !items[0].ctxMode && !items[0].suppress) {
+        nudItems = items[0].body.type === 'seq' ? items[0].body.items : [items[0].body];
+      }
+      // capBelow / ctxMode (arrow functions, await/yield context) are a deeper construct — defer.
+      if (nudItems.some((it) => it.type === 'group' && (it.capBelow || it.ctxMode || it.suppress))) {
+        throw new Error(`portable: Pratt NUD with capBelow/ctxMode/suppress not yet in scope (rule ${name}) — arrow functions etc.`);
+      }
+      nudSeqs.push(nudItems.map((it) => stepOfPratt(it)));   // general NUD sequence (guarded ident, class expr)
+      continue;
     }
     // LED (starts with self): `$ op $` (binary, op slot + trailing self) or `$ <lit> …` (mixfix)
     const rest = items.slice(1);
@@ -272,6 +286,8 @@ function buildPratt(
   // a self-ref inside a NUD/LED sub-sequence is a fresh parse of this rule
   function stepOfPratt(e: RuleExpr): Step {
     if (e.type === 'ref' && e.name === name) return { t: 'rule', name };
+    if (e.type === 'not') return { t: 'not', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
+    if (e.type === 'group' && !e.capBelow && !e.ctxMode && !e.suppress && e.body.type !== 'seq') return stepOfPratt(e.body);
     if (e.type === 'sep') return { t: 'sep', elem: stepOfPratt(e.element), delim: e.delimiter };
     if (e.type === 'quantifier' && e.kind === '?') return { t: 'opt', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
     if (e.type === 'quantifier' && e.kind === '*') return { t: 'star', step: stepOfPratt(e.body) };
@@ -282,5 +298,5 @@ function buildPratt(
   const binary = sawBinary
     ? [...a.opTable.entries()].filter(([, info]) => info.position === 'infix').map(([op, info]) => ({ op, lbp: info.lbp, rbp: info.rbp }))
     : [];
-  return { kind: 'pratt', name, nudToks, nudBrackets, prefix, binary, leds, postfixToks };
+  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, prefix, binary, leds, postfixToks };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index e9bfb03..73357fd 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -170,6 +170,7 @@ function stepCond(s: Step): string {
     case 'sep': return `sepBy(func() bool { return ${stepCond(s.elem)} }, ${J(s.delim)})`;
     case 'altlit': return `altLit([][2]string{${s.opts.map((o) => `{${J(o.value)}, ${J(o.ttype)}}`).join(', ')}})`;
     case 'alt': return `func() bool { ${s.branches.map((br) => `{ save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${br.length ? br.map(stepCond).join(' && ') : 'true'} { return true }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('; ')}; return false }()`;
+    case 'not': return `func() bool { save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); m := ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return !m }()`;
   }
 }
 
@@ -258,6 +259,7 @@ ${r.nudBrackets.map(bracketNud).join('\n')}
 \t\tscratch = append(scratch, operand)
 \t\treturn finish(${J(r.name)}, sb, t.Off)
 \t}
+${r.nudSeqs.map((seq) => `\t{ save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${seq.length ? seq.map(stepCond).join(' && ') : 'true'} { return finish(${J(r.name)}, sb, offAt(save)) }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('\n')}
 \treturn -1
 }`;
 }
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 381a1e3..9e253ff 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -179,6 +179,7 @@ function stepCond(s: Step): string {
     case 'sep': return `self.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, &mut kids)`;
     case 'altlit': return `self.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], &mut kids)`;
     case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(self, &mut kids)`;
+    case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(self, &mut kids)`;
   }
 }
 // A backtracking inline alternation rendered as an immediately-applied closure over (p, k),
@@ -186,6 +187,10 @@ function stepCond(s: Step): string {
 function altBody(branches: Step[][]): string {
   return `${branches.map((br) => `{ let sp = p.pos; let bk = k.len(); if ${br.length ? br.map(stepCondP).join(' && ') : 'true'} { return true; } p.pos = sp; k.truncate(bk); }`).join(' ')} false`;
 }
+// Zero-width negative lookahead: try the steps, restore, succeed iff they did NOT all match.
+function notBody(steps: Step[]): string {
+  return `let sp = p.pos; let bk = k.len(); let m = ${steps.length ? steps.map(stepCondP).join(' && ') : 'true'}; p.pos = sp; k.truncate(bk); !m`;
+}
 // Inside a closure: uses `p` and `k`.
 function stepCondP(s: Step): string {
   switch (s.t) {
@@ -197,6 +202,7 @@ function stepCondP(s: Step): string {
     case 'sep': return `p.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, k)`;
     case 'altlit': return `p.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], k)`;
     case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(p, k)`;
+    case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(p, k)`;
   }
 }
 
@@ -272,6 +278,7 @@ ${r.nudBrackets.map(bracketNud).join('\n')}
                 None => { self.pos = save; return None; }
             }
         }
+${r.nudSeqs.map((seq) => `        { let save = self.pos; let mut kids: Vec<Cst> = Vec::new(); if ${seq.length ? seq.map(stepCond).join(' && ') : 'true'} { return Some(self.branch(${J(r.name)}, kids, save)); } self.pos = save; }`).join('\n')}
         None
     }`;
 }
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 8a4cc50..f394f54 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -162,6 +162,7 @@ function stepCond(s: Step): string {
     case 'sep': return `sepBy(() => ${stepCond(s.elem)}, ${J(s.delim)}, kids)`;
     case 'altlit': return `altLit([${s.opts.map((o) => `[${J(o.value)}, ${J(o.ttype)}]`).join(', ')}], kids)`;
     case 'alt': return `(() => { ${s.branches.map((br) => `{ const sp = pos; const bk = kids.length; if (${br.length ? br.map(stepCond).join(' && ') : 'true'}) return true; pos = sp; kids.length = bk; }`).join(' ')} return false; })()`;
+    case 'not': return `(() => { const sp = pos; const bk = kids.length; const m = ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = sp; kids.length = bk; return !m; })()`;
   }
 }
 
@@ -234,6 +235,7 @@ ${r.nudBrackets.map(bracketNud).join('\n')}
     if (operand === null) { pos = save; return null; }
     return { rule: ${J(r.name)}, children: [opLeaf, operand], offset: t.off, end: operand.end };
   }
+${r.nudSeqs.map((seq) => `  { const save = pos; const kids: Cst[] = []; if (${seq.length ? seq.map(stepCond).join(' && ') : 'true'}) return branch(${J(r.name)}, kids, save); pos = save; }`).join('\n')}
   return null;
 }`;
 }
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index cf3d881..429bd53 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -99,6 +99,17 @@ const CASES: Case[] = [
     ],
     reject: ['{a:};', '{: 1};', '{a 1};', '{a: 1,, b: 2};'],
   },
+  {
+    // General Pratt NUD sequences: a reserved-word-guarded identifier (`not(kw)… Ident`,
+    // a zero-width negative lookahead) and a quantifier-first class expression.
+    grammar: 'nudjs', path: '../examples/nudjs.ts',
+    accept: [
+      'x;', 'foo + bar;', 'class C {};', 'class {};', 'class C extends B {};',
+      '@dec class C { m(){} };', 'new Foo;', 'new C();', 'a.b.c;',
+      'class C { @x m(){} n(){} };', 'x + class {} + y;',
+    ],
+    reject: ['if;', 'class;', 'new;', 'return + 1;'],   // reserved words can't be bare identifiers
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From 22cfc5e5b7a5c63df1605ac28924275973943be1 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 03:21:27 +0800
Subject: [PATCH 14/27] emit-portable: postfix-operator Pratt LED + access-tail
 closure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The next javascript.ts construct after the NUD cluster: a postfix operator LED
`[$, postfix]` (`x++`, `x--`) — consume the operator, no right operand, bind iff
lbp > minBp. With it comes the access-tail CLOSURE that makes it correct: once a
postfix binds, the operand is an update expression, so a further postfix or an
access tail (`.x`, `[i]`, `(…)`, a tagged template) can no longer attach. The led
loop now threads a `tailClosed` flag — set by a postfix, gating both further
postfixes and the access-tail leds. An access-tail led is detected structurally
(buildPratt): a led whose last step is not a fresh same-rule operand (closed, not
an open binary/ternary) and whose connector is a punctuator, not a word operator —
so `in`/`instanceof`/`?:` still bind after `a++`.

examples/postjs.ts verifies it across ts/go/rust: `a++--`, `a++.b`, `a++ ++` are
rejected, `(a++).b` and `x.y.z++` accepted — 11/11 accept, 4/4 reject per target,
byte-identical to createParser. Full suite 42/42. javascript.ts now clears the LED
wall and advances to the next construct (a nested `seq` rd step).
---
 examples/postjs.ts       | 38 ++++++++++++++++++++++++++++++++++++++
 src/emit-portable.ts     | 23 +++++++++++++++++++----
 src/target-go.ts         | 15 +++++++++++----
 src/target-rust.ts       | 12 ++++++++----
 src/target-ts.ts         | 16 +++++++++++-----
 test/portable-targets.ts | 11 +++++++++++
 6 files changed, 98 insertions(+), 17 deletions(-)
 create mode 100644 examples/postjs.ts

diff --git a/examples/postjs.ts b/examples/postjs.ts
new file mode 100644
index 0000000..239fdec
--- /dev/null
+++ b/examples/postjs.ts
@@ -0,0 +1,38 @@
+// Exercises the postfix-operator Pratt LED `[$, postfix]` (e.g. `x++`, `x--`) — a LED that
+// consumes the operator and no right operand, binding tight. `++`/`--` are BOTH prefix (NUD,
+// `++x`) and postfix (LED, `x++`); the engine resolves them by position.
+import {
+  token, rule, defineGrammar, left, right, op, prefix, postfix,
+  seq, oneOf, range, star, many,
+} from '../src/api.ts';
+
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(range('0', '9'), star(range('0', '9'))), { scope: 'constant.numeric' });
+
+const jsPrec = [
+  left('+', '-'),
+  left('*', '/'),
+  right(prefix('-', '!', '++', '--')),
+  left(postfix('++', '--')),
+];
+
+const Expr = rule(($) => [
+  Number_, Ident,
+  ['(', $, ')'],
+  [prefix, $],
+  [$, op, $],
+  [$, '.', Ident],
+  [$, postfix],          // postfix operator LED
+]);
+const Stmt = rule(($) => [[Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'postjs',
+  scopeName: 'source.postjs',
+  tokens: { Ident, Number: Number_ },
+  prec: jsPrec,
+  rules: { Expr, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 28a5196..1b642c8 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -65,7 +65,9 @@ export type PrattRule = {
   prefix: Array<{ op: string; rbp: number }>;         // NUD: prefix op then operand at rbp
   binary: Array<{ op: string; lbp: number; rbp: number }>;  // LED: infix op, bind iff lbp > minBp, rhs at rbp
   leds: Bracket[];                                    // LED: mixfix continuation (call/member/index), tried before operators
-  postfixToks: string[];                              // LED: a postfix token `$ X` (e.g. a tagged template), tried like a mixfix led
+  ledAccessTail: boolean[];                           // parallel to leds: a "closed punct-connector" tail (member/call/index) — disabled once a postfix binds
+  postfixToks: string[];                              // LED: a postfix token `$ X` (e.g. a tagged template), tried like a mixfix led (also an access tail)
+  postfix: Array<{ op: string; lbp: number }>;        // LED: a postfix operator `$ ++` — binds iff lbp > minBp + !tailClosed, no rhs, closes the tail
 };
 export type RuleIR = RdRule | PrattRule;
 
@@ -253,8 +255,9 @@ function buildPratt(
   const nudToks: string[] = [];
   const nudBrackets: Bracket[] = [];
   const nudSeqs: Step[][] = [];
-  let sawPrefix = false, sawBinary = false;
+  let sawPrefix = false, sawBinary = false, sawPostfix = false;
   const leds: Bracket[] = [];
+  const ledAccessTail: boolean[] = [];
   const postfixToks: string[] = [];
   for (const alt of alts) {
     const items = alt.type === 'seq' ? alt.items : [alt];
@@ -279,7 +282,16 @@ function buildPratt(
     // LED (starts with self): `$ op $` (binary, op slot + trailing self) or `$ <lit> …` (mixfix)
     const rest = items.slice(1);
     if (rest[0].type === 'op') { sawBinary = true; continue; }
-    if (rest[0].type === 'literal') { leds.push({ first: rest[0].value, steps: rest.map((it) => stepOfPratt(it)) }); continue; }
+    if (rest[0].type === 'postfix') { sawPostfix = true; continue; }   // postfix operator (`x++`)
+    if (rest[0].type === 'literal') {
+      const steps = rest.map((it) => stepOfPratt(it));
+      const last = steps[steps.length - 1];
+      const lastIsOperand = last !== undefined && last.t === 'rule' && last.name === name;   // open binary/ternary operand
+      const wordConnector = /^[A-Za-z]/.test(rest[0].value);                                  // `in`/`instanceof`/`as` — not a tail
+      leds.push({ first: rest[0].value, steps });
+      ledAccessTail.push(!lastIsOperand && !wordConnector);
+      continue;
+    }
     if (rest.length === 1 && rest[0].type === 'ref' && a.tokenNames.has(rest[0].name)) { postfixToks.push(rest[0].name); continue; }  // postfix token (tagged template)
     throw new Error(`portable: Pratt LED shape not in scope (rule ${name})`);
   }
@@ -298,5 +310,8 @@ function buildPratt(
   const binary = sawBinary
     ? [...a.opTable.entries()].filter(([, info]) => info.position === 'infix').map(([op, info]) => ({ op, lbp: info.lbp, rbp: info.rbp }))
     : [];
-  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, prefix, binary, leds, postfixToks };
+  const postfix = sawPostfix
+    ? [...a.opTable.entries()].filter(([, info]) => info.position === 'postfix').map(([op, info]) => ({ op, lbp: info.lbp }))
+    : [];
+  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, prefix, binary, leds, ledAccessTail, postfixToks, postfix };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index 73357fd..6c04e9e 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -202,7 +202,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
 \t\tif ${b.steps.map(stepCond).join(' && ')} { return finish(${J(r.name)}, sb, t.Off) }
 \t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1
 \t}`;
-  const ledArm = (b: Bracket) => `\t\tif t.Text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}t.Text == ${J(b.first)} {
 \t\t\tledSave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 \t\t\tscratch = append(scratch, left)
 \t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
@@ -210,27 +210,34 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
 \t\t}`;
   const postfixArm = (tok: string) => {
     const tplPart = tpl && tok === tpl.token ? `
-\t\tif t.Kind == "$templateHead" {
+\t\tif !tailClosed && t.Kind == "$templateHead" {
 \t\t\tnode := matchTemplate()
 \t\t\tif node >= 0 { sb := len(scratch); scratch = append(scratch, left, node); left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
 \t\t}` : '';
-    return `\t\tif t.Kind == ${J(tok)} {
+    return `\t\tif !tailClosed && t.Kind == ${J(tok)} {
 \t\t\tsb := len(scratch); scratch = append(scratch, left, mkLeaf(t.Kind, t.Off, t.End)); pos++
 \t\t\tleft = finish(${J(r.name)}, sb, nodes[left].Offset); continue
 \t\t}${tplPart}`;
   };
+  const post = r.postfix.map((p) => `${J(p.op)}: ${p.lbp}`).join(', ');
   return `var ${r.name}BIN = map[string]bp{${bin}}
 var ${r.name}PRE = map[string]int{${pre}}
+var ${r.name}POST = map[string]int{${post}}
 var ${r.name}ATOM = map[string]bool{${atoms}}
 func parse${r.name}() int32 { return ${r.name}bp(0) }
 func ${r.name}bp(minBp int) int32 {
 \tleft := ${r.name}nud()
 \tif left < 0 { return -1 }
+\ttailClosed := false
 \tfor {
 \t\tt := peek()
 \t\tif t == nil { break }
-${r.leds.map(ledArm).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
+\t\tif post, ok := ${r.name}POST[t.Text]; ok && !tailClosed && post > minBp {
+\t\t\tsb := len(scratch); scratch = append(scratch, left, mkLeaf("$operator", t.Off, t.End)); pos++; tailClosed = true
+\t\t\tleft = finish(${J(r.name)}, sb, nodes[left].Offset); continue
+\t\t}
 \t\tinfo, ok := ${r.name}BIN[t.Text]
 \t\tif !ok || info.lbp <= minBp { break }
 \t\tledSave := pos; sb := len(scratch)
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 9e253ff..9f14ab8 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -230,7 +230,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
             if ${b.steps.map(stepCond).join(' && ')} { return Some(node(${J(r.name)}, kids)); }
             self.pos = save; return None;
         }`;
-  const ledArm = (b: Bracket) => `            if t.text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean) => `            if ${accessTail ? '!tail_closed && ' : ''}t.text == ${J(b.first)} {
                 let led_save = self.pos; let mut kids: Vec<Cst> = Vec::new();
                 if ${b.steps.map(stepCond).join(' && ')} {
                     let mut full = vec![left]; full.append(&mut kids);
@@ -240,19 +240,23 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
             }`;
   const postfixArm = (tok: string) => {
     const tplPart = tpl && tok === tpl.token ? `
-            if t.kind == "$templateHead" { if let Some(n) = self.match_template() { left = node(${J(r.name)}, vec![left, n]); continue; } }` : '';
-    return `            if t.kind == ${J(tok)} { self.pos += 1; let leaf = Cst::leaf(t.kind, t.off, t.end); left = node(${J(r.name)}, vec![left, leaf]); continue; }${tplPart}`;
+            if !tail_closed && t.kind == "$templateHead" { if let Some(n) = self.match_template() { left = node(${J(r.name)}, vec![left, n]); continue; } }` : '';
+    return `            if !tail_closed && t.kind == ${J(tok)} { self.pos += 1; let leaf = Cst::leaf(t.kind, t.off, t.end); left = node(${J(r.name)}, vec![left, leaf]); continue; }${tplPart}`;
   };
+  const postArms = r.postfix.map((p) => `${J(p.op)} => Some(${p.lbp})`).join(', ');
   return `    fn parse_${r.name}(&mut self) -> Option<Cst> { self.${r.name}_bp(0) }
     fn ${r.name}_bin(op: &str) -> Option<(i64, i64)> { match op { ${binArms}${binArms ? ', ' : ''}_ => None } }
     fn ${r.name}_pre(op: &str) -> Option<i64> { match op { ${preArms}${preArms ? ', ' : ''}_ => None } }
+    fn ${r.name}_post(op: &str) -> Option<i64> { match op { ${postArms}${postArms ? ', ' : ''}_ => None } }
     fn ${r.name}_atom(kind: &str) -> bool { matches!(kind, ${atomArm || '""'}) }
     fn ${r.name}_bp(&mut self, min_bp: i64) -> Option<Cst> {
         let mut left = self.${r.name}_nud()?;
+        let mut tail_closed = false;
         loop {
             let t = match self.peek() { Some(t) => t, None => break };
-${r.leds.map(ledArm).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
+            if let Some(plbp) = Parser::${r.name}_post(t.text) { if !tail_closed && plbp > min_bp { self.pos += 1; let op_leaf = Cst::leaf("$operator", t.off, t.end); left = node(${J(r.name)}, vec![left, op_leaf]); tail_closed = true; continue; } }
             let (lbp, rbp) = match Parser::${r.name}_bin(t.text) { Some(x) => x, None => break };
             if lbp <= min_bp { break; }
             let led_save = self.pos;
diff --git a/src/target-ts.ts b/src/target-ts.ts
index f394f54..ffb12e2 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -188,29 +188,35 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
       if (${b.steps.map(stepCond).join(' && ')}) return node(${J(r.name)}, kids);
       pos = save; return null;
     }`;
-  const ledArm = (b: Bracket) => `    if (t.text === ${J(b.first)}) {
+  // Access-tail leds (member/call/index) are disabled once a postfix has closed the operand.
+  const ledArm = (b: Bracket, accessTail: boolean) => `    if (${accessTail ? '!tailClosed && ' : ''}t.text === ${J(b.first)}) {
       const ledSave = pos; const kids: Cst[] = [left];
       if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.name)}, kids); continue; }
       pos = ledSave; break;
     }`;
-  // A postfix token (e.g. a tagged template) binds like a mixfix led: `left X` → node(left, X).
+  // A postfix token (e.g. a tagged template) binds like a mixfix led: `left X` → node(left, X). Also an access tail.
   const postfixArm = (tok: string) => {
     const tplPart = tpl && tok === tpl.token ? `
-    if (t.kind === '$templateHead') { const node = matchTemplate(); if (node !== null) { left = { rule: ${J(r.name)}, children: [left, node], offset: left.offset, end: node.end }; continue; } }` : '';
-    return `    if (t.kind === ${J(tok)}) { const leaf: Leaf = { tokenType: t.kind, offset: t.off, end: t.end }; pos++; left = { rule: ${J(r.name)}, children: [left, leaf], offset: left.offset, end: leaf.end }; continue; }${tplPart}`;
+    if (!tailClosed && t.kind === '$templateHead') { const node = matchTemplate(); if (node !== null) { left = { rule: ${J(r.name)}, children: [left, node], offset: left.offset, end: node.end }; continue; } }` : '';
+    return `    if (!tailClosed && t.kind === ${J(tok)}) { const leaf: Leaf = { tokenType: t.kind, offset: t.off, end: t.end }; pos++; left = { rule: ${J(r.name)}, children: [left, leaf], offset: left.offset, end: leaf.end }; continue; }${tplPart}`;
   };
+  const POST = `{ ${r.postfix.map((p) => `${J(p.op)}: ${p.lbp}`).join(', ')} }`;
   return `const ${r.name}_BIN: Record<string, { lbp: number; rbp: number }> = ${BIN};
 const ${r.name}_PRE: Record<string, number> = ${PRE};
+const ${r.name}_POST: Record<string, number> = ${POST};
 const ${r.name}_ATOM = ${atom};
 function parse${r.name}(): Node | null { return ${r.name}_bp(0); }
 function ${r.name}_bp(minBp: number): Node | null {
   let left = ${r.name}_nud();
   if (left === null) return null;
+  let tailClosed = false;
   for (;;) {
     const t = peek();
     if (t === null) break;
-${r.leds.map(ledArm).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
+    const post = ${r.name}_POST[t.text];
+    if (!tailClosed && post !== undefined && post > minBp) { pos++; const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end }; left = { rule: ${J(r.name)}, children: [left, opLeaf], offset: left.offset, end: t.end }; tailClosed = true; continue; }
     const info = ${r.name}_BIN[t.text];
     if (info === undefined || info.lbp <= minBp) break;
     const ledSave = pos;
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 429bd53..c26f9a0 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -110,6 +110,17 @@ const CASES: Case[] = [
     ],
     reject: ['if;', 'class;', 'new;', 'return + 1;'],   // reserved words can't be bare identifiers
   },
+  {
+    // Postfix-operator LED (`x++`/`x--`) + the access-tail closure: once a postfix binds, the
+    // operand is an update expression, so a further postfix or an access tail (`.`/`[`/`(`)
+    // can't attach (`a++--`, `a++.b` are ill-formed; `(a++).b` is fine).
+    grammar: 'postjs', path: '../examples/postjs.ts',
+    accept: [
+      'x++;', 'x--;', 'a + b++;', '++x;', 'x++ + y;', 'a.b++;', '(x)++;', '--a.b;',
+      'x++ * 2;', '(a++).b;', 'x.y.z++;',
+    ],
+    reject: ['a++--;', 'a++.b;', 'a++ ++;', '++;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From ab022a775ca571c3b4905c3a664414770ba72025 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 03:31:14 +0800
Subject: [PATCH 15/27] emit-portable: grouped sub-sequence `seq` step
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The next javascript.ts construct: a `seq` reaching stepOf — a star/sep body that is
itself a sequence, e.g. a comma list written `star([',', $])` (`many(',', $)`), the
shape javascript.ts uses for array/argument/sequence lists. stepOf/stepOfPratt now
compile a sequence into a `seq` step, rendered as the conjunction of its sub-steps
(the enclosing star/opt/sep handles backtracking).

examples/seqjs.ts verifies it across ts/go/rust — 10/10 accept, 4/4 reject per
target, byte-identical to createParser. Full suite 42/42. javascript.ts now advances
to the deferred construct it has been heading toward: arrow functions
(group{capBelow, ctxMode} — assignment-level precedence + the await/yield context fork).
---
 examples/seqjs.ts        | 33 +++++++++++++++++++++++++++++++++
 src/emit-portable.ts     |  5 ++++-
 src/target-go.ts         |  1 +
 src/target-rust.ts       |  2 ++
 src/target-ts.ts         |  1 +
 test/portable-targets.ts | 10 ++++++++++
 6 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 examples/seqjs.ts

diff --git a/examples/seqjs.ts b/examples/seqjs.ts
new file mode 100644
index 0000000..d0e40fe
--- /dev/null
+++ b/examples/seqjs.ts
@@ -0,0 +1,33 @@
+// Exercises a grouped sub-sequence `seq` step: comma lists written as `star([',', $])` (a
+// star whose body is the two-element sequence `, Expr`) rather than `sep(...)`, the shape
+// javascript.ts uses for argument/array/sequence lists.
+import {
+  token, rule, defineGrammar, left, op,
+  seq, oneOf, range, star, opt, many,
+} from '../src/api.ts';
+// `many(',', $)` is the rule-level `(',' Expr)*` — a star whose body is the sequence
+// `, Expr`, exactly the shape javascript.ts uses for comma lists.
+
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(range('0', '9'), star(range('0', '9'))), { scope: 'constant.numeric' });
+
+const jsPrec = [left('+', '-'), left('*', '/')];
+const Expr = rule(($) => [
+  Number_, Ident,
+  ['(', $, ')'],
+  ['[', opt($, many(',', $)), ']'],               // array literal via star(seq)
+  [$, op, $],
+  [$, '(', opt($, many(',', $)), ')'],            // call args via star(seq)
+]);
+const Stmt = rule(($) => [[Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'seqjs',
+  scopeName: 'source.seqjs',
+  tokens: { Ident, Number: Number_ },
+  prec: jsPrec,
+  rules: { Expr, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 1b642c8..89bfb00 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -51,7 +51,8 @@ export type Step =
   | { t: 'sep'; elem: Step; delim: string }                     // elem (delim elem)*
   | { t: 'altlit'; opts: Lit[] }                                // inline alternation of literals (fast path)
   | { t: 'alt'; branches: Step[][] }                            // inline alternation of sub-sequences (backtracking)
-  | { t: 'not'; steps: Step[] };                                // zero-width negative lookahead (consumes nothing)
+  | { t: 'not'; steps: Step[] }                                 // zero-width negative lookahead (consumes nothing)
+  | { t: 'seq'; steps: Step[] };                                // a grouped sub-sequence (e.g. a star body `(',' Expr)`)
 export type Alt = Step[];
 
 export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
@@ -141,6 +142,7 @@ function buildIR(grammar: CstGrammar): ParserIR {
       case 'ref': return tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name };
       case 'group': { const ss = altSteps(e.body); if (ss.length !== 1) throw new Error('portable: group must reduce to a single step'); return ss[0]; }
       case 'not': return { t: 'not', steps: altSteps(e.body) };   // zero-width negative lookahead
+      case 'seq': return { t: 'seq', steps: e.items.map(stepOf) };  // grouped sub-sequence (star/sep body)
       case 'sep': return { t: 'sep', elem: stepOf(e.element), delim: e.delimiter };
       case 'quantifier':
         if (e.kind === '*') return { t: 'star', step: stepOf(e.body) };
@@ -298,6 +300,7 @@ function buildPratt(
   // a self-ref inside a NUD/LED sub-sequence is a fresh parse of this rule
   function stepOfPratt(e: RuleExpr): Step {
     if (e.type === 'ref' && e.name === name) return { t: 'rule', name };
+    if (e.type === 'seq') return { t: 'seq', steps: e.items.map(stepOfPratt) };
     if (e.type === 'not') return { t: 'not', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
     if (e.type === 'group' && !e.capBelow && !e.ctxMode && !e.suppress && e.body.type !== 'seq') return stepOfPratt(e.body);
     if (e.type === 'sep') return { t: 'sep', elem: stepOfPratt(e.element), delim: e.delimiter };
diff --git a/src/target-go.ts b/src/target-go.ts
index 6c04e9e..8a936d5 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -171,6 +171,7 @@ function stepCond(s: Step): string {
     case 'altlit': return `altLit([][2]string{${s.opts.map((o) => `{${J(o.value)}, ${J(o.ttype)}}`).join(', ')}})`;
     case 'alt': return `func() bool { ${s.branches.map((br) => `{ save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${br.length ? br.map(stepCond).join(' && ') : 'true'} { return true }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('; ')}; return false }()`;
     case 'not': return `func() bool { save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); m := ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return !m }()`;
+    case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
   }
 }
 
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 9f14ab8..20f540d 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -180,6 +180,7 @@ function stepCond(s: Step): string {
     case 'altlit': return `self.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], &mut kids)`;
     case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(self, &mut kids)`;
     case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(self, &mut kids)`;
+    case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
   }
 }
 // A backtracking inline alternation rendered as an immediately-applied closure over (p, k),
@@ -203,6 +204,7 @@ function stepCondP(s: Step): string {
     case 'altlit': return `p.alt_lit(&[${s.opts.map((o) => `(${J(o.value)}, ${J(o.ttype)})`).join(', ')}], k)`;
     case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(p, k)`;
     case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(p, k)`;
+    case 'seq': return `(${s.steps.length ? s.steps.map(stepCondP).join(' && ') : 'true'})`;
   }
 }
 
diff --git a/src/target-ts.ts b/src/target-ts.ts
index ffb12e2..41ad3f3 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -163,6 +163,7 @@ function stepCond(s: Step): string {
     case 'altlit': return `altLit([${s.opts.map((o) => `[${J(o.value)}, ${J(o.ttype)}]`).join(', ')}], kids)`;
     case 'alt': return `(() => { ${s.branches.map((br) => `{ const sp = pos; const bk = kids.length; if (${br.length ? br.map(stepCond).join(' && ') : 'true'}) return true; pos = sp; kids.length = bk; }`).join(' ')} return false; })()`;
     case 'not': return `(() => { const sp = pos; const bk = kids.length; const m = ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = sp; kids.length = bk; return !m; })()`;
+    case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
   }
 }
 
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index c26f9a0..a433a1d 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -121,6 +121,16 @@ const CASES: Case[] = [
     ],
     reject: ['a++--;', 'a++.b;', 'a++ ++;', '++;'],
   },
+  {
+    // A grouped sub-sequence `seq` step: comma lists as `star([',', $])` (e.g. `many(',', $)`),
+    // the array/argument-list shape javascript.ts uses.
+    grammar: 'seqjs', path: '../examples/seqjs.ts',
+    accept: [
+      '[1, 2, 3];', '[];', '[1];', 'f(1, 2);', 'f();', '[a + b, c];',
+      'f(g(1, 2), 3);', '(x);', 'f(a)(b, c);', '[[1,2],[3,4]];',
+    ],
+    reject: ['[1 2];', 'f(1,);', '[, 1];', 'f(1 2);'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From 9624d4fb06ccbfc16560ac420ee9176263600582 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 03:44:01 +0800
Subject: [PATCH 16/27] emit-portable: `sameLine` zero-width assertion + lexer
 newline tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

typescript.ts's first parser-algebra blocker (and a piece of async arrows): the
`sameLine` restricted-production assertion — matches, consuming nothing, iff the
next token has no preceding line terminator. The lexer now tracks newline-before
per token (a `nl` flag on Tok), set when the skipped whitespace contains a newline
OR a skipped comment spans one, so a block comment across a newline counts. In the
stateful lexer the flag lives on LexState; otherwise a local threaded through the
plain push.

examples/sljs.ts (a `return` that takes a value only on the same line) verifies it
across ts/go/rust: `return 1;` keeps the value; `return\n1;`, `return /*\n*/ 1;`
(block comment spanning a newline) and `return // c\n 1;` correctly reject — 7/7
accept, 4/4 reject per target, byte-identical to createParser. Full suite 42/42.
typescript.ts now clears sameLine and advances to `notLeftLeaf`; javascript.ts
remains at arrow functions (capBelow/ctxMode).
---
 examples/sljs.ts         | 29 +++++++++++++++++++++++++++++
 src/emit-portable.ts     |  5 ++++-
 src/target-go.ts         | 16 +++++++++++-----
 src/target-rust.ts       | 21 +++++++++++++--------
 src/target-ts.ts         | 16 ++++++++++------
 test/portable-targets.ts | 11 +++++++++++
 6 files changed, 78 insertions(+), 20 deletions(-)
 create mode 100644 examples/sljs.ts

diff --git a/examples/sljs.ts b/examples/sljs.ts
new file mode 100644
index 0000000..68421a5
--- /dev/null
+++ b/examples/sljs.ts
@@ -0,0 +1,29 @@
+// Exercises the `sameLine` zero-width assertion (no line terminator before the next token).
+// A `return` takes a value only when it is on the SAME line (ASI-style restricted production):
+// `return 1;` keeps the value, `return\n1;` does not. Verifies the lexer's newline-before
+// tracking — including a block comment that spans a newline.
+import {
+  token, rule, defineGrammar, left, op,
+  seq, oneOf, range, star, opt, many, altPattern, noneOf, sameLine,
+} from '../src/api.ts';
+
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(range('0', '9'), star(range('0', '9'))), { scope: 'constant.numeric' });
+const LineComment = token(seq('//', star(noneOf('\n'))), { skip: true, scope: 'comment.line' });
+const BlockComment = token(seq('/*', star(altPattern(noneOf('*'), seq('*', noneOf('/')))), '*/'), { skip: true, scope: 'comment.block' });
+
+const jsPrec = [left('+', '-'), left('*', '/')];
+const Expr = rule(($) => [Number_, Ident, ['(', $, ')'], [$, op, $]]);
+const Ret = rule(($) => [['return', opt(sameLine, Expr), ';']]);   // `return` + a SAME-LINE value
+const Stmt = rule(($) => [Ret, [Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'sljs',
+  scopeName: 'source.sljs',
+  tokens: { Ident, Number: Number_, LineComment, BlockComment },
+  prec: jsPrec,
+  rules: { Expr, Ret, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 89bfb00..079bdee 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -52,7 +52,8 @@ export type Step =
   | { t: 'altlit'; opts: Lit[] }                                // inline alternation of literals (fast path)
   | { t: 'alt'; branches: Step[][] }                            // inline alternation of sub-sequences (backtracking)
   | { t: 'not'; steps: Step[] }                                 // zero-width negative lookahead (consumes nothing)
-  | { t: 'seq'; steps: Step[] };                                // a grouped sub-sequence (e.g. a star body `(',' Expr)`)
+  | { t: 'seq'; steps: Step[] }                                 // a grouped sub-sequence (e.g. a star body `(',' Expr)`)
+  | { t: 'sameLine' };                                          // zero-width: the next token is on the same line (no preceding newline)
 export type Alt = Step[];
 
 export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
@@ -142,6 +143,7 @@ function buildIR(grammar: CstGrammar): ParserIR {
       case 'ref': return tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name };
       case 'group': { const ss = altSteps(e.body); if (ss.length !== 1) throw new Error('portable: group must reduce to a single step'); return ss[0]; }
       case 'not': return { t: 'not', steps: altSteps(e.body) };   // zero-width negative lookahead
+      case 'sameLine': return { t: 'sameLine' };                  // zero-width no-newline assertion
       case 'seq': return { t: 'seq', steps: e.items.map(stepOf) };  // grouped sub-sequence (star/sep body)
       case 'sep': return { t: 'sep', elem: stepOf(e.element), delim: e.delimiter };
       case 'quantifier':
@@ -301,6 +303,7 @@ function buildPratt(
   function stepOfPratt(e: RuleExpr): Step {
     if (e.type === 'ref' && e.name === name) return { t: 'rule', name };
     if (e.type === 'seq') return { t: 'seq', steps: e.items.map(stepOfPratt) };
+    if (e.type === 'sameLine') return { t: 'sameLine' };
     if (e.type === 'not') return { t: 'not', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
     if (e.type === 'group' && !e.capBelow && !e.ctxMode && !e.suppress && e.body.type !== 'seq') return stepOfPratt(e.body);
     if (e.type === 'sep') return { t: 'sep', elem: stepOfPratt(e.element), delim: e.delimiter };
diff --git a/src/target-go.ts b/src/target-go.ts
index 8a936d5..82b70c5 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -48,7 +48,7 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): st
   const name = (t as { name: string }).name;
   const stateful = rxTok !== undefined || tplTok !== undefined;
   if (tplTok !== undefined && name === tplTok) return '';   // template token scanned by the state machine
-  const push = (endE: string) => (t.skip ? '' : stateful ? `emit(${J(name)}, src[pos:${endE}], pos, ${endE}); ` : `toks = append(toks, Tok{${J(name)}, src[pos:${endE}], pos, ${endE}}); `);
+  const push = (endE: string) => (t.skip ? `if strings.Contains(src[pos:${endE}], "\\n") { pendingNl = true }; ` : stateful ? `emit(${J(name)}, src[pos:${endE}], pos, ${endE}); ` : `pushTok(${J(name)}, src[pos:${endE}], pos, ${endE}); `);
   const gate = rxTok !== undefined && name === rxTok ? '!prevIsValue() && ' : '';
   if (t.kind === 'run') return `\t\tif ${gate}${rangeCond('c', t.first)} {
 \t\t\te := pos + 1
@@ -81,7 +81,7 @@ function lexer(ir: ParserIR): string {
   const tpl = ir.tpl;
   const stateful = !!(rx || tpl);
   const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken, tpl?.token)).join('\n');
-  const pushPunct = stateful ? (p: string) => `emit("", ${J(p)}, pos, pos + ${p.length})` : (p: string) => `toks = append(toks, Tok{"", ${J(p)}, pos, pos + ${p.length}})`;
+  const pushPunct = stateful ? (p: string) => `emit("", ${J(p)}, pos, pos + ${p.length})` : (p: string) => `pushTok("", ${J(p)}, pos, pos + ${p.length})`;
   const puncts = ir.puncts.map((p) =>
     `\t\tif strings.HasPrefix(src[pos:], ${J(p)}) { ${pushPunct(p)}; pos += ${p.length}; continue }`).join('\n');
   const goMap = (a: string[]) => `map[string]bool{${a.map((x) => `${J(x)}: true`).join(', ')}}`;
@@ -129,7 +129,7 @@ function lexer(ir: ParserIR): string {
   const emitTail = rx ? `\n\t\tbpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true` : '';
   const emitFn = stateful ? `\temit := func(kind, text string, off, end int) {
 ${emitHooks}
-\t\ttoks = append(toks, Tok{kind, text, off, end})${emitTail}
+\t\ttoks = append(toks, Tok{kind, text, off, end, pendingNl}); pendingNl = false${emitTail}
 \t}
 \t_ = emit
 ` : '';
@@ -145,13 +145,17 @@ ${emitHooks}
 \t\t\tpos = e; continue
 \t\t}
 ` : '';
+  const pushTokFn = stateful ? '' : `\tpushTok := func(kind, text string, off, end int) { toks = append(toks, Tok{kind, text, off, end, pendingNl}); pendingNl = false }\n\t_ = pushTok\n`;
   return `${defs.length ? 'var _s string\n' + defs.join('\n') + '\n' : ''}func lex(src string) []Tok {
 \ttoks := toks[:0]
 \tn := len(src)
 \tpos := 0
-${rxState}${tplState}${emitFn}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
+\tpendingNl := false
+\t_ = pendingNl
+${rxState}${tplState}${emitFn}${pushTokFn}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
 \t\tc := int(src[pos])
-\t\tif c == 32 || c == 9 || c == 10 || c == 13 { pos++; continue }
+\t\tif c == 32 || c == 9 { pos++; continue }
+\t\tif c == 10 || c == 13 { pendingNl = true; pos++; continue }
 ${tplDispatch}${toks}
 ${puncts}
 \t\tpanic(fmt.Sprintf("lex error at %d", pos))
@@ -172,6 +176,7 @@ function stepCond(s: Step): string {
     case 'alt': return `func() bool { ${s.branches.map((br) => `{ save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${br.length ? br.map(stepCond).join(' && ') : 'true'} { return true }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('; ')}; return false }()`;
     case 'not': return `func() bool { save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); m := ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return !m }()`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
+    case 'sameLine': return `func() bool { t := peek(); return t != nil && !t.Nl }()`;
   }
 }
 
@@ -310,6 +315,7 @@ import (
 type Tok struct {
 \tKind, Text string
 \tOff, End   int
+\tNl         bool
 }
 // Arena node: an int32 index into nodes; children are a flat range in kids.
 type Node struct {
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 20f540d..4b032fb 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -51,7 +51,8 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): st
   const name = (t as { name: string }).name;
   const stateful = rxTok !== undefined || tplTok !== undefined;
   if (tplTok !== undefined && name === tplTok) return '';   // template token scanned by the state machine
-  const push = (endE: string) => (t.skip ? '' : stateful ? `st.emit(${J(name)}, &src[pos..${endE}], pos, ${endE}); ` : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE} }); `);
+  const nlVar = stateful ? 'st.pending_nl' : 'pending_nl';
+  const push = (endE: string) => (t.skip ? `if src[pos..${endE}].contains('\\n') { ${nlVar} = true; } ` : stateful ? `st.emit(${J(name)}, &src[pos..${endE}], pos, ${endE}); ` : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE}, nl: pending_nl }); pending_nl = false; `);
   const gate = rxTok !== undefined && name === rxTok ? '!st.prev_is_value() && ' : '';
   if (t.kind === 'run') return `        if ${gate}${rangeCond('c', t.first)} {
             let mut e = pos + 1;
@@ -85,7 +86,7 @@ function lexer(ir: ParserIR): string {
   const stateful = !!(rx || tpl);
   const toks = ir.tokens.map((t) => scanTok(t, defs, rx?.regexToken, tpl?.token)).join('\n');
   const puncts = ir.puncts.map((p) =>
-    `        if src[pos..].starts_with(${J(p)}) { ${stateful ? `st.emit("", &src[pos..pos + ${p.length}], pos, pos + ${p.length});` : `toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length} });`} pos += ${p.length}; continue; }`).join('\n');
+    `        if src[pos..].starts_with(${J(p)}) { ${stateful ? `st.emit("", &src[pos..pos + ${p.length}], pos, pos + ${p.length});` : `toks.push(Tok { kind: "", text: &src[pos..pos + ${p.length}], off: pos, end: pos + ${p.length}, nl: pending_nl }); pending_nl = false;`} pos += ${p.length}; continue; }`).join('\n');
   const rsArr = (a: string[]) => `&[${a.map(J).join(', ')}]`;
   // Struct fields / emit hooks / init are assembled per-feature so a grammar can have regex,
   // templates, or both share one LexState.
@@ -109,7 +110,7 @@ fn _in(set: &[&str], x: &str) -> bool { set.iter().any(|s| *s == x) }
     (false, p)
 }
 ` : '';
-  const fields = ['toks: Vec<Tok<\'a>>',
+  const fields = ['toks: Vec<Tok<\'a>>', 'pending_nl: bool',
     rx ? 'prev_text: &\'a str, prev_kind: &\'static str, bp_text: &\'a str, has_prev: bool, has_prev2: bool, paren_head: Vec<bool>, last_close: bool, last_bang: bool' : '',
     tpl ? 'template_stack: Vec<i64>' : ''].filter(Boolean).join(', ');
   const prevIsValue = rx ? `    fn prev_is_value(&self) -> bool {
@@ -132,14 +133,15 @@ fn _in(set: &[&str], x: &str) -> bool { set.iter().any(|s| *s == x) }
 impl<'a> LexState<'a> {
 ${prevIsValue}    fn emit(&mut self, kind: &'static str, text: &'a str, off: usize, end: usize) {
 ${emitHooks}
-        self.toks.push(Tok { kind, text, off, end });${emitTail}
+        self.toks.push(Tok { kind, text, off, end, nl: self.pending_nl }); self.pending_nl = false;${emitTail}
     }
 }
 ` : '';
-  const initFields = ['toks: Vec::new()',
+  const initFields = ['toks: Vec::new()', 'pending_nl: false',
     rx ? 'prev_text: "", prev_kind: "", bp_text: "", has_prev: false, has_prev2: false, paren_head: Vec::new(), last_close: false, last_bang: false' : '',
     tpl ? 'template_stack: Vec::new()' : ''].filter(Boolean).join(', ');
-  const open = stateful ? `    let mut st = LexState { ${initFields} };` : `    let mut toks: Vec<Tok> = Vec::new();`;
+  const open = stateful ? `    let mut st = LexState { ${initFields} };` : `    let mut toks: Vec<Tok> = Vec::new();\n    let mut pending_nl = false;`;
+  const nlVar = stateful ? 'st.pending_nl' : 'pending_nl';
   const tplDispatch = tpl ? `        if !st.template_stack.is_empty() && src[pos..].starts_with(${J(tpl.interpClose)}) && *st.template_stack.last().unwrap() == 0 {
             st.template_stack.pop();
             let (interp, e) = _scan_tpl_span(src, pos + ${tpl.interpClose.length});
@@ -159,7 +161,8 @@ ${open}
     let mut pos = 0usize;
     while pos < n {
         let c = b[pos] as u32;
-        if c == 32 || c == 9 || c == 10 || c == 13 { pos += 1; continue; }
+        if c == 32 || c == 9 { pos += 1; continue; }
+        if c == 10 || c == 13 { ${nlVar} = true; pos += 1; continue; }
 ${tplDispatch}${toks}
 ${puncts}
         panic!("lex error at {}", pos);
@@ -181,6 +184,7 @@ function stepCond(s: Step): string {
     case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(self, &mut kids)`;
     case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(self, &mut kids)`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
+    case 'sameLine': return `matches!(self.peek(), Some(t) if !t.nl)`;
   }
 }
 // A backtracking inline alternation rendered as an immediately-applied closure over (p, k),
@@ -205,6 +209,7 @@ function stepCondP(s: Step): string {
     case 'alt': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${altBody(s.branches)} })(p, k)`;
     case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(p, k)`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCondP).join(' && ') : 'true'})`;
+    case 'sameLine': return `matches!(p.peek(), Some(t) if !t.nl)`;
   }
 }
 
@@ -318,7 +323,7 @@ use std::io::Read;
 // Zero-alloc tokens: kind is a known grammar name (&'static str), text is a slice of the
 // source. Tok is Copy, so peek() copies pointers — no per-peek heap work.
 #[derive(Clone, Copy)]
-struct Tok<'a> { kind: &'static str, text: &'a str, off: usize, end: usize }
+struct Tok<'a> { kind: &'static str, text: &'a str, off: usize, end: usize, nl: bool }
 
 // CST nodes hold only &'static str labels (rule names / token-type tags are all literals)
 // + usize spans — no per-node String allocation.
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 41ad3f3..fcfb818 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -45,8 +45,9 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): st
   const name = (t as { name: string }).name;
   const stateful = rxTok !== undefined || tplTok !== undefined;
   if (tplTok !== undefined && name === tplTok) return '';   // template token is scanned by the state machine
-  // `emit(...)` threads the lexer state in stateful mode; a plain push otherwise.
-  const push = (endExpr: string) => (t.skip ? '' : `${stateful ? 'emit' : 'push'}(${J(name)}, src.slice(pos, ${endExpr}), pos, ${endExpr}); `);
+  // `emit(...)` threads the lexer state in stateful mode; a plain push otherwise. A skipped
+  // token (comment) still records a newline it spans, so `sameLine` sees it.
+  const push = (endExpr: string) => (t.skip ? `if (src.slice(pos, ${endExpr}).indexOf('\\n') >= 0) pendingNl = true; ` : `${stateful ? 'emit' : 'push'}(${J(name)}, src.slice(pos, ${endExpr}), pos, ${endExpr}); `);
   const gate = rxTok !== undefined && name === rxTok ? '!prevIsValue() && ' : '';
   if (t.kind === 'run') return `    if (${gate}${rangeCond('c', t.first)}) {
       let e = pos + 1;
@@ -118,7 +119,7 @@ function lexer(ir: ParserIR): string {
   const emitTail = rx ? `\n    bpText = prevText; hasPrev2 = hasPrev; prevKind = kind; prevText = text; hasPrev = true;` : '';
   const emitFn = stateful ? `  function emit(kind: string, text: string, off: number, end: number): void {
 ${emitHooks}
-    toks.push({ kind, text, off, end });${emitTail}
+    toks.push({ kind, text, off, end, nl: pendingNl }); pendingNl = false;${emitTail}
   }
 ` : '';
   // Template dispatch runs at the top of the loop, before token/punct scanning.
@@ -140,9 +141,11 @@ ${emitHooks}
   const toks: Tok[] = [];
   const n = src.length;
   let pos = 0;
-${defs.length ? '  _s = src;\n' : ''}${rxState}${tplState}${stateful ? emitFn : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end }); };\n'}  while (pos < n) {
+  let pendingNl = false;
+${defs.length ? '  _s = src;\n' : ''}${rxState}${tplState}${stateful ? emitFn : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end, nl: pendingNl }); pendingNl = false; };\n'}  while (pos < n) {
     const c = src.charCodeAt(pos);
-    if (c === 32 || c === 9 || c === 10 || c === 13) { pos++; continue; }
+    if (c === 32 || c === 9) { pos++; continue; }
+    if (c === 10 || c === 13) { pendingNl = true; pos++; continue; }
 ${tplDispatch}${toks}
 ${puncts}
     throw new Error('lex error at ' + pos + ': ' + JSON.stringify(src[pos]));
@@ -164,6 +167,7 @@ function stepCond(s: Step): string {
     case 'alt': return `(() => { ${s.branches.map((br) => `{ const sp = pos; const bk = kids.length; if (${br.length ? br.map(stepCond).join(' && ') : 'true'}) return true; pos = sp; kids.length = bk; }`).join(' ')} return false; })()`;
     case 'not': return `(() => { const sp = pos; const bk = kids.length; const m = ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = sp; kids.length = bk; return !m; })()`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
+    case 'sameLine': return `(() => { const t = peek(); return t !== null && !t.nl; })()`;
   }
 }
 
@@ -274,7 +278,7 @@ export const tsTarget: Target = {
     return `// GENERATED by emit-portable.ts (tsTarget) — parser for grammar "${ir.grammarName}".
 import { readFileSync } from 'node:fs';
 
-type Tok = { kind: string; text: string; off: number; end: number };
+type Tok = { kind: string; text: string; off: number; end: number; nl: boolean };
 type Leaf = { tokenType: string; offset: number; end: number };
 type Node = { rule: string; children: Cst[]; offset: number; end: number };
 type Cst = Node | Leaf;
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index a433a1d..e9cfc59 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -131,6 +131,17 @@ const CASES: Case[] = [
     ],
     reject: ['[1 2];', 'f(1,);', '[, 1];', 'f(1 2);'],
   },
+  {
+    // The `sameLine` zero-width assertion (no line terminator before the next token):
+    // `return` takes a value only on the same line. Also verifies the lexer's newline-before
+    // tracking across a block comment that spans a newline.
+    grammar: 'sljs', path: '../examples/sljs.ts',
+    accept: [
+      'return 1;', 'return;', 'return 1 + 2;', '1 + 2;', 'return /* c */ 1;',
+      '(a);', 'return (1);',
+    ],
+    reject: ['return\n1;', 'return\nx;', 'return /*\n*/ 1;', 'return // c\n 1;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From f807c6b5d72fc52cb8041c1764bac304348c20ef Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 04:00:32 +0800
Subject: [PATCH 17/27] emit-portable: capBelow arrow functions + fix sep
 trailing-delimiter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hardest parser construct, the wall javascript.ts has been heading toward:
assignment-level (capBelow) NUDs — arrow functions. A capExpr NUD carries the
binding power of its connector; it is parsed only when the enclosing minBp is
LOOSER than that (so `1 + () => x` needs parens), and once parsed it is "capped" —
the led loop is skipped entirely (`() => {} || a` rejects). The nud now takes minBp,
tries the capped sequences FIRST (so the `(x) => y` vs `(x)` ambiguity resolves by
longest-match — the arrow is attempted, then falls back to grouping), and signals
the cap via `_capped`. The `=>` body's ctxMode (await/yield) is treated as
transparent: the context fork is not modelled, so this covers basic arrows, not
async/await bodies.

Also fixes a latent `sep` bug surfaced by `(a,) => b`: gen-parser's sep allows a
trailing delimiter, the portable sepBy did not. Now matched in all three targets —
earlier grammars simply had no trailing-delimiter test, so the aggregate passed for
the wrong reason.

examples/arrowjs.ts verifies it across ts/go/rust — 14/14 accept (incl. trailing
commas and curried `x => y => x`), 4/4 reject, byte-identical to createParser. Full
suite 42/42. javascript.ts clears the arrow wall and advances to the next group case.
---
 examples/arrowjs.ts      | 36 ++++++++++++++++++++++++++++++++++++
 src/emit-portable.ts     | 24 ++++++++++++++++++------
 src/target-go.ts         | 14 +++++++++++---
 src/target-rust.ts       | 21 ++++++++++++++-------
 src/target-ts.ts         | 14 +++++++++++---
 test/portable-targets.ts | 12 ++++++++++++
 6 files changed, 102 insertions(+), 19 deletions(-)
 create mode 100644 examples/arrowjs.ts

diff --git a/examples/arrowjs.ts b/examples/arrowjs.ts
new file mode 100644
index 0000000..486cdaa
--- /dev/null
+++ b/examples/arrowjs.ts
@@ -0,0 +1,36 @@
+// Exercises the capBelow (assignment-level) Pratt construct — arrow functions. A `capExpr`
+// NUD is parsed only when the enclosing minBp is LOOSER than its connector's binding power
+// (so `1 + (() => x)` needs the parens) and, once parsed, admits NO led (it is "capped").
+// The `=>` body's ctxMode (await/yield) is treated as transparent here — the context fork
+// is NOT modelled, so this covers basic arrows, not async/await bodies.
+import {
+  token, rule, defineGrammar, left, right, op, capExpr, alt,
+  seq, oneOf, range, star, sep, opt, many,
+} from '../src/api.ts';
+
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(range('0', '9'), star(range('0', '9'))), { scope: 'constant.numeric' });
+
+const jsPrec = [right('='), left('||'), left('+', '-'), left('*', '/')];
+
+const Block = rule(($) => [['{', many(Stmt), '}']]);
+const Expr = rule(($) => [
+  Number_, Ident,
+  ['(', $, ')'],
+  capExpr('=', '(', opt(sep(Ident, ',')), ')', '=>', alt(Block, $)),   // (params) => body
+  capExpr('=', Ident, '=>', alt(Block, $)),                             // x => body
+  [$, op, $],
+  [$, '(', opt(sep($, ',')), ')'],                                      // call
+]);
+const Stmt = rule(($) => [Block, [Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'arrowjs',
+  scopeName: 'source.arrowjs',
+  tokens: { Ident, Number: Number_ },
+  prec: jsPrec,
+  rules: { Expr, Block, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 079bdee..12751b0 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -64,6 +64,7 @@ export type PrattRule = {
   nudToks: string[];                                  // NUD: a bare token wrapped in a node
   nudBrackets: Bracket[];                             // NUD: '(' … ')' / '[' … ']'
   nudSeqs: Step[][];                                  // NUD: a general sequence (guarded ident, class expr), tried with backtracking
+  nudCapped: Array<{ steps: Step[]; capBp: number }>; // NUD: an assignment-level capped sequence (arrow function) — parsed only when minBp < capBp, admits no led
   prefix: Array<{ op: string; rbp: number }>;         // NUD: prefix op then operand at rbp
   binary: Array<{ op: string; lbp: number; rbp: number }>;  // LED: infix op, bind iff lbp > minBp, rhs at rbp
   leds: Bracket[];                                    // LED: mixfix continuation (call/member/index), tried before operators
@@ -259,6 +260,7 @@ function buildPratt(
   const nudToks: string[] = [];
   const nudBrackets: Bracket[] = [];
   const nudSeqs: Step[][] = [];
+  const nudCapped: Array<{ steps: Step[]; capBp: number }> = [];
   let sawPrefix = false, sawBinary = false, sawPostfix = false;
   const leds: Bracket[] = [];
   const ledAccessTail: boolean[] = [];
@@ -270,15 +272,24 @@ function buildPratt(
       // NUD
       if (items.length === 1 && items[0].type === 'ref' && a.tokenNames.has(items[0].name)) { nudToks.push(items[0].name); continue; }
       if (items[0].type === 'prefix') { sawPrefix = true; continue; }
+      // A capExpr (arrow function): an assignment-level group{capBelow}. ctxMode in its body
+      // is treated as transparent (the await/yield fork is not modelled in the portable parser).
+      if (items.length === 1 && items[0].type === 'group' && items[0].capBelow !== undefined) {
+        const capBp = a.nudCapOf(items[0]);
+        if (capBp === null) throw new Error(`portable: capBelow connector '${items[0].capBelow}' has no binding power (rule ${name})`);
+        const b = items[0].body;
+        nudCapped.push({ steps: (b.type === 'seq' ? b.items : [b]).map((it) => stepOfPratt(it)), capBp });
+        continue;
+      }
       if (items[0].type === 'literal') { nudBrackets.push({ first: items[0].value, steps: items.map((it) => stepOfPratt(it)) }); continue; }
       // A single transparent group unwraps to its body (an explicit grouping of the NUD sequence).
       let nudItems = items;
-      if (items.length === 1 && items[0].type === 'group' && !items[0].capBelow && !items[0].ctxMode && !items[0].suppress) {
+      if (items.length === 1 && items[0].type === 'group' && !items[0].suppress) {
         nudItems = items[0].body.type === 'seq' ? items[0].body.items : [items[0].body];
       }
-      // capBelow / ctxMode (arrow functions, await/yield context) are a deeper construct — defer.
-      if (nudItems.some((it) => it.type === 'group' && (it.capBelow || it.ctxMode || it.suppress))) {
-        throw new Error(`portable: Pratt NUD with capBelow/ctxMode/suppress not yet in scope (rule ${name}) — arrow functions etc.`);
+      // A no-`in`/suppress group is a deeper construct — defer.
+      if (nudItems.some((it) => it.type === 'group' && it.suppress)) {
+        throw new Error(`portable: Pratt NUD with suppress (no-in context) not yet in scope (rule ${name})`);
       }
       nudSeqs.push(nudItems.map((it) => stepOfPratt(it)));   // general NUD sequence (guarded ident, class expr)
       continue;
@@ -305,7 +316,8 @@ function buildPratt(
     if (e.type === 'seq') return { t: 'seq', steps: e.items.map(stepOfPratt) };
     if (e.type === 'sameLine') return { t: 'sameLine' };
     if (e.type === 'not') return { t: 'not', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
-    if (e.type === 'group' && !e.capBelow && !e.ctxMode && !e.suppress && e.body.type !== 'seq') return stepOfPratt(e.body);
+    // ctxMode (await/yield) is transparent to the portable parser (no fork); unwrap a non-seq group.
+    if (e.type === 'group' && !e.capBelow && !e.suppress && e.body.type !== 'seq') return stepOfPratt(e.body);
     if (e.type === 'sep') return { t: 'sep', elem: stepOfPratt(e.element), delim: e.delimiter };
     if (e.type === 'quantifier' && e.kind === '?') return { t: 'opt', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
     if (e.type === 'quantifier' && e.kind === '*') return { t: 'star', step: stepOfPratt(e.body) };
@@ -319,5 +331,5 @@ function buildPratt(
   const postfix = sawPostfix
     ? [...a.opTable.entries()].filter(([, info]) => info.position === 'postfix').map(([op, info]) => ({ op, lbp: info.lbp }))
     : [];
-  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, prefix, binary, leds, ledAccessTail, postfixToks, postfix };
+  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, nudCapped, prefix, binary, leds, ledAccessTail, postfixToks, postfix };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index 82b70c5..6729ec3 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -232,8 +232,9 @@ var ${r.name}POST = map[string]int{${post}}
 var ${r.name}ATOM = map[string]bool{${atoms}}
 func parse${r.name}() int32 { return ${r.name}bp(0) }
 func ${r.name}bp(minBp int) int32 {
-\tleft := ${r.name}nud()
+\tleft := ${r.name}nud(minBp)
 \tif left < 0 { return -1 }
+\tif _capped { return left }
 \ttailClosed := false
 \tfor {
 \t\tt := peek()
@@ -256,9 +257,11 @@ ${r.postfixToks.map(postfixArm).join('\n')}
 \t}
 \treturn left
 }
-func ${r.name}nud() int32 {
+func ${r.name}nud(minBp int) int32 {
+\t_capped = false
 \tt := peek()
 \tif t == nil { return -1 }
+${r.nudCapped.map((c) => `\tif minBp < ${c.capBp} { save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'} { _capped = true; return finish(${J(r.name)}, sb, offAt(save)) }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('\n')}
 ${tplNud}\tif ${r.name}ATOM[t.Kind] {
 \t\tsb := len(scratch); scratch = append(scratch, mkLeaf(t.Kind, t.Off, t.End)); pos++
 \t\treturn finish(${J(r.name)}, sb, t.Off)
@@ -327,6 +330,7 @@ type bp struct{ lbp, rbp int }
 
 var toks []Tok
 var pos int
+var _capped bool
 var nodes []Node
 var kids []int32
 var scratch []int32
@@ -375,7 +379,11 @@ func opt(body func() bool) bool {
 }
 func sepBy(elem func() bool, delim string) bool {
 \tif !elem() { return false }
-\tfor { sp := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if matchLit(delim, "$punct") && elem() { continue }; pos = sp; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break }
+\tfor {
+\t\tsp := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
+\t\tif !matchLit(delim, "$punct") { pos = sp; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break }
+\t\tif !elem() { break }   // a trailing delimiter is allowed — keep the pushed delim and stop
+\t}
 \treturn true
 }
 func altLit(opts [][2]string) bool {
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 4b032fb..a81c3f5 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -257,7 +257,8 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
     fn ${r.name}_post(op: &str) -> Option<i64> { match op { ${postArms}${postArms ? ', ' : ''}_ => None } }
     fn ${r.name}_atom(kind: &str) -> bool { matches!(kind, ${atomArm || '""'}) }
     fn ${r.name}_bp(&mut self, min_bp: i64) -> Option<Cst> {
-        let mut left = self.${r.name}_nud()?;
+        let mut left = self.${r.name}_nud(min_bp)?;
+        if self.capped { return Some(left); }
         let mut tail_closed = false;
         loop {
             let t = match self.peek() { Some(t) => t, None => break };
@@ -274,8 +275,10 @@ ${r.postfixToks.map(postfixArm).join('\n')}
         }
         Some(left)
     }
-    fn ${r.name}_nud(&mut self) -> Option<Cst> {
+    fn ${r.name}_nud(&mut self, min_bp: i64) -> Option<Cst> {
+        self.capped = false;
         let t = self.peek()?;
+${r.nudCapped.map((c) => `        if min_bp < ${c.capBp} { let save = self.pos; let mut kids: Vec<Cst> = Vec::new(); if ${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'} { self.capped = true; return Some(self.branch(${J(r.name)}, kids, save)); } self.pos = save; }`).join('\n')}
 ${tplNud}        if Parser::${r.name}_atom(t.kind) {
             self.pos += 1;
             return Some(Cst::node(${J(r.name)}, vec![Cst::leaf(t.kind, t.off, t.end)], t.off, t.end));
@@ -337,7 +340,7 @@ fn node(rule: &'static str, kids: Vec<Cst>) -> Cst { let o = kids[0].offset; let
 
 ${lexer(ir)}
 
-struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize }
+struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize, capped: bool }
 impl<'a> Parser<'a> {
     fn peek(&self) -> Option<Tok<'a>> { if self.pos < self.toks.len() { Some(self.toks[self.pos]) } else { None } }
     fn branch(&self, rule: &'static str, kids: Vec<Cst>, save: usize) -> Cst {
@@ -363,7 +366,11 @@ impl<'a> Parser<'a> {
     }
     fn sep_by(&mut self, elem: fn(&mut Parser<'a>, &mut Vec<Cst>) -> bool, delim: &str, kids: &mut Vec<Cst>) -> bool {
         if !elem(self, kids) { return false; }
-        loop { let sp = self.pos; let before = kids.len(); if self.match_lit(delim, "$punct", kids) && elem(self, kids) { continue; } self.pos = sp; kids.truncate(before); break; }
+        loop {
+            let sp = self.pos; let before = kids.len();
+            if !self.match_lit(delim, "$punct", kids) { self.pos = sp; kids.truncate(before); break; }
+            if !elem(self, kids) { break; }   // a trailing delimiter is allowed — keep the pushed delim and stop
+        }
         true
     }
     fn alt_lit(&mut self, opts: &[(&str, &'static str)], kids: &mut Vec<Cst>) -> bool {
@@ -390,15 +397,15 @@ fn main() {
     // Self-bench: a numeric arg N times the lex+parse loop and prints ms/iteration.
     if let Some(iters) = std::env::args().nth(1).and_then(|a| a.parse::<u64>().ok()) {
         // black_box on the input + result so the optimizer can't elide the lex/parse.
-        for _ in 0..3 { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0 }; std::hint::black_box(p.parse_${ir.entry}()); }
+        for _ in 0..3 { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false }; std::hint::black_box(p.parse_${ir.entry}()); }
         let t = std::time::Instant::now();
-        for _ in 0..iters { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0 }; std::hint::black_box(p.parse_${ir.entry}()); }
+        for _ in 0..iters { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false }; std::hint::black_box(p.parse_${ir.entry}()); }
         println!("{:.4}", t.elapsed().as_secs_f64() * 1000.0 / iters as f64);
         return;
     }
     let toks = lex(&src);
     let n = toks.len();
-    let mut p = Parser { toks, pos: 0 };
+    let mut p = Parser { toks, pos: 0, capped: false };
     match p.parse_${ir.entry}() {
         Some(root) if p.pos == n => { let mut out = String::new(); write_json(&root, &mut out); print!("{}", out); }
         _ => { eprintln!("parse error (pos {}/{})", p.pos, n); std::process::exit(1); }
diff --git a/src/target-ts.ts b/src/target-ts.ts
index fcfb818..0be1cb8 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -212,8 +212,9 @@ const ${r.name}_POST: Record<string, number> = ${POST};
 const ${r.name}_ATOM = ${atom};
 function parse${r.name}(): Node | null { return ${r.name}_bp(0); }
 function ${r.name}_bp(minBp: number): Node | null {
-  let left = ${r.name}_nud();
+  let left = ${r.name}_nud(minBp);
   if (left === null) return null;
+  if (_capped) return left;   // an assignment-level arrow admits no led
   let tailClosed = false;
   for (;;) {
     const t = peek();
@@ -233,9 +234,11 @@ ${r.postfixToks.map(postfixArm).join('\n')}
   }
   return left;
 }
-function ${r.name}_nud(): Node | null {
+function ${r.name}_nud(minBp: number): Node | null {
+  _capped = false;
   const t = peek();
   if (t === null) return null;
+${r.nudCapped.map((c) => `  if (minBp < ${c.capBp}) { const save = pos; const kids: Cst[] = []; if (${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'}) { _capped = true; return branch(${J(r.name)}, kids, save); } pos = save; }`).join('\n')}
 ${tplNud}  if (${r.name}_ATOM.has(t.kind)) { pos++; return { rule: ${J(r.name)}, children: [{ tokenType: t.kind, offset: t.off, end: t.end }], offset: t.off, end: t.end }; }
 ${r.nudBrackets.map(bracketNud).join('\n')}
   const pbp = ${r.name}_PRE[t.text];
@@ -287,6 +290,7 @@ ${lexer(ir)}
 
 let toks: Tok[] = [];
 let pos = 0;
+let _capped = false;
 function peek(): Tok | null { return pos < toks.length ? toks[pos] : null; }
 function branch(rule: string, kids: Cst[], save: number): Node {
   const offset = kids.length > 0 ? kids[0].offset : (save < toks.length ? toks[save].off : 0);
@@ -320,7 +324,11 @@ function opt(body: () => boolean, kids: Cst[]): boolean {
 }
 function sepBy(elem: () => boolean, delim: string, kids: Cst[]): boolean {
   if (!elem()) return false;
-  for (;;) { const sp = pos; const before = kids.length; if (matchLit(delim, '$punct', kids) && elem()) continue; pos = sp; kids.length = before; break; }
+  for (;;) {
+    const sp = pos; const before = kids.length;
+    if (!matchLit(delim, '$punct', kids)) { pos = sp; kids.length = before; break; }
+    if (!elem()) break;   // a trailing delimiter is allowed — keep the pushed delim and stop
+  }
   return true;
 }
 function altLit(opts: [string, string][], kids: Cst[]): boolean {
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index e9cfc59..71a28fd 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -142,6 +142,18 @@ const CASES: Case[] = [
     ],
     reject: ['return\n1;', 'return\nx;', 'return /*\n*/ 1;', 'return // c\n 1;'],
   },
+  {
+    // capBelow (assignment-level) arrow functions: a NUD parsed only when minBp < the
+    // connector's bp, admitting NO led once parsed; the `(x) => y` vs `(x)` ambiguity is
+    // resolved by longest-match ordering (the arrow is tried first, falls back to grouping).
+    grammar: 'arrowjs', path: '../examples/arrowjs.ts',
+    accept: [
+      'x => x;', '(a, b) => a + b;', '() => {};', 'x = (() => 1);', 'f(() => 1, 2);',
+      '(x);', 'a + b;', 'x => y => x;', '(() => 2);', '(a) => a;', 'x = y => y;', 'foo();',
+      '(a,) => b;', '(a, b,) => a;',   // trailing comma in params (sep allows a trailing delimiter)
+    ],
+    reject: ['=> x;', 'x => ;', '1 + () => 2;', '(,) => b;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From 395ba5113d1f56668f96d9a376d9f511c90967a9 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 04:14:55 +0800
Subject: [PATCH 18/27] emit-portable: a transparent group degrades to a `seq`
 step
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A `group` whose body is a multi-item sequence (e.g. a ctxMode group wrapping a
sequence) previously threw "group must reduce to a single step". Since ctxMode is
transparent to the portable parser and a `seq` step already exists, a transparent
group now degrades to a single `seq` step (or its sole step when the body is one);
only a no-`in` `suppress` group is still deferred. Both stepOf and stepOfPratt.

No new behaviour to verify beyond the existing seq step (seqjs) — full suite 42/42,
no regression. javascript.ts clears the multi-step group and advances to the next
construct, the no-`in` `suppress` context.
---
 src/emit-portable.ts | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 12751b0..91afac1 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -142,7 +142,11 @@ function buildIR(grammar: CstGrammar): ParserIR {
     switch (e.type) {
       case 'literal': return { t: 'lit', value: e.value, ttype: litTtype(e.value) };
       case 'ref': return tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name };
-      case 'group': { const ss = altSteps(e.body); if (ss.length !== 1) throw new Error('portable: group must reduce to a single step'); return ss[0]; }
+      case 'group': {   // transparent (ctxMode is invisible to the portable parser); only no-in `suppress` is deferred
+        if (e.suppress) throw new Error('portable: group with suppress (no-in context) not yet in scope');
+        const ss = altSteps(e.body);
+        return ss.length === 1 ? ss[0] : { t: 'seq', steps: ss };
+      }
       case 'not': return { t: 'not', steps: altSteps(e.body) };   // zero-width negative lookahead
       case 'sameLine': return { t: 'sameLine' };                  // zero-width no-newline assertion
       case 'seq': return { t: 'seq', steps: e.items.map(stepOf) };  // grouped sub-sequence (star/sep body)
@@ -316,8 +320,10 @@ function buildPratt(
     if (e.type === 'seq') return { t: 'seq', steps: e.items.map(stepOfPratt) };
     if (e.type === 'sameLine') return { t: 'sameLine' };
     if (e.type === 'not') return { t: 'not', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
-    // ctxMode (await/yield) is transparent to the portable parser (no fork); unwrap a non-seq group.
-    if (e.type === 'group' && !e.capBelow && !e.suppress && e.body.type !== 'seq') return stepOfPratt(e.body);
+    // ctxMode (await/yield) is transparent to the portable parser (no fork); unwrap the group.
+    if (e.type === 'group' && !e.capBelow && !e.suppress) {
+      return e.body.type === 'seq' ? { t: 'seq', steps: e.body.items.map(stepOfPratt) } : stepOfPratt(e.body);
+    }
     if (e.type === 'sep') return { t: 'sep', elem: stepOfPratt(e.element), delim: e.delimiter };
     if (e.type === 'quantifier' && e.kind === '?') return { t: 'opt', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
     if (e.type === 'quantifier' && e.kind === '*') return { t: 'star', step: stepOfPratt(e.body) };

From 544e277f716b8222a150f1b4f6ad252ff71ea40f Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 04:23:42 +0800
Subject: [PATCH 19/27] emit-portable: precedence-gated mixfix LEDs (ternary +
 chain-rhs in/instanceof)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The portable parser's mixfix leds bound maximally tight — fine for access tails
(`.`/`(`/`[`) but wrong for a precedence-carrying led like the ternary `? :`
(`a == b ? c : d` must group as `(a == b) ? c : d`). The led loop now gates such a
led by its lbp (from the grammar's ledPrec): bind only when lbp > minBp. And a
chain-rhs led (`in`/`instanceof`) parses its trailing self-operand at the level's bp
via a new `ruleBp` step, so `a in b in c` left-chains as `(a in b) in c`. Both
derive from analyzeGrammar's ledPrecByConnector — single-sourced with the interpreter.

examples/ledjs.ts verifies it across ts/go/rust — 11/11 accept (ternary below the
operators, right-associative `a ? b : c ? d : e`, chain-rhs `in`), 4/4 reject,
byte-identical to createParser. Full suite 42/42. This is the precedence foundation
the no-`in` (suppress) context builds on next.
---
 examples/ledjs.ts        | 46 ++++++++++++++++++++++++++++++++++++++++
 src/emit-portable.ts     | 14 +++++++++---
 src/target-go.ts         |  5 +++--
 src/target-rust.ts       |  6 ++++--
 src/target-ts.ts         |  8 ++++---
 test/portable-targets.ts | 11 ++++++++++
 6 files changed, 80 insertions(+), 10 deletions(-)
 create mode 100644 examples/ledjs.ts

diff --git a/examples/ledjs.ts b/examples/ledjs.ts
new file mode 100644
index 0000000..f13831a
--- /dev/null
+++ b/examples/ledjs.ts
@@ -0,0 +1,46 @@
+// Exercises precedence-gated mixfix LEDs: the ternary `? :` (a led that binds LOOSER than the
+// operators, so `a == b ? c : d` groups as `(a == b) ? c : d`) and `in`/`instanceof` (chain-rhs
+// leds at the relational level — `a in b in c` left-chains as `(a in b) in c`). Both need the
+// led-precedence gate the portable parser previously lacked (its mixfix leds bound maximally tight).
+import {
+  token, rule, defineGrammar, left, right, op,
+  seq, oneOf, range, star, many,
+} from '../src/api.ts';
+
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(range('0', '9'), star(range('0', '9'))), { scope: 'constant.numeric' });
+
+const jsPrec = [
+  right('='),
+  left('||'),
+  left('==', '!='),
+  left('<', '>'),
+  left('+', '-'),
+  left('*', '/'),
+];
+
+const Expr = rule(($) => [
+  Number_, Ident,
+  ['(', $, ')'],
+  [$, op, $],
+  [$, '?', $, ':', $],          // ternary (binds below `||`)
+  [$, 'in', $],                 // relational chain-rhs
+  [$, 'instanceof', $],
+]);
+const Stmt = rule(($) => [[Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'ledjs',
+  scopeName: 'source.ledjs',
+  tokens: { Ident, Number: Number_ },
+  prec: jsPrec,
+  ledPrec: [
+    { connector: '?', below: '||' },
+    { connector: 'in', sameAs: '<', chainRhs: true },
+    { connector: 'instanceof', sameAs: '<', chainRhs: true },
+  ],
+  rules: { Expr, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 91afac1..3e9fce4 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -46,6 +46,7 @@ export type Step =
   | { t: 'lit'; value: string; ttype: '$keyword' | '$punct' }   // match a literal by text
   | { t: 'tok'; name: string }                                  // match a token kind
   | { t: 'rule'; name: string }                                 // call a rule, append its node
+  | { t: 'ruleBp'; name: string; bp: number }                   // call a Pratt rule at a given binding power (chain-rhs led trailing operand)
   | { t: 'star'; step: Step }                                   // repeat inner 0+
   | { t: 'opt'; steps: Step[] }                                 // optional sub-sequence
   | { t: 'sep'; elem: Step; delim: string }                     // elem (delim elem)*
@@ -69,6 +70,7 @@ export type PrattRule = {
   binary: Array<{ op: string; lbp: number; rbp: number }>;  // LED: infix op, bind iff lbp > minBp, rhs at rbp
   leds: Bracket[];                                    // LED: mixfix continuation (call/member/index), tried before operators
   ledAccessTail: boolean[];                           // parallel to leds: a "closed punct-connector" tail (member/call/index) — disabled once a postfix binds
+  ledLbp: Array<number | null>;                       // parallel to leds: precedence gate (ternary/in/instanceof) — bind only when lbp > minBp; null = bind maximally tight
   postfixToks: string[];                              // LED: a postfix token `$ X` (e.g. a tagged template), tried like a mixfix led (also an access tail)
   postfix: Array<{ op: string; lbp: number }>;        // LED: a postfix operator `$ ++` — binds iff lbp > minBp + !tailClosed, no rhs, closes the tail
 };
@@ -268,6 +270,7 @@ function buildPratt(
   let sawPrefix = false, sawBinary = false, sawPostfix = false;
   const leds: Bracket[] = [];
   const ledAccessTail: boolean[] = [];
+  const ledLbp: Array<number | null> = [];
   const postfixToks: string[] = [];
   for (const alt of alts) {
     const items = alt.type === 'seq' ? alt.items : [alt];
@@ -303,12 +306,17 @@ function buildPratt(
     if (rest[0].type === 'op') { sawBinary = true; continue; }
     if (rest[0].type === 'postfix') { sawPostfix = true; continue; }   // postfix operator (`x++`)
     if (rest[0].type === 'literal') {
+      const conn = rest[0].value;
+      const prec = a.ledPrecByConnector.get(conn);   // { lbp, rhsBp } for ternary/in/instanceof
       const steps = rest.map((it) => stepOfPratt(it));
       const last = steps[steps.length - 1];
       const lastIsOperand = last !== undefined && last.t === 'rule' && last.name === name;   // open binary/ternary operand
-      const wordConnector = /^[A-Za-z]/.test(rest[0].value);                                  // `in`/`instanceof`/`as` — not a tail
-      leds.push({ first: rest[0].value, steps });
+      // chain-rhs (`in`/`instanceof`): the trailing self-operand parses at the level's bp (left-chain).
+      if (prec && prec.rhsBp !== null && lastIsOperand) steps[steps.length - 1] = { t: 'ruleBp', name, bp: prec.rhsBp };
+      const wordConnector = /^[A-Za-z]/.test(conn);                                           // `in`/`instanceof`/`as` — not a tail
+      leds.push({ first: conn, steps });
       ledAccessTail.push(!lastIsOperand && !wordConnector);
+      ledLbp.push(prec ? prec.lbp : null);
       continue;
     }
     if (rest.length === 1 && rest[0].type === 'ref' && a.tokenNames.has(rest[0].name)) { postfixToks.push(rest[0].name); continue; }  // postfix token (tagged template)
@@ -337,5 +345,5 @@ function buildPratt(
   const postfix = sawPostfix
     ? [...a.opTable.entries()].filter(([, info]) => info.position === 'postfix').map(([op, info]) => ({ op, lbp: info.lbp }))
     : [];
-  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, nudCapped, prefix, binary, leds, ledAccessTail, postfixToks, postfix };
+  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, nudCapped, prefix, binary, leds, ledAccessTail, ledLbp, postfixToks, postfix };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index 6729ec3..428bea6 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -169,6 +169,7 @@ function stepCond(s: Step): string {
     case 'lit': return `matchLit(${J(s.value)}, ${J(s.ttype)})`;
     case 'tok': return `matchTok(${J(s.name)})`;
     case 'rule': return `callRule(parse${s.name})`;
+    case 'ruleBp': return `callRule(func() int32 { return ${s.name}bp(${s.bp}) })`;
     case 'star': return `star(func() bool { return ${stepCond(s.step)} })`;
     case 'opt': return `opt(func() bool { return ${s.steps.map(stepCond).join(' && ')} })`;
     case 'sep': return `sepBy(func() bool { return ${stepCond(s.elem)} }, ${J(s.delim)})`;
@@ -208,7 +209,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
 \t\tif ${b.steps.map(stepCond).join(' && ')} { return finish(${J(r.name)}, sb, t.Off) }
 \t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1
 \t}`;
-  const ledArm = (b: Bracket, accessTail: boolean) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}t.Text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}t.Text == ${J(b.first)} {
 \t\t\tledSave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 \t\t\tscratch = append(scratch, left)
 \t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
@@ -239,7 +240,7 @@ func ${r.name}bp(minBp int) int32 {
 \tfor {
 \t\tt := peek()
 \t\tif t == nil { break }
-${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i])).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
 \t\tif post, ok := ${r.name}POST[t.Text]; ok && !tailClosed && post > minBp {
 \t\t\tsb := len(scratch); scratch = append(scratch, left, mkLeaf("$operator", t.Off, t.End)); pos++; tailClosed = true
diff --git a/src/target-rust.ts b/src/target-rust.ts
index a81c3f5..adbc530 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -177,6 +177,7 @@ function stepCond(s: Step): string {
     case 'lit': return `self.match_lit(${J(s.value)}, ${J(s.ttype)}, &mut kids)`;
     case 'tok': return `self.match_tok(${J(s.name)}, &mut kids)`;
     case 'rule': return `self.call_rule(Parser::parse_${s.name}, &mut kids)`;
+    case 'ruleBp': return `self.call_rule(|p| p.${s.name}_bp(${s.bp}), &mut kids)`;
     case 'star': return `self.star(|p, k| ${stepCondP(s.step)}, &mut kids)`;
     case 'opt': return `self.opt(|p, k| ${s.steps.map(stepCondP).join(' && ')}, &mut kids)`;
     case 'sep': return `self.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, &mut kids)`;
@@ -202,6 +203,7 @@ function stepCondP(s: Step): string {
     case 'lit': return `p.match_lit(${J(s.value)}, ${J(s.ttype)}, k)`;
     case 'tok': return `p.match_tok(${J(s.name)}, k)`;
     case 'rule': return `p.call_rule(Parser::parse_${s.name}, k)`;
+    case 'ruleBp': return `p.call_rule(|p| p.${s.name}_bp(${s.bp}), k)`;
     case 'star': return `p.star(|p, k| ${stepCondP(s.step)}, k)`;
     case 'opt': return `p.opt(|p, k| ${s.steps.map(stepCondP).join(' && ')}, k)`;
     case 'sep': return `p.sep_by(|p, k| ${stepCondP(s.elem)}, ${J(s.delim)}, k)`;
@@ -237,7 +239,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
             if ${b.steps.map(stepCond).join(' && ')} { return Some(node(${J(r.name)}, kids)); }
             self.pos = save; return None;
         }`;
-  const ledArm = (b: Bracket, accessTail: boolean) => `            if ${accessTail ? '!tail_closed && ' : ''}t.text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `            if ${accessTail ? '!tail_closed && ' : ''}${lbp !== null ? `${lbp} > min_bp && ` : ''}t.text == ${J(b.first)} {
                 let led_save = self.pos; let mut kids: Vec<Cst> = Vec::new();
                 if ${b.steps.map(stepCond).join(' && ')} {
                     let mut full = vec![left]; full.append(&mut kids);
@@ -262,7 +264,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
         let mut tail_closed = false;
         loop {
             let t = match self.peek() { Some(t) => t, None => break };
-${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i])).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
             if let Some(plbp) = Parser::${r.name}_post(t.text) { if !tail_closed && plbp > min_bp { self.pos += 1; let op_leaf = Cst::leaf("$operator", t.off, t.end); left = node(${J(r.name)}, vec![left, op_leaf]); tail_closed = true; continue; } }
             let (lbp, rbp) = match Parser::${r.name}_bin(t.text) { Some(x) => x, None => break };
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 0be1cb8..216a0c8 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -160,6 +160,7 @@ function stepCond(s: Step): string {
     case 'lit': return `matchLit(${J(s.value)}, ${J(s.ttype)}, kids)`;
     case 'tok': return `matchTok(${J(s.name)}, kids)`;
     case 'rule': return `callRule(parse${s.name}, kids)`;
+    case 'ruleBp': return `callRule(() => ${s.name}_bp(${s.bp}), kids)`;
     case 'star': return `star(() => ${stepCond(s.step)}, kids)`;
     case 'opt': return `opt(() => ${s.steps.map(stepCond).join(' && ')}, kids)`;
     case 'sep': return `sepBy(() => ${stepCond(s.elem)}, ${J(s.delim)}, kids)`;
@@ -193,8 +194,9 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
       if (${b.steps.map(stepCond).join(' && ')}) return node(${J(r.name)}, kids);
       pos = save; return null;
     }`;
-  // Access-tail leds (member/call/index) are disabled once a postfix has closed the operand.
-  const ledArm = (b: Bracket, accessTail: boolean) => `    if (${accessTail ? '!tailClosed && ' : ''}t.text === ${J(b.first)}) {
+  // Access-tail leds (member/call/index) are disabled once a postfix has closed the operand;
+  // a precedence-gated led (ternary/in/instanceof) binds only when its lbp > minBp.
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `    if (${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}t.text === ${J(b.first)}) {
       const ledSave = pos; const kids: Cst[] = [left];
       if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.name)}, kids); continue; }
       pos = ledSave; break;
@@ -219,7 +221,7 @@ function ${r.name}_bp(minBp: number): Node | null {
   for (;;) {
     const t = peek();
     if (t === null) break;
-${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i])).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
     const post = ${r.name}_POST[t.text];
     if (!tailClosed && post !== undefined && post > minBp) { pos++; const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end }; left = { rule: ${J(r.name)}, children: [left, opLeaf], offset: left.offset, end: t.end }; tailClosed = true; continue; }
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 71a28fd..c41234d 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -154,6 +154,17 @@ const CASES: Case[] = [
     ],
     reject: ['=> x;', 'x => ;', '1 + () => 2;', '(,) => b;'],
   },
+  {
+    // Precedence-gated mixfix LEDs: ternary `? :` (binds below the operators) and the
+    // chain-rhs relational leds `in`/`instanceof` (`a in b in c` left-chains).
+    grammar: 'ledjs', path: '../examples/ledjs.ts',
+    accept: [
+      'a == b ? c : d;', 'a ? b : c ? d : e;', 'a + b ? c : d - e;', 'a in b;',
+      'a in b in c;', 'x instanceof Y;', 'a < b in c;', '1 + 2 * 3 ? 4 : 5;',
+      '(a ? b : c) + d;', 'a in b ? c : d;', 'a = b ? c : d;',
+    ],
+    reject: ['a ? b;', 'a ? : c;', 'in b;', 'a instanceof;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From ba158c0e61bc4fe3180bef95c77fe305848e4655 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 04:50:34 +0800
Subject: [PATCH 20/27] =?UTF-8?q?emit-portable:=20no-in=20suppress,=20+-qu?=
 =?UTF-8?q?antifier,=20sep/bracket=20fixes=20=E2=80=94=20javascript.ts=20n?=
 =?UTF-8?q?ow=20EMITS?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A run of constructs that together take the real javascript.ts grammar through the
whole portable emitter end-to-end:

- no-`in` (suppress) context: a `for (binding in iterable)` head parses its binding
  with the `in` led disabled (examples/noinjs.ts, 9/9+4/4 ×3). Threads a
  suppressed-connector set consumed per led loop.
- one-or-more `+` quantifier (`x+` = `x x*`) — the last buildIR throw; with it,
  javascript.ts EMITS in all three targets.
- Two latent `sep` bugs, both exposed only by the real grammar (earlier grammars
  wrapped sep in opt or never tested the shapes — the aggregate passed for the wrong
  reason): gen-parser's sep is `(element (delim element)*)?`, i.e. the WHOLE list is
  optional (empty `f()` valid) AND a trailing delimiter is allowed. sepBy now matches.
- A NUD bracket that fails now FALLS THROUGH to the next same-first-token alternative
  instead of returning null — javascript has four `new`-led NUDs.

Result: javascript.ts emits, compiles and runs in ts/go/rust, and is byte-identical to
createParser on basic JS (var/function/arrow/ternary/member-call/for-in/while/if/class/
new/template/regex/instanceof/try/switch) — 23/24 in TS, the one miss a `new a.b()`
NewTarget member-constructor CST shape. The await/yield fork (async/await) and that
new-expression edge remain. Full suite 42/42; existing gate unaffected by the shared
sep/bracket fixes.
---
 examples/noinjs.ts       | 35 +++++++++++++++++++++++++++++++++++
 src/emit-portable.ts     | 19 +++++++++----------
 src/target-go.ts         |  9 ++++++---
 src/target-rust.ts       | 18 +++++++++++-------
 src/target-ts.ts         |  9 ++++++---
 test/portable-targets.ts | 11 +++++++++++
 6 files changed, 78 insertions(+), 23 deletions(-)
 create mode 100644 examples/noinjs.ts

diff --git a/examples/noinjs.ts b/examples/noinjs.ts
new file mode 100644
index 0000000..bc413ad
--- /dev/null
+++ b/examples/noinjs.ts
@@ -0,0 +1,35 @@
+// Exercises the no-`in` (suppress) context. In a `for (binding in iterable)` head, the
+// binding is parsed with the `in` LED DISABLED — `exclude('in', Expr)` — so the `in` belongs
+// to the for-head, not to a relational expression inside the binding. Outside a for-head, `in`
+// binds normally. The portable parser threads a suppressed-connector set into the led loop.
+import {
+  token, rule, defineGrammar, left, op, exclude,
+  seq, oneOf, range, star, many,
+} from '../src/api.ts';
+
+const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
+const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
+const Ident = token(seq(idStart, star(idCont)), { identifier: true, scope: 'variable' });
+const Number_ = token(seq(range('0', '9'), star(range('0', '9'))), { scope: 'constant.numeric' });
+
+const jsPrec = [left('||'), left('<', '>'), left('+', '-')];
+
+const Expr = rule(($) => [
+  Number_, Ident,
+  ['(', $, ')'],
+  [$, op, $],
+  [$, 'in', $],
+  [$, '.', Ident],
+]);
+const ForHead = rule(($) => [['for', '(', exclude('in', Expr), 'in', Expr, ')', Stmt]]);
+const Stmt = rule(($) => [ForHead, [Expr, ';']]);
+const Program = rule(($) => [many(Stmt)]);
+
+export default defineGrammar({
+  name: 'noinjs',
+  scopeName: 'source.noinjs',
+  tokens: { Ident, Number: Number_ },
+  prec: jsPrec,
+  ledPrec: [{ connector: 'in', sameAs: '<', chainRhs: true }],
+  rules: { Expr, ForHead, Stmt, Program },
+});
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index 3e9fce4..dffa5f5 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -54,7 +54,8 @@ export type Step =
   | { t: 'alt'; branches: Step[][] }                            // inline alternation of sub-sequences (backtracking)
   | { t: 'not'; steps: Step[] }                                 // zero-width negative lookahead (consumes nothing)
   | { t: 'seq'; steps: Step[] }                                 // a grouped sub-sequence (e.g. a star body `(',' Expr)`)
-  | { t: 'sameLine' };                                          // zero-width: the next token is on the same line (no preceding newline)
+  | { t: 'sameLine' }                                           // zero-width: the next token is on the same line (no preceding newline)
+  | { t: 'suppress'; connectors: string[]; steps: Step[] };     // parse the body with these LED connectors disabled (no-`in` context)
 export type Alt = Step[];
 
 export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
@@ -144,9 +145,9 @@ function buildIR(grammar: CstGrammar): ParserIR {
     switch (e.type) {
       case 'literal': return { t: 'lit', value: e.value, ttype: litTtype(e.value) };
       case 'ref': return tokenNames.has(e.name) ? { t: 'tok', name: e.name } : { t: 'rule', name: e.name };
-      case 'group': {   // transparent (ctxMode is invisible to the portable parser); only no-in `suppress` is deferred
-        if (e.suppress) throw new Error('portable: group with suppress (no-in context) not yet in scope');
+      case 'group': {   // transparent (ctxMode is invisible to the portable parser)
         const ss = altSteps(e.body);
+        if (e.suppress && e.suppress.length) return { t: 'suppress', connectors: e.suppress, steps: ss };   // no-`in` context
         return ss.length === 1 ? ss[0] : { t: 'seq', steps: ss };
       }
       case 'not': return { t: 'not', steps: altSteps(e.body) };   // zero-width negative lookahead
@@ -156,7 +157,7 @@ function buildIR(grammar: CstGrammar): ParserIR {
       case 'quantifier':
         if (e.kind === '*') return { t: 'star', step: stepOf(e.body) };
         if (e.kind === '?') return { t: 'opt', steps: altSteps(e.body) };
-        if (e.kind === '+') throw new Error("portable: '+' not yet modeled (use '*')");
+        if (e.kind === '+') return { t: 'seq', steps: [stepOf(e.body), { t: 'star', step: stepOf(e.body) }] };   // x+ = x x*
         break;
       case 'alt': {
         if (e.items.every((it) => it.type === 'literal')) {   // fast path: all-literal alternation
@@ -289,15 +290,11 @@ function buildPratt(
         continue;
       }
       if (items[0].type === 'literal') { nudBrackets.push({ first: items[0].value, steps: items.map((it) => stepOfPratt(it)) }); continue; }
-      // A single transparent group unwraps to its body (an explicit grouping of the NUD sequence).
+      // A single transparent (non-suppress) group unwraps to its body (an explicit grouping).
       let nudItems = items;
       if (items.length === 1 && items[0].type === 'group' && !items[0].suppress) {
         nudItems = items[0].body.type === 'seq' ? items[0].body.items : [items[0].body];
       }
-      // A no-`in`/suppress group is a deeper construct — defer.
-      if (nudItems.some((it) => it.type === 'group' && it.suppress)) {
-        throw new Error(`portable: Pratt NUD with suppress (no-in context) not yet in scope (rule ${name})`);
-      }
       nudSeqs.push(nudItems.map((it) => stepOfPratt(it)));   // general NUD sequence (guarded ident, class expr)
       continue;
     }
@@ -328,13 +325,15 @@ function buildPratt(
     if (e.type === 'seq') return { t: 'seq', steps: e.items.map(stepOfPratt) };
     if (e.type === 'sameLine') return { t: 'sameLine' };
     if (e.type === 'not') return { t: 'not', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
+    if (e.type === 'group' && e.suppress && e.suppress.length) return { t: 'suppress', connectors: e.suppress, steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
     // ctxMode (await/yield) is transparent to the portable parser (no fork); unwrap the group.
-    if (e.type === 'group' && !e.capBelow && !e.suppress) {
+    if (e.type === 'group' && !e.capBelow) {
       return e.body.type === 'seq' ? { t: 'seq', steps: e.body.items.map(stepOfPratt) } : stepOfPratt(e.body);
     }
     if (e.type === 'sep') return { t: 'sep', elem: stepOfPratt(e.element), delim: e.delimiter };
     if (e.type === 'quantifier' && e.kind === '?') return { t: 'opt', steps: (e.body.type === 'seq' ? e.body.items : [e.body]).map(stepOfPratt) };
     if (e.type === 'quantifier' && e.kind === '*') return { t: 'star', step: stepOfPratt(e.body) };
+    if (e.type === 'quantifier' && e.kind === '+') return { t: 'seq', steps: [stepOfPratt(e.body), { t: 'star', step: stepOfPratt(e.body) }] };
     if (e.type === 'literal') return { t: 'lit', value: e.value, ttype: litTtype(e.value) };
     return stepOf(e);
   }
diff --git a/src/target-go.ts b/src/target-go.ts
index 428bea6..2fad500 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -178,6 +178,7 @@ function stepCond(s: Step): string {
     case 'not': return `func() bool { save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); m := ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return !m }()`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
     case 'sameLine': return `func() bool { t := peek(); return t != nil && !t.Nl }()`;
+    case 'suppress': return `func() bool { _suppressNext = map[string]bool{${s.connectors.map((c) => `${J(c)}: true`).join(', ')}}; _r := (${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}); _suppressNext = nil; return _r }()`;
   }
 }
 
@@ -207,9 +208,9 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
   const bracketNud = (b: Bracket) => `\tif t.Text == ${J(b.first)} {
 \t\tsave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 \t\tif ${b.steps.map(stepCond).join(' && ')} { return finish(${J(r.name)}, sb, t.Off) }
-\t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1
+\t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]
 \t}`;
-  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}t.Text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}!_mySup[${J(b.first)}] && t.Text == ${J(b.first)} {
 \t\t\tledSave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 \t\t\tscratch = append(scratch, left)
 \t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
@@ -233,6 +234,7 @@ var ${r.name}POST = map[string]int{${post}}
 var ${r.name}ATOM = map[string]bool{${atoms}}
 func parse${r.name}() int32 { return ${r.name}bp(0) }
 func ${r.name}bp(minBp int) int32 {
+\t_mySup := _suppressNext; _suppressNext = nil; _ = _mySup
 \tleft := ${r.name}nud(minBp)
 \tif left < 0 { return -1 }
 \tif _capped { return left }
@@ -332,6 +334,7 @@ type bp struct{ lbp, rbp int }
 var toks []Tok
 var pos int
 var _capped bool
+var _suppressNext map[string]bool
 var nodes []Node
 var kids []int32
 var scratch []int32
@@ -379,7 +382,7 @@ func opt(body func() bool) bool {
 \tsp := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if !body() { pos = sp; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }; return true
 }
 func sepBy(elem func() bool, delim string) bool {
-\tif !elem() { return false }
+\tif !elem() { return true }   // the whole separated list is optional — zero elements is valid
 \tfor {
 \t\tsp := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 \t\tif !matchLit(delim, "$punct") { pos = sp; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break }
diff --git a/src/target-rust.ts b/src/target-rust.ts
index adbc530..a05f71b 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -186,6 +186,7 @@ function stepCond(s: Step): string {
     case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(self, &mut kids)`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
     case 'sameLine': return `matches!(self.peek(), Some(t) if !t.nl)`;
+    case 'suppress': return `{ self.suppress_next = vec![${s.connectors.map(J).join(', ')}]; let _r = (${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}); self.suppress_next = Vec::new(); _r }`;
   }
 }
 // A backtracking inline alternation rendered as an immediately-applied closure over (p, k),
@@ -212,6 +213,7 @@ function stepCondP(s: Step): string {
     case 'not': return `(|p: &mut Parser<'a>, k: &mut Vec<Cst>| -> bool { ${notBody(s.steps)} })(p, k)`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCondP).join(' && ') : 'true'})`;
     case 'sameLine': return `matches!(p.peek(), Some(t) if !t.nl)`;
+    case 'suppress': return `{ p.suppress_next = vec![${s.connectors.map(J).join(', ')}]; let _r = (${s.steps.length ? s.steps.map(stepCondP).join(' && ') : 'true'}); p.suppress_next = Vec::new(); _r }`;
   }
 }
 
@@ -237,9 +239,9 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
   const bracketNud = (b: Bracket) => `        if t.text == ${J(b.first)} {
             let save = self.pos; let mut kids: Vec<Cst> = Vec::new();
             if ${b.steps.map(stepCond).join(' && ')} { return Some(node(${J(r.name)}, kids)); }
-            self.pos = save; return None;
+            self.pos = save;   // fall through to the next NUD alternative
         }`;
-  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `            if ${accessTail ? '!tail_closed && ' : ''}${lbp !== null ? `${lbp} > min_bp && ` : ''}t.text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `            if ${accessTail ? '!tail_closed && ' : ''}${lbp !== null ? `${lbp} > min_bp && ` : ''}!my_sup.iter().any(|c| *c == ${J(b.first)}) && t.text == ${J(b.first)} {
                 let led_save = self.pos; let mut kids: Vec<Cst> = Vec::new();
                 if ${b.steps.map(stepCond).join(' && ')} {
                     let mut full = vec![left]; full.append(&mut kids);
@@ -259,6 +261,8 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
     fn ${r.name}_post(op: &str) -> Option<i64> { match op { ${postArms}${postArms ? ', ' : ''}_ => None } }
     fn ${r.name}_atom(kind: &str) -> bool { matches!(kind, ${atomArm || '""'}) }
     fn ${r.name}_bp(&mut self, min_bp: i64) -> Option<Cst> {
+        let my_sup = std::mem::take(&mut self.suppress_next);
+        let _ = &my_sup;
         let mut left = self.${r.name}_nud(min_bp)?;
         if self.capped { return Some(left); }
         let mut tail_closed = false;
@@ -342,7 +346,7 @@ fn node(rule: &'static str, kids: Vec<Cst>) -> Cst { let o = kids[0].offset; let
 
 ${lexer(ir)}
 
-struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize, capped: bool }
+struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize, capped: bool, suppress_next: Vec<&'static str> }
 impl<'a> Parser<'a> {
     fn peek(&self) -> Option<Tok<'a>> { if self.pos < self.toks.len() { Some(self.toks[self.pos]) } else { None } }
     fn branch(&self, rule: &'static str, kids: Vec<Cst>, save: usize) -> Cst {
@@ -367,7 +371,7 @@ impl<'a> Parser<'a> {
         let sp = self.pos; let before = kids.len(); if !body(self, kids) { self.pos = sp; kids.truncate(before); } true
     }
     fn sep_by(&mut self, elem: fn(&mut Parser<'a>, &mut Vec<Cst>) -> bool, delim: &str, kids: &mut Vec<Cst>) -> bool {
-        if !elem(self, kids) { return false; }
+        if !elem(self, kids) { return true; }   // the whole separated list is optional — zero elements is valid
         loop {
             let sp = self.pos; let before = kids.len();
             if !self.match_lit(delim, "$punct", kids) { self.pos = sp; kids.truncate(before); break; }
@@ -399,15 +403,15 @@ fn main() {
     // Self-bench: a numeric arg N times the lex+parse loop and prints ms/iteration.
     if let Some(iters) = std::env::args().nth(1).and_then(|a| a.parse::<u64>().ok()) {
         // black_box on the input + result so the optimizer can't elide the lex/parse.
-        for _ in 0..3 { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false }; std::hint::black_box(p.parse_${ir.entry}()); }
+        for _ in 0..3 { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new() }; std::hint::black_box(p.parse_${ir.entry}()); }
         let t = std::time::Instant::now();
-        for _ in 0..iters { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false }; std::hint::black_box(p.parse_${ir.entry}()); }
+        for _ in 0..iters { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new() }; std::hint::black_box(p.parse_${ir.entry}()); }
         println!("{:.4}", t.elapsed().as_secs_f64() * 1000.0 / iters as f64);
         return;
     }
     let toks = lex(&src);
     let n = toks.len();
-    let mut p = Parser { toks, pos: 0, capped: false };
+    let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new() };
     match p.parse_${ir.entry}() {
         Some(root) if p.pos == n => { let mut out = String::new(); write_json(&root, &mut out); print!("{}", out); }
         _ => { eprintln!("parse error (pos {}/{})", p.pos, n); std::process::exit(1); }
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 216a0c8..c30e81f 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -169,6 +169,7 @@ function stepCond(s: Step): string {
     case 'not': return `(() => { const sp = pos; const bk = kids.length; const m = ${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}; pos = sp; kids.length = bk; return !m; })()`;
     case 'seq': return `(${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'})`;
     case 'sameLine': return `(() => { const t = peek(); return t !== null && !t.nl; })()`;
+    case 'suppress': return `(() => { _suppressNext = new Set([${s.connectors.map(J).join(', ')}]); const _r = (${s.steps.length ? s.steps.map(stepCond).join(' && ') : 'true'}); _suppressNext = null; return _r; })()`;
   }
 }
 
@@ -192,11 +193,11 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
   const bracketNud = (b: Bracket) => `    if (t.text === ${J(b.first)}) {
       const save = pos; const kids: Cst[] = [];
       if (${b.steps.map(stepCond).join(' && ')}) return node(${J(r.name)}, kids);
-      pos = save; return null;
+      pos = save;   // fall through to the next NUD alternative (e.g. another '${b.first}'-led form)
     }`;
   // Access-tail leds (member/call/index) are disabled once a postfix has closed the operand;
   // a precedence-gated led (ternary/in/instanceof) binds only when its lbp > minBp.
-  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `    if (${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}t.text === ${J(b.first)}) {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `    if (${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}(_mySup === null || !_mySup.has(${J(b.first)})) && t.text === ${J(b.first)}) {
       const ledSave = pos; const kids: Cst[] = [left];
       if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.name)}, kids); continue; }
       pos = ledSave; break;
@@ -214,6 +215,7 @@ const ${r.name}_POST: Record<string, number> = ${POST};
 const ${r.name}_ATOM = ${atom};
 function parse${r.name}(): Node | null { return ${r.name}_bp(0); }
 function ${r.name}_bp(minBp: number): Node | null {
+  const _mySup = _suppressNext; _suppressNext = null;   // no-in: consume the suppressed-connector set for this led loop
   let left = ${r.name}_nud(minBp);
   if (left === null) return null;
   if (_capped) return left;   // an assignment-level arrow admits no led
@@ -293,6 +295,7 @@ ${lexer(ir)}
 let toks: Tok[] = [];
 let pos = 0;
 let _capped = false;
+let _suppressNext: Set<string> | null = null;
 function peek(): Tok | null { return pos < toks.length ? toks[pos] : null; }
 function branch(rule: string, kids: Cst[], save: number): Node {
   const offset = kids.length > 0 ? kids[0].offset : (save < toks.length ? toks[save].off : 0);
@@ -325,7 +328,7 @@ function opt(body: () => boolean, kids: Cst[]): boolean {
   const sp = pos; const before = kids.length; if (!body()) { pos = sp; kids.length = before; } return true;
 }
 function sepBy(elem: () => boolean, delim: string, kids: Cst[]): boolean {
-  if (!elem()) return false;
+  if (!elem()) return true;   // the whole separated list is optional — zero elements is valid
   for (;;) {
     const sp = pos; const before = kids.length;
     if (!matchLit(delim, '$punct', kids)) { pos = sp; kids.length = before; break; }
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index c41234d..fe52c60 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -165,6 +165,17 @@ const CASES: Case[] = [
     ],
     reject: ['a ? b;', 'a ? : c;', 'in b;', 'a instanceof;'],
   },
+  {
+    // The no-`in` (suppress) context: a `for (binding in iterable)` head parses its binding
+    // with the `in` led disabled, so `in` belongs to the for-head, not the binding.
+    grammar: 'noinjs', path: '../examples/noinjs.ts',
+    accept: [
+      'for (x in y) z;', 'x in y;', 'for (a.b in c) d;', 'a in b in c;',
+      'for ((x) in y) z;', 'for (x in y) a in b;', 'for (x in a in b) z;',
+      '(a in b);', 'for (a in b) for (c in d) e;',
+    ],
+    reject: ['for (x y) z;', 'for x in y;', 'for (in y) z;', 'for (x in) z;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From 0b6d7fddf97514e255dfbc8de3e61fae78ddac2a Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 05:26:48 +0800
Subject: [PATCH 21/27] emit-portable: the real javascript.ts grammar emits to
 ts/go/rust (issue #6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The target-agnostic emitter now handles a full language end-to-end. javascript.ts —
89 rules after the [Await]/[Yield] fork — emits, compiles and runs in all three
targets, byte-identical to createParser, and is gate-maintained (28/28 accept,
6/6 reject ×3, ASCII corpus). What it took:

- Left recursion: a left-recursive non-Pratt rule (NewTarget, TS Type) now routes
  through buildPratt (atom-then-continuation), fixing the infinite recursion a plain
  rd rule hit.
- The [Await]/[Yield] context fork: emitPortableParser applies `withAwaitYield`
  exactly as createParser does, so `await`/`yield` are keywords in async/generator
  bodies and identifiers elsewhere — name-forked into $A/$Y/$AY families.
- A forked rule labels its CST node with the CANON base name (cstName), not the
  $-suffixed family name; and the $ in family names (a valid TS but not Go/Rust
  identifier) is sanitized to `_` for the emitted parse-fn names.
- Full JS whitespace (`\s`: NBSP/LS/PS/…), not just ASCII.
- A leaked `_capped` flag: it is a global, but gen-parser's `capped` is local, so a
  grouping `(arrow)` leaked the cap to the outer expression and dropped a trailing
  call (`(() => {})()`). Non-capped NUD arms now force it false.
- Two more `sep` shapes (empty list `f()`, both surfaced by the real grammar).

ts/go/rust all 28/28 on the ASCII corpus (destructuring, generators, classes,
optional chaining, async/await, labels). Byte-based go/rust use UTF-8 offsets —
identical to the JS oracle for ASCII; non-ASCII offset units differ inherently.
Full suite 42/42.
---
 src/emit-portable.ts     | 48 +++++++++++++++++++++++++++++++++-------
 src/target-go.ts         | 32 +++++++++++++++------------
 src/target-rust.ts       | 30 +++++++++++++++----------
 src/target-ts.ts         | 34 ++++++++++++++++------------
 test/portable-targets.ts | 19 ++++++++++++++++
 5 files changed, 115 insertions(+), 48 deletions(-)

diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index dffa5f5..a6a24eb 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -21,6 +21,7 @@
 // operators. buildIR THROWS on a construct outside this set rather than emit a wrong
 // parser. This is enough to derive a real JavaScript-subset parser (examples/minijs.ts).
 import type { CstGrammar, RuleExpr, TokenDecl, TokenPattern } from './types.ts';
+import { withAwaitYield } from './await-yield-fork.ts';
 import { analyzeGrammar, findEntryRule } from './grammar-analysis.ts';
 import { collectLiterals, isKeywordLiteral } from './grammar-utils.ts';
 import {
@@ -58,11 +59,12 @@ export type Step =
   | { t: 'suppress'; connectors: string[]; steps: Step[] };     // parse the body with these LED connectors disabled (no-`in` context)
 export type Alt = Step[];
 
-export type RdRule = { kind: 'rd'; name: string; alts: Alt[] };
+export type RdRule = { kind: 'rd'; name: string; cstName: string; alts: Alt[] };
 export type Bracket = { first: string; steps: Step[] };          // a literal-led sequence (grouping/array; LED call/index)
 export type PrattRule = {
   kind: 'pratt';
-  name: string;
+  name: string;       // the (possibly $A/$Y-forked) rule name — used for the parse fn names
+  cstName: string;    // the CANON name — the CST node label (a fork collapses to its base)
   nudToks: string[];                                  // NUD: a bare token wrapped in a node
   nudBrackets: Bracket[];                             // NUD: '(' … ')' / '[' … ']'
   nudSeqs: Step[][];                                  // NUD: a general sequence (guarded ident, class expr), tried with backtracking
@@ -123,7 +125,10 @@ export interface Target {
 }
 
 export function emitPortableParser(grammar: CstGrammar, target: Target): string {
-  return target.render(buildIR(grammar));
+  // Apply the [Await]/[Yield] context fork exactly as createParser does, so `await`/`yield`
+  // are keywords inside async/generator bodies and identifiers outside — name-forked into
+  // $A/$Y/$AY rule families. Every other consumer (and the portable parser) sees plain rules.
+  return target.render(buildIR(withAwaitYield(grammar)));
 }
 
 // ── buildIR: grammar + analysis → the target-agnostic parse plan ──
@@ -174,8 +179,12 @@ function buildIR(grammar: CstGrammar): ParserIR {
   }
 
   const rules: RuleIR[] = grammar.rules.map((r) => {
-    if (a.prattRules.has(r.name)) return buildPratt(r.name, r.body, a, stepOf, altSteps, litTtype);
-    return { kind: 'rd', name: r.name, alts: r.body.type === 'alt' ? r.body.items.map(altSteps) : [altSteps(r.body)] };
+    const cstName = (r as { canon?: string }).canon ?? r.name;   // a forked $A/$Y rule labels its CST node with the base name
+    // Pratt rules AND left-recursive non-Pratt rules (e.g. NewTarget, TS Type) both parse as
+    // atom-then-continuation: buildPratt detects `startsSelf` and splits accordingly, so routing
+    // left-recursive rules through it avoids the infinite left-recursion a plain rd rule would hit.
+    if (a.prattRules.has(r.name) || a.leftRecSet.has(r.name)) return buildPratt(r.name, cstName, r.body, a, stepOf, altSteps, litTtype);
+    return { kind: 'rd', name: r.name, cstName, alts: r.body.type === 'alt' ? r.body.items.map(altSteps) : [altSteps(r.body)] };
   });
 
   // Regex-vs-division context (only if the grammar declares a regex token + config).
@@ -212,7 +221,30 @@ function buildIR(grammar: CstGrammar): ParserIR {
     };
   }
 
-  return { grammarName: grammar.name ?? 'grammar', entry: findEntryRule(grammar), tokens, puncts, rules, regexCtx, tpl };
+  // The [Await]/[Yield] fork names rules `Expr$A`/`Expr$Y` — `$` is a valid TS identifier but
+  // NOT a Go/Rust one. Sanitize every rule-IDENTIFIER use (`$`→`_`) for the emitted parse-fn
+  // names; the CST node label (cstName) keeps the canon base name, so the tree is unchanged.
+  const san = (n: string) => n.replace(/\$/g, '_');
+  const sanStep = (s: Step): void => {
+    if (s.t === 'rule' || s.t === 'ruleBp') s.name = san(s.name);
+    else if (s.t === 'star') sanStep(s.step);
+    else if (s.t === 'opt' || s.t === 'not' || s.t === 'seq' || s.t === 'suppress') s.steps.forEach(sanStep);
+    else if (s.t === 'sep') sanStep(s.elem);
+    else if (s.t === 'alt') s.branches.forEach((b) => b.forEach(sanStep));
+  };
+  for (const r of rules) {
+    r.name = san(r.name);
+    if (r.kind === 'rd') r.alts.forEach((alt) => alt.forEach(sanStep));
+    else {
+      r.nudBrackets.forEach((b) => b.steps.forEach(sanStep));
+      r.nudSeqs.forEach((seq) => seq.forEach(sanStep));
+      r.nudCapped.forEach((c) => c.steps.forEach(sanStep));
+      r.leds.forEach((b) => b.steps.forEach(sanStep));
+    }
+  }
+  if (tpl) tpl.interpRule = san(tpl.interpRule);
+
+  return { grammarName: grammar.name ?? 'grammar', entry: san(findEntryRule(grammar)), tokens, puncts, rules, regexCtx, tpl };
 }
 
 // Classify a token: a fast-path shape (run/string/line/block) when one cleanly matches,
@@ -259,7 +291,7 @@ function codesToRanges(codes: number[]): CharRange[] {
 // A Pratt rule's alternatives → NUD atoms/brackets/prefix + binary + mixfix LEDs.
 // Binding powers come from the analysis (opTable/prefixOps), single-sourced with the interpreter.
 function buildPratt(
-  name: string, body: RuleExpr, a: ReturnType<typeof analyzeGrammar>,
+  name: string, cstName: string, body: RuleExpr, a: ReturnType<typeof analyzeGrammar>,
   stepOf: (e: RuleExpr) => Step, altSteps: (e: RuleExpr) => Step[],
   litTtype: (v: string) => '$keyword' | '$punct',
 ): PrattRule {
@@ -344,5 +376,5 @@ function buildPratt(
   const postfix = sawPostfix
     ? [...a.opTable.entries()].filter(([, info]) => info.position === 'postfix').map(([op, info]) => ({ op, lbp: info.lbp }))
     : [];
-  return { kind: 'pratt', name, nudToks, nudBrackets, nudSeqs, nudCapped, prefix, binary, leds, ledAccessTail, ledLbp, postfixToks, postfix };
+  return { kind: 'pratt', name, cstName, nudToks, nudBrackets, nudSeqs, nudCapped, prefix, binary, leds, ledAccessTail, ledLbp, postfixToks, postfix };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index 2fad500..0e58d86 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -154,8 +154,8 @@ ${emitHooks}
 \t_ = pendingNl
 ${rxState}${tplState}${emitFn}${pushTokFn}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
 \t\tc := int(src[pos])
-\t\tif c == 32 || c == 9 { pos++; continue }
-\t\tif c == 10 || c == 13 { pendingNl = true; pos++; continue }
+\t\tif c == 10 || c == 13 || c == 8232 || c == 8233 { pendingNl = true; pos++; continue }
+\t\tif c == 32 || c == 9 || c == 11 || c == 12 || c == 160 || c == 5760 || (c >= 8192 && c <= 8202) || c == 8239 || c == 8287 || c == 12288 || c == 65279 { pos++; continue }
 ${tplDispatch}${toks}
 ${puncts}
 \t\tpanic(fmt.Sprintf("lex error at %d", pos))
@@ -184,7 +184,7 @@ function stepCond(s: Step): string {
 
 function rdRule(r: RdRule): string {
   const alt = (steps: Step[]) =>
-    `\tif ${steps.map(stepCond).join(' && ')} { return finish(${J(r.name)}, sb, offAt(save)) }
+    `\tif ${steps.map(stepCond).join(' && ')} { return finish(${J(r.cstName)}, sb, offAt(save)) }
 \tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]`;
   return `func parse${r.name}() int32 {
 \tsave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
@@ -199,7 +199,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
 \t\tnode := matchTemplate()
 \t\tif node < 0 { return -1 }
 \t\tsb := len(scratch); scratch = append(scratch, node)
-\t\treturn finish(${J(r.name)}, sb, nodes[node].Offset)
+\t\treturn finish(${J(r.cstName)}, sb, nodes[node].Offset)
 \t}\n`
     : '';
   const bin = r.binary.map((b) => `${J(b.op)}: {${b.lbp}, ${b.rbp}}`).join(', ');
@@ -207,24 +207,24 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
   const atoms = r.nudToks.map((k) => `${J(k)}: true`).join(', ');
   const bracketNud = (b: Bracket) => `\tif t.Text == ${J(b.first)} {
 \t\tsave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
-\t\tif ${b.steps.map(stepCond).join(' && ')} { return finish(${J(r.name)}, sb, t.Off) }
+\t\tif ${b.steps.map(stepCond).join(' && ')} { return finish(${J(r.cstName)}, sb, t.Off) }
 \t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]
 \t}`;
   const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}!_mySup[${J(b.first)}] && t.Text == ${J(b.first)} {
 \t\t\tledSave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 \t\t\tscratch = append(scratch, left)
-\t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
+\t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.cstName)}, sb, nodes[left].Offset); continue }
 \t\t\tpos = ledSave; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; break
 \t\t}`;
   const postfixArm = (tok: string) => {
     const tplPart = tpl && tok === tpl.token ? `
 \t\tif !tailClosed && t.Kind == "$templateHead" {
 \t\t\tnode := matchTemplate()
-\t\t\tif node >= 0 { sb := len(scratch); scratch = append(scratch, left, node); left = finish(${J(r.name)}, sb, nodes[left].Offset); continue }
+\t\t\tif node >= 0 { sb := len(scratch); scratch = append(scratch, left, node); left = finish(${J(r.cstName)}, sb, nodes[left].Offset); continue }
 \t\t}` : '';
     return `\t\tif !tailClosed && t.Kind == ${J(tok)} {
 \t\t\tsb := len(scratch); scratch = append(scratch, left, mkLeaf(t.Kind, t.Off, t.End)); pos++
-\t\t\tleft = finish(${J(r.name)}, sb, nodes[left].Offset); continue
+\t\t\tleft = finish(${J(r.cstName)}, sb, nodes[left].Offset); continue
 \t\t}${tplPart}`;
   };
   const post = r.postfix.map((p) => `${J(p.op)}: ${p.lbp}`).join(', ');
@@ -246,7 +246,7 @@ ${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
 \t\tif post, ok := ${r.name}POST[t.Text]; ok && !tailClosed && post > minBp {
 \t\t\tsb := len(scratch); scratch = append(scratch, left, mkLeaf("$operator", t.Off, t.End)); pos++; tailClosed = true
-\t\t\tleft = finish(${J(r.name)}, sb, nodes[left].Offset); continue
+\t\t\tleft = finish(${J(r.cstName)}, sb, nodes[left].Offset); continue
 \t\t}
 \t\tinfo, ok := ${r.name}BIN[t.Text]
 \t\tif !ok || info.lbp <= minBp { break }
@@ -256,7 +256,7 @@ ${r.postfixToks.map(postfixArm).join('\n')}
 \t\trhs := ${r.name}bp(info.rbp)
 \t\tif rhs < 0 { pos = ledSave; scratch = scratch[:sb]; break }
 \t\tscratch = append(scratch, rhs)
-\t\tleft = finish(${J(r.name)}, sb, nodes[left].Offset)
+\t\tleft = finish(${J(r.cstName)}, sb, nodes[left].Offset)
 \t}
 \treturn left
 }
@@ -264,10 +264,11 @@ func ${r.name}nud(minBp int) int32 {
 \t_capped = false
 \tt := peek()
 \tif t == nil { return -1 }
-${r.nudCapped.map((c) => `\tif minBp < ${c.capBp} { save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'} { _capped = true; return finish(${J(r.name)}, sb, offAt(save)) }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('\n')}
+${r.nudCapped.map((c) => `\tif minBp < ${c.capBp} { save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'} { _capped = true; return finish(${J(r.cstName)}, sb, offAt(save)) }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('\n')}
+\t_r := func() int32 {   // non-capped: a sub-parse may leave _capped set; force it false after
 ${tplNud}\tif ${r.name}ATOM[t.Kind] {
 \t\tsb := len(scratch); scratch = append(scratch, mkLeaf(t.Kind, t.Off, t.End)); pos++
-\t\treturn finish(${J(r.name)}, sb, t.Off)
+\t\treturn finish(${J(r.cstName)}, sb, t.Off)
 \t}
 ${r.nudBrackets.map(bracketNud).join('\n')}
 \tif pbp, ok := ${r.name}PRE[t.Text]; ok {
@@ -276,10 +277,13 @@ ${r.nudBrackets.map(bracketNud).join('\n')}
 \t\toperand := ${r.name}bp(pbp)
 \t\tif operand < 0 { pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]; return -1 }
 \t\tscratch = append(scratch, operand)
-\t\treturn finish(${J(r.name)}, sb, t.Off)
+\t\treturn finish(${J(r.cstName)}, sb, t.Off)
 \t}
-${r.nudSeqs.map((seq) => `\t{ save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${seq.length ? seq.map(stepCond).join(' && ') : 'true'} { return finish(${J(r.name)}, sb, offAt(save)) }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('\n')}
+${r.nudSeqs.map((seq) => `\t{ save := pos; sb := len(scratch); nb := len(nodes); kb := len(kids); if ${seq.length ? seq.map(stepCond).join(' && ') : 'true'} { return finish(${J(r.cstName)}, sb, offAt(save)) }; pos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb] }`).join('\n')}
 \treturn -1
+\t}()
+\t_capped = false
+\treturn _r
 }`;
 }
 
diff --git a/src/target-rust.ts b/src/target-rust.ts
index a05f71b..72d163b 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -219,7 +219,7 @@ function stepCondP(s: Step): string {
 
 function rdRule(r: RdRule): string {
   const alt = (steps: Step[]) =>
-    `        { let mut kids: Vec<Cst> = Vec::new(); if ${steps.map(stepCond).join(' && ')} { return Some(self.branch(${J(r.name)}, kids, save)); } self.pos = save; }`;
+    `        { let mut kids: Vec<Cst> = Vec::new(); if ${steps.map(stepCond).join(' && ')} { return Some(self.branch(${J(r.cstName)}, kids, save)); } self.pos = save; }`;
   return `    fn parse_${r.name}(&mut self) -> Option<Cst> {
         let save = self.pos;
 ${r.alts.map(alt).join('\n')}
@@ -230,7 +230,7 @@ ${r.alts.map(alt).join('\n')}
 function prattRule(r: PrattRule, tpl: TplCfg | null): string {
   const tplNud = tpl && r.nudToks.includes(tpl.token)
     ? `        if t.kind == "$templateHead" {
-            return self.match_template().map(|n| { let (o, e) = (n.offset, n.end); Cst::node(${J(r.name)}, vec![n], o, e) });
+            return self.match_template().map(|n| { let (o, e) = (n.offset, n.end); Cst::node(${J(r.cstName)}, vec![n], o, e) });
         }\n`
     : '';
   const binArms = r.binary.map((b) => `${J(b.op)} => Some((${b.lbp}, ${b.rbp}))`).join(', ');
@@ -238,21 +238,21 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
   const atomArm = r.nudToks.map(J).join(' | ');
   const bracketNud = (b: Bracket) => `        if t.text == ${J(b.first)} {
             let save = self.pos; let mut kids: Vec<Cst> = Vec::new();
-            if ${b.steps.map(stepCond).join(' && ')} { return Some(node(${J(r.name)}, kids)); }
+            if ${b.steps.map(stepCond).join(' && ')} { return Some(node(${J(r.cstName)}, kids)); }
             self.pos = save;   // fall through to the next NUD alternative
         }`;
   const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `            if ${accessTail ? '!tail_closed && ' : ''}${lbp !== null ? `${lbp} > min_bp && ` : ''}!my_sup.iter().any(|c| *c == ${J(b.first)}) && t.text == ${J(b.first)} {
                 let led_save = self.pos; let mut kids: Vec<Cst> = Vec::new();
                 if ${b.steps.map(stepCond).join(' && ')} {
                     let mut full = vec![left]; full.append(&mut kids);
-                    left = node(${J(r.name)}, full); continue;
+                    left = node(${J(r.cstName)}, full); continue;
                 }
                 self.pos = led_save; break;
             }`;
   const postfixArm = (tok: string) => {
     const tplPart = tpl && tok === tpl.token ? `
-            if !tail_closed && t.kind == "$templateHead" { if let Some(n) = self.match_template() { left = node(${J(r.name)}, vec![left, n]); continue; } }` : '';
-    return `            if !tail_closed && t.kind == ${J(tok)} { self.pos += 1; let leaf = Cst::leaf(t.kind, t.off, t.end); left = node(${J(r.name)}, vec![left, leaf]); continue; }${tplPart}`;
+            if !tail_closed && t.kind == "$templateHead" { if let Some(n) = self.match_template() { left = node(${J(r.cstName)}, vec![left, n]); continue; } }` : '';
+    return `            if !tail_closed && t.kind == ${J(tok)} { self.pos += 1; let leaf = Cst::leaf(t.kind, t.off, t.end); left = node(${J(r.cstName)}, vec![left, leaf]); continue; }${tplPart}`;
   };
   const postArms = r.postfix.map((p) => `${J(p.op)} => Some(${p.lbp})`).join(', ');
   return `    fn parse_${r.name}(&mut self) -> Option<Cst> { self.${r.name}_bp(0) }
@@ -270,35 +270,41 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
             let t = match self.peek() { Some(t) => t, None => break };
 ${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
-            if let Some(plbp) = Parser::${r.name}_post(t.text) { if !tail_closed && plbp > min_bp { self.pos += 1; let op_leaf = Cst::leaf("$operator", t.off, t.end); left = node(${J(r.name)}, vec![left, op_leaf]); tail_closed = true; continue; } }
+            if let Some(plbp) = Parser::${r.name}_post(t.text) { if !tail_closed && plbp > min_bp { self.pos += 1; let op_leaf = Cst::leaf("$operator", t.off, t.end); left = node(${J(r.cstName)}, vec![left, op_leaf]); tail_closed = true; continue; } }
             let (lbp, rbp) = match Parser::${r.name}_bin(t.text) { Some(x) => x, None => break };
             if lbp <= min_bp { break; }
             let led_save = self.pos;
             self.pos += 1;
             let op_leaf = Cst::leaf("$operator", t.off, t.end);
             let rhs = match self.${r.name}_bp(rbp) { Some(r) => r, None => { self.pos = led_save; break; } };
-            left = node(${J(r.name)}, vec![left, op_leaf, rhs]);
+            left = node(${J(r.cstName)}, vec![left, op_leaf, rhs]);
         }
         Some(left)
     }
     fn ${r.name}_nud(&mut self, min_bp: i64) -> Option<Cst> {
         self.capped = false;
         let t = self.peek()?;
-${r.nudCapped.map((c) => `        if min_bp < ${c.capBp} { let save = self.pos; let mut kids: Vec<Cst> = Vec::new(); if ${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'} { self.capped = true; return Some(self.branch(${J(r.name)}, kids, save)); } self.pos = save; }`).join('\n')}
+${r.nudCapped.map((c) => `        if min_bp < ${c.capBp} { let save = self.pos; let mut kids: Vec<Cst> = Vec::new(); if ${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'} { self.capped = true; return Some(self.branch(${J(r.cstName)}, kids, save)); } self.pos = save; }`).join('\n')}
+        // non-capped: a sub-parse may leave capped set (grouping a capped arrow); force it false after
+        let r = self.${r.name}_nud_rest(t);
+        self.capped = false;
+        r
+    }
+    fn ${r.name}_nud_rest(&mut self, t: Tok<'a>) -> Option<Cst> {
 ${tplNud}        if Parser::${r.name}_atom(t.kind) {
             self.pos += 1;
-            return Some(Cst::node(${J(r.name)}, vec![Cst::leaf(t.kind, t.off, t.end)], t.off, t.end));
+            return Some(Cst::node(${J(r.cstName)}, vec![Cst::leaf(t.kind, t.off, t.end)], t.off, t.end));
         }
 ${r.nudBrackets.map(bracketNud).join('\n')}
         if let Some(pbp) = Parser::${r.name}_pre(t.text) {
             let save = self.pos; self.pos += 1;
             let op_leaf = Cst::leaf("$operator", t.off, t.end);
             match self.${r.name}_bp(pbp) {
-                Some(operand) => { let (o, e) = (t.off, operand.end); return Some(Cst::node(${J(r.name)}, vec![op_leaf, operand], o, e)); }
+                Some(operand) => { let (o, e) = (t.off, operand.end); return Some(Cst::node(${J(r.cstName)}, vec![op_leaf, operand], o, e)); }
                 None => { self.pos = save; return None; }
             }
         }
-${r.nudSeqs.map((seq) => `        { let save = self.pos; let mut kids: Vec<Cst> = Vec::new(); if ${seq.length ? seq.map(stepCond).join(' && ') : 'true'} { return Some(self.branch(${J(r.name)}, kids, save)); } self.pos = save; }`).join('\n')}
+${r.nudSeqs.map((seq) => `        { let save = self.pos; let mut kids: Vec<Cst> = Vec::new(); if ${seq.length ? seq.map(stepCond).join(' && ') : 'true'} { return Some(self.branch(${J(r.cstName)}, kids, save)); } self.pos = save; }`).join('\n')}
         None
     }`;
 }
diff --git a/src/target-ts.ts b/src/target-ts.ts
index c30e81f..589476e 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -144,8 +144,8 @@ ${emitHooks}
   let pendingNl = false;
 ${defs.length ? '  _s = src;\n' : ''}${rxState}${tplState}${stateful ? emitFn : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end, nl: pendingNl }); pendingNl = false; };\n'}  while (pos < n) {
     const c = src.charCodeAt(pos);
-    if (c === 32 || c === 9) { pos++; continue; }
-    if (c === 10 || c === 13) { pendingNl = true; pos++; continue; }
+    if (c === 10 || c === 13 || c === 8232 || c === 8233) { pendingNl = true; pos++; continue; }
+    if (c === 32 || c === 9 || c === 11 || c === 12 || c === 160 || c === 5760 || (c >= 8192 && c <= 8202) || c === 8239 || c === 8287 || c === 12288 || c === 65279) { pos++; continue; }
 ${tplDispatch}${toks}
 ${puncts}
     throw new Error('lex error at ' + pos + ': ' + JSON.stringify(src[pos]));
@@ -175,7 +175,7 @@ function stepCond(s: Step): string {
 
 function rdRule(r: RdRule): string {
   const alt = (steps: Step[]) =>
-    `  { const kids: Cst[] = []; if (${steps.map(stepCond).join(' && ')}) return branch(${J(r.name)}, kids, save); pos = save; }`;
+    `  { const kids: Cst[] = []; if (${steps.map(stepCond).join(' && ')}) return branch(${J(r.cstName)}, kids, save); pos = save; }`;
   return `function parse${r.name}(): Node | null {
   const save = pos;
 ${r.alts.map(alt).join('\n')}
@@ -185,28 +185,28 @@ ${r.alts.map(alt).join('\n')}
 
 function prattRule(r: PrattRule, tpl: TplCfg | null): string {
   const tplNud = tpl && r.nudToks.includes(tpl.token)
-    ? `  if (t.kind === '$templateHead') { const node = matchTemplate(); return node === null ? null : { rule: ${J(r.name)}, children: [node], offset: node.offset, end: node.end }; }\n`
+    ? `  if (t.kind === '$templateHead') { const node = matchTemplate(); return node === null ? null : { rule: ${J(r.cstName)}, children: [node], offset: node.offset, end: node.end }; }\n`
     : '';
   const BIN = `{ ${r.binary.map((b) => `${J(b.op)}: { lbp: ${b.lbp}, rbp: ${b.rbp} }`).join(', ')} }`;
   const PRE = `{ ${r.prefix.map((p) => `${J(p.op)}: ${p.rbp}`).join(', ')} }`;
   const atom = `new Set([${r.nudToks.map(J).join(', ')}])`;
   const bracketNud = (b: Bracket) => `    if (t.text === ${J(b.first)}) {
       const save = pos; const kids: Cst[] = [];
-      if (${b.steps.map(stepCond).join(' && ')}) return node(${J(r.name)}, kids);
+      if (${b.steps.map(stepCond).join(' && ')}) return node(${J(r.cstName)}, kids);
       pos = save;   // fall through to the next NUD alternative (e.g. another '${b.first}'-led form)
     }`;
   // Access-tail leds (member/call/index) are disabled once a postfix has closed the operand;
   // a precedence-gated led (ternary/in/instanceof) binds only when its lbp > minBp.
   const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `    if (${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}(_mySup === null || !_mySup.has(${J(b.first)})) && t.text === ${J(b.first)}) {
       const ledSave = pos; const kids: Cst[] = [left];
-      if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.name)}, kids); continue; }
+      if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.cstName)}, kids); continue; }
       pos = ledSave; break;
     }`;
   // A postfix token (e.g. a tagged template) binds like a mixfix led: `left X` → node(left, X). Also an access tail.
   const postfixArm = (tok: string) => {
     const tplPart = tpl && tok === tpl.token ? `
-    if (!tailClosed && t.kind === '$templateHead') { const node = matchTemplate(); if (node !== null) { left = { rule: ${J(r.name)}, children: [left, node], offset: left.offset, end: node.end }; continue; } }` : '';
-    return `    if (!tailClosed && t.kind === ${J(tok)}) { const leaf: Leaf = { tokenType: t.kind, offset: t.off, end: t.end }; pos++; left = { rule: ${J(r.name)}, children: [left, leaf], offset: left.offset, end: leaf.end }; continue; }${tplPart}`;
+    if (!tailClosed && t.kind === '$templateHead') { const node = matchTemplate(); if (node !== null) { left = { rule: ${J(r.cstName)}, children: [left, node], offset: left.offset, end: node.end }; continue; } }` : '';
+    return `    if (!tailClosed && t.kind === ${J(tok)}) { const leaf: Leaf = { tokenType: t.kind, offset: t.off, end: t.end }; pos++; left = { rule: ${J(r.cstName)}, children: [left, leaf], offset: left.offset, end: leaf.end }; continue; }${tplPart}`;
   };
   const POST = `{ ${r.postfix.map((p) => `${J(p.op)}: ${p.lbp}`).join(', ')} }`;
   return `const ${r.name}_BIN: Record<string, { lbp: number; rbp: number }> = ${BIN};
@@ -226,7 +226,7 @@ function ${r.name}_bp(minBp: number): Node | null {
 ${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
     const post = ${r.name}_POST[t.text];
-    if (!tailClosed && post !== undefined && post > minBp) { pos++; const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end }; left = { rule: ${J(r.name)}, children: [left, opLeaf], offset: left.offset, end: t.end }; tailClosed = true; continue; }
+    if (!tailClosed && post !== undefined && post > minBp) { pos++; const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end }; left = { rule: ${J(r.cstName)}, children: [left, opLeaf], offset: left.offset, end: t.end }; tailClosed = true; continue; }
     const info = ${r.name}_BIN[t.text];
     if (info === undefined || info.lbp <= minBp) break;
     const ledSave = pos;
@@ -234,7 +234,7 @@ ${r.postfixToks.map(postfixArm).join('\n')}
     const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end };
     const rhs = ${r.name}_bp(info.rbp);
     if (rhs === null) { pos = ledSave; break; }
-    left = { rule: ${J(r.name)}, children: [left, opLeaf, rhs], offset: left.offset, end: rhs.end };
+    left = { rule: ${J(r.cstName)}, children: [left, opLeaf, rhs], offset: left.offset, end: rhs.end };
   }
   return left;
 }
@@ -242,8 +242,11 @@ function ${r.name}_nud(minBp: number): Node | null {
   _capped = false;
   const t = peek();
   if (t === null) return null;
-${r.nudCapped.map((c) => `  if (minBp < ${c.capBp}) { const save = pos; const kids: Cst[] = []; if (${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'}) { _capped = true; return branch(${J(r.name)}, kids, save); } pos = save; }`).join('\n')}
-${tplNud}  if (${r.name}_ATOM.has(t.kind)) { pos++; return { rule: ${J(r.name)}, children: [{ tokenType: t.kind, offset: t.off, end: t.end }], offset: t.off, end: t.end }; }
+${r.nudCapped.map((c) => `  if (minBp < ${c.capBp}) { const save = pos; const kids: Cst[] = []; if (${c.steps.length ? c.steps.map(stepCond).join(' && ') : 'true'}) { _capped = true; return branch(${J(r.cstName)}, kids, save); } pos = save; }`).join('\n')}
+  // Below is non-capped: a sub-parse may leave _capped set (e.g. grouping a capped arrow),
+  // so force it false after — only the capped arms above produce a capped node.
+  const _r = ((): Node | null => {
+${tplNud}  if (${r.name}_ATOM.has(t.kind)) { pos++; return { rule: ${J(r.cstName)}, children: [{ tokenType: t.kind, offset: t.off, end: t.end }], offset: t.off, end: t.end }; }
 ${r.nudBrackets.map(bracketNud).join('\n')}
   const pbp = ${r.name}_PRE[t.text];
   if (pbp !== undefined) {
@@ -251,10 +254,13 @@ ${r.nudBrackets.map(bracketNud).join('\n')}
     const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end };
     const operand = ${r.name}_bp(pbp);
     if (operand === null) { pos = save; return null; }
-    return { rule: ${J(r.name)}, children: [opLeaf, operand], offset: t.off, end: operand.end };
+    return { rule: ${J(r.cstName)}, children: [opLeaf, operand], offset: t.off, end: operand.end };
   }
-${r.nudSeqs.map((seq) => `  { const save = pos; const kids: Cst[] = []; if (${seq.length ? seq.map(stepCond).join(' && ') : 'true'}) return branch(${J(r.name)}, kids, save); pos = save; }`).join('\n')}
+${r.nudSeqs.map((seq) => `  { const save = pos; const kids: Cst[] = []; if (${seq.length ? seq.map(stepCond).join(' && ') : 'true'}) return branch(${J(r.cstName)}, kids, save); pos = save; }`).join('\n')}
   return null;
+  })();
+  _capped = false;
+  return _r;
 }`;
 }
 
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index fe52c60..2863226 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -176,6 +176,25 @@ const CASES: Case[] = [
     ],
     reject: ['for (x y) z;', 'for x in y;', 'for (in y) z;', 'for (x in) z;'],
   },
+  {
+    // The REAL javascript.ts grammar (89 rules after the [Await]/[Yield] fork) — the proof
+    // that the target-agnostic emitter handles a full language end-to-end in ts/go/rust.
+    // ASCII corpus only (byte-based go/rust use UTF-8 offsets, identical to the JS oracle's
+    // UTF-16 offsets for ASCII; non-ASCII offset units differ inherently).
+    grammar: 'javascript', path: '../javascript.ts',
+    accept: [
+      'var x = 1, y = 2;', 'function f(a, b) { return a + b; }', 'const g = (x) => x * 2;',
+      'x => x + 1;', 'a ? b : c;', 'a.b.c();', 'f(g(1, 2), 3);', '[1, 2, 3].map(f);',
+      'for (let i = 0; i < n; i++) x();', 'for (const k in obj) { y(); }', 'while (x) { z(); }',
+      'if (a) b(); else c();', 'class C extends B { m() {} get p() { return 1; } }', 'a++; b--;',
+      'typeof x; void 0;', 'new Foo(1, 2); new.target;', 'a ?? b; a?.b?.c;',
+      'try { f(); } catch (e) { g(); } finally { h(); }', 'switch (x) { case 1: f(); break; default: g(); }',
+      'a instanceof B; a in obj;', '(function () {})(); (() => {})();', 'x = a && b || c;',
+      'do { x(); } while (y);', 'function* gen() { yield* o(); }', 'const { a, b: c, ...r } = o;',
+      'const [p, , q, ...z] = arr;', 'label: for (;;) { break label; }', 'async function h() { await x; }',
+    ],
+    reject: ['function (', 'a +;', 'if x {}', '{ a: }', 'for (;;', 'a ? b ;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From cd4ebc84fe3fb45b41ebe32dac5c137f7e7bd6a4 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 05:49:02 +0800
Subject: [PATCH 22/27] =?UTF-8?q?emit-portable:=20typescript.ts=20emits=20?=
 =?UTF-8?q?too=20=E2=80=94=20both=20real=20grammars=20in=20the=20gate=20(i?=
 =?UTF-8?q?ssue=20#6)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The second real full language now goes through the agnostic emitter end-to-end. Two
type-grammar constructs were the wall:

- A LED with a leading `sameLine` guard (`$ sameLine '<' …`) — TS's generic-args /
  array / non-null type tails that must not cross a newline. The guard is hoisted into
  the led-arm condition (skip, don't break, so the connector can rebind).
- `notLeftLeaf`: a led skipped when the LEFT node's head-leaf text is in a word set
  (`void`/`null`/`this` can't be `.`-qualified as a type). Each target gains a
  `headLeafText` (the leftmost leaf's source text) and the led arm checks it.

typescript.ts (the most complex grammar) emits, compiles and runs in ts/go/rust, and
is gate-maintained alongside javascript.ts (13/13 accept, 4/4 reject ×3, ASCII corpus;
83.5% on the broad curated TS corpus in TS). Full suite 42/42. The agnostic emitter now
covers both full real languages — the issue-#6 goal, proven in three target languages.
---
 src/emit-portable.ts     | 24 +++++++++++++++++++-----
 src/target-go.ts         | 12 ++++++++++--
 src/target-rust.ts       | 18 ++++++++++++------
 src/target-ts.ts         | 11 +++++++++--
 test/portable-targets.ts | 14 ++++++++++++++
 5 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index a6a24eb..d631b12 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -74,6 +74,8 @@ export type PrattRule = {
   leds: Bracket[];                                    // LED: mixfix continuation (call/member/index), tried before operators
   ledAccessTail: boolean[];                           // parallel to leds: a "closed punct-connector" tail (member/call/index) — disabled once a postfix binds
   ledLbp: Array<number | null>;                       // parallel to leds: precedence gate (ternary/in/instanceof) — bind only when lbp > minBp; null = bind maximally tight
+  ledSameLine: boolean[];                             // parallel to leds: a leading `sameLine` guard (TS type tails) — the connector must be on the operand's line
+  ledNotLeftLeaf: Array<string[] | null>;             // parallel to leds: skip this led when the left node's head-leaf text is in this set (`void.x` etc.)
   postfixToks: string[];                              // LED: a postfix token `$ X` (e.g. a tagged template), tried like a mixfix led (also an access tail)
   postfix: Array<{ op: string; lbp: number }>;        // LED: a postfix operator `$ ++` — binds iff lbp > minBp + !tailClosed, no rhs, closes the tail
 };
@@ -304,9 +306,17 @@ function buildPratt(
   const leds: Bracket[] = [];
   const ledAccessTail: boolean[] = [];
   const ledLbp: Array<number | null> = [];
+  const ledSameLine: boolean[] = [];
+  const ledNotLeftLeaf: Array<string[] | null> = [];
   const postfixToks: string[] = [];
   for (const alt of alts) {
-    const items = alt.type === 'seq' ? alt.items : [alt];
+    let items = alt.type === 'seq' ? alt.items : [alt];
+    // A left-recursive continuation may carry a leading `notLeftLeaf(words)` head-leaf guard
+    // before the self `$` — strip it and attach the word set to the led it produces.
+    let nllWords: string[] | null = null;
+    if (items[0].type === 'notLeftLeaf' && items[1]?.type === 'ref' && items[1].name === name) {
+      nllWords = items[0].words; items = items.slice(1);
+    }
     const startsSelf = items[0].type === 'ref' && items[0].name === name;
     if (!startsSelf) {
       // NUD
@@ -331,9 +341,11 @@ function buildPratt(
       continue;
     }
     // LED (starts with self): `$ op $` (binary, op slot + trailing self) or `$ <lit> …` (mixfix)
-    const rest = items.slice(1);
-    if (rest[0].type === 'op') { sawBinary = true; continue; }
-    if (rest[0].type === 'postfix') { sawPostfix = true; continue; }   // postfix operator (`x++`)
+    const restAll = items.slice(1);
+    const hasSameLine = restAll[0]?.type === 'sameLine';   // a TS type tail: `$ sameLine '<' …`
+    const rest = hasSameLine ? restAll.slice(1) : restAll;
+    if (!hasSameLine && rest[0].type === 'op') { sawBinary = true; continue; }
+    if (!hasSameLine && rest[0].type === 'postfix') { sawPostfix = true; continue; }   // postfix operator (`x++`)
     if (rest[0].type === 'literal') {
       const conn = rest[0].value;
       const prec = a.ledPrecByConnector.get(conn);   // { lbp, rhsBp } for ternary/in/instanceof
@@ -346,6 +358,8 @@ function buildPratt(
       leds.push({ first: conn, steps });
       ledAccessTail.push(!lastIsOperand && !wordConnector);
       ledLbp.push(prec ? prec.lbp : null);
+      ledSameLine.push(hasSameLine);
+      ledNotLeftLeaf.push(nllWords);
       continue;
     }
     if (rest.length === 1 && rest[0].type === 'ref' && a.tokenNames.has(rest[0].name)) { postfixToks.push(rest[0].name); continue; }  // postfix token (tagged template)
@@ -376,5 +390,5 @@ function buildPratt(
   const postfix = sawPostfix
     ? [...a.opTable.entries()].filter(([, info]) => info.position === 'postfix').map(([op, info]) => ({ op, lbp: info.lbp }))
     : [];
-  return { kind: 'pratt', name, cstName, nudToks, nudBrackets, nudSeqs, nudCapped, prefix, binary, leds, ledAccessTail, ledLbp, postfixToks, postfix };
+  return { kind: 'pratt', name, cstName, nudToks, nudBrackets, nudSeqs, nudCapped, prefix, binary, leds, ledAccessTail, ledLbp, ledSameLine, ledNotLeftLeaf, postfixToks, postfix };
 }
diff --git a/src/target-go.ts b/src/target-go.ts
index 0e58d86..a654a7b 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -210,7 +210,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
 \t\tif ${b.steps.map(stepCond).join(' && ')} { return finish(${J(r.cstName)}, sb, t.Off) }
 \t\tpos = save; scratch = scratch[:sb]; nodes = nodes[:nb]; kids = kids[:kb]
 \t}`;
-  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}!_mySup[${J(b.first)}] && t.Text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null, sameLine: boolean, nll: string[] | null) => `\t\tif ${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}${sameLine ? '!t.Nl && ' : ''}${nll ? `!_inW([]string{${nll.map(J).join(', ')}}, headLeafText(left)) && ` : ''}!_mySup[${J(b.first)}] && t.Text == ${J(b.first)} {
 \t\t\tledSave := pos; sb := len(scratch); nb := len(nodes); kb := len(kids)
 \t\t\tscratch = append(scratch, left)
 \t\t\tif ${b.steps.map(stepCond).join(' && ')} { left = finish(${J(r.cstName)}, sb, nodes[left].Offset); continue }
@@ -242,7 +242,7 @@ func ${r.name}bp(minBp int) int32 {
 \tfor {
 \t\tt := peek()
 \t\tif t == nil { break }
-${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i], r.ledSameLine[i], r.ledNotLeftLeaf[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
 \t\tif post, ok := ${r.name}POST[t.Text]; ok && !tailClosed && post > minBp {
 \t\t\tsb := len(scratch); scratch = append(scratch, left, mkLeaf("$operator", t.Off, t.End)); pos++; tailClosed = true
@@ -338,6 +338,7 @@ type bp struct{ lbp, rbp int }
 var toks []Tok
 var pos int
 var _capped bool
+var _src string
 var _suppressNext map[string]bool
 var nodes []Node
 var kids []int32
@@ -412,7 +413,14 @@ func writeJSON(id int32, b *strings.Builder) {
 \tfmt.Fprintf(b, "],\\"offset\\":%d,\\"end\\":%d}", nd.Offset, nd.End)
 }
 
+func headLeafText(id int32) string {
+\tfor !nodes[id].IsLeaf && nodes[id].KidCount > 0 { id = kids[nodes[id].KidStart] }
+\treturn _src[nodes[id].Offset:nodes[id].End]
+}
+func _inW(ws []string, s string) bool { for _, w := range ws { if w == s { return true } }; return false }
+
 func parseOnce(src string) int32 {
+\t_src = src
 \ttoks = lex(src)
 \tpos = 0
 \tnodes = nodes[:0]; kids = kids[:0]; scratch = scratch[:0]
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 72d163b..5a43013 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -241,7 +241,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
             if ${b.steps.map(stepCond).join(' && ')} { return Some(node(${J(r.cstName)}, kids)); }
             self.pos = save;   // fall through to the next NUD alternative
         }`;
-  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `            if ${accessTail ? '!tail_closed && ' : ''}${lbp !== null ? `${lbp} > min_bp && ` : ''}!my_sup.iter().any(|c| *c == ${J(b.first)}) && t.text == ${J(b.first)} {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null, sameLine: boolean, nll: string[] | null) => `            if ${accessTail ? '!tail_closed && ' : ''}${lbp !== null ? `${lbp} > min_bp && ` : ''}${sameLine ? '!t.nl && ' : ''}${nll ? `!self.nll_blocked(&[${nll.map(J).join(', ')}], &left) && ` : ''}!my_sup.iter().any(|c| *c == ${J(b.first)}) && t.text == ${J(b.first)} {
                 let led_save = self.pos; let mut kids: Vec<Cst> = Vec::new();
                 if ${b.steps.map(stepCond).join(' && ')} {
                     let mut full = vec![left]; full.append(&mut kids);
@@ -268,7 +268,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
         let mut tail_closed = false;
         loop {
             let t = match self.peek() { Some(t) => t, None => break };
-${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i], r.ledSameLine[i], r.ledNotLeftLeaf[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
             if let Some(plbp) = Parser::${r.name}_post(t.text) { if !tail_closed && plbp > min_bp { self.pos += 1; let op_leaf = Cst::leaf("$operator", t.off, t.end); left = node(${J(r.cstName)}, vec![left, op_leaf]); tail_closed = true; continue; } }
             let (lbp, rbp) = match Parser::${r.name}_bin(t.text) { Some(x) => x, None => break };
@@ -352,9 +352,15 @@ fn node(rule: &'static str, kids: Vec<Cst>) -> Cst { let o = kids[0].offset; let
 
 ${lexer(ir)}
 
-struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize, capped: bool, suppress_next: Vec<&'static str> }
+struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize, capped: bool, suppress_next: Vec<&'static str>, src: &'a str }
 impl<'a> Parser<'a> {
     fn peek(&self) -> Option<Tok<'a>> { if self.pos < self.toks.len() { Some(self.toks[self.pos]) } else { None } }
+    fn head_leaf_text(&self, node: &Cst) -> &'a str {
+        let mut n = node;
+        while !n.children.is_empty() { n = &n.children[0]; }
+        &self.src[n.offset..n.end]
+    }
+    fn nll_blocked(&self, words: &[&str], node: &Cst) -> bool { let h = self.head_leaf_text(node); words.iter().any(|w| *w == h) }
     fn branch(&self, rule: &'static str, kids: Vec<Cst>, save: usize) -> Cst {
         let offset = if !kids.is_empty() { kids[0].offset } else if save < self.toks.len() { self.toks[save].off } else { 0 };
         let end = if !kids.is_empty() { kids[kids.len() - 1].end } else { offset };
@@ -409,15 +415,15 @@ fn main() {
     // Self-bench: a numeric arg N times the lex+parse loop and prints ms/iteration.
     if let Some(iters) = std::env::args().nth(1).and_then(|a| a.parse::<u64>().ok()) {
         // black_box on the input + result so the optimizer can't elide the lex/parse.
-        for _ in 0..3 { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new() }; std::hint::black_box(p.parse_${ir.entry}()); }
+        for _ in 0..3 { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new(), src: &src }; std::hint::black_box(p.parse_${ir.entry}()); }
         let t = std::time::Instant::now();
-        for _ in 0..iters { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new() }; std::hint::black_box(p.parse_${ir.entry}()); }
+        for _ in 0..iters { let toks = lex(std::hint::black_box(&src)); let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new(), src: &src }; std::hint::black_box(p.parse_${ir.entry}()); }
         println!("{:.4}", t.elapsed().as_secs_f64() * 1000.0 / iters as f64);
         return;
     }
     let toks = lex(&src);
     let n = toks.len();
-    let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new() };
+    let mut p = Parser { toks, pos: 0, capped: false, suppress_next: Vec::new(), src: &src };
     match p.parse_${ir.entry}() {
         Some(root) if p.pos == n => { let mut out = String::new(); write_json(&root, &mut out); print!("{}", out); }
         _ => { eprintln!("parse error (pos {}/{})", p.pos, n); std::process::exit(1); }
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 589476e..7984ba9 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -197,7 +197,7 @@ function prattRule(r: PrattRule, tpl: TplCfg | null): string {
     }`;
   // Access-tail leds (member/call/index) are disabled once a postfix has closed the operand;
   // a precedence-gated led (ternary/in/instanceof) binds only when its lbp > minBp.
-  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null) => `    if (${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}(_mySup === null || !_mySup.has(${J(b.first)})) && t.text === ${J(b.first)}) {
+  const ledArm = (b: Bracket, accessTail: boolean, lbp: number | null, sameLine: boolean, nll: string[] | null) => `    if (${accessTail ? '!tailClosed && ' : ''}${lbp !== null ? `${lbp} > minBp && ` : ''}${sameLine ? '!t.nl && ' : ''}${nll ? `!${J(nll)}.includes(headLeafText(left)) && ` : ''}(_mySup === null || !_mySup.has(${J(b.first)})) && t.text === ${J(b.first)}) {
       const ledSave = pos; const kids: Cst[] = [left];
       if (${b.steps.map(stepCond).join(' && ')}) { left = node(${J(r.cstName)}, kids); continue; }
       pos = ledSave; break;
@@ -223,7 +223,7 @@ function ${r.name}_bp(minBp: number): Node | null {
   for (;;) {
     const t = peek();
     if (t === null) break;
-${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i])).join('\n')}
+${r.leds.map((b, i) => ledArm(b, r.ledAccessTail[i], r.ledLbp[i], r.ledSameLine[i], r.ledNotLeftLeaf[i])).join('\n')}
 ${r.postfixToks.map(postfixArm).join('\n')}
     const post = ${r.name}_POST[t.text];
     if (!tailClosed && post !== undefined && post > minBp) { pos++; const opLeaf: Leaf = { tokenType: '$operator', offset: t.off, end: t.end }; left = { rule: ${J(r.cstName)}, children: [left, opLeaf], offset: left.offset, end: t.end }; tailClosed = true; continue; }
@@ -302,7 +302,13 @@ let toks: Tok[] = [];
 let pos = 0;
 let _capped = false;
 let _suppressNext: Set<string> | null = null;
+let _src = '';
 function peek(): Tok | null { return pos < toks.length ? toks[pos] : null; }
+function headLeafText(node: Cst): string {
+  let n: Cst = node;
+  while ('children' in n && n.children.length > 0) n = n.children[0];
+  return _src.slice(n.offset, n.end);
+}
 function branch(rule: string, kids: Cst[], save: number): Node {
   const offset = kids.length > 0 ? kids[0].offset : (save < toks.length ? toks[save].off : 0);
   const end = kids.length > 0 ? kids[kids.length - 1].end : offset;
@@ -350,6 +356,7 @@ function altLit(opts: [string, string][], kids: Cst[]): boolean {
 ${matchTemplate}${ruleFns}
 
 const src = readFileSync(0, 'utf8');
+_src = src;
 toks = lex(src);
 pos = 0;
 const root = parse${ir.entry}();
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 2863226..7dd6544 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -195,6 +195,20 @@ const CASES: Case[] = [
     ],
     reject: ['function (', 'a +;', 'if x {}', '{ a: }', 'for (;;', 'a ? b ;'],
   },
+  {
+    // The real typescript.ts grammar — the second, most complex full language proving the
+    // agnostic emitter (types, generics, interfaces, enums, assertions, variance). ASCII.
+    grammar: 'typescript', path: '../typescript.ts',
+    accept: [
+      'const a: number = 1;', 'let s: string;', 'type Alias = { a: number; b?: string };',
+      'type U = "a" | "b" | "c";', 'function gen2<T, U extends T>(x: T, y: U): T { return x; }',
+      'interface I<T> extends A<T> { m(x: T): T; }', 'const c = x as const;',
+      'function isStr(x: unknown): x is string { return true; }', 'enum E { A, B, C }',
+      'const n = maybe!;', 'let arr: number[];', 'type Fn = (x: number) => string;',
+      'class C<in out T> { value!: T; }',
+    ],
+    reject: ['interface {}', 'const x: = 1;', 'enum {}', 'a + ;'],
+  },
 ];
 
 const sortKeys = (o: unknown): unknown =>

From ca2a56bbd44fffffa58b138686e3d94cc04fffda Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 05:50:27 +0800
Subject: [PATCH 23/27] =?UTF-8?q?docs:=20README=20=E2=80=94=20the=20emitte?=
 =?UTF-8?q?d=20parser=20need=20not=20be=20JS=20(issue=20#6)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Documents the target-agnostic emitter under "A language-agnostic engine": one
analysis → one IR → per-target render (Go/Rust/native, each with its own regex-free
lexer), proven by the real javascript.ts and typescript.ts grammars emitting to
ts/go/rust byte-identical to the interpreter and gate-maintained, with the Rust/Go
throughput results and the ASCII-offset boundary noted.
---
 README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/README.md b/README.md
index a8f69cf..3b2e3a9 100644
--- a/README.md
+++ b/README.md
@@ -338,6 +338,21 @@ const Regex = token(seq(
 
 [`test/agnostic.ts`](test/agnostic.ts) proves it directly — the same engine parses a toy grammar whose identifier token is `Word`, with no templates or regex. The deeper proof is [`html.ts`](html.ts): markup shares *nothing* with TypeScript's token stream, yet the same engine handles it.
 
+### The emitted parser need not be JS — Go, Rust, native
+
+The grammar also derives a **standalone parser in another language**. [`emitPortableParser(grammar, target)`](src/emit-portable.ts) runs one analysis into one language-agnostic IR, and each `Target` renders it — including its own regex-free lexer, so the output has no dependency on the JS runtime and compiles offline:
+
+```ts
+import { emitPortableParser } from './src/emit-portable.ts';
+import { goTarget } from './src/target-go.ts';
+import { rustTarget } from './src/target-rust.ts';
+
+writeFileSync('parser.go', emitPortableParser(grammar, goTarget));   // `go build`, no deps
+writeFileSync('parser.rs', emitPortableParser(grammar, rustTarget)); // `rustc`, no crates
+```
+
+The proof is the full languages: the real [`javascript.ts`](javascript.ts) and [`typescript.ts`](typescript.ts) grammars — including the `[Await]/[Yield]` fork, left recursion, the regex/division and template state machines, arrow functions, and the TS type grammar — emit to **TypeScript, Go, and Rust**, and every CST is byte-identical to the reference interpreter. [`test/portable-targets.ts`](test/portable-targets.ts) compiles and runs all three for sixteen grammars (the two real languages plus focused fixtures) on every CI run. The Rust output reaches [oxc](https://github.com/oxc-project/oxc) throughput and the Go output beats [tsgo](https://github.com/microsoft/typescript-go) on the same corpus (an arena keeps both near zero-allocation). Byte-based Go/Rust use UTF-8 offsets — identical to the JS interpreter's for ASCII; non-ASCII offset units differ inherently.
+
 ## Adding a language
 
 A new language is **one grammar file** on the unchanged engine:

From aeb4736bc45487611f4122e757443f85cd79931a Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 06:47:10 +0800
Subject: [PATCH 24/27] emit: converge to 2 target-parameterized APIs
 (emitParser reuses emitLexer)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The emit layer had three inconsistent entry points — `emitParser(grammar)` (JS,
no target), `emitLexer(grammar, st)` (JS, internal symtab), and
`emitPortableParser(grammar, target)` (lexer buried in `target.render`). Collapse
them to exactly two, both parameterized by a Target:

    emitLexer(grammar, target)  -> the lexer for that target
    emitParser(grammar, target) -> the parser, REUSING emitLexer(grammar, target)

A Target owns both halves, so a parser reuses the SAME target's lexer — jsTarget's
parser embeds jsTarget's SoA-int lexer, goTarget's parser embeds goTarget's Tok-list
lexer. No cross-target lexer format is shared, so the optimized JS path keeps its
integer-bitmask dispatch and the portable targets keep their clean byte scanner.

- src/emit.ts (new): the Target interface + the two public functions; re-exports
  jsTarget / tsTarget / goTarget / rustTarget.
- emit-parser.ts: the optimized emitter split into `emitJsLexer` (derive) +
  `emitJsParser` (embed a handed-in lexer) behind `jsTarget`. The split is pure
  refactor — re-deriving the deterministic symtab yields the identical lexer string,
  so emit-parser-verify stays byte-for-byte.
- emit-lexer.ts: `emitLexer` -> `emitSoaLexer` (frees the public name).
- emit-portable.ts + target-{ts,go,rust}.ts: `render(ir)` split into the target's
  `emitLexer`/`emitParser`; `emitPortableParser` removed (`portableIR` exported).
- ~19 callers updated to `emitParser(g, jsTarget)` / `emitParser(g, <portable>)`.

emit-parser-verify byte-identical (0 mismatches), portable-targets 16 grammars ×3 ≡
interpreter, emit-tsc-gate clean, full suite 42/42.
---
 src/emit-lexer.ts            |  2 +-
 src/emit-parser.ts           | 31 +++++++++++++++++++++++++------
 src/emit-portable.ts         | 19 +++++++------------
 src/emit.ts                  | 33 +++++++++++++++++++++++++++++++++
 src/target-go.ts             | 16 +++++++++++-----
 src/target-rust.ts           | 16 +++++++++++-----
 src/target-ts.ts             | 13 ++++++++++---
 test/cst-match-totality.ts   |  4 ++--
 test/emit-lexer-verify.ts    |  4 ++--
 test/emit-parser-bench.ts    |  4 ++--
 test/emit-parser-verify.ts   |  4 ++--
 test/emit-reject-messages.ts |  4 ++--
 test/emit-tsc-gate.ts        |  4 ++--
 test/exhaustive-edits.ts     |  4 ++--
 test/head-to-head.ts         |  4 ++--
 test/incremental-grammars.ts |  4 ++--
 test/incremental-verify.ts   |  4 ++--
 test/multi-doc.ts            |  4 ++--
 test/portable-targets.ts     | 13 +++++--------
 test/profile-vs-peers.mjs    |  6 +++---
 test/profile-vs-tsc.mjs      |  4 ++--
 test/recovery-conformance.ts |  4 ++--
 test/recovery.ts             |  4 ++--
 23 files changed, 134 insertions(+), 71 deletions(-)
 create mode 100644 src/emit.ts

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index ba09347..28f9420 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -45,7 +45,7 @@ const NON_ASCII_WS_FN =
 const nonAsciiWsConsume = (v: string, cont: boolean, indent: string): string =>
   `${indent}if (${v} > 127 && lxNonAsciiWs(${v})) { LX_WS.lastIndex = pos; const m = LX_WS.exec(source); if (m !== null) { if (m[0].includes('\\n')) pendingNl = true; pos += m[0].length;${cont ? ' continue;' : ''} } }`;
 
-export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
+export function emitSoaLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   // Out of scope: the markup / indentation / newline state machines.
   if (grammar.markup || grammar.indent || grammar.newline) return null;
   if (grammar.tokens.some(t => tokenBlockPatternSource(t) || t.blockOnly)) return null;
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 6368898..ebbc2f9 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -27,7 +27,8 @@
 import type { CstGrammar, RuleExpr, RuleDecl } from './types.ts';
 import { isKeywordLiteral, collectLiterals } from './grammar-utils.ts';
 import { analyzeGrammar, findEntryRule, type Sec } from './grammar-analysis.ts';
-import { emitLexer } from './emit-lexer.ts';
+import { emitSoaLexer } from './emit-lexer.ts';
+import type { Target } from './emit.ts';
 import { withAwaitYield } from './await-yield-fork.ts';
 
 // ── Static analysis ──
@@ -1092,7 +1093,28 @@ class Emitter {
 
 // ── Top-level emit ──
 
-export function emitParser(grammar: CstGrammar): string {
+// The `js` Target: the optimized SoA-int parser/lexer, wrapped behind the same two-method
+// Target contract as the portable ts/go/rust targets (see emit.ts). `emitJsLexer` derives the
+// standalone lexer; `emitJsParser` embeds whatever lexer source it is handed. Splitting the
+// lexer COMPUTATION from its EMBEDDING leaves the emitted bytes identical (both re-derive the
+// same deterministic symtab), so `emit-parser-verify` stays byte-for-byte.
+export const jsTarget: Target = {
+  name: 'javascript',
+  ext: 'js',
+  emitLexer: emitJsLexer,
+  emitParser: emitJsParser,
+};
+
+export function emitJsLexer(grammar: CstGrammar): string | null {
+  grammar = withAwaitYield(grammar);
+  const st = analyze(grammar).symtab;
+  return emitSoaLexer(grammar, {
+    typeKind: st.typeKind, kwLitKind: st.kwLitKind, puLitKind: st.puLitKind,
+    KIND_PUNCT: st.KIND_PUNCT, KIND_NAMED_FALLBACK: st.KIND_NAMED_FALLBACK,
+  });
+}
+
+export function emitJsParser(grammar: CstGrammar, lexSrc: string | null): string {
   // [Await]/[Yield] context: name-fork the body-reachable rule closure into $A/$Y/$AY
   // families (see await-yield-fork.ts). No-op for a grammar with no ctx markers. Done
   // HERE (not at grammar export) so the forks exist ONLY in the parser's rule identity
@@ -1127,11 +1149,8 @@ export function emitParser(grammar: CstGrammar): string {
   // The lexer: EMITTED (specialized, standalone — see emit-lexer.ts) when the grammar
   // is a plain token stream; the data-driven createLexer runtime otherwise
   // (markup/indent/newline state machines stay interpreter-only).
+  // `lexSrc` is handed in by the Target façade (emitParser reuses emitLexer) — see emit.ts.
   const st = a.symtab;
-  const lexSrc = emitLexer(grammar, {
-    typeKind: st.typeKind, kwLitKind: st.kwLitKind, puLitKind: st.puLitKind,
-    KIND_PUNCT: st.KIND_PUNCT, KIND_NAMED_FALLBACK: st.KIND_NAMED_FALLBACK,
-  });
   e.soa = lexSrc !== null;
   if (!lexSrc) {
     e.emit(`import { createLexer } from ${J(resolveLexerImport())};`);
diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index d631b12..e2ec5d8 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -1,6 +1,6 @@
 // ── emit-portable ──
 //
-// The target-agnostic emitter (issue #6). `emitPortableParser(grammar, target)` derives
+// The target-agnostic emitter (issue #6). `emitParser(grammar, target)` (see emit.ts) derives
 // a COMPLETE, self-contained parser in the target's language from the same CstGrammar the
 // TS engine uses. It is the agnosticism proof: ONE analysis → ONE intermediate form (IR)
 // → N language renderings, all producing the byte-identical CST the interpreter does.
@@ -120,17 +120,12 @@ export type ParserIR = {
   tpl: TplCfg | null;          // null unless the grammar has a template token
 };
 
-export interface Target {
-  name: string;
-  ext: string;                       // emitted file extension (no dot)
-  render(ir: ParserIR): string;      // the complete, compilable source
-}
-
-export function emitPortableParser(grammar: CstGrammar, target: Target): string {
-  // Apply the [Await]/[Yield] context fork exactly as createParser does, so `await`/`yield`
-  // are keywords inside async/generator bodies and identifiers outside — name-forked into
-  // $A/$Y/$AY rule families. Every other consumer (and the portable parser) sees plain rules.
-  return target.render(buildIR(withAwaitYield(grammar)));
+// The target-agnostic parse plan for a grammar. Applies the [Await]/[Yield] context fork
+// exactly as createParser does (so `await`/`yield` are keywords inside async/generator bodies
+// and identifiers outside — name-forked into $A/$Y/$AY rule families), then builds the IR each
+// portable Target (ts/go/rust) renders. The `Target` contract itself lives in emit.ts.
+export function portableIR(grammar: CstGrammar): ParserIR {
+  return buildIR(withAwaitYield(grammar));
 }
 
 // ── buildIR: grammar + analysis → the target-agnostic parse plan ──
diff --git a/src/emit.ts b/src/emit.ts
new file mode 100644
index 0000000..1513fb5
--- /dev/null
+++ b/src/emit.ts
@@ -0,0 +1,33 @@
+// The emit layer's public surface: exactly two APIs, both parameterized by a `Target`.
+//
+//   emitLexer(grammar, target)  → the lexer source for that target
+//   emitParser(grammar, target) → the parser source for that target, REUSING emitLexer
+//
+// A `Target` owns BOTH halves, so emitParser(grammar, target) reuses the SAME target's lexer —
+// jsTarget's parser embeds jsTarget's SoA-int lexer, goTarget's parser embeds goTarget's
+// Tok-list lexer. No cross-target lexer format is shared, so the optimized JS path keeps its
+// integer-bitmask token dispatch while the portable targets keep their clean byte scanner.
+//
+// Targets: `jsTarget` (the optimized SoA parser, emit-parser.ts) and the portable
+// `tsTarget`/`goTarget`/`rustTarget` (emit-portable.ts + target-*.ts).
+import type { CstGrammar } from './types.ts';
+
+export interface Target {
+  name: string;
+  ext: string;                                                  // emitted file extension (no dot)
+  emitLexer(grammar: CstGrammar): string | null;               // null ⇒ runtime-lexer fallback (jsTarget markup/indent grammars)
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string;   // the parser, embedding `lexerSrc`
+}
+
+export function emitLexer(grammar: CstGrammar, target: Target): string | null {
+  return target.emitLexer(grammar);
+}
+
+export function emitParser(grammar: CstGrammar, target: Target): string {
+  return target.emitParser(grammar, emitLexer(grammar, target));   // ← parser reuses lexer
+}
+
+export { jsTarget } from './emit-parser.ts';
+export { tsTarget } from './target-ts.ts';
+export { goTarget } from './target-go.ts';
+export { rustTarget } from './target-rust.ts';
diff --git a/src/target-go.ts b/src/target-go.ts
index a654a7b..8d61807 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -1,7 +1,7 @@
 // The Go Target for emit-portable. Renders the same language-agnostic ParserIR as tsTarget
 // into a self-contained Go program (Go stdlib only — the lexer is regex-free, so it compiles
 // with no module dependencies). Its CST JSON is checked byte-for-byte against the interpreter,
-// so `emitPortableParser(grammar, goTarget)` is a real, verified Go parser derived from the
+// so `emitParser(grammar, goTarget)` is a real, verified Go parser derived from the
 // same grammar definition.
 //
 // ARENA allocation (to minimise GC pressure, as tsgo does): nodes live in a flat `nodes []Node`,
@@ -9,8 +9,10 @@
 // stack. A node is an int32 index, never a heap pointer. Backtracking truncates the three
 // slices to saved lengths; the slices keep their capacity across parses (reset to len 0), so a
 // warmed parser allocates ~nothing per parse.
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
-import type { TokenPattern } from './types.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, TplCfg } from './emit-portable.ts';
+import { portableIR } from './emit-portable.ts';
+import type { Target } from './emit.ts';
+import type { TokenPattern, CstGrammar } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
@@ -290,7 +292,11 @@ ${r.nudSeqs.map((seq) => `\t{ save := pos; sb := len(scratch); nb := len(nodes);
 export const goTarget: Target = {
   name: 'go',
   ext: 'go',
-  render(ir: ParserIR): string {
+  emitLexer(grammar: CstGrammar): string {
+    return lexer(portableIR(grammar));
+  },
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string {
+    const ir = portableIR(grammar);
     const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
     const matchTemplate = ir.tpl ? `func matchTemplate() int32 {
 \tt := peek()
@@ -344,7 +350,7 @@ var nodes []Node
 var kids []int32
 var scratch []int32
 
-${lexer(ir)}
+${lexerSrc ?? ''}
 
 func peek() *Tok {
 \tif pos < len(toks) { return &toks[pos] }
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 5a43013..8d995f2 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -1,7 +1,7 @@
 // The Rust Target for emit-portable. Renders the same language-agnostic ParserIR as
 // tsTarget/goTarget into a self-contained Rust program (no external crates — the lexer is
 // regex-free, so it compiles with rustc alone, no Cargo/network). Its CST JSON is checked
-// byte-for-byte against the interpreter, so `emitPortableParser(grammar, rustTarget)` is a
+// byte-for-byte against the interpreter, so `emitParser(grammar, rustTarget)` is a
 // real, verified Rust parser derived from the same grammar definition.
 //
 // Rust ownership note: a CST node is OWNED (moved), unlike the TS/Go pointer trees. In the
@@ -11,8 +11,10 @@
 // returns it. Sub-sequence combinators (star/opt/sep) take non-capturing fn pointers
 // `fn(&mut Parser, &mut Vec<Cst>) -> bool`, threading the parser + kids as params (so nothing
 // is captured, sidestepping the borrow checker).
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
-import type { TokenPattern } from './types.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, TplCfg } from './emit-portable.ts';
+import { portableIR } from './emit-portable.ts';
+import type { Target } from './emit.ts';
+import type { TokenPattern, CstGrammar } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
@@ -312,7 +314,11 @@ ${r.nudSeqs.map((seq) => `        { let save = self.pos; let mut kids: Vec<Cst>
 export const rustTarget: Target = {
   name: 'rust',
   ext: 'rs',
-  render(ir: ParserIR): string {
+  emitLexer(grammar: CstGrammar): string {
+    return lexer(portableIR(grammar));
+  },
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string {
+    const ir = portableIR(grammar);
     const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
     const matchTemplate = ir.tpl ? `    fn match_template(&mut self) -> Option<Cst> {
         let t = self.peek()?;
@@ -350,7 +356,7 @@ impl Cst {
 // offset/end inferred from first/last child (children non-empty).
 fn node(rule: &'static str, kids: Vec<Cst>) -> Cst { let o = kids[0].offset; let e = kids[kids.len() - 1].end; Cst::node(rule, kids, o, e) }
 
-${lexer(ir)}
+${lexerSrc ?? ''}
 
 struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize, capped: bool, suppress_next: Vec<&'static str>, src: &'a str }
 impl<'a> Parser<'a> {
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 7984ba9..549b302 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -4,7 +4,10 @@
 // index LEDs), and a CST→JSON printer over stdin. It is the reference rendering — its CST
 // is checked byte-for-byte against the interpreter (createParser), so a divergence in the
 // portable logic surfaces here before Go/Rust are compiled.
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, TplCfg } from './emit-portable.ts';
+import { portableIR } from './emit-portable.ts';
+import type { Target } from './emit.ts';
+import type { CstGrammar } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
@@ -267,7 +270,11 @@ ${r.nudSeqs.map((seq) => `  { const save = pos; const kids: Cst[] = []; if (${se
 export const tsTarget: Target = {
   name: 'typescript',
   ext: 'ts',
-  render(ir: ParserIR): string {
+  emitLexer(grammar: CstGrammar): string {
+    return lexer(portableIR(grammar));
+  },
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string {
+    const ir = portableIR(grammar);
     const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
     const matchTemplate = ir.tpl ? `function matchTemplate(): Cst | null {
   const t = peek();
@@ -296,7 +303,7 @@ type Leaf = { tokenType: string; offset: number; end: number };
 type Node = { rule: string; children: Cst[]; offset: number; end: number };
 type Cst = Node | Leaf;
 
-${lexer(ir)}
+${lexerSrc ?? ''}
 
 let toks: Tok[] = [];
 let pos = 0;
diff --git a/test/cst-match-totality.ts b/test/cst-match-totality.ts
index 25c0d8b..2aab35f 100644
--- a/test/cst-match-totality.ts
+++ b/test/cst-match-totality.ts
@@ -13,7 +13,7 @@
 //   node test/cst-match-totality.ts
 import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { generateInputs } from './grammar-gen.ts';
 
 const GRAMMARS = ['typescript', 'javascript', 'typescriptreact', 'javascriptreact', 'yaml', 'html'];
@@ -52,7 +52,7 @@ for (const name of GRAMMARS) {
   const grammar = (await import(`../${name}.ts`)).default;
   const matchers = (await import(`../${name}.cst-match.ts`)).MATCHERS;
   const emPath = `/tmp/emitted-totality-${name}.mts`;
-  writeFileSync(emPath, emitParser(grammar));
+  writeFileSync(emPath, emitParser(grammar, jsTarget));
   const em = (await import(emPath + '?v=' + process.pid)) as Emitted;
   let parsed = 0;
   for (const input of generateInputs(grammar, { depth: 5, nestDepth: 5, cap: 7, fuzzRounds: 250, maxInputs: 1500, seed: 5 })) {
diff --git a/test/emit-lexer-verify.ts b/test/emit-lexer-verify.ts
index e0ab8a5..900de91 100644
--- a/test/emit-lexer-verify.ts
+++ b/test/emit-lexer-verify.ts
@@ -9,14 +9,14 @@
 //   node test/emit-lexer-verify.ts            # in-repo corpus (+ /tmp/ts-repo if present)
 import { readFileSync, writeFileSync } from 'node:fs';
 import { createLexer } from '../src/gen-lexer.ts';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { inRepoCorpus, externalTsFiles } from './emit-corpus.ts';
 
 const grammar = (await import('../typescript.ts')).default;
 
 // The reference: createLexer with the SAME intern config the emitted parser bakes.
 const EMITTED = '/tmp/emit-lexer-verify-parser.mts';
-writeFileSync(EMITTED, emitParser(grammar));
+writeFileSync(EMITTED, emitParser(grammar, jsTarget));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 const src = readFileSync(EMITTED, 'utf-8');
 if (src.includes('createLexer(')) {
diff --git a/test/emit-parser-bench.ts b/test/emit-parser-bench.ts
index 5f9a2a3..5af58a2 100644
--- a/test/emit-parser-bench.ts
+++ b/test/emit-parser-bench.ts
@@ -9,14 +9,14 @@
 //   node test/emit-parser-bench.ts            # the 4 bench files, N=20
 //   node test/emit-parser-bench.ts <N>        # custom timed-run count
 import { createParser } from '../src/gen-parser.ts';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { readFileSync, writeFileSync } from 'fs';
 
 const grammar = (await import('../typescript.ts')).default;
 const oracle = createParser(grammar);
 
 const EMITTED = '/tmp/emitted-parser.mts';
-writeFileSync(EMITTED, emitParser(grammar));
+writeFileSync(EMITTED, emitParser(grammar, jsTarget));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 
 const N = Number(process.argv[2]) || 20;
diff --git a/test/emit-parser-verify.ts b/test/emit-parser-verify.ts
index 63228c6..b3020bd 100644
--- a/test/emit-parser-verify.ts
+++ b/test/emit-parser-verify.ts
@@ -13,7 +13,7 @@
 //   node test/emit-parser-verify.ts <N>        # external sweep stride N (default ~400 files)
 import { objectify } from './emitted-obj.ts';
 import { createParser } from '../src/gen-parser.ts';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { inRepoCorpus, externalTsFiles } from './emit-corpus.ts';
 import { readFileSync, writeFileSync } from 'fs';
 
@@ -22,7 +22,7 @@ const oracle = createParser(grammar);
 
 // Emit, write to /tmp, import the standalone module.
 const EMITTED = '/tmp/emitted-parser.mts';
-writeFileSync(EMITTED, emitParser(grammar));
+writeFileSync(EMITTED, emitParser(grammar, jsTarget));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 
 type Outcome = { ok: true; cst: string } | { ok: false; err: string };
diff --git a/test/emit-reject-messages.ts b/test/emit-reject-messages.ts
index 9d549b0..28891e7 100644
--- a/test/emit-reject-messages.ts
+++ b/test/emit-reject-messages.ts
@@ -16,7 +16,7 @@
 //
 //   node test/emit-reject-messages.ts
 import { createParser } from '../src/gen-parser.ts';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { inRepoCorpus, externalTsFiles } from './emit-corpus.ts';
 import { readFileSync, writeFileSync } from 'fs';
 
@@ -24,7 +24,7 @@ const grammar = (await import('../typescript.ts')).default;
 const oracle = createParser(grammar);
 
 const EMITTED = '/tmp/emitted-parser-msg.mts';
-writeFileSync(EMITTED, emitParser(grammar));
+writeFileSync(EMITTED, emitParser(grammar, jsTarget));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 
 function errOf(parse: (s: string) => unknown, code: string): string | null {
diff --git a/test/emit-tsc-gate.ts b/test/emit-tsc-gate.ts
index e6df929..a923934 100644
--- a/test/emit-tsc-gate.ts
+++ b/test/emit-tsc-gate.ts
@@ -19,7 +19,7 @@
 // and it already paid off: the fallback editCore branch referenced cs/ceOld/
 // parenCachePos declared only in the soa branch (unreached at runtime, invisible
 // until this gate), now hoisted/gated correctly.
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { writeFileSync } from 'node:fs';
 import { execFileSync } from 'node:child_process';
 import type { CstGrammar } from '../src/types.ts';
@@ -51,7 +51,7 @@ for (const [name, path] of GRAMMARS) {
     continue;
   }
   const out = `/tmp/emit-tsc-gate-${name}.ts`;
-  writeFileSync(out, emitParser(grammar));
+  writeFileSync(out, emitParser(grammar, jsTarget));
   try {
     execFileSync('npx', ['tsc', ...TSC_FLAGS, out], { stdio: 'pipe' });
     console.log(`  ${name}: ✓ emitted parser type-checks (tsc --strict)`);
diff --git a/test/exhaustive-edits.ts b/test/exhaustive-edits.ts
index 72a8ca9..9aa404f 100644
--- a/test/exhaustive-edits.ts
+++ b/test/exhaustive-edits.ts
@@ -9,7 +9,7 @@
 //   node --max-old-space-size=4096 test/exhaustive-edits.ts
 import { writeFileSync } from 'node:fs';
 import { token, rule, defineGrammar, many, opt, sep, plus, oneOf, range, seq, star, noneOf } from '../src/api.ts';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { objectify } from './emitted-obj.ts';
 
 // A deliberately bracket-and-list-shaped grammar: parens force synthesis and
@@ -32,7 +32,7 @@ const g = defineGrammar({
 });
 
 const emPath = '/tmp/emitted-exhaustive.mts';
-writeFileSync(emPath, emitParser(g));
+writeFileSync(emPath, emitParser(g, jsTarget));
 type Cst = { root: number; errors: object[] };
 type Parser = { parse(s: string): Cst; edit(c: Cst, e: object[]): void; visit(c: Cst, fns: object): void; tree: import('./emitted-obj.ts').TreeView };
 const em = (await import(emPath + '?v=' + process.pid)) as { createParser(): Parser; __arenaStats(): { inPlaceShrink: number } };
diff --git a/test/head-to-head.ts b/test/head-to-head.ts
index 15f913b..1d84a5a 100644
--- a/test/head-to-head.ts
+++ b/test/head-to-head.ts
@@ -15,7 +15,7 @@
 // so it reads through a 16KB chunk callback (its documented large-input path).
 import { readFileSync } from 'node:fs';
 import { createRequire } from 'node:module';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { writeFileSync } from 'node:fs';
 import ts from 'typescript';
 
@@ -27,7 +27,7 @@ const TSLang = require(TS_BENCH + '/node_modules/tree-sitter-typescript').typesc
 
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-h2h.mts';
-writeFileSync(emPath, emitParser(grammar));
+writeFileSync(emPath, emitParser(grammar, jsTarget));
 const { createParser } = await import(emPath + '?v=' + process.pid);
 
 const unit = readFileSync(CORPUS, 'utf-8');
diff --git a/test/incremental-grammars.ts b/test/incremental-grammars.ts
index 404272b..9c4a780 100644
--- a/test/incremental-grammars.ts
+++ b/test/incremental-grammars.ts
@@ -13,7 +13,7 @@
 //
 //   node test/incremental-grammars.ts
 import { writeFileSync } from 'node:fs';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { generateInputs } from './grammar-gen.ts';
 import { objectify } from './emitted-obj.ts';
 
@@ -85,7 +85,7 @@ const failures: string[] = [];
 for (const name of GRAMMARS) {
   const grammar = (await import(`../${name}.ts`)).default;
   const emPath = `/tmp/emitted-incr-${name}.mts`;
-  writeFileSync(emPath, emitParser(grammar));
+  writeFileSync(emPath, emitParser(grammar, jsTarget));
   const em = (await import(emPath + '?v=' + process.pid)) as Em;
   const session = em.createParser();
   const fresh = em.createParser();
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index cd01c17..85814e1 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -8,11 +8,11 @@
 //   node test/incremental-verify.ts
 import { objectify } from './emitted-obj.ts';
 import { existsSync, readFileSync, writeFileSync } from 'node:fs';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-incremental.mts';
-writeFileSync(emPath, emitParser(grammar));
+writeFileSync(emPath, emitParser(grammar, jsTarget));
 type Edit = { start: number; end: number; text: string };
 type Cst = { root: number; errors: { offset: number; end: number; message: string }[] };
 type Parser = {
diff --git a/test/multi-doc.ts b/test/multi-doc.ts
index c3d844d..25af324 100644
--- a/test/multi-doc.ts
+++ b/test/multi-doc.ts
@@ -9,12 +9,12 @@
 //
 //   node test/multi-doc.ts
 import { writeFileSync } from 'node:fs';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { objectify } from './emitted-obj.ts';
 
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-multidoc.mts';
-writeFileSync(emPath, emitParser(grammar));
+writeFileSync(emPath, emitParser(grammar, jsTarget));
 type Edit = { start: number; end: number; text: string };
 type Cst = { root: number; errors: { offset: number; end: number; message: string }[] };
 type Parser = { parse(s: string): Cst; edit(cst: Cst, edits: Edit[]): void; visit(cst: Cst, fns: object): void; tree: import('./emitted-obj.ts').TreeView };
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 7dd6544..5732b64 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -1,4 +1,4 @@
-// Gate: the TARGET-AGNOSTIC emitter (issue #6) — `emitPortableParser(grammar, target)`
+// Gate: the TARGET-AGNOSTIC emitter (issue #6) — `emitParser(grammar, target)`
 // derives a parser in EACH target language that produces the byte-identical CST the
 // interpreter (createParser) does. The agnosticism proof by EXECUTION: every grammar is
 // rendered to TypeScript, Go, and Rust; the Go/Rust sources are COMPILED and RUN, and each
@@ -15,10 +15,7 @@
 import { execFileSync } from 'node:child_process';
 import { mkdirSync, rmSync, writeFileSync } from 'node:fs';
 import { createParser } from '../src/gen-parser.ts';
-import { emitPortableParser } from '../src/emit-portable.ts';
-import { tsTarget } from '../src/target-ts.ts';
-import { goTarget } from '../src/target-go.ts';
-import { rustTarget } from '../src/target-rust.ts';
+import { emitParser, tsTarget, goTarget, rustTarget } from '../src/emit.ts';
 import type { CstGrammar } from '../src/types.ts';
 
 type Case = { grammar: string; path: string; accept: string[]; reject: string[]; tsOnly?: boolean };
@@ -243,19 +240,19 @@ for (const c of CASES) {
   const runners: Array<{ label: string; run: (src: string) => Outcome }> = [];
 
   const tsFile = `${dir}/p.ts`;
-  writeFileSync(tsFile, emitPortableParser(grammar, tsTarget));
+  writeFileSync(tsFile, emitParser(grammar, tsTarget));
   runners.push({ label: 'typescript', run: (src) => runProc('node', [tsFile], src) });
 
   if (HAS_GO && !c.tsOnly) {
     const gdir = `${dir}/go`; mkdirSync(gdir, { recursive: true });
-    writeFileSync(`${gdir}/main.go`, emitPortableParser(grammar, goTarget));
+    writeFileSync(`${gdir}/main.go`, emitParser(grammar, goTarget));
     writeFileSync(`${gdir}/go.mod`, 'module p\n\ngo 1.21\n');
     execFileSync('go', ['build', '-o', `${gdir}/p`, '.'], { cwd: gdir, stdio: 'pipe' });
     runners.push({ label: 'go', run: (src) => runProc(`${gdir}/p`, [], src) });
   }
   if (HAS_RUST && !c.tsOnly) {
     const rfile = `${dir}/main.rs`;
-    writeFileSync(rfile, emitPortableParser(grammar, rustTarget));
+    writeFileSync(rfile, emitParser(grammar, rustTarget));
     execFileSync('rustc', ['-O', '-A', 'warnings', rfile, '-o', `${dir}/pr`], { stdio: 'pipe' });
     runners.push({ label: 'rust', run: (src) => runProc(`${dir}/pr`, [], src) });
   }
diff --git a/test/profile-vs-peers.mjs b/test/profile-vs-peers.mjs
index 421bc6a..e67a1f7 100644
--- a/test/profile-vs-peers.mjs
+++ b/test/profile-vs-peers.mjs
@@ -17,10 +17,10 @@ import { fileURLToPath } from 'node:url';
 const REPO = resolve(dirname(fileURLToPath(import.meta.url)), '..');
 const acorn = await import(REPO + '/node_modules/acorn/dist/acorn.mjs');
 const parse5 = await import(REPO + '/node_modules/parse5/dist/index.js');
-const { emitParser } = await import(REPO + '/src/emit-parser.ts');
+const { emitParser, jsTarget } = await import(REPO + '/src/emit.ts');
 
-writeFileSync('/tmp/emitted-peers-js.mjs', emitParser((await import(REPO + '/javascript.ts')).default));
-writeFileSync('/tmp/emitted-peers-html.mjs', emitParser((await import(REPO + '/html.ts')).default));
+writeFileSync('/tmp/emitted-peers-js.mjs', emitParser((await import(REPO + '/javascript.ts')).default, jsTarget));
+writeFileSync('/tmp/emitted-peers-html.mjs', emitParser((await import(REPO + '/html.ts')).default, jsTarget));
 const monoJs = await import('/tmp/emitted-peers-js.mjs?v=' + Date.now());
 const monoHtml = await import('/tmp/emitted-peers-html.mjs?v=' + Date.now());
 
diff --git a/test/profile-vs-tsc.mjs b/test/profile-vs-tsc.mjs
index b668fdd..0dc13c3 100644
--- a/test/profile-vs-tsc.mjs
+++ b/test/profile-vs-tsc.mjs
@@ -11,10 +11,10 @@ import { fileURLToPath } from 'node:url';
 
 const REPO = resolve(dirname(fileURLToPath(import.meta.url)), '..');
 const ts = (await import(REPO + '/node_modules/typescript/lib/typescript.js')).default;
-const { emitParser } = await import(REPO + '/src/emit-parser.ts');
+const { emitParser, jsTarget } = await import(REPO + '/src/emit.ts');
 const grammar = (await import(REPO + '/typescript.ts')).default;
 
-writeFileSync('/tmp/emitted-current.mjs', emitParser(grammar));
+writeFileSync('/tmp/emitted-current.mjs', emitParser(grammar, jsTarget));
 const emitted = await import('/tmp/emitted-current.mjs?v=' + Date.now());
 
 const paths = [
diff --git a/test/recovery-conformance.ts b/test/recovery-conformance.ts
index a2eda45..7c7a7f1 100644
--- a/test/recovery-conformance.ts
+++ b/test/recovery-conformance.ts
@@ -13,12 +13,12 @@
 import { writeFileSync, readFileSync } from 'node:fs';
 import { readdir } from 'fs/promises';
 import { join } from 'path';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import ts from 'typescript';
 
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-recovery-conf.mts';
-writeFileSync(emPath, emitParser(grammar));
+writeFileSync(emPath, emitParser(grammar, jsTarget));
 type Cst = { root: number; errors: { offset: number; end: number; message: string }[] };
 const em = (await import(emPath + '?v=' + process.pid)) as { createParser(): { parse(s: string): Cst } };
 const p = em.createParser();
diff --git a/test/recovery.ts b/test/recovery.ts
index 9215c46..193cae8 100644
--- a/test/recovery.ts
+++ b/test/recovery.ts
@@ -13,12 +13,12 @@
 //
 //   node test/recovery.ts
 import { existsSync, readFileSync, writeFileSync } from 'node:fs';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { objectify } from './emitted-obj.ts';
 
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-recovery.mts';
-writeFileSync(emPath, emitParser(grammar));
+writeFileSync(emPath, emitParser(grammar, jsTarget));
 type Edit = { start: number; end: number; text: string };
 type Diag = { offset: number; end: number; message: string; related?: { offset: number; end: number; message: string } };
 type Cst = { root: number; errors: Diag[] };

From 6e0df6c9f5a3f3b684030bf7c2f2810b40da0a89 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 07:14:55 +0800
Subject: [PATCH 25/27] test: move the portable-targets fixtures examples/ ->
 test/fixtures/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The thirteen grammars under examples/ are not user-facing examples — they are the
construct-isolation fixtures consumed solely by test/portable-targets.ts (each
isolates one emitter construct so a divergence pinpoints which one broke). They
belong next to their only consumer, beside test/vendor/, not in a directory whose
name promises a learning sample. No real examples were displaced; examples/ held
only fixtures and is now removed.

Mechanical: git mv to test/fixtures/, fixtures' `../src` imports -> `../../src`,
gate paths `../examples/X.ts` -> `./fixtures/X.ts`. Full suite 42/42.
---
 src/emit-portable.ts                      |  2 +-
 {examples => test/fixtures}/altjs.ts      |  2 +-
 {examples => test/fixtures}/arrowjs.ts    |  2 +-
 {examples => test/fixtures}/calc.ts       |  2 +-
 {examples => test/fixtures}/ledjs.ts      |  2 +-
 {examples => test/fixtures}/minijs.ts     |  2 +-
 {examples => test/fixtures}/noinjs.ts     |  2 +-
 {examples => test/fixtures}/nudjs.ts      |  2 +-
 {examples => test/fixtures}/postjs.ts     |  2 +-
 {examples => test/fixtures}/regexjs.ts    |  2 +-
 {examples => test/fixtures}/richtokens.ts |  2 +-
 {examples => test/fixtures}/seqjs.ts      |  2 +-
 {examples => test/fixtures}/sljs.ts       |  2 +-
 {examples => test/fixtures}/templatejs.ts |  2 +-
 test/portable-targets.ts                  | 26 +++++++++++------------
 15 files changed, 27 insertions(+), 27 deletions(-)
 rename {examples => test/fixtures}/altjs.ts (98%)
 rename {examples => test/fixtures}/arrowjs.ts (98%)
 rename {examples => test/fixtures}/calc.ts (98%)
 rename {examples => test/fixtures}/ledjs.ts (98%)
 rename {examples => test/fixtures}/minijs.ts (99%)
 rename {examples => test/fixtures}/noinjs.ts (98%)
 rename {examples => test/fixtures}/nudjs.ts (98%)
 rename {examples => test/fixtures}/postjs.ts (97%)
 rename {examples => test/fixtures}/regexjs.ts (99%)
 rename {examples => test/fixtures}/richtokens.ts (98%)
 rename {examples => test/fixtures}/seqjs.ts (98%)
 rename {examples => test/fixtures}/sljs.ts (98%)
 rename {examples => test/fixtures}/templatejs.ts (98%)

diff --git a/src/emit-portable.ts b/src/emit-portable.ts
index e2ec5d8..4bbf760 100644
--- a/src/emit-portable.ts
+++ b/src/emit-portable.ts
@@ -19,7 +19,7 @@
 // and a Pratt expression engine with operator precedence/associativity, prefix unary,
 // bracket NUDs (grouping, array), and mixfix LEDs (call / member / index) tried before
 // operators. buildIR THROWS on a construct outside this set rather than emit a wrong
-// parser. This is enough to derive a real JavaScript-subset parser (examples/minijs.ts).
+// parser. This is enough to derive a real JavaScript-subset parser (test/fixtures/minijs.ts).
 import type { CstGrammar, RuleExpr, TokenDecl, TokenPattern } from './types.ts';
 import { withAwaitYield } from './await-yield-fork.ts';
 import { analyzeGrammar, findEntryRule } from './grammar-analysis.ts';
diff --git a/examples/altjs.ts b/test/fixtures/altjs.ts
similarity index 98%
rename from examples/altjs.ts
rename to test/fixtures/altjs.ts
index d1f117d..fe409a7 100644
--- a/examples/altjs.ts
+++ b/test/fixtures/altjs.ts
@@ -5,7 +5,7 @@
 import {
   token, rule, defineGrammar, left, op,
   seq, oneOf, range, star, sep, opt, many, alt, noneOf,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const digit = range('0', '9');
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
diff --git a/examples/arrowjs.ts b/test/fixtures/arrowjs.ts
similarity index 98%
rename from examples/arrowjs.ts
rename to test/fixtures/arrowjs.ts
index 486cdaa..b4967c9 100644
--- a/examples/arrowjs.ts
+++ b/test/fixtures/arrowjs.ts
@@ -6,7 +6,7 @@
 import {
   token, rule, defineGrammar, left, right, op, capExpr, alt,
   seq, oneOf, range, star, sep, opt, many,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
 const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
diff --git a/examples/calc.ts b/test/fixtures/calc.ts
similarity index 98%
rename from examples/calc.ts
rename to test/fixtures/calc.ts
index 2bfcfff..5fb7078 100644
--- a/examples/calc.ts
+++ b/test/fixtures/calc.ts
@@ -15,7 +15,7 @@
 import {
   token, rule, defineGrammar, left, right, op, prefix,
   seq, oneOf, range, star, many,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const digit = range('0', '9');
 const identStart = oneOf(range('a', 'z'), range('A', 'Z'), '_');
diff --git a/examples/ledjs.ts b/test/fixtures/ledjs.ts
similarity index 98%
rename from examples/ledjs.ts
rename to test/fixtures/ledjs.ts
index f13831a..7148851 100644
--- a/examples/ledjs.ts
+++ b/test/fixtures/ledjs.ts
@@ -5,7 +5,7 @@
 import {
   token, rule, defineGrammar, left, right, op,
   seq, oneOf, range, star, many,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
 const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
diff --git a/examples/minijs.ts b/test/fixtures/minijs.ts
similarity index 99%
rename from examples/minijs.ts
rename to test/fixtures/minijs.ts
index 6de468a..25279e6 100644
--- a/examples/minijs.ts
+++ b/test/fixtures/minijs.ts
@@ -16,7 +16,7 @@
 import {
   token, rule, defineGrammar, left, right, op, prefix, alt,
   seq, oneOf, range, star, sep, opt, many, altPattern, noneOf, anyChar,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const digit = range('0', '9');
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
diff --git a/examples/noinjs.ts b/test/fixtures/noinjs.ts
similarity index 98%
rename from examples/noinjs.ts
rename to test/fixtures/noinjs.ts
index bc413ad..54d5395 100644
--- a/examples/noinjs.ts
+++ b/test/fixtures/noinjs.ts
@@ -5,7 +5,7 @@
 import {
   token, rule, defineGrammar, left, op, exclude,
   seq, oneOf, range, star, many,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
 const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
diff --git a/examples/nudjs.ts b/test/fixtures/nudjs.ts
similarity index 98%
rename from examples/nudjs.ts
rename to test/fixtures/nudjs.ts
index a443b7b..d9b54c6 100644
--- a/examples/nudjs.ts
+++ b/test/fixtures/nudjs.ts
@@ -6,7 +6,7 @@
 import {
   token, rule, defineGrammar, left, op,
   seq, oneOf, range, star, sep, opt, many, alt, not, noneOf,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
 const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
diff --git a/examples/postjs.ts b/test/fixtures/postjs.ts
similarity index 97%
rename from examples/postjs.ts
rename to test/fixtures/postjs.ts
index 239fdec..1ac9340 100644
--- a/examples/postjs.ts
+++ b/test/fixtures/postjs.ts
@@ -4,7 +4,7 @@
 import {
   token, rule, defineGrammar, left, right, op, prefix, postfix,
   seq, oneOf, range, star, many,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
 const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
diff --git a/examples/regexjs.ts b/test/fixtures/regexjs.ts
similarity index 99%
rename from examples/regexjs.ts
rename to test/fixtures/regexjs.ts
index b9ad82d..0f966f9 100644
--- a/examples/regexjs.ts
+++ b/test/fixtures/regexjs.ts
@@ -7,7 +7,7 @@
 import {
   token, rule, defineGrammar, left, right, op, prefix, alt,
   seq, oneOf, range, star, sep, opt, many, altPattern, noneOf, anyChar,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const digit = range('0', '9');
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
diff --git a/examples/richtokens.ts b/test/fixtures/richtokens.ts
similarity index 98%
rename from examples/richtokens.ts
rename to test/fixtures/richtokens.ts
index ed10aec..3f7bc2d 100644
--- a/examples/richtokens.ts
+++ b/test/fixtures/richtokens.ts
@@ -9,7 +9,7 @@
 import {
   token, rule, defineGrammar,
   seq, oneOf, range, star, plus, repeat, optPattern, altPattern, noneOf, anyChar, notFollowedBy, many,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const digit = range('0', '9');
 const hexDigit = oneOf(digit, range('a', 'f'), range('A', 'F'));
diff --git a/examples/seqjs.ts b/test/fixtures/seqjs.ts
similarity index 98%
rename from examples/seqjs.ts
rename to test/fixtures/seqjs.ts
index d0e40fe..b1facfd 100644
--- a/examples/seqjs.ts
+++ b/test/fixtures/seqjs.ts
@@ -4,7 +4,7 @@
 import {
   token, rule, defineGrammar, left, op,
   seq, oneOf, range, star, opt, many,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 // `many(',', $)` is the rule-level `(',' Expr)*` — a star whose body is the sequence
 // `, Expr`, exactly the shape javascript.ts uses for comma lists.
 
diff --git a/examples/sljs.ts b/test/fixtures/sljs.ts
similarity index 98%
rename from examples/sljs.ts
rename to test/fixtures/sljs.ts
index 68421a5..5c57d9e 100644
--- a/examples/sljs.ts
+++ b/test/fixtures/sljs.ts
@@ -5,7 +5,7 @@
 import {
   token, rule, defineGrammar, left, op,
   seq, oneOf, range, star, opt, many, altPattern, noneOf, sameLine,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
 const idCont = oneOf(range('a', 'z'), range('A', 'Z'), range('0', '9'), '_', '$');
diff --git a/examples/templatejs.ts b/test/fixtures/templatejs.ts
similarity index 98%
rename from examples/templatejs.ts
rename to test/fixtures/templatejs.ts
index 8cda83d..cf6f523 100644
--- a/examples/templatejs.ts
+++ b/test/fixtures/templatejs.ts
@@ -6,7 +6,7 @@
 import {
   token, rule, defineGrammar, left, right, op, prefix, alt,
   seq, oneOf, range, star, sep, opt, many, altPattern, noneOf, notFollowedBy,
-} from '../src/api.ts';
+} from '../../src/api.ts';
 
 const digit = range('0', '9');
 const idStart = oneOf(range('a', 'z'), range('A', 'Z'), '_', '$');
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 5732b64..732c011 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -21,7 +21,7 @@ import type { CstGrammar } from '../src/types.ts';
 type Case = { grammar: string; path: string; accept: string[]; reject: string[]; tsOnly?: boolean };
 const CASES: Case[] = [
   {
-    grammar: 'calc', path: '../examples/calc.ts',
+    grammar: 'calc', path: './fixtures/calc.ts',
     accept: [
       '1;', 'a;', '', '1 + 2 * 3;', '1 * 2 + 3;', '1 - 2 - 3;', 'a / b / c;', '1 + 2 + 3 + 4;',
       '-a;', '-(-a);', '- - a;', '-a * b;', '-a + b * c;', '-(a + b) * c;',
@@ -31,7 +31,7 @@ const CASES: Case[] = [
     reject: ['1 +;', '(1;', '1 2;', 'let = 1;', ') ;', '* a;', 'let x 1;'],
   },
   {
-    grammar: 'minijs', path: '../examples/minijs.ts',
+    grammar: 'minijs', path: './fixtures/minijs.ts',
     accept: [
       '1;', 'a;', '', 'x = 1 + 2 * 3;', '-a * b + 1;', '(1 + 2) * 3;',
       'foo(a, b);', 'a.b.c;', 'a[0][1];', 'f()()();', 'a.b(c).d[e];',
@@ -50,7 +50,7 @@ const CASES: Case[] = [
     // The general token-pattern matcher (stateless real-JS token tier): \u-escaped
     // identifiers, the decimal/hex number family with a boundary, both-quote strings —
     // compiled to a backtracking-free matcher in all three targets.
-    grammar: 'richtokens', path: '../examples/richtokens.ts',
+    grammar: 'richtokens', path: './fixtures/richtokens.ts',
     accept: [
       '123', '0xFF', '1_000_000', '3.14', 'foo', 'bar_$x9', '"hi"', "'single'",
       '"esc\\"q\\n"', '123 0xa foo "s" 3.14', '0xDEADbeef 42 _id $x cafe // line\n 7',
@@ -62,7 +62,7 @@ const CASES: Case[] = [
     // The STATEFUL regex-vs-division lexer: `/` is a regex in expression context, division
     // after a value. Exercises every branch of prevIsValue — after `=`/keyword/`(`-head
     // (regex) vs after value/`)`/`]`/member/call (division), plus regex escapes & classes.
-    grammar: 'regexjs', path: '../examples/regexjs.ts',
+    grammar: 'regexjs', path: './fixtures/regexjs.ts',
     accept: [
       'a / b;', 'var r = /abc/g;', 'return /re/;', 'if (x) /re/;', '(a + b) / c;',
       'a.b / c;', 'foo(x) / y;', '[1, 2] / 3;', 'var x = a / b / c;',
@@ -76,7 +76,7 @@ const CASES: Case[] = [
     // STATEFUL template literals: the `${…}` interpolation split (head/middle/tail) with a
     // brace-depth stack — adjacent/multiple holes, exprs in holes, nested templates, and a
     // nested `{…}` object inside a hole (which must NOT close the hole).
-    grammar: 'templatejs', path: '../examples/templatejs.ts',
+    grammar: 'templatejs', path: './fixtures/templatejs.ts',
     accept: [
       'var a = `hello`;', 'var b = `hi ${name}!`;', 'var c = `${x}${y}`;',
       'var d = `a${ x + 1 }b${ y * 2 }c`;', 'var e = `outer ${ `inner ${z}` } end`;',
@@ -89,7 +89,7 @@ const CASES: Case[] = [
   {
     // General (non-literal) inline alt: object keys are alt(Ident | Str | Number) — a
     // backtracking alternation of token refs inside a rule sequence.
-    grammar: 'altjs', path: '../examples/altjs.ts',
+    grammar: 'altjs', path: './fixtures/altjs.ts',
     accept: [
       '{a: 1};', '{"k": 2};', '{1: x};', '{a: 1, "b": 2, 3: c};', '{x: 1 + 2 * 3};',
       '({nested: {inner: 1}});', '{};', 'a + b;', '{k: (1 + 2)};',
@@ -99,7 +99,7 @@ const CASES: Case[] = [
   {
     // General Pratt NUD sequences: a reserved-word-guarded identifier (`not(kw)… Ident`,
     // a zero-width negative lookahead) and a quantifier-first class expression.
-    grammar: 'nudjs', path: '../examples/nudjs.ts',
+    grammar: 'nudjs', path: './fixtures/nudjs.ts',
     accept: [
       'x;', 'foo + bar;', 'class C {};', 'class {};', 'class C extends B {};',
       '@dec class C { m(){} };', 'new Foo;', 'new C();', 'a.b.c;',
@@ -111,7 +111,7 @@ const CASES: Case[] = [
     // Postfix-operator LED (`x++`/`x--`) + the access-tail closure: once a postfix binds, the
     // operand is an update expression, so a further postfix or an access tail (`.`/`[`/`(`)
     // can't attach (`a++--`, `a++.b` are ill-formed; `(a++).b` is fine).
-    grammar: 'postjs', path: '../examples/postjs.ts',
+    grammar: 'postjs', path: './fixtures/postjs.ts',
     accept: [
       'x++;', 'x--;', 'a + b++;', '++x;', 'x++ + y;', 'a.b++;', '(x)++;', '--a.b;',
       'x++ * 2;', '(a++).b;', 'x.y.z++;',
@@ -121,7 +121,7 @@ const CASES: Case[] = [
   {
     // A grouped sub-sequence `seq` step: comma lists as `star([',', $])` (e.g. `many(',', $)`),
     // the array/argument-list shape javascript.ts uses.
-    grammar: 'seqjs', path: '../examples/seqjs.ts',
+    grammar: 'seqjs', path: './fixtures/seqjs.ts',
     accept: [
       '[1, 2, 3];', '[];', '[1];', 'f(1, 2);', 'f();', '[a + b, c];',
       'f(g(1, 2), 3);', '(x);', 'f(a)(b, c);', '[[1,2],[3,4]];',
@@ -132,7 +132,7 @@ const CASES: Case[] = [
     // The `sameLine` zero-width assertion (no line terminator before the next token):
     // `return` takes a value only on the same line. Also verifies the lexer's newline-before
     // tracking across a block comment that spans a newline.
-    grammar: 'sljs', path: '../examples/sljs.ts',
+    grammar: 'sljs', path: './fixtures/sljs.ts',
     accept: [
       'return 1;', 'return;', 'return 1 + 2;', '1 + 2;', 'return /* c */ 1;',
       '(a);', 'return (1);',
@@ -143,7 +143,7 @@ const CASES: Case[] = [
     // capBelow (assignment-level) arrow functions: a NUD parsed only when minBp < the
     // connector's bp, admitting NO led once parsed; the `(x) => y` vs `(x)` ambiguity is
     // resolved by longest-match ordering (the arrow is tried first, falls back to grouping).
-    grammar: 'arrowjs', path: '../examples/arrowjs.ts',
+    grammar: 'arrowjs', path: './fixtures/arrowjs.ts',
     accept: [
       'x => x;', '(a, b) => a + b;', '() => {};', 'x = (() => 1);', 'f(() => 1, 2);',
       '(x);', 'a + b;', 'x => y => x;', '(() => 2);', '(a) => a;', 'x = y => y;', 'foo();',
@@ -154,7 +154,7 @@ const CASES: Case[] = [
   {
     // Precedence-gated mixfix LEDs: ternary `? :` (binds below the operators) and the
     // chain-rhs relational leds `in`/`instanceof` (`a in b in c` left-chains).
-    grammar: 'ledjs', path: '../examples/ledjs.ts',
+    grammar: 'ledjs', path: './fixtures/ledjs.ts',
     accept: [
       'a == b ? c : d;', 'a ? b : c ? d : e;', 'a + b ? c : d - e;', 'a in b;',
       'a in b in c;', 'x instanceof Y;', 'a < b in c;', '1 + 2 * 3 ? 4 : 5;',
@@ -165,7 +165,7 @@ const CASES: Case[] = [
   {
     // The no-`in` (suppress) context: a `for (binding in iterable)` head parses its binding
     // with the `in` led disabled, so `in` belongs to the for-head, not the binding.
-    grammar: 'noinjs', path: '../examples/noinjs.ts',
+    grammar: 'noinjs', path: './fixtures/noinjs.ts',
     accept: [
       'for (x in y) z;', 'x in y;', 'for (a.b in c) d;', 'a in b in c;',
       'for ((x) in y) z;', 'for (x in y) a in b;', 'for (x in a in b) z;',

From 84895d3cc30e75546160243f80028a21347d8f43 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 07:33:46 +0800
Subject: [PATCH 26/27] Address Copilot review: portable lexer newline parity +
 stale-API docs + .mts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All ten review comments, verified before fixing:

- Portable lexers (ts/go/rust) set newline-before for `\r`/LS/PS, but the
  interpreter (gen-lexer.ts) sets it only for `\n`. Confirmed by parse: `return\r1;`
  the oracle ACCEPTS (CR isn't newline-before) while the portable REJECTED. Fixed all
  three to set pendingNl only for `\n`; `\r`/LS/PS are plain whitespace. Added the
  `return\r1;` (accept) / `return\r\n1;` (reject) cases to the sljs gate as a guard.
  (go/rust are byte-based, so their `8232`/`8233` checks were already dead; the
  reachable bug was `\r`.)
- README's portable-emitter snippet still imported the removed `emitPortableParser`
  from src/emit-portable.ts + target-*.ts → rewritten to `emitParser` from src/emit.ts.
- calc/minijs fixture header comments referenced `emitPortableParser` → `emitParser`.
- profile-vs-tsc/peers write the now-typed emitted parser to `.mjs` and import it;
  node only strips types from `.ts`/`.mts`, so that would SyntaxError → switched the
  emitted output to `.mts` (matching the other emit harnesses).

emit-parser-verify byte-identical, portable-targets 16 grammars ×3 (incl. the new CR
cases), full suite 42/42.

Separately noted (not in scope here): the interpreter itself counts only `\n` as a
line terminator, not `\r`/LS/PS — a pre-existing JS-ASI conformance gap in the core
lexer, on near-extinct inputs.
---
 README.md                 | 10 ++++------
 src/target-go.ts          |  4 ++--
 src/target-rust.ts        |  4 ++--
 src/target-ts.ts          |  6 ++++--
 test/fixtures/calc.ts     |  2 +-
 test/fixtures/minijs.ts   |  2 +-
 test/portable-targets.ts  |  5 ++++-
 test/profile-vs-peers.mjs |  8 ++++----
 test/profile-vs-tsc.mjs   |  4 ++--
 9 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 3b2e3a9..243879e 100644
--- a/README.md
+++ b/README.md
@@ -340,15 +340,13 @@ const Regex = token(seq(
 
 ### The emitted parser need not be JS — Go, Rust, native
 
-The grammar also derives a **standalone parser in another language**. [`emitPortableParser(grammar, target)`](src/emit-portable.ts) runs one analysis into one language-agnostic IR, and each `Target` renders it — including its own regex-free lexer, so the output has no dependency on the JS runtime and compiles offline:
+The grammar also derives a **standalone parser in another language**. [`emitParser(grammar, target)`](src/emit.ts) runs one analysis into one language-agnostic IR, and each `Target` renders it — including its own regex-free lexer (`emitParser` reuses `emitLexer(grammar, target)`), so the output has no dependency on the JS runtime and compiles offline:
 
 ```ts
-import { emitPortableParser } from './src/emit-portable.ts';
-import { goTarget } from './src/target-go.ts';
-import { rustTarget } from './src/target-rust.ts';
+import { emitParser, goTarget, rustTarget } from './src/emit.ts';
 
-writeFileSync('parser.go', emitPortableParser(grammar, goTarget));   // `go build`, no deps
-writeFileSync('parser.rs', emitPortableParser(grammar, rustTarget)); // `rustc`, no crates
+writeFileSync('parser.go', emitParser(grammar, goTarget));   // `go build`, no deps
+writeFileSync('parser.rs', emitParser(grammar, rustTarget)); // `rustc`, no crates
 ```
 
 The proof is the full languages: the real [`javascript.ts`](javascript.ts) and [`typescript.ts`](typescript.ts) grammars — including the `[Await]/[Yield]` fork, left recursion, the regex/division and template state machines, arrow functions, and the TS type grammar — emit to **TypeScript, Go, and Rust**, and every CST is byte-identical to the reference interpreter. [`test/portable-targets.ts`](test/portable-targets.ts) compiles and runs all three for sixteen grammars (the two real languages plus focused fixtures) on every CI run. The Rust output reaches [oxc](https://github.com/oxc-project/oxc) throughput and the Go output beats [tsgo](https://github.com/microsoft/typescript-go) on the same corpus (an arena keeps both near zero-allocation). Byte-based Go/Rust use UTF-8 offsets — identical to the JS interpreter's for ASCII; non-ASCII offset units differ inherently.
diff --git a/src/target-go.ts b/src/target-go.ts
index 8d61807..d45ec2c 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -156,8 +156,8 @@ ${emitHooks}
 \t_ = pendingNl
 ${rxState}${tplState}${emitFn}${pushTokFn}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
 \t\tc := int(src[pos])
-\t\tif c == 10 || c == 13 || c == 8232 || c == 8233 { pendingNl = true; pos++; continue }
-\t\tif c == 32 || c == 9 || c == 11 || c == 12 || c == 160 || c == 5760 || (c >= 8192 && c <= 8202) || c == 8239 || c == 8287 || c == 12288 || c == 65279 { pos++; continue }
+\t\tif c == 10 { pendingNl = true; pos++; continue }   // only LF (10) is newline-before (matches the interpreter); CR is plain whitespace
+\t\tif c == 13 || c == 32 || c == 9 || c == 11 || c == 12 || c == 160 || c == 5760 || (c >= 8192 && c <= 8202) || c == 8239 || c == 8287 || c == 12288 || c == 65279 { pos++; continue }
 ${tplDispatch}${toks}
 ${puncts}
 \t\tpanic(fmt.Sprintf("lex error at %d", pos))
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 8d995f2..0466ce2 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -163,8 +163,8 @@ ${open}
     let mut pos = 0usize;
     while pos < n {
         let c = b[pos] as u32;
-        if c == 32 || c == 9 { pos += 1; continue; }
-        if c == 10 || c == 13 { ${nlVar} = true; pos += 1; continue; }
+        if c == 32 || c == 9 || c == 13 { pos += 1; continue; }   // CR is plain whitespace, NOT newline-before
+        if c == 10 { ${nlVar} = true; pos += 1; continue; }   // only LF (10) is newline-before (matches the interpreter)
 ${tplDispatch}${toks}
 ${puncts}
         panic!("lex error at {}", pos);
diff --git a/src/target-ts.ts b/src/target-ts.ts
index 549b302..f18c046 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -147,8 +147,10 @@ ${emitHooks}
   let pendingNl = false;
 ${defs.length ? '  _s = src;\n' : ''}${rxState}${tplState}${stateful ? emitFn : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end, nl: pendingNl }); pendingNl = false; };\n'}  while (pos < n) {
     const c = src.charCodeAt(pos);
-    if (c === 10 || c === 13 || c === 8232 || c === 8233) { pendingNl = true; pos++; continue; }
-    if (c === 32 || c === 9 || c === 11 || c === 12 || c === 160 || c === 5760 || (c >= 8192 && c <= 8202) || c === 8239 || c === 8287 || c === 12288 || c === 65279) { pos++; continue; }
+    // Only LF (char 10) sets newline-before, matching the interpreter (gen-lexer.ts: only wc === 10).
+    // CR/LS/PS are whitespace but NOT newline-before there, so a lone CR must not flip sameLine.
+    if (c === 10) { pendingNl = true; pos++; continue; }
+    if (c === 13 || c === 8232 || c === 8233 || c === 32 || c === 9 || c === 11 || c === 12 || c === 160 || c === 5760 || (c >= 8192 && c <= 8202) || c === 8239 || c === 8287 || c === 12288 || c === 65279) { pos++; continue; }
 ${tplDispatch}${toks}
 ${puncts}
     throw new Error('lex error at ' + pos + ': ' + JSON.stringify(src[pos]));
diff --git a/test/fixtures/calc.ts b/test/fixtures/calc.ts
index 5fb7078..1d04fd1 100644
--- a/test/fixtures/calc.ts
+++ b/test/fixtures/calc.ts
@@ -5,7 +5,7 @@
 // backtracking alternation, quantifiers (opt/many/sep), recursion (grouping), and —
 // the crux — a Pratt expression engine with operator PRECEDENCE and associativity
 // (`1 + 2 * 3` must group as `1 + (2 * 3)`), prefix unary, and a left-associative
-// call/postfix continuation. emitPortableParser derives a TS, Go, and Rust parser
+// call/postfix continuation. emitParser(grammar, target) derives a TS, Go, and Rust parser
 // from THIS one definition; the cross-language gate proves all three produce the
 // byte-identical CST the interpreter (createParser) does.
 //
diff --git a/test/fixtures/minijs.ts b/test/fixtures/minijs.ts
index 25279e6..21f9bfb 100644
--- a/test/fixtures/minijs.ts
+++ b/test/fixtures/minijs.ts
@@ -4,7 +4,7 @@
 // chains, arrays, and the common statement forms), so the emitted Rust parser can
 // be benchmarked against oxc on the same bytes.
 //
-// Derived from ONE definition by emitPortableParser into TypeScript, Go, and Rust;
+// Derived from ONE definition by emitParser(grammar, target) into TypeScript, Go, and Rust;
 // the cross-language gate proves all three produce the byte-identical CST that the
 // interpreter (createParser) does. The portable lexer is regex-free (char scanner
 // driven by token-pattern.ts's structural recognizers), so the Go/Rust output
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index 732c011..ca3072a 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -136,8 +136,11 @@ const CASES: Case[] = [
     accept: [
       'return 1;', 'return;', 'return 1 + 2;', '1 + 2;', 'return /* c */ 1;',
       '(a);', 'return (1);',
+      // Only `\n` is newline-before — a lone `\r` is plain whitespace, so `return` still takes its
+      // same-line operand (matches the interpreter; was a portable-lexer bug). CRLF still has the `\n`.
+      'return\r1;',
     ],
-    reject: ['return\n1;', 'return\nx;', 'return /*\n*/ 1;', 'return // c\n 1;'],
+    reject: ['return\n1;', 'return\nx;', 'return /*\n*/ 1;', 'return // c\n 1;', 'return\r\n1;'],
   },
   {
     // capBelow (assignment-level) arrow functions: a NUD parsed only when minBp < the
diff --git a/test/profile-vs-peers.mjs b/test/profile-vs-peers.mjs
index e67a1f7..801ecde 100644
--- a/test/profile-vs-peers.mjs
+++ b/test/profile-vs-peers.mjs
@@ -19,10 +19,10 @@ const acorn = await import(REPO + '/node_modules/acorn/dist/acorn.mjs');
 const parse5 = await import(REPO + '/node_modules/parse5/dist/index.js');
 const { emitParser, jsTarget } = await import(REPO + '/src/emit.ts');
 
-writeFileSync('/tmp/emitted-peers-js.mjs', emitParser((await import(REPO + '/javascript.ts')).default, jsTarget));
-writeFileSync('/tmp/emitted-peers-html.mjs', emitParser((await import(REPO + '/html.ts')).default, jsTarget));
-const monoJs = await import('/tmp/emitted-peers-js.mjs?v=' + Date.now());
-const monoHtml = await import('/tmp/emitted-peers-html.mjs?v=' + Date.now());
+writeFileSync('/tmp/emitted-peers-js.mts', emitParser((await import(REPO + '/javascript.ts')).default, jsTarget));
+writeFileSync('/tmp/emitted-peers-html.mts', emitParser((await import(REPO + '/html.ts')).default, jsTarget));
+const monoJs = await import('/tmp/emitted-peers-js.mts?v=' + Date.now());
+const monoHtml = await import('/tmp/emitted-peers-html.mts?v=' + Date.now());
 
 function time(fn, code, name, n) {
   const s = process.hrtime.bigint();
diff --git a/test/profile-vs-tsc.mjs b/test/profile-vs-tsc.mjs
index 0dc13c3..61d7382 100644
--- a/test/profile-vs-tsc.mjs
+++ b/test/profile-vs-tsc.mjs
@@ -14,8 +14,8 @@ const ts = (await import(REPO + '/node_modules/typescript/lib/typescript.js')).d
 const { emitParser, jsTarget } = await import(REPO + '/src/emit.ts');
 const grammar = (await import(REPO + '/typescript.ts')).default;
 
-writeFileSync('/tmp/emitted-current.mjs', emitParser(grammar, jsTarget));
-const emitted = await import('/tmp/emitted-current.mjs?v=' + Date.now());
+writeFileSync('/tmp/emitted-current.mts', emitParser(grammar, jsTarget));
+const emitted = await import('/tmp/emitted-current.mts?v=' + Date.now());
 
 const paths = [
   '/tmp/ts-repo/tests/cases/conformance/parser/ecmascript5/RealWorld/parserharness.ts',

From 8cca2bc0f35cf5bac12e91a2d64be2886295cd6d Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Mon, 22 Jun 2026 07:54:20 +0800
Subject: [PATCH 27/27] Fix JS line-terminator conformance across all four
 lexers (CR / LS / PS)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lexers counted only LF as a line terminator, but ECMAScript also defines CR
(U+000D), LS (U+2028), and PS (U+2029) — the set that drives ASI and the
"no LineTerminator here" restrictions. So `return\r1` was parsed `return 1`
where a conforming JS parser applies ASI (bare `return`, then `1`).

Fixed consistently in all four lexer implementations so they stay in lockstep:
- gen-lexer.ts (interpreter, the oracle): LF/CR in the ASCII path, LS/PS via the
  \s-run regex, and the comment-span check.
- emit-lexer.ts (emitted SoA/JS lexer): the same, in its codegen.
- target-ts.ts (portable, UTF-16): LF/CR/LS/PS.
- target-go.ts / target-rust.ts (portable, byte-based): LF/CR (LS/PS are multi-byte
  and fall under the documented non-ASCII offset boundary).

CRLF is unchanged (the LF already set newline-before), so the existing corpus is
unaffected — the change only reaches lone-CR and LS/PS inputs. This supersedes the
earlier direction (which had made the portable lexers match the LF-only interpreter);
now the interpreter is conforming and all four agree on the full set.

sljs gate extended: `return\r1;` / `return\r\n1;` / `return /*\r*/ 1;` reject,
`return\t1;` accepts (tab is whitespace, not a terminator) — checked across ts/go/rust.
emit-parser-verify byte-identical, portable-targets 16 grammars ×3, full suite 42/42.
---
 src/emit-lexer.ts        |  6 +++---
 src/gen-lexer.ts         | 10 ++++++----
 src/target-go.ts         |  6 +++---
 src/target-rust.ts       |  6 +++---
 src/target-ts.ts         |  9 ++++-----
 test/portable-targets.ts | 10 +++++-----
 6 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index 28f9420..c9f2921 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -43,7 +43,7 @@ const NON_ASCII_WS_FN =
 // The non-ASCII whitespace fallback, emitted at the two sites that need it (after an ASCII run,
 // and as the lead char). `cont` appends the `continue` the lead-char site needs.
 const nonAsciiWsConsume = (v: string, cont: boolean, indent: string): string =>
-  `${indent}if (${v} > 127 && lxNonAsciiWs(${v})) { LX_WS.lastIndex = pos; const m = LX_WS.exec(source); if (m !== null) { if (m[0].includes('\\n')) pendingNl = true; pos += m[0].length;${cont ? ' continue;' : ''} } }`;
+  `${indent}if (${v} > 127 && lxNonAsciiWs(${v})) { LX_WS.lastIndex = pos; const m = LX_WS.exec(source); if (m !== null) { if (/[\\n\\r\\u2028\\u2029]/.test(m[0])) pendingNl = true; pos += m[0].length;${cont ? ' continue;' : ''} } }`;
 
 export function emitSoaLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   // Out of scope: the markup / indentation / newline state machines.
@@ -390,7 +390,7 @@ export function emitSoaLexer(grammar: CstGrammar, st: LexerSymtab): string | nul
   emit(`    if (cc === 32 || (cc >= 9 && cc <= 13)) {`);
   emit(`      let wc = cc;`);
   emit(`      do {`);
-  emit(`        if (wc === 10) pendingNl = true;`);
+  emit(`        if (wc === 10 || wc === 13) pendingNl = true;`);   // JS line terminators LF/CR (LS/PS via the \\s regex below)
   emit(`        pos++;`);
   emit(`        wc = source.charCodeAt(pos);`);
   emit(`      } while (wc === 32 || (wc >= 9 && wc <= 13));`);
@@ -476,7 +476,7 @@ export function emitSoaLexer(grammar: CstGrammar, st: LexerSymtab): string | nul
       emit(`${ind}    }`);
     }
     if (m.skip) {
-      emit(`${ind}    if (m[0].includes('\\n')) pendingNl = true;`);
+      emit(`${ind}    if (/[\\n\\r\\u2028\\u2029]/.test(m[0])) pendingNl = true;`);
       emit(`${ind}    pos += m[0].length;`);
     } else {
       emit(`${ind}    const _e = pos + m[0].length;`);
diff --git a/src/gen-lexer.ts b/src/gen-lexer.ts
index 7fc06ea..a4ed10c 100644
--- a/src/gen-lexer.ts
+++ b/src/gen-lexer.ts
@@ -842,21 +842,23 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
         let wc = source.charCodeAt(pos);
         if (wc === 32 || (wc >= 9 && wc <= 13)) {
           do {
-            if (wc === 10) pendingNl = true;
+            // JS line terminators: LF, CR, LS, PS (the ECMAScript set, driving ASI / "no
+            // LineTerminator here"). LF/CR are ASCII (here); LS/PS arrive via the \s regex below.
+            if (wc === 10 || wc === 13) pendingNl = true;
             pos++;
             wc = source.charCodeAt(pos);
           } while (wc === 32 || (wc >= 9 && wc <= 13));
           if (wc > 127) {   // a Unicode space may continue the run — absorb it like the old regex did
             wsReY.lastIndex = pos;
             const wsMatch = wsReY.exec(source);
-            if (wsMatch) { if (wsMatch[0].includes('\n')) pendingNl = true; pos += wsMatch[0].length; }
+            if (wsMatch) { if (/[\n\r\u2028\u2029]/.test(wsMatch[0])) pendingNl = true; pos += wsMatch[0].length; }
           }
           continue;
         }
         if (wc > 127) {
           wsReY.lastIndex = pos;
           const wsMatch = wsReY.exec(source);
-          if (wsMatch) { if (wsMatch[0].includes('\n')) pendingNl = true; pos += wsMatch[0].length; continue; }
+          if (wsMatch) { if (/[\n\r\u2028\u2029]/.test(wsMatch[0])) pendingNl = true; pos += wsMatch[0].length; continue; }
         }
       }
 
@@ -1178,7 +1180,7 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
           if (!tm.skip) {
             push(mkNamed(tm.name, m[0], pos, tm.k));
           } else {
-            if (m[0].includes('\n')) pendingNl = true;   // a skipped comment spanning a newline still terminates the previous line
+            if (/[\n\r\u2028\u2029]/.test(m[0])) pendingNl = true;   // a skipped comment spanning a line terminator still terminates the previous line
             // An inline comment (indentation grammars) ENDS a plain scalar — flag the next token so a
             // multi-line fold won't reabsorb a post-comment line (yaml-test-suite 8XDJ / BF9H).
             if (indent?.comment && m[0].startsWith(indent.comment)) pendingComment = true;
diff --git a/src/target-go.ts b/src/target-go.ts
index d45ec2c..5d1e9b9 100644
--- a/src/target-go.ts
+++ b/src/target-go.ts
@@ -50,7 +50,7 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): st
   const name = (t as { name: string }).name;
   const stateful = rxTok !== undefined || tplTok !== undefined;
   if (tplTok !== undefined && name === tplTok) return '';   // template token scanned by the state machine
-  const push = (endE: string) => (t.skip ? `if strings.Contains(src[pos:${endE}], "\\n") { pendingNl = true }; ` : stateful ? `emit(${J(name)}, src[pos:${endE}], pos, ${endE}); ` : `pushTok(${J(name)}, src[pos:${endE}], pos, ${endE}); `);
+  const push = (endE: string) => (t.skip ? `if strings.ContainsAny(src[pos:${endE}], "\\n\\r") { pendingNl = true }; ` : stateful ? `emit(${J(name)}, src[pos:${endE}], pos, ${endE}); ` : `pushTok(${J(name)}, src[pos:${endE}], pos, ${endE}); `);
   const gate = rxTok !== undefined && name === rxTok ? '!prevIsValue() && ' : '';
   if (t.kind === 'run') return `\t\tif ${gate}${rangeCond('c', t.first)} {
 \t\t\te := pos + 1
@@ -156,8 +156,8 @@ ${emitHooks}
 \t_ = pendingNl
 ${rxState}${tplState}${emitFn}${pushTokFn}${defs.length ? '\t_s = src\n' : ''}\tfor pos < n {
 \t\tc := int(src[pos])
-\t\tif c == 10 { pendingNl = true; pos++; continue }   // only LF (10) is newline-before (matches the interpreter); CR is plain whitespace
-\t\tif c == 13 || c == 32 || c == 9 || c == 11 || c == 12 || c == 160 || c == 5760 || (c >= 8192 && c <= 8202) || c == 8239 || c == 8287 || c == 12288 || c == 65279 { pos++; continue }
+\t\tif c == 10 || c == 13 { pendingNl = true; pos++; continue }   // JS line terminators LF/CR (matches the interpreter; LS/PS are multi-byte: non-ASCII boundary)
+\t\tif c == 32 || c == 9 || c == 11 || c == 12 || c == 160 || c == 5760 || (c >= 8192 && c <= 8202) || c == 8239 || c == 8287 || c == 12288 || c == 65279 { pos++; continue }
 ${tplDispatch}${toks}
 ${puncts}
 \t\tpanic(fmt.Sprintf("lex error at %d", pos))
diff --git a/src/target-rust.ts b/src/target-rust.ts
index 0466ce2..52051a0 100644
--- a/src/target-rust.ts
+++ b/src/target-rust.ts
@@ -54,7 +54,7 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): st
   const stateful = rxTok !== undefined || tplTok !== undefined;
   if (tplTok !== undefined && name === tplTok) return '';   // template token scanned by the state machine
   const nlVar = stateful ? 'st.pending_nl' : 'pending_nl';
-  const push = (endE: string) => (t.skip ? `if src[pos..${endE}].contains('\\n') { ${nlVar} = true; } ` : stateful ? `st.emit(${J(name)}, &src[pos..${endE}], pos, ${endE}); ` : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE}, nl: pending_nl }); pending_nl = false; `);
+  const push = (endE: string) => (t.skip ? `if src[pos..${endE}].bytes().any(|c| c == 10 || c == 13) { ${nlVar} = true; } ` : stateful ? `st.emit(${J(name)}, &src[pos..${endE}], pos, ${endE}); ` : `toks.push(Tok { kind: ${J(name)}, text: &src[pos..${endE}], off: pos, end: ${endE}, nl: pending_nl }); pending_nl = false; `);
   const gate = rxTok !== undefined && name === rxTok ? '!st.prev_is_value() && ' : '';
   if (t.kind === 'run') return `        if ${gate}${rangeCond('c', t.first)} {
             let mut e = pos + 1;
@@ -163,8 +163,8 @@ ${open}
     let mut pos = 0usize;
     while pos < n {
         let c = b[pos] as u32;
-        if c == 32 || c == 9 || c == 13 { pos += 1; continue; }   // CR is plain whitespace, NOT newline-before
-        if c == 10 { ${nlVar} = true; pos += 1; continue; }   // only LF (10) is newline-before (matches the interpreter)
+        if c == 32 || c == 9 { pos += 1; continue; }
+        if c == 10 || c == 13 { ${nlVar} = true; pos += 1; continue; }   // JS line terminators LF/CR (matches the interpreter; LS/PS multi-byte: non-ASCII boundary)
 ${tplDispatch}${toks}
 ${puncts}
         panic!("lex error at {}", pos);
diff --git a/src/target-ts.ts b/src/target-ts.ts
index f18c046..dc45015 100644
--- a/src/target-ts.ts
+++ b/src/target-ts.ts
@@ -50,7 +50,7 @@ function scanTok(t: LexTok, defs: string[], rxTok?: string, tplTok?: string): st
   if (tplTok !== undefined && name === tplTok) return '';   // template token is scanned by the state machine
   // `emit(...)` threads the lexer state in stateful mode; a plain push otherwise. A skipped
   // token (comment) still records a newline it spans, so `sameLine` sees it.
-  const push = (endExpr: string) => (t.skip ? `if (src.slice(pos, ${endExpr}).indexOf('\\n') >= 0) pendingNl = true; ` : `${stateful ? 'emit' : 'push'}(${J(name)}, src.slice(pos, ${endExpr}), pos, ${endExpr}); `);
+  const push = (endExpr: string) => (t.skip ? `if (/[\\n\\r\\u2028\\u2029]/.test(src.slice(pos, ${endExpr}))) pendingNl = true; ` : `${stateful ? 'emit' : 'push'}(${J(name)}, src.slice(pos, ${endExpr}), pos, ${endExpr}); `);
   const gate = rxTok !== undefined && name === rxTok ? '!prevIsValue() && ' : '';
   if (t.kind === 'run') return `    if (${gate}${rangeCond('c', t.first)}) {
       let e = pos + 1;
@@ -147,10 +147,9 @@ ${emitHooks}
   let pendingNl = false;
 ${defs.length ? '  _s = src;\n' : ''}${rxState}${tplState}${stateful ? emitFn : '  const push = (kind: string, text: string, off: number, end: number) => { toks.push({ kind, text, off, end, nl: pendingNl }); pendingNl = false; };\n'}  while (pos < n) {
     const c = src.charCodeAt(pos);
-    // Only LF (char 10) sets newline-before, matching the interpreter (gen-lexer.ts: only wc === 10).
-    // CR/LS/PS are whitespace but NOT newline-before there, so a lone CR must not flip sameLine.
-    if (c === 10) { pendingNl = true; pos++; continue; }
-    if (c === 13 || c === 8232 || c === 8233 || c === 32 || c === 9 || c === 11 || c === 12 || c === 160 || c === 5760 || (c >= 8192 && c <= 8202) || c === 8239 || c === 8287 || c === 12288 || c === 65279) { pos++; continue; }
+    // JS line terminators LF/CR/LS/PS set newline-before, matching the interpreter (gen-lexer.ts).
+    if (c === 10 || c === 13 || c === 8232 || c === 8233) { pendingNl = true; pos++; continue; }
+    if (c === 32 || c === 9 || c === 11 || c === 12 || c === 160 || c === 5760 || (c >= 8192 && c <= 8202) || c === 8239 || c === 8287 || c === 12288 || c === 65279) { pos++; continue; }
 ${tplDispatch}${toks}
 ${puncts}
     throw new Error('lex error at ' + pos + ': ' + JSON.stringify(src[pos]));
diff --git a/test/portable-targets.ts b/test/portable-targets.ts
index ca3072a..58de024 100644
--- a/test/portable-targets.ts
+++ b/test/portable-targets.ts
@@ -135,12 +135,12 @@ const CASES: Case[] = [
     grammar: 'sljs', path: './fixtures/sljs.ts',
     accept: [
       'return 1;', 'return;', 'return 1 + 2;', '1 + 2;', 'return /* c */ 1;',
-      '(a);', 'return (1);',
-      // Only `\n` is newline-before — a lone `\r` is plain whitespace, so `return` still takes its
-      // same-line operand (matches the interpreter; was a portable-lexer bug). CRLF still has the `\n`.
-      'return\r1;',
+      '(a);', 'return (1);', 'return\t1;',
     ],
-    reject: ['return\n1;', 'return\nx;', 'return /*\n*/ 1;', 'return // c\n 1;', 'return\r\n1;'],
+    // `\r`, LS, PS are JS line terminators just like `\n` (ASI / "no LineTerminator here"), so a
+    // `return` followed by any of them takes no operand — across all four lexers (interpreter,
+    // emitted JS, portable ts/go/rust). A `\t` (tab) is whitespace but NOT a terminator → accepted above.
+    reject: ['return\n1;', 'return\nx;', 'return /*\n*/ 1;', 'return // c\n 1;', 'return\r1;', 'return\r\n1;', 'return /*\r*/ 1;'],
   },
   {
     // capBelow (assignment-level) arrow functions: a NUD parsed only when minBp < the