From 2fc1864dbc805dad45723548d933dadc2b6f4550 Mon Sep 17 00:00:00 2001 From: Dhruva Reddy Date: Fri, 1 May 2026 12:38:50 -0700 Subject: [PATCH] refactor: deterministic state-file key ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## ELI5 **Problem.** Every push rewrites `.vapi-state..json`. JavaScript's `JSON.stringify` keeps whatever order keys happened to land in — and state sections get rebuilt from multiple sources (push, pull, bootstrap) with unpredictable insertion order. Result: about half of every state diff is just lines moving up and down without any actual change. Reviewers stopped reading state diffs because they were mostly noise, which defeats the point of versioning the file. **What this fix does.** Adds a `sortedKeysReplacer` that runs during `JSON.stringify` and emits object keys alphabetically at every nesting level. Arrays stay in their original order (squad member ordering, tool destination priority, etc. are semantic). State writes go through this replacer. **Outcome you'll notice.** The first push after this lands produces a **big one-time diff** of pure reordering across every customer. That's the cost of landing the fix — please don't read the first state diff post-merge, it's churn. Every diff after that shows only real changes: new UUIDs, removed entries, hashes changing. Reviewing state files becomes useful again. --- JS's JSON.stringify honors insertion order. State sections get rebuilt from multiple sources (push, pull, bootstrap) with unpredictable insertion order, so ~half of every state-file diff is pure reorderings that hide the real changes. - src/state-serialize.ts (NEW): sortedKeysReplacer (recursive alphabetical key sort, arrays untouched) + canonicalize (also drops null/undefined leaves; reused by Stack F/G). Kept config-free so tests can import without triggering config.ts's CLI parser. - src/state.ts: saveState now passes sortedKeysReplacer to JSON.stringify. Atomic-write pattern preserved. - tests/state-key-order.test.ts: pin byte-identical serialization across insertion orders, recursion, array preservation, primitive handling, idempotence. Closes improvements.md #17. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- improvements.md | 2 +- src/state-serialize.ts | 48 ++++++++++++++++++ src/state.ts | 16 +++++- tests/state-key-order.test.ts | 96 +++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 src/state-serialize.ts create mode 100644 tests/state-key-order.test.ts diff --git a/improvements.md b/improvements.md index ba9c30a..3267944 100644 --- a/improvements.md +++ b/improvements.md @@ -68,7 +68,7 @@ you which stack PR closes the row.** | 14 | Multi-file push undocumented | Discoverability | None | RESOLVED 2026-04-30 (Stack A) | | 15 | Scoped push rewrites entire state file | Pre-existing drift sweeps into focused commits | #4 | Open (Stack J planned) | | 16 | No CLI runner for simulation suites | Engine pushes them, can't run them | None | Open (Stack E planned) | -| 17 | State file key-order churn produces noisy diffs | Reorderings hide real changes | None | Open (Stack B planned) | +| 17 | State file key-order churn produces noisy diffs | Reorderings hide real changes | None | RESOLVED 2026-04-30 (Stack B) | | 18 | Structured-output `name` capped at 40 chars (no warning) | Push fails partway after partial application | None | Open (Stack D planned) | | 19 | No `maxTokens` floor warning for tool-using assistants | `maxTokens: 1` bricks the assistant silently | None | Open (Stack D planned) | | 20 | Prompt vocabulary leaks into TTS | `Reason.` becomes verbal contaminant | None | Open (Stack D heuristic planned) | diff --git a/src/state-serialize.ts b/src/state-serialize.ts new file mode 100644 index 0000000..67975d4 --- /dev/null +++ b/src/state-serialize.ts @@ -0,0 +1,48 @@ +// Pure serialization helpers for the state file (and snapshot files). +// +// Kept config-free so tests can import without triggering the CLI argument +// parser in `config.ts` (which `process.exit(1)`s when no env is supplied). + +// JSON.stringify replacer that emits object keys in alphabetical order at +// every nesting level. Without this, the state file diff includes pure +// reorderings every time a resource map gets rebuilt from multiple sources +// (push, pull, bootstrap) — about half the diff lines are insertion-order +// churn rather than semantic change. Reviewers stop reading state diffs +// closely as a result, which defeats the point of versioning the file. +// +// Arrays are returned as-is so existing array order (e.g. squad members, +// tool destinations) is preserved. +export function sortedKeysReplacer(_key: string, value: unknown): unknown { + if (value === null || typeof value !== "object" || Array.isArray(value)) { + return value; + } + const sorted: Record = {}; + for (const k of Object.keys(value as Record).sort()) { + sorted[k] = (value as Record)[k]; + } + return sorted; +} + +// Canonicalize a value: sort object keys at every level, drop null/undefined +// leaves recursively, leave array order intact. Produces a stable shape +// regardless of insertion order or transient nullish leaves the API may +// emit. Used by Stack F (content hashes) and Stack G (drift detection) — +// kept here so the helpers stay co-located. +export function canonicalize(value: unknown): unknown { + if (value === null || value === undefined) return undefined; + if (Array.isArray(value)) { + const out: unknown[] = []; + for (const item of value) { + const c = canonicalize(item); + if (c !== undefined) out.push(c); + } + return out; + } + if (typeof value !== "object") return value; + const sorted: Record = {}; + for (const k of Object.keys(value as Record).sort()) { + const c = canonicalize((value as Record)[k]); + if (c !== undefined) sorted[k] = c; + } + return sorted; +} diff --git a/src/state.ts b/src/state.ts index 152bd54..f822e92 100644 --- a/src/state.ts +++ b/src/state.ts @@ -1,8 +1,14 @@ import { existsSync, readFileSync } from "fs"; import { rename, writeFile } from "fs/promises"; import { STATE_FILE_PATH, VAPI_ENV } from "./config.ts"; +import { sortedKeysReplacer } from "./state-serialize.ts"; import type { StateFile } from "./types.ts"; +// Re-export the pure helper so callers can pull it from `state.ts` (same +// import line as loadState/saveState) without forcing the config-laden +// module on test code that just wants the serializer. +export { sortedKeysReplacer } from "./state-serialize.ts"; + // ───────────────────────────────────────────────────────────────────────────── // State Management // ───────────────────────────────────────────────────────────────────────────── @@ -57,8 +63,16 @@ export async function saveState(state: StateFile): Promise { // A crash or SIGINT mid-write leaves the original state intact rather than // truncating it. A truncated state file would silently wipe all UUID // mappings on the next load. + // sortedKeysReplacer enforces deterministic key ordering across every + // nested object so two semantically-equal state objects (with different + // insertion orders from push/pull/bootstrap merges) always serialize + // byte-identically. Without this, ~half of the state-file diff is pure + // reordering, which trains reviewers to skim past it. const tmpPath = `${STATE_FILE_PATH}.tmp`; - await writeFile(tmpPath, JSON.stringify(state, null, 2) + "\n"); + await writeFile( + tmpPath, + JSON.stringify(state, sortedKeysReplacer, 2) + "\n", + ); await rename(tmpPath, STATE_FILE_PATH); console.log(`💾 Saved state file: ${STATE_FILE_PATH}`); } diff --git a/tests/state-key-order.test.ts b/tests/state-key-order.test.ts new file mode 100644 index 0000000..d791fd0 --- /dev/null +++ b/tests/state-key-order.test.ts @@ -0,0 +1,96 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { sortedKeysReplacer } from "../src/state-serialize.ts"; + +// Stack B regression test — pin deterministic key ordering on state file +// serialization. Two semantically equal state objects with different +// insertion orders MUST serialize byte-identically. Without this, the state +// file accumulates pure-reordering diffs that hide real changes. + +test("sortedKeysReplacer emits top-level keys alphabetically", () => { + const insertedABC = { c: 3, a: 1, b: 2 }; + const insertedCBA = { a: 1, b: 2, c: 3 }; + + const serializedABC = JSON.stringify(insertedABC, sortedKeysReplacer, 2); + const serializedCBA = JSON.stringify(insertedCBA, sortedKeysReplacer, 2); + + assert.equal(serializedABC, serializedCBA); + assert.equal( + serializedABC, + `{ + "a": 1, + "b": 2, + "c": 3 +}`, + ); +}); + +test("sortedKeysReplacer recursively sorts nested objects", () => { + const a = { + assistants: { z: "uuid-z", a: "uuid-a" }, + tools: { y: "uuid-y", b: "uuid-b" }, + }; + const b = { + tools: { b: "uuid-b", y: "uuid-y" }, + assistants: { a: "uuid-a", z: "uuid-z" }, + }; + + assert.equal( + JSON.stringify(a, sortedKeysReplacer, 2), + JSON.stringify(b, sortedKeysReplacer, 2), + ); +}); + +test("sortedKeysReplacer leaves array order intact", () => { + // Array order is semantic for resource lists like `assistant_ids` — + // sorting them would corrupt squad member ordering, tool destination + // priority, etc. The replacer MUST NOT reorder arrays. + const obj = { tags: ["zebra", "apple", "mango"] }; + const result = JSON.parse(JSON.stringify(obj, sortedKeysReplacer)); + assert.deepEqual(result.tags, ["zebra", "apple", "mango"]); +}); + +test("sortedKeysReplacer handles deeply nested mixed structures", () => { + const insertion1 = { + z: { y: { x: 1, w: 2 }, v: [{ b: 1, a: 2 }, { d: 1, c: 2 }] }, + a: 0, + }; + const insertion2 = { + a: 0, + z: { v: [{ b: 1, a: 2 }, { d: 1, c: 2 }], y: { w: 2, x: 1 } }, + }; + + assert.equal( + JSON.stringify(insertion1, sortedKeysReplacer, 2), + JSON.stringify(insertion2, sortedKeysReplacer, 2), + ); +}); + +test("sortedKeysReplacer preserves null and primitive values", () => { + const obj = { + voicemailMessage: null, + name: "test", + count: 42, + enabled: true, + nothing: undefined, // JSON.stringify drops undefined naturally + }; + const serialized = JSON.stringify(obj, sortedKeysReplacer, 2); + const parsed = JSON.parse(serialized); + assert.equal(parsed.voicemailMessage, null); + assert.equal(parsed.name, "test"); + assert.equal(parsed.count, 42); + assert.equal(parsed.enabled, true); + assert.equal("nothing" in parsed, false); +}); + +test("sortedKeysReplacer is stable: serializing twice yields identical output", () => { + const state = { + credentials: { c: "1", a: "2" }, + assistants: { z: "3", b: "4" }, + tools: { y: "5", x: "6" }, + }; + + const first = JSON.stringify(state, sortedKeysReplacer, 2); + const second = JSON.stringify(JSON.parse(first), sortedKeysReplacer, 2); + assert.equal(first, second); +});