From f5f7677430cca5827561fd51208014414e171fb2 Mon Sep 17 00:00:00 2001
From: ehsan shariati <ehsan6sha@gmail.com>
Date: Sat, 2 May 2026 09:56:17 -0400
Subject: [PATCH 1/6] Added offline download feature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

| # | Step | Repo / files (primary) | Effort | Depends on |
|---|---|---|---|---|
| 0 | Verify pinning chain | (op-run; done) | done | — |
| 1.2 | Add `bucket_lookup_h` field + SDK header + master populate-if-missing | `fula-api/crates/fula-core/src/metadata.rs`, `fula-cli/src/handlers/object.rs`, `fula-client/src/encryption.rs:3243`, new `fula-crypto` HKDF helper | ~100 LOC, 2-3 days | Step 0 |
| 2.1 | Master-down detection (health gate) | `fula-client/src/encryption.rs` GET, `fula-cli/src/client.rs:319-371` | ~150 LOC, 2-3 days | independent of 1.2 |
| 2.2 | Local block cache (redb LRU) | new `fula-client/src/block_cache.rs` | ~200 LOC, 3-4 days | — |
| 2.3 | Multi-gateway race + dynamic priority + CID verification | new `fula-client/src/gateway_fetch.rs` | ~300 LOC, 4-5 days | 2.2 |
| 2.4 | Wire warm-device offline GET | `fula-client/src/encryption.rs` GET, glue 2.1+2.2+2.3 | ~150 LOC, 2-3 days | 2.1, 2.2, 2.3 |
---
 Cargo.lock                                    |   13 +
 Cargo.toml                                    |    6 +
 crates/fula-cli/src/handlers/mod.rs           |    1 +
 crates/fula-cli/src/handlers/object.rs        |   69 +-
 .../src/handlers/users_index_publisher.rs     | 1450 +++++++++++++++++
 crates/fula-client/Cargo.toml                 |    8 +
 crates/fula-client/src/block_cache.rs         |  628 +++++++
 crates/fula-client/src/client.rs              |   62 +-
 crates/fula-client/src/config.rs              |   13 +
 crates/fula-client/src/encryption.rs          |   41 +-
 crates/fula-client/src/error.rs               |    8 +
 crates/fula-client/src/gateway_fetch.rs       | 1306 +++++++++++++++
 crates/fula-client/src/health_gate.rs         |  240 +++
 crates/fula-client/src/lib.rs                 |    5 +
 crates/fula-core/src/bucket.rs                |  361 ++++
 crates/fula-core/src/metadata.rs              |  135 +-
 crates/fula-flutter/src/api/error.rs          |    8 +
 17 files changed, 4344 insertions(+), 10 deletions(-)
 create mode 100644 crates/fula-cli/src/handlers/users_index_publisher.rs
 create mode 100644 crates/fula-client/src/block_cache.rs
 create mode 100644 crates/fula-client/src/gateway_fetch.rs
 create mode 100644 crates/fula-client/src/health_gate.rs

diff --git a/Cargo.lock b/Cargo.lock
index ca50b8f..2cabddd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1799,6 +1799,7 @@ dependencies = [
  "blake3",
  "bytes",
  "chrono",
+ "cid 0.11.1",
  "dashmap 6.1.0",
  "dirs",
  "fs2",
@@ -1806,10 +1807,13 @@ dependencies = [
  "futures",
  "hex",
  "mime_guess",
+ "parking_lot",
  "quick-xml",
+ "redb",
  "reqwest",
  "serde",
  "serde_json",
+ "sha2",
  "tempfile",
  "thiserror 2.0.17",
  "tokio",
@@ -4209,6 +4213,15 @@ dependencies = [
  "crossbeam-utils",
 ]
 
+[[package]]
+name = "redb"
+version = "2.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8eca1e9d98d5a7e9002d0013e18d5a9b000aee942eb134883a82f06ebffb6c01"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.5.18"
diff --git a/Cargo.toml b/Cargo.toml
index e12ca5c..e835bbc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -169,6 +169,12 @@ lru = "0.12"
 semver = "1.0"
 bitvec = "1.0"
 
+# Embedded persistent KV (block cache, Phase 2.2 of master-independent reads).
+# Pinned to 2.6.x to avoid silent file-format drift in routine cargo update.
+# A 2.x bump is a deliberate decision (verify file-format compatibility before
+# upgrading; cache files in production may need migration handling).
+redb = "~2.6"
+
 # Testing
 criterion = "0.5"
 proptest = "1.5"
diff --git a/crates/fula-cli/src/handlers/mod.rs b/crates/fula-cli/src/handlers/mod.rs
index 585052e..91ea72d 100644
--- a/crates/fula-cli/src/handlers/mod.rs
+++ b/crates/fula-cli/src/handlers/mod.rs
@@ -8,6 +8,7 @@ pub mod multipart;
 pub mod object;
 pub mod service;
 pub mod tagging;
+pub mod users_index_publisher;
 
 pub use admin::*;
 pub use batch::*;
diff --git a/crates/fula-cli/src/handlers/object.rs b/crates/fula-cli/src/handlers/object.rs
index 19b1381..e33594f 100644
--- a/crates/fula-cli/src/handlers/object.rs
+++ b/crates/fula-cli/src/handlers/object.rs
@@ -130,9 +130,16 @@ pub async fn put_object(
         metadata = metadata.with_content_type(ct);
     }
 
-    // Extract user metadata (x-amz-meta-*)
+    // Extract user metadata (x-amz-meta-*).
+    // Internal Fula control headers (consumed by the handler, not stored as
+    // object metadata) are filtered out — they would otherwise pollute every
+    // object's persisted metadata.
+    const FULA_CONTROL_HEADERS: &[&str] = &["fula-bucket-lookup-h"];
     for (name, value) in headers.iter() {
         if let Some(key) = name.as_str().strip_prefix("x-amz-meta-") {
+            if FULA_CONTROL_HEADERS.contains(&key) {
+                continue;
+            }
             if let Ok(v) = value.to_str() {
                 metadata = metadata.with_user_metadata(key, v);
             }
@@ -145,7 +152,7 @@ pub async fn put_object(
             tracing::error!(error = %e, key = %key, "Failed to put object");
             e
         })?;
-    
+
     tracing::debug!("Flushing bucket");
     let bucket_root_cid = bucket.flush().await
         .map_err(|e| {
@@ -153,6 +160,64 @@ pub async fn put_object(
             e
         })?;
 
+    // Phase 1.2 of master-independent reads: if the SDK included
+    // `x-amz-meta-fula-bucket-lookup-h` (only set on the Phase 2 manifest
+    // root PUT in `save_sharded_hamt_forest`), populate the bucket-level
+    // `bucket_lookup_h` field if currently None. Idempotent — never
+    // overwrites. Gated by env so we can stage the rollout: SDK always
+    // sends the header (cheap); master only consumes it when ready.
+    //
+    // Failures are non-fatal — bad/missing headers must not break uploads.
+    // Placement: AFTER bucket.flush() (so the flush has already replaced
+    // the DashMap entry) and BEFORE persist_registry_with_token (so the
+    // updated field gets serialized into the registry CBOR on this same
+    // request, no extra IPFS write).
+    let buckets_index_enabled = std::env::var("FULA_BUCKET_LOOKUP_H_ENABLED")
+        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
+        .unwrap_or(false);
+    if buckets_index_enabled {
+        if let Some(hex_str) = headers
+            .get("x-amz-meta-fula-bucket-lookup-h")
+            .and_then(|v| v.to_str().ok())
+        {
+            match hex::decode(hex_str) {
+                Ok(bytes) if bytes.len() == 16 => {
+                    let mut lookup_h = [0u8; 16];
+                    lookup_h.copy_from_slice(&bytes);
+                    match state.bucket_manager.populate_lookup_h_if_missing(
+                        &session.hashed_user_id,
+                        &bucket_name,
+                        lookup_h,
+                    ) {
+                        Ok(true) => tracing::debug!(
+                            bucket = %bucket_name,
+                            "Populated bucket_lookup_h (Phase 1.2)"
+                        ),
+                        Ok(false) => { /* already set; idempotent skip */ }
+                        // BucketNotFound on a successful PUT to a real bucket
+                        // is an internal-consistency violation — promote to
+                        // error level so operators notice the signal.
+                        Err(e) => tracing::error!(
+                            error = %e,
+                            bucket = %bucket_name,
+                            user = %session.hashed_user_id,
+                            "populate_lookup_h_if_missing failed on a bucket that just accepted a PUT"
+                        ),
+                    }
+                }
+                Ok(other) => tracing::warn!(
+                    actual_len = other.len(),
+                    "x-amz-meta-fula-bucket-lookup-h: expected 16-byte hex (32 chars), got {} bytes",
+                    other.len()
+                ),
+                Err(e) => tracing::warn!(
+                    error = %e,
+                    "Failed to hex-decode x-amz-meta-fula-bucket-lookup-h"
+                ),
+            }
+        }
+    }
+
     // Persist the bucket registry so the new root CID survives restarts.
     // This MUST succeed — otherwise the new tree root is lost on restart.
     // Use the user's JWT for pinning service authentication.
diff --git a/crates/fula-cli/src/handlers/users_index_publisher.rs b/crates/fula-cli/src/handlers/users_index_publisher.rs
new file mode 100644
index 0000000..448cd56
--- /dev/null
+++ b/crates/fula-cli/src/handlers/users_index_publisher.rs
@@ -0,0 +1,1450 @@
+//! Phase 3.2 master-side users-index publisher.
+//!
+//! Builds a global users-index CBOR mapping every active user's
+//! `userKey` (= `hashed_user_id`) to that user's per-user
+//! `bucketsIndex` CID, pins it via the existing pinning chain
+//! (cluster), and publishes the new CID via IPNS for SDK clients to
+//! resolve during master-down cold-starts.
+//!
+//! This module owns three responsibilities:
+//!
+//! 1. **State persistence** (this file, A1) — a tiny 3-line text file
+//!    that survives master restarts: `(latest_global_cid, sequence,
+//!    updated_at_unix)`. Crash safety mirrored from
+//!    `BucketManager::persist_registry_internal` (atomic write +
+//!    `.bak` backup). Sequence is monotonic; it only increments.
+//!
+//! 2. **Tick logic** (A2 — coming next) — snapshot
+//!    `BucketManager.buckets`, build per-user bucketsIndex CBORs
+//!    only for users whose state changed since the last tick (diff
+//!    cache), build the global users-index CBOR, pin both via cluster.
+//!
+//! 3. **IPNS publish + internal endpoints** (A3 — after A2) — call
+//!    kubo `/api/v0/name/publish`; expose `GET /_internal/users-index-state`
+//!    for the daily chain cron in `mainnet-reward-server`.
+//!
+//! Background-task lifecycle mirrors `handlers::locks::start_sweeper`:
+//! one `tokio::spawn` from `server::run_server` after `AppState` is
+//! wrapped in `Arc`. The task lives for the process lifetime.
+
+#![allow(dead_code)] // A3 will consume `internal_token`
+
+use anyhow::Result as AnyResult;
+use cid::Cid;
+use fula_blockstore::{BlockStore, PinStore};
+use fula_core::{metadata::BucketMetadata, BucketManager};
+use parking_lot::{Mutex, RwLock};
+use serde::{Deserialize, Serialize};
+use std::collections::{BTreeMap, HashMap};
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+/// State that persists across master restarts. Single source of truth
+/// for "what did we last successfully publish?". Written **after** a
+/// successful pin + IPNS publish. Read on startup.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct PersistedState {
+    /// CID of the most recently pinned global users-index CBOR.
+    /// `None` = nothing has been published yet (fresh master).
+    pub global_cid: Option<Cid>,
+    /// Monotonic sequence number embedded in the most recent global
+    /// users-index CBOR's payload. Always increments. SDK clients
+    /// reject responses with a regression as a replay defense.
+    pub sequence: u64,
+    /// Wall-clock seconds-since-epoch when the most recent publish
+    /// committed. Used for diagnostics and for the
+    /// `/_internal/users-index-state` HTTP response.
+    pub updated_at_unix: u64,
+}
+
+impl Default for PersistedState {
+    fn default() -> Self {
+        Self {
+            global_cid: None,
+            sequence: 0,
+            updated_at_unix: 0,
+        }
+    }
+}
+
+impl PersistedState {
+    /// Load state from `path`. Returns `Ok(default)` if the file
+    /// doesn't exist (fresh master). Returns an error on any other
+    /// I/O failure or parse problem — the caller surfaces this so
+    /// the operator can fix it (e.g., truncated file from a
+    /// half-completed write).
+    ///
+    /// Format: 3 lines separated by `\n`:
+    ///   line 1 = CID string (or empty for `None`)
+    ///   line 2 = sequence (u64 decimal)
+    ///   line 3 = updated_at_unix (u64 decimal); optional — older
+    ///            two-line files parse to `updated_at_unix = 0`
+    pub fn load(path: &Path) -> Result<Self, PersistError> {
+        let raw = match std::fs::read_to_string(path) {
+            Ok(s) => s,
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                return Ok(Self::default());
+            }
+            Err(e) => return Err(PersistError::Io(e)),
+        };
+        Self::parse(&raw)
+    }
+
+    fn parse(raw: &str) -> Result<Self, PersistError> {
+        let mut lines = raw.lines();
+        let cid_line = lines.next().unwrap_or("").trim();
+        let seq_line = lines.next().unwrap_or("").trim();
+        let ts_line = lines.next().unwrap_or("").trim();
+
+        let global_cid = if cid_line.is_empty() {
+            None
+        } else {
+            Some(cid_line.parse::<Cid>().map_err(|e| {
+                PersistError::Parse(format!("invalid CID '{}': {}", cid_line, e))
+            })?)
+        };
+
+        let sequence: u64 = if seq_line.is_empty() {
+            0
+        } else {
+            seq_line.parse().map_err(|e| {
+                PersistError::Parse(format!("invalid sequence '{}': {}", seq_line, e))
+            })?
+        };
+
+        let updated_at_unix: u64 = if ts_line.is_empty() {
+            0
+        } else {
+            ts_line.parse().map_err(|e| {
+                PersistError::Parse(format!("invalid updated_at '{}': {}", ts_line, e))
+            })?
+        };
+
+        Ok(Self {
+            global_cid,
+            sequence,
+            updated_at_unix,
+        })
+    }
+
+    fn serialize(&self) -> String {
+        format!(
+            "{}\n{}\n{}\n",
+            self.global_cid.map_or(String::new(), |c| c.to_string()),
+            self.sequence,
+            self.updated_at_unix
+        )
+    }
+
+    /// Atomically write to `path`. If `path` already exists, copy it
+    /// to `path.bak` first (mirrors `BucketManager::persist_registry_internal`'s
+    /// backup pattern). Tolerates missing parent directory by creating
+    /// it; tolerates missing existing file by skipping the backup.
+    pub fn save(&self, path: &Path) -> Result<(), PersistError> {
+        if let Some(parent) = path.parent() {
+            if !parent.as_os_str().is_empty() {
+                std::fs::create_dir_all(parent).map_err(PersistError::Io)?;
+            }
+        }
+
+        // Backup the previous state file before overwriting. This
+        // mirrors the fula-bucket-registry persistence pattern; if a
+        // crash interrupts the write, the operator can recover from
+        // the .bak.
+        if path.exists() {
+            let backup_path = with_bak_suffix(path);
+            // Best-effort backup; failure to back up should not block
+            // the main write (we'd rather lose the .bak than the
+            // primary). Surfaces only as a tracing log.
+            if let Err(e) = std::fs::copy(path, &backup_path) {
+                tracing::warn!(
+                    error = %e,
+                    backup_path = %backup_path.display(),
+                    "users-index state-file backup failed; continuing with primary write"
+                );
+            }
+        }
+
+        // Atomic rename: write to a tmp sibling then rename onto the
+        // target. On most filesystems this is atomic; on Windows it
+        // requires the destination to be removable, which our
+        // backup-first step makes safe.
+        let tmp_path = path.with_extension("tmp");
+        std::fs::write(&tmp_path, self.serialize()).map_err(PersistError::Io)?;
+        std::fs::rename(&tmp_path, path).map_err(PersistError::Io)?;
+        Ok(())
+    }
+
+    /// Compose the next state from a successful publish:
+    /// increment sequence, set new CID, refresh timestamp.
+    pub fn next(&self, new_cid: Cid) -> Self {
+        Self {
+            global_cid: Some(new_cid),
+            sequence: self.sequence.saturating_add(1),
+            updated_at_unix: now_unix(),
+        }
+    }
+}
+
+fn with_bak_suffix(path: &Path) -> PathBuf {
+    let mut s = path.as_os_str().to_owned();
+    s.push(".bak");
+    PathBuf::from(s)
+}
+
+fn now_unix() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|d| d.as_secs())
+        .unwrap_or(0)
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum PersistError {
+    #[error("io error: {0}")]
+    Io(#[from] std::io::Error),
+
+    #[error("parse error: {0}")]
+    Parse(String),
+}
+
+// ============================================================
+// CBOR data structures (Phase 3.2.a)
+// ============================================================
+
+/// Per-user `bucketsIndex` CBOR. Pinned per user; one CBOR per user
+/// per snapshot if their state changed. Map keys are either:
+///   - 32-hex BLAKE3-derived `bucketLookupH` (Phase 1.2 blinded form)
+///   - plaintext bucket name (Phase 1.2 lazy-migration legacy form)
+/// `legacy=true` distinguishes the latter so SDK cold-start can
+/// dispatch correctly.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+pub struct UserBucketsIndex {
+    pub v: u32,
+    /// `BTreeMap` for **deterministic** key ordering — same input
+    /// must produce byte-identical CBOR (and thus the same CID)
+    /// across master restarts and across hosts. dag-cbor sorts map
+    /// keys but using BTreeMap upstream is belt-and-suspenders.
+    pub buckets: BTreeMap<String, BucketEntry>,
+    pub updated_at_unix: u64,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+pub struct BucketEntry {
+    /// CID of the user's per-bucket forest manifest (Prolly Tree
+    /// root from `BucketMetadata.root_cid`). Stored as string so
+    /// the CBOR doesn't grow IPLD-link semantics that would change
+    /// the recursive-pin walk.
+    pub manifest: String,
+    /// `true` ⇔ map key is plaintext `bucket_name` (Phase 1.2 hadn't
+    /// run for this bucket yet — i.e., user hasn't uploaded with a
+    /// Phase-1.2-aware client since the field was introduced). SDK
+    /// lookup falls through from blinded-key to legacy-name on miss.
+    pub legacy: bool,
+}
+
+/// Global users-index CBOR. Master pins one per snapshot; the CID
+/// is published via IPNS (every flush) and to the chain anchor
+/// (every 12h).
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+pub struct GlobalUsersIndex {
+    pub v: u32,
+    /// Monotonic publisher sequence. Replay defense: SDK persists
+    /// `highest_seen_sequence`; rejects payloads with regression.
+    pub sequence: u64,
+    pub updated_at_unix: u64,
+    /// `userKey_hex` (32 hex chars = 16-byte hashed_user_id) →
+    /// per-user bucketsIndex CID (string). BTreeMap for determinism.
+    pub users: BTreeMap<String, String>,
+}
+
+// ============================================================
+// Per-user diff cache
+// ============================================================
+
+/// One row of the publisher's diff cache. The publisher uses
+/// `content_hash` to detect "this user's bucket set changed since
+/// the last tick" without re-pinning a brand-new CBOR every time.
+///
+/// `content_hash` is BLAKE3 over a deterministic encoding of the
+/// user's complete bucket set — see [`compute_user_content_hash`].
+/// Changing any bucket's name, root_cid, or bucket_lookup_h
+/// triggers a rebuild on the next tick.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct PerUserDiffEntry {
+    pub content_hash: [u8; 32],
+    pub buckets_index_cid: Cid,
+}
+
+/// Build a per-user `bucketsIndex` CBOR from that user's full
+/// bucket list. Pure — no I/O. The caller pins the resulting CBOR
+/// via `BlockStore::put_ipld` + `PinStore::pin_with_token`.
+pub fn build_user_buckets_index(
+    buckets: &[BucketMetadata],
+    now_unix: u64,
+) -> UserBucketsIndex {
+    let mut entries: BTreeMap<String, BucketEntry> = BTreeMap::new();
+    for b in buckets {
+        let (key, legacy) = match b.bucket_lookup_h {
+            Some(h) => (hex::encode(h), false),
+            None => (b.name.clone(), true),
+        };
+        entries.insert(
+            key,
+            BucketEntry {
+                manifest: b.root_cid.to_string(),
+                legacy,
+            },
+        );
+    }
+    UserBucketsIndex {
+        v: 2,
+        buckets: entries,
+        updated_at_unix: now_unix,
+    }
+}
+
+/// Build the global users-index CBOR from a per-user CID map.
+/// `entries` is `userKey_hex (32 hex) → bucketsIndexCid`.
+pub fn build_global_users_index(
+    entries: &BTreeMap<String, Cid>,
+    sequence: u64,
+    now_unix: u64,
+) -> GlobalUsersIndex {
+    let users: BTreeMap<String, String> = entries
+        .iter()
+        .map(|(uk, cid)| (uk.clone(), cid.to_string()))
+        .collect();
+    GlobalUsersIndex {
+        v: 1,
+        sequence,
+        updated_at_unix: now_unix,
+        users,
+    }
+}
+
+/// Compute a deterministic content hash over a user's full bucket
+/// set. Used for diff-cache lookups: if this hash matches the
+/// cached value, skip rebuilding+re-pinning the per-user CBOR.
+///
+/// Encoding: each bucket contributes the byte-concatenation of
+/// `name_bytes || 0x00 || root_cid_bytes || 0x00 || lookup_h_bytes_or_marker`.
+/// Buckets are sorted by `name` first (BLAKE3 is itself
+/// order-sensitive). Domain separator at the start defends against
+/// cross-namespace collisions.
+pub(crate) fn compute_user_content_hash(buckets: &[BucketMetadata]) -> [u8; 32] {
+    let mut sorted: Vec<&BucketMetadata> = buckets.iter().collect();
+    sorted.sort_by(|a, b| a.name.cmp(&b.name));
+
+    let mut hasher = blake3::Hasher::new();
+    hasher.update(b"fula:users-index-publisher:user-content-hash:v1");
+    for b in &sorted {
+        hasher.update(b.name.as_bytes());
+        hasher.update(&[0u8]);
+        hasher.update(&b.root_cid.to_bytes());
+        hasher.update(&[0u8]);
+        match b.bucket_lookup_h {
+            Some(h) => {
+                hasher.update(b"H");
+                hasher.update(&h);
+            }
+            None => {
+                hasher.update(b"N");
+            }
+        }
+        hasher.update(&[0u8]);
+    }
+    let h = hasher.finalize();
+    let mut out = [0u8; 32];
+    out.copy_from_slice(h.as_bytes());
+    out
+}
+
+// ============================================================
+// Publisher configuration
+// ============================================================
+
+#[derive(Clone, Debug)]
+pub struct PublisherConfig {
+    /// How often the publisher tick fires when there are changes.
+    /// Default 5 min — matches the user-facing latency expectation
+    /// for cross-device-fresh-data when using the IPNS path.
+    pub flush_interval: Duration,
+    /// Cap on the per-user pin operations the first tick fires per
+    /// second. The first tick after deploy has to pin every user's
+    /// bucketsIndex CBOR (cache is empty), so for large user sets
+    /// this can be tens of thousands of pin requests. Throttle to
+    /// avoid swamping the pinning-service.
+    pub first_publish_max_pins_per_sec: u32,
+    /// IPNS record lifetime. 36h gives a 24h margin over the 12h
+    /// chain-cron cadence — see plan section 3.2.b.
+    pub ipns_lifetime: Duration,
+    /// IPNS DHT cache TTL hint for resolvers. 15min keeps the SDK's
+    /// IPNS lookup latency low without aggressive re-fetch.
+    pub ipns_ttl: Duration,
+    /// Kubo IPNS key NAME (kubo's local label, e.g.,
+    /// `fula-users-index`). Distinct from the IPNS NAME (libp2p
+    /// public-key hash) that clients use. See plan 3.2.b.
+    pub ipns_key_name: String,
+    /// Path to the persisted `(global_cid, sequence, updated_at)`
+    /// state file. Mirrors the `registry_cid_path` pattern.
+    pub state_file_path: PathBuf,
+    /// Kubo HTTP API URL (e.g., `http://localhost:5001`). Used for
+    /// `/api/v0/name/publish`.
+    pub ipfs_api_url: String,
+    /// Internal-endpoint shared-secret token. Disabled (returns 503)
+    /// if not set. Required in production.
+    pub internal_token: Option<String>,
+}
+
+impl PublisherConfig {
+    pub fn default_for(state_file_path: PathBuf, ipfs_api_url: String) -> Self {
+        Self {
+            flush_interval: Duration::from_secs(300),
+            first_publish_max_pins_per_sec: 100,
+            ipns_lifetime: Duration::from_secs(36 * 3600),
+            ipns_ttl: Duration::from_secs(15 * 60),
+            ipns_key_name: "fula-users-index".to_string(),
+            state_file_path,
+            ipfs_api_url,
+            internal_token: None,
+        }
+    }
+}
+
+// ============================================================
+// In-memory latest-published view (read by /_internal/users-index-state)
+// ============================================================
+
+/// Snapshot of the last-published state. Updated under a write lock
+/// inside the publisher tick. Read by the internal HTTP endpoint
+/// without blocking the publisher.
+#[derive(Clone, Debug, Default)]
+pub struct LatestPublished {
+    pub global_cid: Option<Cid>,
+    pub sequence: u64,
+    pub updated_at_unix: u64,
+}
+
+impl From<&PersistedState> for LatestPublished {
+    fn from(p: &PersistedState) -> Self {
+        Self {
+            global_cid: p.global_cid,
+            sequence: p.sequence,
+            updated_at_unix: p.updated_at_unix,
+        }
+    }
+}
+
+// ============================================================
+// Publisher skeleton
+// ============================================================
+
+/// The publisher. Generic over the block store so tests can use
+/// `MemoryBlockStore` while production uses `FlexibleBlockStore`.
+pub struct UsersIndexPublisher<S: BlockStore + PinStore + 'static> {
+    config: PublisherConfig,
+    bucket_manager: Arc<BucketManager<S>>,
+    block_store: Arc<S>,
+    /// Per-user diff cache — owner_id → (content_hash, bucketsIndexCid).
+    /// `Mutex` (not `RwLock`) because the tick is the only writer and
+    /// the lock window is tiny (a HashMap insert).
+    diff_cache: Mutex<HashMap<String, PerUserDiffEntry>>,
+    /// Mirror of the on-disk state, refreshed after every successful
+    /// publish. Read by the internal endpoint.
+    latest: RwLock<LatestPublished>,
+    /// Serializes `run_tick` invocations so a periodic firing and an
+    /// admin `publish-now` call (A3) never race the rename of the state
+    /// file or produce two competing `sequence` values for the same
+    /// underlying state. Tokio mutex (not parking_lot) because the tick
+    /// holds it across `await`s on the pin chain.
+    tick_lock: tokio::sync::Mutex<()>,
+}
+
+/// Outcome of a single `run_tick` call. Useful for tests and for
+/// observability counters.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct TickOutcome {
+    /// Number of distinct users whose per-user CBOR was rebuilt and
+    /// re-pinned this tick. Always equal to `total_users` on the
+    /// first tick (cache is empty).
+    pub changed_users: usize,
+    /// Total number of users in `BucketManager.buckets` at this tick.
+    pub total_users: usize,
+    /// CID of the global users-index CBOR pinned this tick.
+    pub global_cid: Cid,
+    /// Sequence number embedded in the global CBOR's payload.
+    pub sequence: u64,
+    /// `true` iff the global users-index actually changed (i.e., at
+    /// least one user changed OR the cache was empty). When `false`
+    /// the publisher could in principle skip the global rebuild —
+    /// but for simplicity the current implementation always rebuilds
+    /// the global CBOR. Field kept for future optimization.
+    pub global_rebuilt: bool,
+}
+
+impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
+    /// Construct from config + handles to the bucket manager and
+    /// block store. Loads existing state-file on-disk; fresh master
+    /// starts with `PersistedState::default()`.
+    pub fn open(
+        config: PublisherConfig,
+        bucket_manager: Arc<BucketManager<S>>,
+        block_store: Arc<S>,
+    ) -> Result<Self, PersistError> {
+        let persisted = PersistedState::load(&config.state_file_path)?;
+        let latest = LatestPublished::from(&persisted);
+        Ok(Self {
+            config,
+            bucket_manager,
+            block_store,
+            diff_cache: Mutex::new(HashMap::new()),
+            latest: RwLock::new(latest),
+            tick_lock: tokio::sync::Mutex::new(()),
+        })
+    }
+
+    /// Snapshot of the last successful publish. Cheap-clone via the
+    /// underlying RwLock read guard.
+    pub fn latest(&self) -> LatestPublished {
+        self.latest.read().clone()
+    }
+
+    /// Read the on-disk persisted state directly (bypasses the
+    /// in-memory `latest` cache). Used by tests and by the startup
+    /// chain-cross-check (see plan 3.2.b advisor note).
+    pub fn read_persisted(&self) -> Result<PersistedState, PersistError> {
+        PersistedState::load(&self.config.state_file_path)
+    }
+
+    /// Number of entries in the diff cache. Test-only accessor.
+    #[cfg(test)]
+    fn diff_cache_len(&self) -> usize {
+        self.diff_cache.lock().len()
+    }
+
+    /// Atomically write the next state to disk and update the
+    /// in-memory `latest` mirror. Called by `run_tick` AFTER a
+    /// successful pin — the documented order is "pin → persist"
+    /// (IPNS publish lands in A3, between these two). A crash
+    /// between pin and persist leaks the orphan-pinned CBOR;
+    /// cluster GC reaps it; on-chain `require(newSequence > sequence)`
+    /// keeps sequence monotonic regardless. (Advisor note, plan 3.2.a.)
+    fn commit_state(&self, next: PersistedState) -> Result<(), PersistError> {
+        next.save(&self.config.state_file_path)?;
+        *self.latest.write() = LatestPublished::from(&next);
+        Ok(())
+    }
+
+    /// Run one publisher tick: snapshot the bucket manager, rebuild
+    /// per-user CBORs only for users whose `content_hash` changed
+    /// since the last tick, build the global users-index CBOR, pin
+    /// both via the `PinStore` (cluster), persist the new state.
+    ///
+    /// IPNS publishing lands in A3 — this method does not call kubo's
+    /// `name/publish`. Tests assert the pin chain and the persisted
+    /// state; the IPNS step will plug in afterward without changing
+    /// the contract here.
+    ///
+    /// **Concurrency.** `BucketManager.buckets` is a `DashMap`; we
+    /// snapshot to a `Vec` in one synchronous block (no `await` while
+    /// the iterator is alive — that would be a shard-guard-deadlock
+    /// hazard).
+    pub async fn run_tick(&self) -> AnyResult<TickOutcome> {
+        // Single-tick-at-a-time. The periodic scheduler and the
+        // admin `publish-now` (A3) will both invoke run_tick; this
+        // ensures they never race the rename of the state file or
+        // emit two competing `sequence` values from the same
+        // starting state.
+        let _guard = self.tick_lock.lock().await;
+
+        // 1. Snapshot every user's full bucket set. `list_buckets`
+        //    iterates the DashMap and clones each value; drops the
+        //    iterator before returning, so no shard guard survives
+        //    into our subsequent `await`s.
+        let snapshot: Vec<BucketMetadata> = self.bucket_manager.list_buckets();
+
+        // 2. Group by owner_id.
+        let mut by_user: HashMap<String, Vec<BucketMetadata>> = HashMap::new();
+        for b in snapshot {
+            by_user.entry(b.owner_id.clone()).or_default().push(b);
+        }
+        let total_users = by_user.len();
+        let now = now_unix();
+
+        // 3. For each user: compute content_hash; if cache miss or
+        //    diff, rebuild + pin per-user CBOR.
+        let max_concurrent = self
+            .config
+            .first_publish_max_pins_per_sec
+            .max(1) as usize;
+        let to_rebuild: Vec<(String, Vec<BucketMetadata>)> = {
+            let cache = self.diff_cache.lock();
+            by_user
+                .iter()
+                .filter_map(|(owner_id, buckets)| {
+                    let hash = compute_user_content_hash(buckets);
+                    let unchanged = cache
+                        .get(owner_id)
+                        .map(|e| e.content_hash == hash)
+                        .unwrap_or(false);
+                    if unchanged {
+                        None
+                    } else {
+                        Some((owner_id.clone(), buckets.clone()))
+                    }
+                })
+                .collect()
+            // cache guard drops here, before any `await`
+        };
+
+        // Buffer-unordered keeps at most `max_concurrent` pin ops in
+        // flight at any time (advisor's first-publish throttle).
+        let block_store = Arc::clone(&self.block_store);
+        let pin_results: Vec<AnyResult<(String, [u8; 32], Cid)>> = {
+            use futures::stream::{self, StreamExt};
+            stream::iter(to_rebuild.into_iter().map(|(owner_id, buckets)| {
+                let bs = Arc::clone(&block_store);
+                async move {
+                    let hash = compute_user_content_hash(&buckets);
+                    let cbor = build_user_buckets_index(&buckets, now);
+                    let cid = bs.put_ipld(&cbor).await?;
+                    bs.pin(&cid, Some("fula-users-index-per-user"))
+                        .await?;
+                    Ok::<_, anyhow::Error>((owner_id, hash, cid))
+                }
+            }))
+            .buffer_unordered(max_concurrent)
+            .collect()
+            .await
+        };
+
+        let mut changed_users = 0usize;
+        for r in pin_results {
+            let (owner_id, hash, cid) = r?;
+            self.diff_cache.lock().insert(
+                owner_id,
+                PerUserDiffEntry {
+                    content_hash: hash,
+                    buckets_index_cid: cid,
+                },
+            );
+            changed_users += 1;
+        }
+
+        // Prune diff-cache rows for users who disappeared from
+        // `BucketManager` since the last tick (deleted account,
+        // user deleted all their buckets, etc.). Without this, the
+        // cache would grow forever AND — critically — a removed
+        // user would keep appearing in published globals because
+        // the early-return below would never fire a rebuild for a
+        // pure-deletion tick. We track `users_pruned` to fold
+        // deletions into the rebuild trigger.
+        let users_pruned = {
+            let mut cache = self.diff_cache.lock();
+            let before = cache.len();
+            cache.retain(|owner_id, _| by_user.contains_key(owner_id));
+            before - cache.len()
+        };
+
+        let prior = self.latest.read().clone();
+
+        // 4. Skip-if-no-change: every user's cache row matched AND
+        //    no users were pruned AND we've already published at
+        //    least once → tick is a no-op. Returning early avoids
+        //    pin/unpin churn and keeps `sequence` from advancing
+        //    for free, so the 12h chain cron sees the same
+        //    `(cid, sequence)` and skips the on-chain publish.
+        //    Including `users_pruned == 0` is load-bearing: a
+        //    pure-deletion tick has `changed_users == 0` but MUST
+        //    rebuild so the deleted user disappears from the
+        //    published global.
+        if changed_users == 0 && users_pruned == 0 && prior.global_cid.is_some() {
+            return Ok(TickOutcome {
+                changed_users: 0,
+                total_users,
+                global_cid: prior.global_cid.expect("checked is_some"),
+                sequence: prior.sequence,
+                global_rebuilt: false,
+            });
+        }
+
+        // 5. Build the user → bucketsIndexCid map from the now-up-to-date
+        //    cache. Iterating `by_user.keys()` ensures we include every
+        //    user even if their cache row was already up to date.
+        let mut user_to_cid: BTreeMap<String, Cid> = BTreeMap::new();
+        let cache_snapshot = self.diff_cache.lock().clone();
+        for owner_id in by_user.keys() {
+            if let Some(entry) = cache_snapshot.get(owner_id) {
+                user_to_cid.insert(owner_id.clone(), entry.buckets_index_cid);
+            }
+        }
+
+        // 6. Build + pin global users-index CBOR. Sequence increments
+        //    relative to the last persisted state; new state is committed
+        //    only after the pin succeeds.
+        let next_sequence = prior.sequence.saturating_add(1);
+        let global = build_global_users_index(&user_to_cid, next_sequence, now);
+        let global_cid = self.block_store.put_ipld(&global).await?;
+        self.block_store
+            .pin(&global_cid, Some("fula-users-index-global"))
+            .await?;
+
+        // 7. Best-effort unpin previous global. Failure is fine —
+        //    cluster GC will eventually reap it.
+        if let Some(prev) = prior.global_cid {
+            if prev != global_cid {
+                if let Err(e) = self.block_store.unpin(&prev).await {
+                    tracing::debug!(
+                        prev = %prev,
+                        error = %e,
+                        "users-index publisher: unpin previous global failed (best-effort; cluster GC will reap)"
+                    );
+                }
+            }
+        }
+
+        // 8. Persist new state. (A3 will insert IPNS publish between
+        //    pin and persist; commit_state stays last so a crash mid-
+        //    IPNS leaves us in a recoverable place.)
+        let next_state = PersistedState {
+            global_cid: Some(global_cid),
+            sequence: next_sequence,
+            updated_at_unix: now,
+        };
+        self.commit_state(next_state)?;
+
+        Ok(TickOutcome {
+            changed_users,
+            total_users,
+            global_cid,
+            sequence: next_sequence,
+            global_rebuilt: true,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use cid::multihash::Multihash;
+    use fula_blockstore::MemoryBlockStore;
+    use fula_core::metadata::Owner;
+    use tempfile::TempDir;
+
+    fn fixture_cid(seed: u8) -> Cid {
+        let mut bytes = [0u8; 32];
+        bytes[0] = seed;
+        let mh = Multihash::<64>::wrap(0x1e /* blake3 */, &bytes).unwrap();
+        Cid::new_v1(0x71 /* dag-cbor */, mh)
+    }
+
+    /// Build a synthetic `BucketMetadata` for the **pure** (no-IPFS)
+    /// builder + content-hash tests. Uses `BucketMetadata::new` so the
+    /// struct stays in sync with field additions. Real `run_tick`
+    /// integration tests use `create_bucket_for_user` instead so they
+    /// exercise the real DashMap insertion path.
+    fn bucket_meta(
+        owner_id: &str,
+        name: &str,
+        root_seed: u8,
+        lookup_h: Option<[u8; 16]>,
+    ) -> BucketMetadata {
+        let mut m = BucketMetadata::new(
+            name.to_string(),
+            owner_id.to_string(),
+            fixture_cid(root_seed),
+        );
+        m.bucket_lookup_h = lookup_h;
+        m
+    }
+
+    /// Construct a publisher backed by `MemoryBlockStore` for tests.
+    /// Returns `(publisher, store, manager)` so individual tests can
+    /// poke at the manager (insert buckets etc.) and inspect the
+    /// store (verify pins).
+    fn fixture_publisher(
+        path: PathBuf,
+    ) -> (
+        UsersIndexPublisher<MemoryBlockStore>,
+        Arc<MemoryBlockStore>,
+        Arc<BucketManager<MemoryBlockStore>>,
+    ) {
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = Arc::new(BucketManager::new(Arc::clone(&store)));
+        let publisher = UsersIndexPublisher::open(
+            fixture_config(path),
+            Arc::clone(&manager),
+            Arc::clone(&store),
+        )
+        .expect("open");
+        (publisher, store, manager)
+    }
+
+    // ============================================================
+    // PersistedState round-trip
+    // ============================================================
+
+    #[test]
+    fn test_persisted_state_default_is_empty() {
+        let s = PersistedState::default();
+        assert!(s.global_cid.is_none());
+        assert_eq!(s.sequence, 0);
+        assert_eq!(s.updated_at_unix, 0);
+    }
+
+    #[test]
+    fn test_load_missing_file_returns_default() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("nonexistent.state");
+        let s = PersistedState::load(&path).expect("missing file is not an error");
+        assert_eq!(s, PersistedState::default());
+    }
+
+    #[test]
+    fn test_save_then_load_roundtrip() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let cid = fixture_cid(0xab);
+        let s = PersistedState {
+            global_cid: Some(cid),
+            sequence: 42,
+            updated_at_unix: 1_700_000_000,
+        };
+        s.save(&path).expect("save");
+        let loaded = PersistedState::load(&path).expect("load");
+        assert_eq!(loaded, s);
+    }
+
+    #[test]
+    fn test_save_creates_parent_directory() {
+        // Mirrors `persist_registry_internal`'s parent-creation
+        // behavior — operators may configure a path under a missing
+        // directory; the publisher must not fail.
+        let dir = TempDir::new().unwrap();
+        let nested = dir.path().join("sub").join("dir").join("state.txt");
+        let s = PersistedState::default();
+        s.save(&nested).expect("save");
+        assert!(nested.exists());
+    }
+
+    #[test]
+    fn test_save_creates_bak_on_overwrite() {
+        // Critical for crash recovery: the previous state file must
+        // be backed up to .bak before being overwritten, so a half-
+        // completed write doesn't lose the prior valid state.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let s1 = PersistedState {
+            global_cid: Some(fixture_cid(1)),
+            sequence: 1,
+            updated_at_unix: 100,
+        };
+        s1.save(&path).expect("save 1");
+
+        let s2 = PersistedState {
+            global_cid: Some(fixture_cid(2)),
+            sequence: 2,
+            updated_at_unix: 200,
+        };
+        s2.save(&path).expect("save 2");
+
+        let bak = with_bak_suffix(&path);
+        assert!(bak.exists(), ".bak file must be created on overwrite");
+        let bak_loaded = PersistedState::load(&bak).expect("load bak");
+        assert_eq!(bak_loaded, s1, ".bak must hold the previous state");
+
+        let primary_loaded = PersistedState::load(&path).expect("load primary");
+        assert_eq!(primary_loaded, s2);
+    }
+
+    #[test]
+    fn test_first_save_does_not_create_bak() {
+        // No prior file → no .bak created. Avoids leaving a stray
+        // empty file on first write.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let s = PersistedState::default();
+        s.save(&path).expect("save");
+        let bak = with_bak_suffix(&path);
+        assert!(!bak.exists(), ".bak must NOT exist on first write");
+    }
+
+    #[test]
+    fn test_parse_two_line_legacy_format() {
+        // Forward-tolerant: an older two-line file (CID + sequence,
+        // no timestamp) must parse with `updated_at = 0`. This isn't
+        // a current production format, but the parser is permissive.
+        let cid = fixture_cid(7);
+        let raw = format!("{}\n5\n", cid);
+        let s = PersistedState::parse(&raw).expect("parse");
+        assert_eq!(s.global_cid, Some(cid));
+        assert_eq!(s.sequence, 5);
+        assert_eq!(s.updated_at_unix, 0);
+    }
+
+    #[test]
+    fn test_parse_empty_lines_are_treated_as_missing() {
+        // An empty-string CID line means "nothing published yet."
+        // An empty sequence line means seq=0. Tolerates the
+        // edge case where a pre-publish state file gets persisted.
+        let s = PersistedState::parse("\n\n\n").expect("parse");
+        assert_eq!(s, PersistedState::default());
+    }
+
+    #[test]
+    fn test_parse_corrupt_cid_returns_error() {
+        let raw = "not-a-cid\n0\n";
+        let result = PersistedState::parse(raw);
+        assert!(matches!(result, Err(PersistError::Parse(_))));
+    }
+
+    #[test]
+    fn test_parse_corrupt_sequence_returns_error() {
+        let cid = fixture_cid(1);
+        let raw = format!("{}\nnot-a-number\n", cid);
+        let result = PersistedState::parse(&raw);
+        assert!(matches!(result, Err(PersistError::Parse(_))));
+    }
+
+    #[test]
+    fn test_next_increments_sequence() {
+        let s = PersistedState {
+            global_cid: Some(fixture_cid(1)),
+            sequence: 99,
+            updated_at_unix: 1_700_000_000,
+        };
+        let next_cid = fixture_cid(2);
+        let n = s.next(next_cid);
+        assert_eq!(n.global_cid, Some(next_cid));
+        assert_eq!(n.sequence, 100, "sequence must increment exactly once");
+        assert!(
+            n.updated_at_unix >= 1_700_000_000,
+            "timestamp must be monotonic-or-equal"
+        );
+    }
+
+    #[test]
+    fn test_next_from_default_starts_at_one() {
+        // First-ever publish: sequence transitions from 0 → 1.
+        let initial = PersistedState::default();
+        let n = initial.next(fixture_cid(0));
+        assert_eq!(n.sequence, 1);
+    }
+
+    #[test]
+    fn test_next_saturating_at_max() {
+        // Defensive: if sequence somehow reaches u64::MAX (impossible
+        // in practice but worth not panicking on), `saturating_add`
+        // keeps us from overflow.
+        let s = PersistedState {
+            global_cid: Some(fixture_cid(1)),
+            sequence: u64::MAX,
+            updated_at_unix: 0,
+        };
+        let n = s.next(fixture_cid(2));
+        assert_eq!(n.sequence, u64::MAX);
+    }
+
+    // ============================================================
+    // UsersIndexPublisher::open + commit_state
+    // ============================================================
+
+    fn fixture_config(state_path: PathBuf) -> PublisherConfig {
+        PublisherConfig::default_for(state_path, "http://localhost:5001".to_string())
+    }
+
+    #[test]
+    fn test_open_with_empty_state_starts_fresh() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, _store, _manager) = fixture_publisher(path);
+        let latest = publisher.latest();
+        assert!(latest.global_cid.is_none());
+        assert_eq!(latest.sequence, 0);
+    }
+
+    #[test]
+    fn test_open_with_existing_state_loads_it() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+
+        // Write existing state, then open
+        let prior = PersistedState {
+            global_cid: Some(fixture_cid(0xaa)),
+            sequence: 17,
+            updated_at_unix: 1_700_000_000,
+        };
+        prior.save(&path).expect("seed");
+
+        let (publisher, _store, _manager) = fixture_publisher(path);
+        let latest = publisher.latest();
+        assert_eq!(latest.global_cid, Some(fixture_cid(0xaa)));
+        assert_eq!(latest.sequence, 17);
+        assert_eq!(latest.updated_at_unix, 1_700_000_000);
+    }
+
+    #[test]
+    fn test_commit_state_updates_disk_and_memory() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, _store, _manager) = fixture_publisher(path.clone());
+
+        let next = PersistedState {
+            global_cid: Some(fixture_cid(1)),
+            sequence: 1,
+            updated_at_unix: 1_700_000_001,
+        };
+        publisher.commit_state(next.clone()).expect("commit");
+
+        // In-memory `latest` reflects the commit.
+        let latest = publisher.latest();
+        assert_eq!(latest.global_cid, next.global_cid);
+        assert_eq!(latest.sequence, next.sequence);
+
+        // On-disk file matches.
+        let disk = PersistedState::load(&path).expect("reload");
+        assert_eq!(disk, next);
+    }
+
+    #[test]
+    fn test_commit_state_survives_subsequent_open() {
+        // The crash-recovery path: master commits state, then
+        // restarts. New publisher instance must see the committed
+        // state.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+
+        {
+            let (publisher, _store, _manager) = fixture_publisher(path.clone());
+            let next = PersistedState {
+                global_cid: Some(fixture_cid(0xee)),
+                sequence: 12,
+                updated_at_unix: 1_700_000_012,
+            };
+            publisher.commit_state(next).expect("commit");
+            // publisher drops here, simulating master restart
+        }
+
+        let (publisher, _store, _manager) = fixture_publisher(path);
+        let latest = publisher.latest();
+        assert_eq!(latest.global_cid, Some(fixture_cid(0xee)));
+        assert_eq!(latest.sequence, 12);
+    }
+
+    #[test]
+    fn test_open_returns_error_on_corrupt_state_file() {
+        // Operator must be told if the state file is corrupt rather
+        // than silently starting with a default that would re-issue
+        // already-used sequence numbers.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        std::fs::write(&path, "not-a-cid\nnot-a-number\n").expect("seed");
+
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = Arc::new(BucketManager::new(Arc::clone(&store)));
+        let result = UsersIndexPublisher::open(fixture_config(path), manager, store);
+        assert!(matches!(result, Err(PersistError::Parse(_))));
+    }
+
+    // ============================================================
+    // Phase 3.2 A2 — pure CBOR builders + content-hash determinism
+    // ============================================================
+
+    #[test]
+    fn test_build_user_buckets_index_empty() {
+        let cbor = build_user_buckets_index(&[], 1_700_000_000);
+        assert_eq!(cbor.v, 2);
+        assert!(cbor.buckets.is_empty());
+        assert_eq!(cbor.updated_at_unix, 1_700_000_000);
+    }
+
+    #[test]
+    fn test_build_user_buckets_index_legacy_only() {
+        // Bucket with `bucket_lookup_h = None` → legacy plaintext key.
+        let buckets = vec![bucket_meta("alice", "photos", 1, None)];
+        let cbor = build_user_buckets_index(&buckets, 1_700_000_000);
+        assert_eq!(cbor.buckets.len(), 1);
+        let entry = cbor.buckets.get("photos").expect("photos under plaintext key");
+        assert!(entry.legacy, "missing lookup_h → must be legacy");
+        assert_eq!(entry.manifest, fixture_cid(1).to_string());
+    }
+
+    #[test]
+    fn test_build_user_buckets_index_blinded_only() {
+        let h = [0x42u8; 16];
+        let buckets = vec![bucket_meta("alice", "photos", 1, Some(h))];
+        let cbor = build_user_buckets_index(&buckets, 1_700_000_000);
+        assert_eq!(cbor.buckets.len(), 1);
+        let entry = cbor.buckets.get(&hex::encode(h)).expect("blinded key");
+        assert!(!entry.legacy, "lookup_h present → must NOT be legacy");
+        assert!(
+            !cbor.buckets.contains_key("photos"),
+            "blinded entry must not also leak under plaintext name"
+        );
+    }
+
+    #[test]
+    fn test_build_user_buckets_index_mixed_legacy_and_blinded() {
+        // One bucket migrated, one not. Both appear in the CBOR
+        // under their respective key types (Phase 1.2 lazy-
+        // migration semantics).
+        let h = [0xaau8; 16];
+        let buckets = vec![
+            bucket_meta("alice", "photos", 1, Some(h)),
+            bucket_meta("alice", "tax-2024", 2, None),
+        ];
+        let cbor = build_user_buckets_index(&buckets, 1_700_000_000);
+        assert_eq!(cbor.buckets.len(), 2);
+        let blinded = cbor.buckets.get(&hex::encode(h)).expect("blinded entry");
+        assert!(!blinded.legacy);
+        let legacy = cbor
+            .buckets
+            .get("tax-2024")
+            .expect("legacy entry under plaintext name");
+        assert!(legacy.legacy);
+    }
+
+    #[test]
+    fn test_compute_user_content_hash_is_deterministic() {
+        // Same inputs in any iteration order must produce the same
+        // hash. Critical: dag-cbor maps + the diff cache both rely
+        // on this for determinism.
+        let h = [0x11u8; 16];
+        let a = vec![
+            bucket_meta("alice", "photos", 1, Some(h)),
+            bucket_meta("alice", "videos", 2, None),
+        ];
+        let b = vec![
+            bucket_meta("alice", "videos", 2, None),
+            bucket_meta("alice", "photos", 1, Some(h)),
+        ];
+        assert_eq!(compute_user_content_hash(&a), compute_user_content_hash(&b));
+    }
+
+    #[test]
+    fn test_compute_user_content_hash_differs_on_root_cid_change() {
+        // Same bucket name, different root_cid → different hash.
+        // This is what triggers a re-pin on the next tick.
+        let a = vec![bucket_meta("alice", "photos", 1, None)];
+        let b = vec![bucket_meta("alice", "photos", 2, None)];
+        assert_ne!(compute_user_content_hash(&a), compute_user_content_hash(&b));
+    }
+
+    #[test]
+    fn test_compute_user_content_hash_differs_on_lookup_h_change() {
+        // None → Some([..]) is the lazy-migration path. The
+        // content_hash MUST detect this so the publisher rebuilds
+        // the per-user CBOR (replacing legacy entry with blinded).
+        let a = vec![bucket_meta("alice", "photos", 1, None)];
+        let b = vec![bucket_meta("alice", "photos", 1, Some([0u8; 16]))];
+        assert_ne!(compute_user_content_hash(&a), compute_user_content_hash(&b));
+    }
+
+    #[test]
+    fn test_build_global_users_index_sorted_by_userkey() {
+        // BTreeMap ordering — same input produces same byte-output
+        // and same CID across master restarts/hosts.
+        let mut entries: BTreeMap<String, Cid> = BTreeMap::new();
+        entries.insert("zzz_user".to_string(), fixture_cid(1));
+        entries.insert("aaa_user".to_string(), fixture_cid(2));
+        let cbor = build_global_users_index(&entries, 5, 1_700_000_000);
+        assert_eq!(cbor.v, 1);
+        assert_eq!(cbor.sequence, 5);
+        // First key in the BTreeMap iteration is the lex-smallest.
+        let first = cbor.users.keys().next().expect("nonempty");
+        assert_eq!(first, "aaa_user");
+    }
+
+    // ============================================================
+    // Phase 3.2 A2 — run_tick orchestration tests
+    // ============================================================
+    //
+    // run_tick tests use the real `create_bucket_for_user` /
+    // `delete_bucket_for_user` / `populate_lookup_h_if_missing` API
+    // to seed `BucketManager` — no private-field reach-in. Root CIDs
+    // are whatever the freshly-built forest produces; tests assert
+    // *behavior* (sequence advance, pin/unpin, diff-cache state),
+    // not exact CID values.
+
+    async fn create_user_bucket(
+        manager: &BucketManager<MemoryBlockStore>,
+        user_id: &str,
+        bucket_name: &str,
+    ) {
+        manager
+            .create_bucket_for_user(
+                user_id,
+                bucket_name.to_string(),
+                Owner::new(user_id),
+            )
+            .await
+            .expect("create_bucket_for_user");
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_first_publish_pins_global_and_per_user() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) = fixture_publisher(path);
+
+        // Two users, three buckets total.
+        create_user_bucket(&manager, "alice", "photos").await;
+        create_user_bucket(&manager, "alice", "videos").await;
+        create_user_bucket(&manager, "bob", "docs").await;
+
+        let outcome = publisher.run_tick().await.expect("tick");
+        assert_eq!(outcome.total_users, 2);
+        assert_eq!(outcome.changed_users, 2);
+        assert!(outcome.global_rebuilt);
+        assert_eq!(outcome.sequence, 1);
+
+        // The global CBOR is pinned and retrievable.
+        assert!(store.is_pinned(&outcome.global_cid).await.unwrap());
+
+        // After the first tick, the persisted state mirrors the in-memory.
+        let persisted = publisher.read_persisted().expect("read");
+        assert_eq!(persisted.global_cid, Some(outcome.global_cid));
+        assert_eq!(persisted.sequence, 1);
+
+        // Decode the global CBOR and verify both users are present.
+        let global_cbor: GlobalUsersIndex =
+            store.get_ipld(&outcome.global_cid).await.expect("global");
+        assert_eq!(global_cbor.users.len(), 2);
+        assert!(global_cbor.users.contains_key("alice"));
+        assert!(global_cbor.users.contains_key("bob"));
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_idempotent_skips_when_no_changes() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, _store, manager) = fixture_publisher(path);
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        let first = publisher.run_tick().await.expect("first");
+        assert_eq!(first.sequence, 1);
+
+        // Second tick — nothing changed in the manager.
+        let second = publisher.run_tick().await.expect("second");
+        assert_eq!(second.changed_users, 0);
+        assert!(!second.global_rebuilt, "no-change tick must NOT rebuild");
+        assert_eq!(second.sequence, 1, "sequence must NOT advance on no-op");
+        assert_eq!(second.global_cid, first.global_cid);
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_advances_sequence_on_real_change() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, _store, manager) = fixture_publisher(path);
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        let first = publisher.run_tick().await.expect("first");
+
+        // Add a new bucket → user content_hash changes → re-pin.
+        create_user_bucket(&manager, "alice", "videos").await;
+        let second = publisher.run_tick().await.expect("second");
+
+        assert_eq!(second.changed_users, 1);
+        assert_eq!(second.sequence, 2, "sequence advances by exactly 1");
+        assert_ne!(second.global_cid, first.global_cid);
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_diff_cache_prunes_deleted_users() {
+        // Pure-deletion tick: every surviving user's content_hash
+        // matches cache (changed_users == 0), but the global MUST
+        // still rebuild so the deleted user disappears from the
+        // published map. This guards against the early-return
+        // that previously fired on `changed_users == 0` alone.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) = fixture_publisher(path);
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        create_user_bucket(&manager, "bob", "docs").await;
+        let first = publisher.run_tick().await.expect("first");
+        assert_eq!(publisher.diff_cache_len(), 2);
+
+        // Verify both users present in the first global.
+        let first_global: GlobalUsersIndex =
+            store.get_ipld(&first.global_cid).await.expect("first global");
+        assert!(first_global.users.contains_key("alice"));
+        assert!(first_global.users.contains_key("bob"));
+
+        // Delete bob's bucket — bob disappears from BucketManager.
+        manager
+            .delete_bucket_for_user("bob", "docs")
+            .await
+            .expect("delete");
+        let second = publisher.run_tick().await.expect("second");
+        assert_eq!(
+            publisher.diff_cache_len(),
+            1,
+            "diff cache must shrink when a user disappears"
+        );
+        assert_eq!(second.changed_users, 0, "no per-user CBOR rebuilt");
+        assert!(
+            second.global_rebuilt,
+            "pure-deletion tick MUST rebuild global"
+        );
+        assert_eq!(
+            second.sequence, 2,
+            "deletion-only tick advances sequence (chain cron must observe new state)"
+        );
+        assert_ne!(
+            second.global_cid, first.global_cid,
+            "global CID must change when membership changes"
+        );
+
+        let second_global: GlobalUsersIndex =
+            store.get_ipld(&second.global_cid).await.expect("second global");
+        assert!(second_global.users.contains_key("alice"));
+        assert!(
+            !second_global.users.contains_key("bob"),
+            "deleted user MUST disappear from published global"
+        );
+        // Idempotency: alice's content didn't change, so her per-
+        // user `bucketsIndexCid` MUST be byte-identical across the
+        // two globals. If this drifts, something in the diff-cache
+        // logic is silently re-pinning unchanged users.
+        assert_eq!(
+            first_global.users["alice"], second_global.users["alice"],
+            "unchanged user's bucketsIndex CID must be stable across deletion ticks"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_after_restart_rebuilds_with_advanced_sequence() {
+        // Crash-recovery scenario: master commits state, restarts.
+        // The new publisher's in-memory diff cache is empty, so
+        // every user looks "changed" on the first tick and the
+        // sequence advances by 1. The per-user `bucketsIndexCid`s
+        // are deterministic CIDs over the same content, so the
+        // pin operations are idempotent — but the global CBOR
+        // embeds a fresh `sequence` + `updated_at_unix`, so its
+        // CID changes. Documented expected behavior.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+
+        let first_global_cid;
+        {
+            let (publisher, _store, manager) = fixture_publisher(path.clone());
+            create_user_bucket(&manager, "alice", "photos").await;
+            create_user_bucket(&manager, "bob", "docs").await;
+            let first = publisher.run_tick().await.expect("first tick");
+            assert_eq!(first.sequence, 1);
+            first_global_cid = first.global_cid;
+        } // publisher drops, simulating master restart
+
+        // Re-open against the same state file AND a *fresh*
+        // BucketManager. We re-create the same buckets so the
+        // post-restart manager mirrors what `load_registry` would
+        // produce in production (same owner_ids + bucket_names).
+        let (publisher, _store, manager) = fixture_publisher(path);
+        create_user_bucket(&manager, "alice", "photos").await;
+        create_user_bucket(&manager, "bob", "docs").await;
+
+        // State persisted before restart is loaded.
+        assert_eq!(publisher.latest().sequence, 1);
+        assert_eq!(publisher.latest().global_cid, Some(first_global_cid));
+
+        // First post-restart tick: cache is empty → every user
+        // gets a re-pin. Sequence advances exactly once.
+        let second = publisher.run_tick().await.expect("post-restart tick");
+        assert_eq!(second.changed_users, 2, "empty cache → all users re-pinned");
+        assert_eq!(second.total_users, 2);
+        assert_eq!(
+            second.sequence, 2,
+            "sequence advances by exactly 1 across restart"
+        );
+        assert!(second.global_rebuilt);
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_legacy_to_blinded_replaces_entry() {
+        // Phase 3.2.1(d) backward-compat scenario: write under old
+        // client (no lookup_h), then again under new client (with
+        // lookup_h). The published CBOR must contain a single
+        // blinded entry for the bucket — NOT both legacy and blinded.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) = fixture_publisher(path);
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        let first = publisher.run_tick().await.expect("first");
+
+        // Simulate the upgrade: populate lookup_h via the public
+        // helper (this is what the PUT handler calls in production
+        // when a Phase-1.2-aware client uploads).
+        let h = [0x77u8; 16];
+        let changed = manager
+            .populate_lookup_h_if_missing("alice", "photos", h)
+            .expect("populate ok");
+        assert!(changed, "must transition None → Some");
+
+        let second = publisher.run_tick().await.expect("second");
+        assert_eq!(second.changed_users, 1);
+        assert_ne!(second.global_cid, first.global_cid);
+
+        // Fetch and decode the per-user CBOR via the global. There
+        // should be exactly ONE entry — keyed under the blinded
+        // hex of `h`, not under "photos".
+        let global_cbor: GlobalUsersIndex =
+            store.get_ipld(&second.global_cid).await.expect("global");
+        let alice_user_key = global_cbor
+            .users
+            .keys()
+            .next()
+            .expect("alice should be present");
+        let alice_buckets_cid: Cid = global_cbor.users[alice_user_key]
+            .parse()
+            .expect("parse cid");
+        let user_cbor: UserBucketsIndex = store
+            .get_ipld(&alice_buckets_cid)
+            .await
+            .expect("user buckets");
+        assert_eq!(
+            user_cbor.buckets.len(),
+            1,
+            "exactly one bucket — legacy must NOT coexist with blinded"
+        );
+        assert!(
+            user_cbor.buckets.contains_key(&hex::encode(h)),
+            "blinded key present"
+        );
+        assert!(
+            !user_cbor.buckets.contains_key("photos"),
+            "plaintext name must NOT appear after migration"
+        );
+        let entry = user_cbor.buckets.get(&hex::encode(h)).unwrap();
+        assert!(!entry.legacy);
+    }
+
+    // NOTE: there is intentionally no `test_run_tick_unpins_previous_global` test.
+    // `MemoryBlockStore::unpin` is a no-op (memory.rs:108-111) and `is_pinned`
+    // resolves to `has_block`, so the in-memory backend can't observe a
+    // pin/unpin distinction. The unpin call itself is exercised — code path
+    // executes — but observability requires a real `IpfsPinning` or `Cluster`
+    // backend (covered in Phase 3.6 staging-mirror verification step 8).
+    // Adding a counting `PinStore` wrapper here would be ~80 LOC of scaffolding
+    // for one assertion; not worth it.
+
+    #[tokio::test]
+    async fn test_run_tick_no_users_first_publish_emits_empty_global() {
+        // Edge case: master starts up with zero buckets. First tick
+        // still publishes (so the SDK can fetch and find an empty
+        // user map without falling back to chain).
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, _store, _manager) = fixture_publisher(path);
+
+        let outcome = publisher.run_tick().await.expect("tick");
+        assert_eq!(outcome.total_users, 0);
+        assert_eq!(outcome.changed_users, 0);
+        assert!(outcome.global_rebuilt, "first publish must run even on empty");
+        assert_eq!(outcome.sequence, 1);
+    }
+}
diff --git a/crates/fula-client/Cargo.toml b/crates/fula-client/Cargo.toml
index 42cdde4..017b095 100644
--- a/crates/fula-client/Cargo.toml
+++ b/crates/fula-client/Cargo.toml
@@ -48,6 +48,14 @@ reqwest = { workspace = true }
 uuid = { workspace = true }
 fs2 = "0.4"
 dirs = "5"
+# Embedded persistent KV for the BlockCache (Phase 2.2 of master-independent reads).
+# Native-only — wasm builds skip the cache (no persistent storage there anyway).
+redb = { workspace = true }
+cid = { workspace = true }
+# CID verification on gateway-fetched bytes (Phase 2.3 of master-independent reads).
+sha2 = { workspace = true }
+# Mutex for per-gateway state in gateway_fetch (Phase 2.3).
+parking_lot = { workspace = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 # WASM: disable default features (no tokio), enable wasm feature
diff --git a/crates/fula-client/src/block_cache.rs b/crates/fula-client/src/block_cache.rs
new file mode 100644
index 0000000..52e84ca
--- /dev/null
+++ b/crates/fula-client/src/block_cache.rs
@@ -0,0 +1,628 @@
+//! Persistent LRU block cache (Phase 2.2 of master-independent reads).
+//!
+//! Stores **encrypted** IPFS blocks fetched from the gateway race so that,
+//! during a master outage, re-reading the same file is served entirely
+//! from local disk instead of re-fetching from public gateways.
+//!
+//! Backed by a single redb file (ACID, no separate DB process). Two
+//! tables:
+//! - `blocks`: CID multihash bytes → encrypted block bytes
+//! - `meta`:   CID multihash bytes → last-access unix-millis (for LRU)
+//!
+//! # Concurrency model
+//!
+//! - **One SDK instance per cache path.** redb requires exclusive access
+//!   to its file. Constructing two `BlockCache`s pointing at the same
+//!   path returns [`BlockCacheError::AlreadyOpen`].
+//! - **Concurrent get/put are safe** within a single instance via
+//!   redb's ACID transactions.
+//! - **Eviction is serialized** by an internal async mutex so concurrent
+//!   `put`s that all cross the budget don't all run eviction at once.
+//!
+//! # Eviction policy
+//!
+//! When `put` would push the cache over `max_bytes`, evict to a
+//! **80 %-of-budget low watermark** (rather than exactly the budget).
+//! That amortizes the eviction cost — without it, every put just-over
+//! the threshold would pay mutex + write-txn overhead to evict a single
+//! tiny entry.
+//!
+//! # Security
+//!
+//! The cache stores **encrypted** block bytes content-addressed by their
+//! IPFS CID. It does **not** verify CID-on-insert — CID verification is
+//! the caller's responsibility (Phase 2.3 enforces it before calling
+//! `put`). The cache makes no security promises about content secrecy
+//! beyond what file-system permissions provide.
+//!
+//! # Backward compatibility
+//!
+//! Phase 2.2 is purely additive new infrastructure. No existing data is
+//! touched; there is no migration. The cache is opt-in via SDK config
+//! (Phase 2.4 wires it in). A first-time-ever open creates an empty
+//! redb file at the configured path.
+
+#![cfg(not(target_arch = "wasm32"))]
+
+use bytes::Bytes;
+use cid::Cid;
+use redb::{Database, ReadableTable, ReadableTableMetadata, TableDefinition};
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+use std::time::{SystemTime, UNIX_EPOCH};
+use tokio::sync::Mutex;
+
+const BLOCKS: TableDefinition<&[u8], &[u8]> = TableDefinition::new("blocks");
+const META: TableDefinition<&[u8], u64> = TableDefinition::new("meta");
+
+/// Eviction low-watermark: when triggered, free space until usage is at
+/// or below this fraction of `max_bytes`. 80 % is the industry-standard
+/// "evict-once-amortize-many-puts" point.
+const EVICT_LOW_WATERMARK_NUMERATOR: u64 = 80;
+const EVICT_LOW_WATERMARK_DENOMINATOR: u64 = 100;
+
+/// Errors specific to the block cache. Surfaced separately from
+/// `ClientError` so tests can match without coupling to the global
+/// error enum.
+#[derive(Debug, thiserror::Error)]
+pub enum BlockCacheError {
+    /// Another process (or a previously-leaked instance in the same
+    /// process) holds the redb file lock.
+    ///
+    /// Only one `BlockCache` may be open per path at a time.
+    #[error("block cache file is already open by another instance: {path}")]
+    AlreadyOpen { path: PathBuf },
+
+    /// The cache file exists but is not a valid redb database.
+    ///
+    /// The caller decides whether to delete and recreate it. We do not
+    /// auto-delete — losing hundreds of MB of cache silently is a
+    /// foot-gun.
+    #[error("block cache file is corrupt: {0}")]
+    Corrupt(String),
+
+    /// A `put` was attempted with a block whose size exceeds the cache
+    /// budget. The cache cannot accept it; the caller should fetch
+    /// directly without caching.
+    #[error("block size {size} bytes exceeds cache budget {budget} bytes")]
+    BlockTooLarge { size: u64, budget: u64 },
+
+    #[error("redb error: {0}")]
+    Redb(String),
+
+    #[error("io error: {0}")]
+    Io(#[from] std::io::Error),
+}
+
+impl From<redb::DatabaseError> for BlockCacheError {
+    fn from(e: redb::DatabaseError) -> Self {
+        // Single classification policy applied wherever DatabaseError
+        // surfaces (both inside `open()`'s map_err and via `?` in
+        // future callers). Lock-style errors → AlreadyOpen; corruption
+        // markers → Corrupt; everything else → generic Redb.
+        let s = e.to_string();
+        let lower = s.to_lowercase();
+        if lower.contains("in use") || lower.contains("locked") || lower.contains("lock") {
+            // Path is unknown at this conversion site — caller will see
+            // the message but lose the path. `open()` constructs
+            // AlreadyOpen directly with the path; this is the fallback
+            // for any other call site that uses `?`.
+            BlockCacheError::AlreadyOpen { path: PathBuf::new() }
+        } else if lower.contains("corrupt") || lower.contains("checksum") {
+            BlockCacheError::Corrupt(s)
+        } else {
+            BlockCacheError::Redb(s)
+        }
+    }
+}
+
+impl From<redb::TransactionError> for BlockCacheError {
+    fn from(e: redb::TransactionError) -> Self {
+        BlockCacheError::Redb(e.to_string())
+    }
+}
+impl From<redb::TableError> for BlockCacheError {
+    fn from(e: redb::TableError) -> Self {
+        BlockCacheError::Redb(e.to_string())
+    }
+}
+impl From<redb::StorageError> for BlockCacheError {
+    fn from(e: redb::StorageError) -> Self {
+        BlockCacheError::Redb(e.to_string())
+    }
+}
+impl From<redb::CommitError> for BlockCacheError {
+    fn from(e: redb::CommitError) -> Self {
+        BlockCacheError::Redb(e.to_string())
+    }
+}
+
+/// LRU block cache backed by a single redb file.
+///
+/// Cheap-clone via `Arc`: clones share the same database, so a `put`
+/// observed by one clone is immediately visible to all others.
+#[derive(Clone)]
+pub struct BlockCache {
+    inner: Arc<BlockCacheInner>,
+}
+
+struct BlockCacheInner {
+    db: Database,
+    max_bytes: u64,
+    /// Live byte counter, kept in sync with the BLOCKS table on every
+    /// `put` / eviction. Re-synced from the table on `open()` to recover
+    /// from any prior abort that left the counter desynced.
+    current_bytes: AtomicU64,
+    /// Serializes eviction passes so concurrent over-budget puts don't
+    /// each run their own eviction.
+    evict_lock: Mutex<()>,
+}
+
+impl BlockCache {
+    /// Open or create the block cache at `path` with a budget of
+    /// `max_bytes` total stored block-bytes.
+    ///
+    /// On open, scans the BLOCKS table to compute the current byte
+    /// count (recovers from any earlier abort that left the in-memory
+    /// counter desynced).
+    pub fn open(path: impl AsRef<Path>, max_bytes: u64) -> Result<Self, BlockCacheError> {
+        let path = path.as_ref();
+        if let Some(parent) = path.parent() {
+            if !parent.as_os_str().is_empty() {
+                std::fs::create_dir_all(parent)?;
+            }
+        }
+
+        let db = Database::create(path).map_err(|e| {
+            // redb returns a specific variant for "another process holds
+            // the lock" — but the variant name has shifted across redb
+            // versions. Do a string check as a portability hedge and
+            // map to AlreadyOpen so callers don't have to read redb
+            // source to interpret it.
+            let s = e.to_string().to_lowercase();
+            if s.contains("in use") || s.contains("locked") || s.contains("lock") {
+                BlockCacheError::AlreadyOpen { path: path.to_path_buf() }
+            } else {
+                BlockCacheError::from(e)
+            }
+        })?;
+
+        // Ensure tables exist (idempotent — opening a non-existent
+        // table inside a write txn creates it).
+        let init_txn = db.begin_write()?;
+        {
+            let _ = init_txn.open_table(BLOCKS)?;
+            let _ = init_txn.open_table(META)?;
+        }
+        init_txn.commit()?;
+
+        // Re-sync the byte counter by scanning. One-time cost at startup;
+        // eliminates the class of bugs where a prior abort desynced the
+        // atomic counter.
+        let mut total: u64 = 0;
+        {
+            let read = db.begin_read()?;
+            let table = read.open_table(BLOCKS)?;
+            let iter = table.iter()?;
+            for entry in iter {
+                let (_, val) = entry?;
+                total += val.value().len() as u64;
+            }
+        }
+
+        Ok(BlockCache {
+            inner: Arc::new(BlockCacheInner {
+                db,
+                max_bytes,
+                current_bytes: AtomicU64::new(total),
+                evict_lock: Mutex::new(()),
+            }),
+        })
+    }
+
+    /// Configured budget in bytes.
+    pub fn max_bytes(&self) -> u64 {
+        self.inner.max_bytes
+    }
+
+    /// Approximate current byte usage. Eventually consistent under
+    /// concurrent writes (the next read after all writes settle is
+    /// exact).
+    pub fn current_bytes(&self) -> u64 {
+        self.inner.current_bytes.load(Ordering::Acquire)
+    }
+
+    /// Number of cached blocks. O(1) approximation via the underlying
+    /// table length.
+    pub fn entry_count(&self) -> Result<u64, BlockCacheError> {
+        let read = self.inner.db.begin_read()?;
+        let table = read.open_table(BLOCKS)?;
+        Ok(table.len()?)
+    }
+
+    /// Look up a block by its CID. Returns `None` if not cached.
+    /// Updates the last-access timestamp on hit (for LRU ordering).
+    ///
+    /// PERF: this currently uses a write txn to update last-access on
+    /// hit, which serializes against other writers. Phase 2.4 will
+    /// expose this in the hot read path; if profiling shows contention,
+    /// switch to deferred or probabilistic access-time updates (e.g.,
+    /// buffer in-memory and flush periodically, or update on 1-in-N
+    /// reads). LRU is approximate by definition.
+    pub fn get(&self, cid: &Cid) -> Result<Option<Bytes>, BlockCacheError> {
+        let key = cid.to_bytes();
+        // Single write txn so the get-then-update-meta is atomic; under
+        // concurrent get/put the timestamp ordering stays consistent.
+        let txn = self.inner.db.begin_write()?;
+        let result = {
+            let blocks = txn.open_table(BLOCKS)?;
+            let val = blocks.get(key.as_slice())?;
+            val.map(|v| Bytes::copy_from_slice(v.value()))
+        };
+        if result.is_some() {
+            let mut meta = txn.open_table(META)?;
+            meta.insert(key.as_slice(), now_ms())?;
+        }
+        txn.commit()?;
+        Ok(result)
+    }
+
+    /// Insert (or overwrite) a block. Triggers LRU eviction down to the
+    /// 80 %-of-budget low watermark if this insert would cross
+    /// `max_bytes`.
+    ///
+    /// Idempotent under repeat-inserts of the same CID with identical
+    /// bytes — `current_bytes` accounting tracks the net delta.
+    pub async fn put(&self, cid: &Cid, data: &[u8]) -> Result<(), BlockCacheError> {
+        let new_size = data.len() as u64;
+        if new_size > self.inner.max_bytes {
+            // A single block larger than the entire budget can't be
+            // cached. Surface as a typed variant so Phase 2.4 can
+            // dispatch on it ("skip caching, fetch directly").
+            return Err(BlockCacheError::BlockTooLarge {
+                size: new_size,
+                budget: self.inner.max_bytes,
+            });
+        }
+
+        // Eviction: if this insert would push us over budget, evict
+        // (under the lock) until we're at the low watermark. Note the
+        // budget check uses the *current* size, not the post-insert
+        // size — over-tightening to "fit exactly" leads to churn.
+        let cur = self.inner.current_bytes.load(Ordering::Acquire);
+        if cur + new_size > self.inner.max_bytes {
+            let _guard = self.inner.evict_lock.lock().await;
+            // Re-check under the lock — another concurrent put may have
+            // already evicted enough.
+            let cur = self.inner.current_bytes.load(Ordering::Acquire);
+            if cur + new_size > self.inner.max_bytes {
+                let target = (self.inner.max_bytes * EVICT_LOW_WATERMARK_NUMERATOR
+                    / EVICT_LOW_WATERMARK_DENOMINATOR)
+                    .saturating_sub(new_size);
+                self.evict_to(target)?;
+            }
+        }
+
+        let key = cid.to_bytes();
+        let now = now_ms();
+        let txn = self.inner.db.begin_write()?;
+        let prior_size: u64 = {
+            let mut blocks = txn.open_table(BLOCKS)?;
+            let prior = blocks
+                .get(key.as_slice())?
+                .map(|v| v.value().len() as u64)
+                .unwrap_or(0);
+            blocks.insert(key.as_slice(), data)?;
+            prior
+        };
+        {
+            let mut meta = txn.open_table(META)?;
+            meta.insert(key.as_slice(), now)?;
+        }
+        txn.commit()?;
+
+        // Adjust the byte counter by net delta. Idempotent for
+        // identical re-inserts (delta = 0).
+        if new_size > prior_size {
+            self.inner
+                .current_bytes
+                .fetch_add(new_size - prior_size, Ordering::AcqRel);
+        } else if prior_size > new_size {
+            self.inner
+                .current_bytes
+                .fetch_sub(prior_size - new_size, Ordering::AcqRel);
+        }
+        Ok(())
+    }
+
+    /// Evict LRU entries until `current_bytes <= target_bytes`. Caller
+    /// must hold `evict_lock`. Atomic via a single redb write txn.
+    fn evict_to(&self, target_bytes: u64) -> Result<(), BlockCacheError> {
+        // Snapshot meta entries sorted by last-access ascending. At
+        // 256 MiB / 1 KiB blocks this is ~256 k entries — a few hundred
+        // microseconds. Acceptable.
+        let txn = self.inner.db.begin_write()?;
+        let mut entries: Vec<(Vec<u8>, u64)> = {
+            let meta = txn.open_table(META)?;
+            meta.iter()?
+                .filter_map(Result::ok)
+                .map(|(k, v)| (k.value().to_vec(), v.value()))
+                .collect()
+        };
+        entries.sort_by_key(|(_, ts)| *ts);
+
+        let mut bytes_freed: u64 = 0;
+        let mut evicted_keys: Vec<Vec<u8>> = Vec::new();
+        let cur = self.inner.current_bytes.load(Ordering::Acquire);
+        let need = cur.saturating_sub(target_bytes);
+
+        {
+            let mut blocks = txn.open_table(BLOCKS)?;
+            let mut meta = txn.open_table(META)?;
+            for (key, _ts) in entries {
+                if bytes_freed >= need {
+                    break;
+                }
+                let block_size = blocks
+                    .get(key.as_slice())?
+                    .map(|v| v.value().len() as u64)
+                    .unwrap_or(0);
+                blocks.remove(key.as_slice())?;
+                meta.remove(key.as_slice())?;
+                bytes_freed = bytes_freed.saturating_add(block_size);
+                evicted_keys.push(key);
+            }
+        }
+        txn.commit()?;
+
+        self.inner
+            .current_bytes
+            .fetch_sub(bytes_freed, Ordering::AcqRel);
+        tracing::debug!(
+            evicted = evicted_keys.len(),
+            bytes_freed = bytes_freed,
+            target = target_bytes,
+            "block_cache: LRU eviction complete"
+        );
+        Ok(())
+    }
+}
+
+fn now_ms() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|d| d.as_millis() as u64)
+        .unwrap_or(0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use cid::multihash::Multihash;
+    use std::time::Duration;
+    use tempfile::TempDir;
+
+    /// Build a deterministic CID from a small u64 seed for test fixtures.
+    fn test_cid(seed: u64) -> Cid {
+        let mut bytes = [0u8; 32];
+        bytes[..8].copy_from_slice(&seed.to_le_bytes());
+        let mh = Multihash::<64>::wrap(0x1e /* blake3 */, &bytes).unwrap();
+        Cid::new_v1(0x55 /* raw */, mh)
+    }
+
+    fn open_cache(dir: &TempDir, max: u64) -> BlockCache {
+        BlockCache::open(dir.path().join("cache.redb"), max).expect("open")
+    }
+
+    #[tokio::test]
+    async fn test_put_get_roundtrip() {
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+
+        let cid = test_cid(1);
+        let data = b"hello world";
+        cache.put(&cid, data).await.expect("put");
+
+        let got = cache.get(&cid).expect("get").expect("hit");
+        assert_eq!(got.as_ref(), data);
+    }
+
+    #[tokio::test]
+    async fn test_get_missing_returns_none() {
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+
+        let cid = test_cid(42);
+        assert!(cache.get(&cid).expect("get").is_none());
+    }
+
+    #[tokio::test]
+    async fn test_persistence_across_open_close() {
+        // Backward-compat-critical: an existing on-disk cache must
+        // survive an SDK restart and serve cached blocks.
+        let dir = TempDir::new().unwrap();
+        let cid = test_cid(7);
+        let data = b"persistent block bytes";
+
+        {
+            let cache = open_cache(&dir, 1024 * 1024);
+            cache.put(&cid, data).await.expect("put");
+            // drop happens at end of scope
+        }
+        {
+            let cache = open_cache(&dir, 1024 * 1024);
+            let got = cache.get(&cid).expect("get").expect("survived restart");
+            assert_eq!(got.as_ref(), data);
+            // current_bytes is correctly re-synced from the DB on open.
+            assert_eq!(cache.current_bytes(), data.len() as u64);
+        }
+    }
+
+    #[tokio::test]
+    async fn test_idempotent_put_does_not_grow() {
+        // Re-inserting the same CID with identical bytes must not double-count.
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+        let cid = test_cid(5);
+        let data = vec![0u8; 4096];
+
+        cache.put(&cid, &data).await.expect("put 1");
+        let after_first = cache.current_bytes();
+        cache.put(&cid, &data).await.expect("put 2");
+        let after_second = cache.current_bytes();
+
+        assert_eq!(after_first, data.len() as u64);
+        assert_eq!(after_second, after_first, "re-insert must not grow current_bytes");
+        assert_eq!(cache.entry_count().expect("count"), 1);
+    }
+
+    #[tokio::test]
+    async fn test_eviction_on_overflow_keeps_size_under_budget() {
+        // Insert N blocks of size B each, with budget = (N/2) * B.
+        // After all inserts settle, current_bytes <= max_bytes * 100/100.
+        // (We aim for the 80 % low watermark on each eviction.)
+        let dir = TempDir::new().unwrap();
+        let block_size = 16 * 1024; // 16 KiB
+        let n_blocks = 20;
+        let budget = (n_blocks as u64 / 2) * block_size; // ~10 blocks fit
+
+        let cache = open_cache(&dir, budget);
+
+        for i in 0..n_blocks {
+            let cid = test_cid(i);
+            let data = vec![i as u8; block_size as usize];
+            cache.put(&cid, &data).await.expect("put");
+        }
+
+        let cur = cache.current_bytes();
+        assert_eq!(cache.max_bytes(), budget, "max_bytes accessor returns the configured budget");
+        assert!(
+            cur <= budget,
+            "current_bytes {} must be <= max_bytes {}",
+            cur,
+            budget
+        );
+        // We had eviction (otherwise current_bytes would equal n_blocks * block_size).
+        assert!(
+            cur < (n_blocks as u64) * block_size,
+            "expected at least one eviction; current={}, total-without-evict={}",
+            cur,
+            (n_blocks as u64) * block_size
+        );
+    }
+
+    #[tokio::test]
+    async fn test_lru_oldest_evicted_first() {
+        // Insert 3 blocks; access #0 to refresh it; insert a 4th to
+        // trigger eviction. The evicted block must be #1 (oldest
+        // last-access), NOT #0 (just accessed).
+        let dir = TempDir::new().unwrap();
+        let block_size = 1024;
+        // Budget exactly 3 blocks — the 4th insert must evict.
+        let cache = open_cache(&dir, 3 * block_size);
+
+        let data = vec![0u8; block_size as usize];
+        cache.put(&test_cid(0), &data).await.expect("put 0");
+        // Sleep 5ms so timestamps are reliably ordered.
+        tokio::time::sleep(Duration::from_millis(5)).await;
+        cache.put(&test_cid(1), &data).await.expect("put 1");
+        tokio::time::sleep(Duration::from_millis(5)).await;
+        cache.put(&test_cid(2), &data).await.expect("put 2");
+        tokio::time::sleep(Duration::from_millis(5)).await;
+
+        // Refresh #0 → it becomes the most-recently-accessed.
+        let _ = cache.get(&test_cid(0)).expect("get 0").expect("hit 0");
+        tokio::time::sleep(Duration::from_millis(5)).await;
+
+        // Insert #3 → must evict (low-watermark = 80% of 3 = 2.4 blocks
+        // of budget; eviction frees enough to fit the new 1-block).
+        cache.put(&test_cid(3), &data).await.expect("put 3");
+
+        // #1 (oldest) should be gone; #0 (refreshed) should still be
+        // present.
+        assert!(
+            cache.get(&test_cid(0)).expect("get").is_some(),
+            "refreshed #0 must survive eviction"
+        );
+        assert!(
+            cache.get(&test_cid(1)).expect("get").is_none(),
+            "oldest #1 must be evicted"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_concurrent_puts_no_corruption_under_eviction() {
+        // The hard concurrency case: K concurrent puts, each within
+        // budget alone, but K-puts collectively over-budget. Verify
+        // post-condition: current_bytes <= max_bytes.
+        let dir = TempDir::new().unwrap();
+        let block_size = 4 * 1024; // 4 KiB
+        let n_concurrent = 16;
+        // Budget = half of total → at least half must be evicted.
+        let budget = (n_concurrent as u64 / 2) * block_size;
+
+        let cache = open_cache(&dir, budget);
+
+        let mut handles = Vec::new();
+        for i in 0..n_concurrent {
+            let cache = cache.clone();
+            let data = vec![i as u8; block_size as usize];
+            let cid = test_cid(i);
+            handles.push(tokio::spawn(async move {
+                cache.put(&cid, &data).await
+            }));
+        }
+        for h in handles {
+            h.await.expect("task panicked").expect("put failed");
+        }
+
+        // The mutex + watermark policy guarantees we never permanently
+        // exceed budget — even though briefly between checks we might
+        // see a transient overshoot.
+        let cur = cache.current_bytes();
+        assert!(
+            cur <= budget,
+            "post-concurrency current_bytes {} > budget {}",
+            cur,
+            budget
+        );
+    }
+
+    #[tokio::test]
+    async fn test_block_too_large_returns_typed_error() {
+        // A block larger than the entire cache budget must surface as
+        // BlockTooLarge — not as a generic Redb(...) string error —
+        // so Phase 2.4 can dispatch on it cleanly ("skip caching,
+        // fetch directly").
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024); // 1 KiB budget
+
+        let cid = test_cid(99);
+        let big_block = vec![0u8; 4096]; // 4 KiB > 1 KiB budget
+
+        match cache.put(&cid, &big_block).await {
+            Err(BlockCacheError::BlockTooLarge { size, budget }) => {
+                assert_eq!(size, 4096);
+                assert_eq!(budget, 1024);
+            }
+            other => panic!("expected BlockTooLarge, got {:?}", other),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_idempotent_open_after_clean_shutdown() {
+        // Simulates: SDK opens cache, writes, drops cleanly, re-opens.
+        // This is the common case for short-lived CLIs.
+        let dir = TempDir::new().unwrap();
+        for round in 0..3 {
+            let cache = open_cache(&dir, 1024 * 1024);
+            let cid = test_cid(round as u64 * 100);
+            let data = vec![round as u8; 256];
+            cache.put(&cid, &data).await.expect("put");
+            assert!(cache.get(&cid).expect("get").is_some());
+            // current_bytes should equal data sizes accumulated across rounds.
+            assert!(cache.current_bytes() >= 256);
+        }
+    }
+}
diff --git a/crates/fula-client/src/client.rs b/crates/fula-client/src/client.rs
index 7ccbf51..af49186 100644
--- a/crates/fula-client/src/client.rs
+++ b/crates/fula-client/src/client.rs
@@ -2,11 +2,13 @@
 
 use crate::{
     Config, ClientError, Result,
+    health_gate::{HealthGate, GateDecision},
     types::*,
 };
 use bytes::Bytes;
 use reqwest::{Client, Response, header};
 use std::collections::HashMap;
+use std::sync::Arc;
 use tracing::{debug, instrument};
 
 /// Fula storage client
@@ -14,6 +16,12 @@ use tracing::{debug, instrument};
 pub struct FulaClient {
     config: Config,
     http: Client,
+    /// Phase 2.1 of master-independent reads. `Some` when
+    /// `Config::health_gate_enabled = true`; shared across all clones via
+    /// `Arc` so a failure observed in one task immediately silences the
+    /// rest. `None` when the feature is off — request path then runs
+    /// exactly as before (backward-compat).
+    health_gate: Option<Arc<HealthGate>>,
 }
 
 impl FulaClient {
@@ -34,7 +42,13 @@ impl FulaClient {
 
         let http = builder.build().map_err(ClientError::Http)?;
 
-        Ok(Self { config, http })
+        let health_gate = if config.health_gate_enabled {
+            Some(Arc::new(HealthGate::new(config.health_gate_ttl)))
+        } else {
+            None
+        };
+
+        Ok(Self { config, http, health_gate })
     }
 
     /// Create with default configuration
@@ -460,8 +474,24 @@ impl FulaClient {
         headers: Option<HashMap<String, String>>,
         body: Option<Bytes>,
     ) -> Result<Response> {
+        // Phase 2.1: consult the health gate before sending. When Down +
+        // within TTL, short-circuit with MasterUnreachable so the caller
+        // doesn't pay the per-read timeout. When the TTL has elapsed the
+        // gate auto-allows a probe through.
+        if let Some(gate) = &self.health_gate {
+            if let GateDecision::ShortCircuit { down_for_secs } = gate.decide() {
+                debug!(
+                    method = %method,
+                    path = %path,
+                    "health gate Down → short-circuiting (down_for_secs={})",
+                    down_for_secs
+                );
+                return Err(ClientError::MasterUnreachable { down_for_secs });
+            }
+        }
+
         let url = format!("{}{}", self.config.endpoint, path);
-        
+
         let mut req = match method {
             "GET" => self.http.get(&url),
             "PUT" => self.http.put(&url),
@@ -494,10 +524,36 @@ impl FulaClient {
         }
 
         debug!("Sending {} request to {}", method, url);
-        let response = req.send().await?;
+        let response = match req.send().await {
+            Ok(r) => r,
+            Err(e) => {
+                // Connection-level error (refused, RST, DNS, timeout). Treat
+                // as a master-down signal for the gate's purposes. Returning
+                // the original error preserves caller diagnostics.
+                if let Some(gate) = &self.health_gate {
+                    gate.record_failure();
+                }
+                return Err(ClientError::Http(e));
+            }
+        };
 
         // Check for errors
         let status = response.status();
+
+        // Phase 2.1: classify the response status for the health gate.
+        //   5xx → master-side failure → record_failure
+        //   4xx → request-level (auth, not-found, precondition, etc.); NOT
+        //         a master-down signal — the server responded, the request
+        //         was just bad. Don't touch the gate.
+        //   2xx/3xx → success → record_success (also clears any prior Down)
+        if let Some(gate) = &self.health_gate {
+            if status.is_server_error() {
+                gate.record_failure();
+            } else if status.is_success() {
+                gate.record_success();
+            }
+        }
+
         if !status.is_success() {
             // 412 Precondition Failed surfaces as ConcurrentModification so
             // callers using If-Match / If-None-Match can retry distinctly.
diff --git a/crates/fula-client/src/config.rs b/crates/fula-client/src/config.rs
index d400f06..b9187cd 100644
--- a/crates/fula-client/src/config.rs
+++ b/crates/fula-client/src/config.rs
@@ -43,6 +43,17 @@ pub struct Config {
     /// mid-stream per-chunk AEAD + size check in the engine itself, so the
     /// ceiling is an allocation guard, not a security boundary.
     pub buffered_download_max_bytes: u64,
+
+    /// Phase 2.1 of master-independent reads: enable the master health
+    /// gate. Off by default (backward-compat). When on, the SDK observes
+    /// request outcomes and short-circuits with `MasterUnreachable` after
+    /// two consecutive failures, instead of paying the per-read timeout.
+    pub health_gate_enabled: bool,
+
+    /// TTL of the `Down` state when `health_gate_enabled = true`. After
+    /// this duration elapses, the next request is allowed through as a
+    /// probe (without resetting state — only an observed success resets).
+    pub health_gate_ttl: Duration,
 }
 
 impl Default for Config {
@@ -58,6 +69,8 @@ impl Default for Config {
             multipart_chunk_size: 256 * 1024,       // 256 KB (must be < 1MB for IPFS)
             per_chunk_download_timeout: Duration::from_secs(300), // 5 min
             buffered_download_max_bytes: 256 * 1024 * 1024,       // 256 MB
+            health_gate_enabled: false, // backward-compat: off by default
+            health_gate_ttl: Duration::from_secs(30),
         }
     }
 }
diff --git a/crates/fula-client/src/encryption.rs b/crates/fula-client/src/encryption.rs
index 7269d01..dc79cfb 100644
--- a/crates/fula-client/src/encryption.rs
+++ b/crates/fula-client/src/encryption.rs
@@ -2902,6 +2902,26 @@ impl EncryptedClient {
         Ok(())
     }
 
+    /// Phase 1.2 of master-independent reads: compute the blinded bucket
+    /// lookup key as hex for the `x-amz-meta-fula-bucket-lookup-h` header.
+    ///
+    /// `bucket_lookup_h = BLAKE3(MetadataKey || bucket_name)[..16]`, where
+    /// `MetadataKey = derive_path_key("fula-metadata-v1")`. Hex-encoded
+    /// (32 chars). The 16-byte truncation matches master's `hashed_user_id`
+    /// convention. Master never sees `MetadataKey`.
+    ///
+    /// Attached on every manifest root commit (sharded v7, monolithic v4,
+    /// and the v1→v7 migration path) so master's put_object handler can
+    /// populate `BucketMetadata.bucket_lookup_h` regardless of which forest
+    /// format the SDK is using. Idempotent on master's side.
+    fn compute_bucket_lookup_h_hex(&self, bucket: &str) -> String {
+        let metadata_key = self.encryption.key_manager.derive_path_key("fula-metadata-v1");
+        let mut input = metadata_key.as_bytes().to_vec();
+        input.extend_from_slice(bucket.as_bytes());
+        let hash = blake3::hash(&input);
+        hex::encode(&hash.as_bytes()[..16])
+    }
+
     /// Save the private forest index for a bucket (monolithic v4 format with AAD+sequence)
     pub async fn save_forest(&self, bucket: &str, forest: &PrivateForest) -> Result<()> {
         let forest_dek = self.encryption.key_manager.derive_path_key(&format!("forest:{}", bucket));
@@ -2924,8 +2944,11 @@ impl EncryptedClient {
         let data = encrypted.to_bytes()
             .map_err(ClientError::Encryption)?;
 
+        // Phase 1.2: monolithic v4 forest is also a manifest-root commit.
+        // Same header semantics as save_sharded_hamt_forest's Phase 2 PUT.
         let metadata = ObjectMetadata::new()
-            .with_content_type("application/octet-stream");
+            .with_content_type("application/octet-stream")
+            .with_metadata("fula-bucket-lookup-h", &self.compute_bucket_lookup_h_hex(bucket));
 
         let put_result = self.inner.put_object_with_metadata_conditional(
             bucket,
@@ -3237,8 +3260,11 @@ impl EncryptedClient {
         let data = encrypted_manifest.to_bytes()
             .map_err(ClientError::Encryption)?;
 
+        // Phase 1.2: sharded HAMT v7 manifest root commit. See
+        // compute_bucket_lookup_h_hex for header semantics.
         let metadata = ObjectMetadata::new()
-            .with_content_type("application/octet-stream");
+            .with_content_type("application/octet-stream")
+            .with_metadata("fula-bucket-lookup-h", &self.compute_bucket_lookup_h_hex(bucket));
 
         let put_result = self.inner.put_object_with_metadata_conditional(
             bucket,
@@ -3956,11 +3982,20 @@ impl EncryptedClient {
         // our HEAD (or GET) and this PUT loses the race — we defer and retry
         // next session. Crucial because the in-process `migration_lock.write()`
         // is NOT held during load-time-triggered migration.
+        //
+        // Phase 1.2: v1→v7 migration is a manifest-root commit. Attach the
+        // bucket-lookup-h header so master can populate `bucket_lookup_h`
+        // here too — otherwise users who migrate to v7 via this path (rather
+        // than save_sharded_hamt_forest) would never get their lookup_h set.
         let put_result = match self.inner.put_object_with_metadata_conditional(
             bucket,
             &index_key,
             Bytes::from(manifest_data),
-            Some(ObjectMetadata::new().with_content_type("application/octet-stream")),
+            Some(
+                ObjectMetadata::new()
+                    .with_content_type("application/octet-stream")
+                    .with_metadata("fula-bucket-lookup-h", &self.compute_bucket_lookup_h_hex(bucket)),
+            ),
             Some(&v1_etag),
             None,
         ).await {
diff --git a/crates/fula-client/src/error.rs b/crates/fula-client/src/error.rs
index f042329..39920a8 100644
--- a/crates/fula-client/src/error.rs
+++ b/crates/fula-client/src/error.rs
@@ -98,6 +98,14 @@ pub enum ClientError {
     /// re-enter the load-time migration path.
     #[error("Migration lock held for bucket {bucket} (expires at {expires_at} ms)")]
     MigrationLockHeld { bucket: String, expires_at: i64 },
+
+    /// Phase 2.1 of master-independent reads: the SDK's health gate
+    /// observed master is unreachable and short-circuited the request.
+    /// Phase 2.4 will catch this variant and trigger the gateway-race
+    /// fallback. Standalone (Phase 2.1 only), this turns "wait 3s for
+    /// timeout" into "fast-fail with a clear signal."
+    #[error("Master unreachable (health gate; down for ~{down_for_secs}s)")]
+    MasterUnreachable { down_for_secs: u64 },
 }
 
 impl ClientError {
diff --git a/crates/fula-client/src/gateway_fetch.rs b/crates/fula-client/src/gateway_fetch.rs
new file mode 100644
index 0000000..5c20ff4
--- /dev/null
+++ b/crates/fula-client/src/gateway_fetch.rs
@@ -0,0 +1,1306 @@
+//! Multi-gateway race + CID verification (Phase 2.3 of master-independent reads).
+//!
+//! When master is unreachable (per the [`crate::health_gate`]), Phase 2.4
+//! routes reads through this module: race fetches against several public
+//! IPFS gateways with dynamic priority, verify each response's bytes
+//! against the requested CID's multihash, return the first verified hit.
+//!
+//! # Default gateway list (post-Step-0)
+//!
+//! Six gateways in quality-priority order. **`ipfs.cloud.fx.land/gateway/`
+//! is intentionally NOT in the default list** — Step 0 verification on
+//! 2026-05-01 found it returns HTTP 500 with `{"error":"Error fetching
+//! content from IPFS"}` for dag-cbor (codec 0x71) CIDs while serving
+//! raw codec correctly. Forest manifest pages and bucket Prolly Tree
+//! roots — the metadata SDK cold-start needs — are dag-cbor. Including
+//! it in the race would burn a slot returning 500s for half of all
+//! requests. Re-add it via `Config::gateway_fallback_urls` once the
+//! gateway-side codec bug is fixed (one-line config change, no code
+//! change).
+//!
+//! # CID verification
+//!
+//! Every fetched body is re-hashed against the multihash declared in
+//! the requested CID. We support standard IPFS multihash codes:
+//!
+//! - `0x1e` (BLAKE3)   — used by encrypted SDK chunk uploads via
+//!   `block/put?mhtype=blake3` (`fula-blockstore::ipfs::put_block_raw`)
+//! - `0x12` (SHA2-256) — used by IPFS UnixFS chunked uploads
+//!   (`block/put` default; `add?cid-version=1`)
+//!
+//! Any other multihash code → [`VerifyError::UnsupportedHashCode`]. We
+//! deliberately do NOT support `cid_utils::create_cid`'s quirky
+//! `sha2_256(blake3(data))` scheme because that path is only used by
+//! the in-memory `MemoryBlockStore` (test backend) and never produces
+//! CIDs that round-trip to public IPFS gateways.
+//!
+//! # Backward compatibility
+//!
+//! Phase 2.3 is purely additive new infrastructure. No callers exist
+//! yet — Phase 2.4 wires this into [`crate::encryption`]'s GET path.
+//! Existing reads against a healthy master continue to behave exactly
+//! as before.
+
+#![cfg(not(target_arch = "wasm32"))]
+
+use bytes::Bytes;
+use cid::Cid;
+use parking_lot::Mutex;
+use std::time::{Duration, Instant};
+
+/// IPFS multihash code for BLAKE3-256.
+const MULTIHASH_BLAKE3: u64 = 0x1e;
+/// IPFS multihash code for SHA2-256.
+const MULTIHASH_SHA2_256: u64 = 0x12;
+
+/// Default decay time-constant. After `TAU`, a penalty of 1.0 decays
+/// to ~0.37; after `3 * TAU` (~3 minutes for default 60s) it's at ~5%.
+/// Configurable per `effective_priority` call so tests can use a much
+/// shorter TAU without sleeping for minutes.
+pub(crate) const DEFAULT_DECAY_TAU: Duration = Duration::from_secs(60);
+
+/// Cooldown after a CID-verification failure. A gateway returning
+/// content that fails CID verification is "returned wrong bytes" —
+/// a strong-signal event. Penalty alone (decay TAU=60s) returns to
+/// ~5% in 3 min, which is too fast to trust again. Cooldown enforces
+/// a hard 5-min skip period before the gateway can re-enter the race.
+pub(crate) const VERIFY_FAILURE_COOLDOWN: Duration = Duration::from_secs(300);
+
+/// Default per-gateway request timeout. The default reqwest timeout
+/// (30s) is too generous for K=3 racing; if Cloudflare hangs, we want
+/// the race to give up on it within 10s and let dweb.link's faster
+/// response win.
+pub(crate) const DEFAULT_FETCH_TIMEOUT: Duration = Duration::from_secs(10);
+
+/// Default ordered list of public gateway URL templates. Lower index =
+/// higher base priority. Phase 2.3 races the top K (default 3) of these
+/// in parallel; Phase 2.4 will use this directly.
+///
+/// Each template uses a literal `{cid}` placeholder that gets substituted
+/// with the requested CID's `to_string()` form. All six gateways speak
+/// standard `/ipfs/<cid>` URL conventions.
+pub fn default_gateway_urls() -> Vec<String> {
+    vec![
+        "https://cloudflare-ipfs.com/ipfs/{cid}".to_string(),
+        "https://dweb.link/ipfs/{cid}".to_string(),
+        "https://ipfs.io/ipfs/{cid}".to_string(),
+        "https://trustless-gateway.link/ipfs/{cid}".to_string(),
+        "https://4everland.io/ipfs/{cid}".to_string(),
+        "https://gateway.pinata.cloud/ipfs/{cid}".to_string(),
+    ]
+}
+
+/// Errors specific to gateway-fetched body verification.
+#[derive(Debug, thiserror::Error, PartialEq, Eq)]
+pub enum VerifyError {
+    /// The CID's multihash code is one we don't know how to verify.
+    /// Returned for codes other than BLAKE3 (0x1e) and SHA2-256 (0x12).
+    #[error("unsupported multihash code: 0x{code:x}")]
+    UnsupportedHashCode { code: u64 },
+
+    /// The fetched bytes hash to a different digest than the CID's
+    /// multihash. Possible causes: gateway returned wrong content,
+    /// in-flight tampering, or the gateway has the wrong block under
+    /// this CID (shouldn't happen since CIDs are content-addressed).
+    #[error("digest mismatch (CID hash code 0x{code:x})")]
+    DigestMismatch { code: u64 },
+}
+
+/// Re-hash `data` using the algorithm declared in `cid`'s multihash and
+/// compare against the CID's digest. Returns `Ok(())` if the bytes
+/// content-address to the CID, otherwise [`VerifyError`].
+///
+/// This is the security boundary: a successful return means the bytes
+/// are exactly what the CID claims they are. Callers MUST refuse to
+/// hand the bytes to downstream consumers if this fails.
+pub fn verify_cid_against_bytes(cid: &Cid, data: &[u8]) -> Result<(), VerifyError> {
+    let mh = cid.hash();
+    let code = mh.code();
+    let expected_digest = mh.digest();
+    match code {
+        MULTIHASH_BLAKE3 => {
+            let actual = blake3::hash(data);
+            if actual.as_bytes().as_slice() == expected_digest {
+                Ok(())
+            } else {
+                Err(VerifyError::DigestMismatch { code })
+            }
+        }
+        MULTIHASH_SHA2_256 => {
+            use sha2::{Digest, Sha256};
+            let mut hasher = Sha256::new();
+            hasher.update(data);
+            let actual = hasher.finalize();
+            if actual.as_slice() == expected_digest {
+                Ok(())
+            } else {
+                Err(VerifyError::DigestMismatch { code })
+            }
+        }
+        other => Err(VerifyError::UnsupportedHashCode { code: other }),
+    }
+}
+
+// ============================================================
+// Gateway pool data structures (Checkpoint A skeleton).
+// Behavior (penalty math, decay, fetch, race) lands in subsequent
+// checkpoints.
+// ============================================================
+
+/// Per-gateway runtime state. Penalty + cooldown + last-observed
+/// timestamp drive the dynamic priority calculation in Checkpoint B.
+#[derive(Debug)]
+pub(crate) struct GatewayState {
+    /// 0.0 = healthy, 1.0 = fully sidelined. Decays toward 0 over time.
+    pub(crate) penalty: f32,
+    /// Bumped on each failure; reset on each success. Used by the
+    /// circuit-breaker open rule (Checkpoint C).
+    pub(crate) consecutive_failures: u32,
+    /// When the last priority observation / state mutation happened.
+    /// Used to compute decay lazily on the next read.
+    pub(crate) last_observed_at: Instant,
+    /// Hard skip until this instant if Some — set on HTTP 429 with a
+    /// `Retry-After` header. Bypasses penalty math entirely while in
+    /// effect.
+    pub(crate) cooldown_until: Option<Instant>,
+}
+
+impl GatewayState {
+    fn fresh() -> Self {
+        Self {
+            penalty: 0.0,
+            consecutive_failures: 0,
+            last_observed_at: Instant::now(),
+            cooldown_until: None,
+        }
+    }
+}
+
+/// One entry in the gateway pool. Cheap-clone via `Arc` (Checkpoint C
+/// will wrap `GatewayPool` in `Arc` so all SDK clones share state).
+#[derive(Debug)]
+pub(crate) struct Gateway {
+    /// URL template containing `{cid}` placeholder.
+    pub(crate) url_template: String,
+    /// Index in the configured list (0 = highest base priority).
+    pub(crate) base_priority: u8,
+    pub(crate) state: Mutex<GatewayState>,
+}
+
+impl Gateway {
+    fn new(url_template: String, base_priority: u8) -> Self {
+        Self {
+            url_template,
+            base_priority,
+            state: Mutex::new(GatewayState::fresh()),
+        }
+    }
+
+    /// Substitute the `{cid}` placeholder with the requested CID's
+    /// canonical string form.
+    pub(crate) fn url_for(&self, cid: &Cid) -> String {
+        self.url_template
+            .replace("{cid}", &cid.to_string())
+    }
+
+    /// Record a successful fetch + verify. Halves the penalty and
+    /// resets the consecutive-failure counter.
+    pub(crate) fn record_success(&self) {
+        let mut s = self.state.lock();
+        s.penalty *= 0.5;
+        s.consecutive_failures = 0;
+        s.last_observed_at = Instant::now();
+    }
+
+    /// Record a transient failure (5xx, timeout, connection error).
+    /// Bumps penalty by 0.3 (capped at 1.0) and increments the
+    /// consecutive-failure counter.
+    pub(crate) fn record_transient_failure(&self) {
+        let mut s = self.state.lock();
+        s.penalty = (s.penalty + 0.3).min(1.0);
+        s.consecutive_failures = s.consecutive_failures.saturating_add(1);
+        s.last_observed_at = Instant::now();
+    }
+
+    /// Record a CID-verification failure. Strong-signal event — the
+    /// gateway returned bytes that don't content-address to the CID.
+    /// Sets penalty to 1.0 AND a 5-minute cooldown. Cooldown is the
+    /// primary defense (skips this gateway from the race entirely);
+    /// penalty=1.0 is the diagnostic signal that recovers via decay
+    /// after cooldown lifts.
+    pub(crate) fn record_verify_failure(&self) {
+        let mut s = self.state.lock();
+        s.penalty = 1.0;
+        s.consecutive_failures = s.consecutive_failures.saturating_add(1);
+        s.last_observed_at = Instant::now();
+        s.cooldown_until = Some(Instant::now() + VERIFY_FAILURE_COOLDOWN);
+    }
+
+    /// Record an HTTP 429 with `Retry-After` header. Sets cooldown
+    /// without changing penalty — rate limits are a load-shedding
+    /// signal, not a quality issue.
+    pub(crate) fn record_rate_limit(&self, retry_after: Duration) {
+        let mut s = self.state.lock();
+        s.cooldown_until = Some(Instant::now() + retry_after);
+    }
+
+    /// Compute the effective priority at `now` using the given decay
+    /// time-constant. Lower = higher priority (matches sort order).
+    ///
+    /// **Pure** — does NOT mutate state. Decay is computed lazily
+    /// from `(penalty, last_observed_at)`. Only events
+    /// (`record_success` / `record_transient_failure` / etc.) update
+    /// `last_observed_at`. This makes decay a function of
+    /// time-since-last-event, which is the property we want.
+    pub(crate) fn effective_priority(&self, now: Instant, tau: Duration) -> f32 {
+        let (penalty, last_obs) = {
+            let s = self.state.lock();
+            (s.penalty, s.last_observed_at)
+        };
+        let elapsed = now.saturating_duration_since(last_obs).as_secs_f32();
+        let tau_secs = tau.as_secs_f32().max(0.001);
+        let decayed = penalty * (-elapsed / tau_secs).exp();
+        self.base_priority as f32 + decayed * 3.0
+    }
+
+    /// True iff the gateway is in an active cooldown window. Phase 2.3
+    /// Checkpoint C will use this to filter cooldowned gateways out of
+    /// the race candidate set entirely (rather than letting them
+    /// participate as f32::INFINITY losers).
+    pub(crate) fn is_in_cooldown(&self, now: Instant) -> bool {
+        let s = self.state.lock();
+        s.cooldown_until.map_or(false, |until| now < until)
+    }
+}
+
+/// Outcome of a single-gateway fetch+verify attempt.
+#[derive(Debug, thiserror::Error)]
+pub enum FetchError {
+    /// 5xx, request timeout, connection-level error. Caller bumps
+    /// the gateway's penalty.
+    #[error("transient gateway failure: {0}")]
+    Transient(String),
+
+    /// HTTP 429 with `Retry-After` parsed. Caller sets cooldown_until
+    /// = now + retry_after; no penalty change.
+    #[error("rate limited (retry after {retry_after_secs}s)")]
+    RateLimited { retry_after_secs: u64 },
+
+    /// HTTP 404 / 410. Request-level outcome — gateway responded
+    /// correctly, content just isn't there. No penalty change.
+    #[error("content not found at gateway")]
+    NotFound,
+
+    /// Gateway returned bytes that don't hash to the requested CID's
+    /// multihash. Caller calls `record_verify_failure` (penalty=1.0
+    /// + 5-min cooldown).
+    #[error("CID verification failed: {0}")]
+    VerifyFailed(#[from] VerifyError),
+}
+
+/// Fetch a single CID from one specific gateway with timeout + CID
+/// verification. Used by the race in Checkpoint C; testable directly.
+///
+/// On HTTP 200: reads body, verifies via `verify_cid_against_bytes`.
+/// On HTTP 429: parses `Retry-After` (decimal-seconds form; falls
+///   back to a 60-second default if the header is missing/unparseable).
+/// On HTTP 4xx (other than 429) and 5xx: surfaces as the appropriate
+///   `FetchError` variant.
+///
+/// Note: this function does NOT call any of the gateway's `record_*`
+/// methods. The race orchestrator (Checkpoint C) does that based on
+/// the returned `FetchError` variant. Keeping the side-effects in
+/// the orchestrator makes per-gateway behavior easier to test.
+pub(crate) async fn fetch_one(
+    gateway: &Gateway,
+    cid: &Cid,
+    http: &reqwest::Client,
+    timeout: Duration,
+) -> Result<Bytes, FetchError> {
+    let url = gateway.url_for(cid);
+    let resp = http
+        .get(&url)
+        .timeout(timeout)
+        .send()
+        .await
+        .map_err(|e| FetchError::Transient(format!("send: {}", e)))?;
+
+    let status = resp.status();
+
+    if status.is_success() {
+        let body = resp
+            .bytes()
+            .await
+            .map_err(|e| FetchError::Transient(format!("body read: {}", e)))?;
+        verify_cid_against_bytes(cid, &body)?;
+        Ok(body)
+    } else if status.as_u16() == 429 {
+        // Retry-After: HTTP/1.1 spec allows either delta-seconds or
+        // an HTTP-date. Most public gateways emit delta-seconds. Fall
+        // back to a 60-second default for missing/unparseable headers
+        // so we don't loop hot against a rate-limiter.
+        let retry_after_secs = resp
+            .headers()
+            .get("retry-after")
+            .and_then(|v| v.to_str().ok())
+            .and_then(|s| s.trim().parse::<u64>().ok())
+            .unwrap_or(60);
+        Err(FetchError::RateLimited { retry_after_secs })
+    } else if status.as_u16() == 404 || status.as_u16() == 410 {
+        Err(FetchError::NotFound)
+    } else {
+        Err(FetchError::Transient(format!("HTTP {}", status.as_u16())))
+    }
+}
+
+/// A pool of gateways racing the same CID fetch. Constructed once per
+/// `FulaClient` and shared across all clones via `Arc` (Checkpoint C).
+#[derive(Debug)]
+pub struct GatewayPool {
+    pub(crate) gateways: Vec<Gateway>,
+    /// How many gateways to race in parallel for a single CID. Default
+    /// 3 — caches the median Cloudflare-occasional-blip without paying
+    /// the latency cost of a single sequential request, while not
+    /// wasting bandwidth on K-1 cancelled losers.
+    pub(crate) race_concurrency: usize,
+}
+
+impl GatewayPool {
+    /// Construct a pool with the default 6-gateway list and race
+    /// concurrency K=3. Use this in production unless an operator has
+    /// overridden via [`Config::gateway_fallback_urls`](crate::Config).
+    pub fn default_pool() -> Self {
+        let gateways = default_gateway_urls()
+            .into_iter()
+            .enumerate()
+            .map(|(i, url)| Gateway::new(url, i as u8))
+            .collect();
+        Self {
+            gateways,
+            race_concurrency: 3,
+        }
+    }
+
+    /// Construct a pool from explicit URL templates. Use this for tests
+    /// (against `wiremock`) or operator overrides.
+    pub fn with_gateways(urls: Vec<String>, race_concurrency: usize) -> Self {
+        let gateways = urls
+            .into_iter()
+            .enumerate()
+            .map(|(i, url)| Gateway::new(url, i as u8))
+            .collect();
+        Self {
+            gateways,
+            race_concurrency,
+        }
+    }
+
+    /// Number of gateways in the pool.
+    pub fn len(&self) -> usize {
+        self.gateways.len()
+    }
+
+    /// True if no gateways are configured (effectively disables
+    /// gateway-race fallback).
+    pub fn is_empty(&self) -> bool {
+        self.gateways.is_empty()
+    }
+
+    /// Select gateways eligible to race RIGHT NOW. Filters out
+    /// cooldowned gateways entirely (rather than letting them
+    /// participate as f32::INFINITY losers — a sentinel-value hack),
+    /// sorts the remaining by effective priority ascending (lower =
+    /// faster path), and takes the top `race_concurrency`.
+    ///
+    /// PERF: this calls `effective_priority` twice per gateway during
+    /// sort comparisons (O(n log n) calls total). For the default 6
+    /// gateways, that's ~30 evaluations — acceptable. If the pool
+    /// grows past ~20 gateways, swap to `sort_by_cached_key` with a
+    /// `Reverse(NotNan)` wrapper or pre-compute `(priority, gateway)`
+    /// tuples and sort those.
+    ///
+    /// Returns an empty `Vec` if every gateway is in cooldown.
+    /// Callers (`fetch_verified`) interpret empty as
+    /// [`GatewayPoolError::AllUnavailable`] — a "try later" signal,
+    /// distinct from [`GatewayPoolError::AllFailed`] (racers ran but
+    /// all returned errors).
+    pub(crate) fn select_for_race(&self, now: Instant) -> Vec<&Gateway> {
+        let mut alive: Vec<&Gateway> = self
+            .gateways
+            .iter()
+            .filter(|g| !g.is_in_cooldown(now))
+            .collect();
+        alive.sort_by(|a, b| {
+            a.effective_priority(now, DEFAULT_DECAY_TAU)
+                .total_cmp(&b.effective_priority(now, DEFAULT_DECAY_TAU))
+        });
+        alive.truncate(self.race_concurrency);
+        alive
+    }
+
+    /// Fetch a CID by racing the top-K eligible gateways in parallel.
+    /// Returns the first verified body. Cancels in-flight losers via
+    /// `Drop` of the spawned futures (reqwest cancels the underlying
+    /// HTTP request on `Response::drop`, releasing the socket).
+    ///
+    /// Per-racer outcomes update the racer's penalty/cooldown state
+    /// via the orchestrator (here) — `fetch_one` itself is pure.
+    ///
+    /// PERF: each settled future synchronously locks the per-gateway
+    /// `parking_lot::Mutex` to update penalty/cooldown. Two
+    /// simultaneous `fetch_verified` calls hitting the same gateway
+    /// will briefly contend on that lock. Negligible for v1; revisit
+    /// if profiling shows lock contention under heavy parallel-race
+    /// load.
+    pub async fn fetch_verified(
+        &self,
+        cid: &Cid,
+        http: &reqwest::Client,
+    ) -> Result<Bytes, GatewayPoolError> {
+        use futures::stream::FuturesUnordered;
+        use futures::StreamExt;
+
+        let now = Instant::now();
+        let candidates = self.select_for_race(now);
+        if candidates.is_empty() {
+            return Err(GatewayPoolError::AllUnavailable);
+        }
+
+        // Spawn one future per candidate. Each future returns a tuple
+        // (gateway_index_in_pool, fetch_result) so the post-race state
+        // mutation can apply to the right gateway.
+        let mut in_flight: FuturesUnordered<_> = candidates
+            .iter()
+            .enumerate()
+            .map(|(i, g)| {
+                let g_ref = *g;
+                async move {
+                    let r = fetch_one(g_ref, cid, http, DEFAULT_FETCH_TIMEOUT).await;
+                    (i, g_ref, r)
+                }
+            })
+            .collect();
+
+        let mut errors: Vec<String> = Vec::new();
+
+        while let Some((_idx, g, result)) = in_flight.next().await {
+            match result {
+                Ok(body) => {
+                    g.record_success();
+                    // Drop in_flight to cancel remaining racers.
+                    drop(in_flight);
+                    return Ok(body);
+                }
+                Err(FetchError::Transient(msg)) => {
+                    g.record_transient_failure();
+                    errors.push(format!("transient: {}", msg));
+                }
+                Err(FetchError::RateLimited { retry_after_secs }) => {
+                    g.record_rate_limit(Duration::from_secs(retry_after_secs));
+                    errors.push(format!("rate-limited (retry {}s)", retry_after_secs));
+                }
+                Err(FetchError::NotFound) => {
+                    // Request-level outcome — no penalty change. But
+                    // a 404 from this gateway means the content isn't
+                    // there; collect for diagnostic, race continues.
+                    errors.push("not-found".to_string());
+                }
+                Err(FetchError::VerifyFailed(ve)) => {
+                    g.record_verify_failure();
+                    errors.push(format!("verify-failed: {}", ve));
+                }
+            }
+        }
+
+        Err(GatewayPoolError::AllFailed { errors })
+    }
+}
+
+/// Outcome of a multi-gateway race. Distinct from `FetchError`
+/// because the race aggregates per-gateway results.
+#[derive(Debug, thiserror::Error)]
+pub enum GatewayPoolError {
+    /// Every gateway in the pool is currently in a cooldown window
+    /// (recent rate-limit or verify-failure). This is a "try again
+    /// later" signal — short-term unavailable, not a failure of the
+    /// content itself.
+    #[error("all gateways are in cooldown; retry later")]
+    AllUnavailable,
+
+    /// All eligible gateways were raced and all returned errors
+    /// (transient, rate-limited, not-found, or verify-failed).
+    /// `errors` lists the per-racer outcomes for diagnostic logging.
+    #[error("all gateway racers failed: {errors:?}")]
+    AllFailed { errors: Vec<String> },
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use cid::multihash::Multihash;
+    use sha2::{Digest, Sha256};
+
+    /// Build a raw-codec CID with a BLAKE3 multihash over `data`. This
+    /// matches what `block/put?mhtype=blake3` produces (the encrypted
+    /// SDK chunk-upload path).
+    fn cid_blake3(data: &[u8]) -> Cid {
+        let h = blake3::hash(data);
+        let mh = Multihash::<64>::wrap(MULTIHASH_BLAKE3, h.as_bytes()).unwrap();
+        Cid::new_v1(0x55 /* raw */, mh)
+    }
+
+    /// Build a dag-cbor CID with a SHA2-256 multihash over `data`. This
+    /// matches what `block/put` (default) and `add?cid-version=1`
+    /// produce.
+    fn cid_sha2(data: &[u8]) -> Cid {
+        let mut hasher = Sha256::new();
+        hasher.update(data);
+        let digest = hasher.finalize();
+        let mh = Multihash::<64>::wrap(MULTIHASH_SHA2_256, digest.as_slice()).unwrap();
+        Cid::new_v1(0x71 /* dag-cbor */, mh)
+    }
+
+    // ============================================================
+    // default_gateway_urls
+    // ============================================================
+
+    #[test]
+    fn test_default_gateway_urls_list_is_six_entries() {
+        let urls = default_gateway_urls();
+        assert_eq!(urls.len(), 6);
+    }
+
+    #[test]
+    fn test_default_gateway_urls_does_not_include_fula_gateway() {
+        // Step-0 finding (2026-05-01): ipfs.cloud.fx.land/gateway/
+        // returns 500 on dag-cbor codec. Default list MUST NOT include
+        // it until that codec bug is fixed.
+        let urls = default_gateway_urls();
+        for url in &urls {
+            assert!(
+                !url.contains("ipfs.cloud.fx.land"),
+                "fula gateway must not be in default list (dag-cbor codec bug); found: {}",
+                url
+            );
+        }
+    }
+
+    #[test]
+    fn test_default_gateway_urls_quality_order() {
+        // Cloudflare is slot 0 (lowest latency, generous rate limits).
+        // Pinata is the last fallback. Verify the published quality
+        // order so a reorder is a deliberate change.
+        let urls = default_gateway_urls();
+        assert!(urls[0].contains("cloudflare-ipfs.com"));
+        assert!(urls[1].contains("dweb.link"));
+        assert!(urls[2].contains("ipfs.io"));
+        assert!(urls[3].contains("trustless-gateway.link"));
+        assert!(urls[4].contains("4everland.io"));
+        assert!(urls[5].contains("gateway.pinata.cloud"));
+    }
+
+    #[test]
+    fn test_default_gateway_urls_have_cid_placeholder() {
+        for url in default_gateway_urls() {
+            assert!(
+                url.contains("{cid}"),
+                "url must have {{cid}} placeholder: {}",
+                url
+            );
+        }
+    }
+
+    // ============================================================
+    // verify_cid_against_bytes
+    // ============================================================
+
+    #[test]
+    fn test_verify_blake3_match_passes() {
+        // The encrypted SDK's chunk-upload path produces blake3+raw
+        // CIDs. Verification of correct bytes against such a CID must
+        // pass.
+        let data = b"hello blake3 world";
+        let cid = cid_blake3(data);
+        verify_cid_against_bytes(&cid, data).expect("blake3 verify on matching bytes");
+    }
+
+    #[test]
+    fn test_verify_blake3_mismatch_rejects() {
+        // Tampered bytes must be rejected with DigestMismatch.
+        let original = b"original content";
+        let cid = cid_blake3(original);
+
+        let tampered = b"tampered content";
+        match verify_cid_against_bytes(&cid, tampered) {
+            Err(VerifyError::DigestMismatch { code }) => {
+                assert_eq!(code, MULTIHASH_BLAKE3);
+            }
+            other => panic!("expected DigestMismatch, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_verify_sha2_match_passes() {
+        // IPFS UnixFS / standard `block/put` paths produce sha2-256
+        // multihash CIDs. Verification of correct bytes must pass.
+        let data = b"hello sha2 world";
+        let cid = cid_sha2(data);
+        verify_cid_against_bytes(&cid, data).expect("sha2 verify on matching bytes");
+    }
+
+    #[test]
+    fn test_verify_sha2_mismatch_rejects() {
+        let original = b"sha2 original";
+        let cid = cid_sha2(original);
+        let tampered = b"sha2 tampered";
+        match verify_cid_against_bytes(&cid, tampered) {
+            Err(VerifyError::DigestMismatch { code }) => {
+                assert_eq!(code, MULTIHASH_SHA2_256);
+            }
+            other => panic!("expected DigestMismatch, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_verify_unsupported_hash_code_rejects() {
+        // SHA3-256 (code 0x16) is NOT in our supported set. Even if
+        // the bytes "match" via sha2/blake3, we must refuse rather
+        // than fake a verification we can't actually perform.
+        let data = b"sha3 test";
+        // Build a CID with an arbitrary code we don't support.
+        let mh = Multihash::<64>::wrap(0x16 /* sha3-256 */, &[0u8; 32]).unwrap();
+        let cid = Cid::new_v1(0x55, mh);
+        match verify_cid_against_bytes(&cid, data) {
+            Err(VerifyError::UnsupportedHashCode { code }) => {
+                assert_eq!(code, 0x16);
+            }
+            other => panic!("expected UnsupportedHashCode, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_verify_empty_data_against_empty_blake3() {
+        // Edge case: empty body. blake3("") has a well-defined digest;
+        // verification must work on length-0 inputs without panic.
+        let cid = cid_blake3(b"");
+        verify_cid_against_bytes(&cid, b"").expect("empty bytes verify");
+    }
+
+    // ============================================================
+    // GatewayPool skeleton (Checkpoint A — structure only)
+    // ============================================================
+
+    #[test]
+    fn test_default_pool_has_six_gateways() {
+        let pool = GatewayPool::default_pool();
+        assert_eq!(pool.len(), 6);
+        assert_eq!(pool.race_concurrency, 3);
+    }
+
+    #[test]
+    fn test_pool_with_gateways_sets_concurrency() {
+        let pool = GatewayPool::with_gateways(
+            vec!["https://test1.example/ipfs/{cid}".to_string()],
+            2,
+        );
+        assert_eq!(pool.len(), 1);
+        assert_eq!(pool.race_concurrency, 2);
+    }
+
+    #[test]
+    fn test_pool_assigns_base_priority_by_index() {
+        let pool = GatewayPool::default_pool();
+        for (i, g) in pool.gateways.iter().enumerate() {
+            assert_eq!(g.base_priority as usize, i);
+        }
+    }
+
+    #[test]
+    fn test_gateway_url_for_substitutes_cid() {
+        let g = Gateway::new("https://example.test/ipfs/{cid}".to_string(), 0);
+        let cid = cid_blake3(b"x");
+        let url = g.url_for(&cid);
+        assert!(url.contains(&cid.to_string()));
+        assert!(!url.contains("{cid}"));
+    }
+
+    #[test]
+    fn test_gateway_state_starts_healthy() {
+        let g = Gateway::new("https://x/{cid}".to_string(), 0);
+        let s = g.state.lock();
+        assert_eq!(s.penalty, 0.0);
+        assert_eq!(s.consecutive_failures, 0);
+        assert!(s.cooldown_until.is_none());
+    }
+
+    // ============================================================
+    // Checkpoint B: per-gateway penalty math + cooldown
+    // ============================================================
+
+    #[test]
+    fn test_record_success_halves_penalty_and_resets_counter() {
+        let g = Gateway::new("https://x/{cid}".to_string(), 0);
+        // Pre-condition: simulate an existing penalty
+        {
+            let mut s = g.state.lock();
+            s.penalty = 0.6;
+            s.consecutive_failures = 3;
+        }
+        g.record_success();
+        let s = g.state.lock();
+        assert!((s.penalty - 0.3).abs() < f32::EPSILON, "penalty must be halved");
+        assert_eq!(s.consecutive_failures, 0);
+    }
+
+    #[test]
+    fn test_record_transient_failure_caps_at_one() {
+        let g = Gateway::new("https://x/{cid}".to_string(), 0);
+        // Five consecutive failures must cap penalty at exactly 1.0
+        // (3 * 0.3 = 0.9, then +0.3 → 1.0; further +0.3 stays at 1.0).
+        for _ in 0..5 {
+            g.record_transient_failure();
+        }
+        let s = g.state.lock();
+        assert!(s.penalty <= 1.0 + f32::EPSILON, "penalty must cap at 1.0");
+        assert!(s.penalty > 0.99, "penalty must reach 1.0 after 5 failures");
+        assert_eq!(s.consecutive_failures, 5);
+    }
+
+    #[test]
+    fn test_record_verify_failure_pegs_penalty_and_sets_cooldown() {
+        let g = Gateway::new("https://x/{cid}".to_string(), 0);
+        let before = Instant::now();
+        g.record_verify_failure();
+        let s = g.state.lock();
+        assert_eq!(s.penalty, 1.0);
+        let cd = s.cooldown_until.expect("cooldown must be set");
+        // Cooldown should be ~5 minutes from now.
+        let target = before + VERIFY_FAILURE_COOLDOWN;
+        // Allow slack for the time elapsed during the test.
+        assert!(cd >= target - Duration::from_secs(1));
+        assert!(cd <= target + Duration::from_secs(2));
+    }
+
+    #[test]
+    fn test_record_rate_limit_sets_cooldown_only() {
+        // Rate-limit cooldown must NOT change penalty (load-shedding,
+        // not a quality issue).
+        let g = Gateway::new("https://x/{cid}".to_string(), 0);
+        let pre_penalty = g.state.lock().penalty;
+        g.record_rate_limit(Duration::from_secs(30));
+        let s = g.state.lock();
+        assert_eq!(s.penalty, pre_penalty, "rate limit must not change penalty");
+        assert!(s.cooldown_until.is_some());
+    }
+
+    #[test]
+    fn test_is_in_cooldown_transitions_through_expiry() {
+        let g = Gateway::new("https://x/{cid}".to_string(), 0);
+        // Fresh: not in cooldown.
+        assert!(!g.is_in_cooldown(Instant::now()));
+
+        g.record_rate_limit(Duration::from_millis(50));
+        assert!(g.is_in_cooldown(Instant::now()), "must be in cooldown immediately after rate-limit");
+
+        std::thread::sleep(Duration::from_millis(80));
+        assert!(
+            !g.is_in_cooldown(Instant::now()),
+            "cooldown must auto-expire after the retry-after duration"
+        );
+    }
+
+    #[test]
+    fn test_effective_priority_reflects_decayed_penalty() {
+        // With a fast TAU (100ms), penalty=1.0 should decay to ~0.37
+        // after 1 TAU and ~0.05 after 3 TAUs.
+        let g = Gateway::new("https://x/{cid}".to_string(), 2);
+        // Force penalty to 1.0 directly so we have a known starting
+        // point (record_transient_failure also bumps last_observed_at,
+        // which we want to set to "just now" anyway).
+        g.record_transient_failure();
+        g.record_transient_failure();
+        g.record_transient_failure();
+        g.record_transient_failure();
+        let now = Instant::now();
+        let tau = Duration::from_millis(100);
+
+        // Immediately after, penalty ~1.0, so effective_priority
+        // ≈ base_priority + 1.0 * 3.0 = 5.0.
+        let pri_now = g.effective_priority(now, tau);
+        assert!(
+            pri_now > 4.5 && pri_now < 5.5,
+            "expected ~5.0 (base 2 + penalty*3), got {}",
+            pri_now
+        );
+
+        // After 1 TAU: decay factor exp(-1) ≈ 0.368. Penalty ≈ 0.368,
+        // effective ≈ 2.0 + 1.1 = ~3.1.
+        let one_tau_later = now + tau;
+        let pri_after = g.effective_priority(one_tau_later, tau);
+        assert!(
+            pri_after > 2.8 && pri_after < 3.3,
+            "expected ~3.1 (base 2 + 0.37*3), got {}",
+            pri_after
+        );
+
+        // After 5 TAUs: decay factor exp(-5) ≈ 0.0067. Penalty essentially
+        // gone, effective ≈ base_priority = 2.0.
+        let five_tau_later = now + tau * 5;
+        let pri_far_later = g.effective_priority(five_tau_later, tau);
+        assert!(
+            pri_far_later >= 2.0 && pri_far_later < 2.1,
+            "expected ~2.0 after 5 TAUs of decay, got {}",
+            pri_far_later
+        );
+    }
+
+    #[test]
+    fn test_effective_priority_does_not_mutate_state() {
+        // Decay-on-read is pure. Calling effective_priority multiple
+        // times must NOT advance last_observed_at — that's what makes
+        // decay a function of time-since-last-event, not
+        // time-since-last-read.
+        let g = Gateway::new("https://x/{cid}".to_string(), 0);
+        g.record_transient_failure();
+        let pre_obs = g.state.lock().last_observed_at;
+        let pre_pen = g.state.lock().penalty;
+
+        let _ = g.effective_priority(Instant::now() + Duration::from_secs(1), Duration::from_secs(60));
+        let _ = g.effective_priority(Instant::now() + Duration::from_secs(2), Duration::from_secs(60));
+
+        let post_obs = g.state.lock().last_observed_at;
+        let post_pen = g.state.lock().penalty;
+
+        assert_eq!(pre_obs, post_obs, "last_observed_at must not change on read");
+        assert_eq!(pre_pen, post_pen, "penalty must not change on read");
+    }
+
+    // ============================================================
+    // Checkpoint B: fetch_one against wiremock
+    // ============================================================
+
+    #[tokio::test]
+    async fn test_fetch_one_success_with_matching_bytes() {
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let data = b"hello fetch_one";
+        let cid = cid_blake3(data);
+
+        let server = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(data.as_ref()))
+            .mount(&server)
+            .await;
+
+        let gw = Gateway::new(format!("{}/ipfs/{{cid}}", server.uri()), 0);
+        let http = reqwest::Client::new();
+        let body = fetch_one(&gw, &cid, &http, Duration::from_secs(5))
+            .await
+            .expect("fetch_one ok");
+        assert_eq!(body.as_ref(), data);
+    }
+
+    #[tokio::test]
+    async fn test_fetch_one_tampered_bytes_rejected() {
+        // The gateway returns bytes that DON'T hash to the requested
+        // CID. fetch_one must reject with VerifyFailed — the security
+        // boundary that defends against malicious or buggy gateways.
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let cid_data = b"original content";
+        let cid = cid_blake3(cid_data);
+        let tampered = b"tampered content";
+
+        let server = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(tampered.as_ref()))
+            .mount(&server)
+            .await;
+
+        let gw = Gateway::new(format!("{}/ipfs/{{cid}}", server.uri()), 0);
+        let http = reqwest::Client::new();
+        match fetch_one(&gw, &cid, &http, Duration::from_secs(5)).await {
+            Err(FetchError::VerifyFailed(VerifyError::DigestMismatch { .. })) => { /* ok */ }
+            other => panic!("expected VerifyFailed/DigestMismatch, got {:?}", other),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_fetch_one_404_returns_not_found() {
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let cid = cid_blake3(b"some content");
+
+        let server = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(404))
+            .mount(&server)
+            .await;
+
+        let gw = Gateway::new(format!("{}/ipfs/{{cid}}", server.uri()), 0);
+        let http = reqwest::Client::new();
+        match fetch_one(&gw, &cid, &http, Duration::from_secs(5)).await {
+            Err(FetchError::NotFound) => { /* ok */ }
+            other => panic!("expected NotFound, got {:?}", other),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_fetch_one_503_returns_transient() {
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let cid = cid_blake3(b"some content");
+
+        let server = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&server)
+            .await;
+
+        let gw = Gateway::new(format!("{}/ipfs/{{cid}}", server.uri()), 0);
+        let http = reqwest::Client::new();
+        match fetch_one(&gw, &cid, &http, Duration::from_secs(5)).await {
+            Err(FetchError::Transient(_)) => { /* ok */ }
+            other => panic!("expected Transient, got {:?}", other),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_fetch_one_429_with_retry_after_returns_rate_limited() {
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let cid = cid_blake3(b"some content");
+
+        let server = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(429).insert_header("Retry-After", "42"))
+            .mount(&server)
+            .await;
+
+        let gw = Gateway::new(format!("{}/ipfs/{{cid}}", server.uri()), 0);
+        let http = reqwest::Client::new();
+        match fetch_one(&gw, &cid, &http, Duration::from_secs(5)).await {
+            Err(FetchError::RateLimited { retry_after_secs }) => {
+                assert_eq!(retry_after_secs, 42, "must parse Retry-After header");
+            }
+            other => panic!("expected RateLimited, got {:?}", other),
+        }
+    }
+
+    // ============================================================
+    // Checkpoint C: race orchestration + circuit breaker
+    // ============================================================
+
+    #[test]
+    fn test_select_for_race_filters_cooldowned() {
+        // 3 gateways. Put gateway 1 in cooldown. select_for_race
+        // returns gateways 0 and 2 only.
+        let pool = GatewayPool::with_gateways(
+            vec![
+                "https://g0/{cid}".to_string(),
+                "https://g1/{cid}".to_string(),
+                "https://g2/{cid}".to_string(),
+            ],
+            3,
+        );
+        pool.gateways[1].record_rate_limit(Duration::from_secs(60));
+
+        let alive = pool.select_for_race(Instant::now());
+        assert_eq!(alive.len(), 2);
+        assert!(alive.iter().any(|g| g.url_template.contains("g0")));
+        assert!(alive.iter().any(|g| g.url_template.contains("g2")));
+        assert!(!alive.iter().any(|g| g.url_template.contains("g1")));
+    }
+
+    #[test]
+    fn test_select_for_race_takes_top_k_by_priority() {
+        // 6 gateways with K=2. select_for_race returns the 2 with
+        // lowest effective priority (= highest quality), which for
+        // a fresh pool is just gateways 0 and 1 (base_priority 0, 1).
+        let pool = GatewayPool::with_gateways(
+            vec![
+                "https://g0/{cid}".to_string(),
+                "https://g1/{cid}".to_string(),
+                "https://g2/{cid}".to_string(),
+                "https://g3/{cid}".to_string(),
+                "https://g4/{cid}".to_string(),
+                "https://g5/{cid}".to_string(),
+            ],
+            2,
+        );
+        let racers = pool.select_for_race(Instant::now());
+        assert_eq!(racers.len(), 2);
+        assert_eq!(racers[0].base_priority, 0);
+        assert_eq!(racers[1].base_priority, 1);
+    }
+
+    #[test]
+    fn test_select_for_race_penalty_demotes_gateway() {
+        // Pile penalty on the top-priority gateway. After enough
+        // failures, its effective priority should fall below the
+        // next ones, and select_for_race should pick the others
+        // first.
+        let pool = GatewayPool::with_gateways(
+            vec![
+                "https://g0/{cid}".to_string(),
+                "https://g1/{cid}".to_string(),
+                "https://g2/{cid}".to_string(),
+            ],
+            2,
+        );
+        // 4 transient failures on g0 → penalty caps near 1.0,
+        // effective priority ≈ 0 + 1.0*3 = 3.0.
+        // g1 base = 1, g2 base = 2.
+        for _ in 0..4 {
+            pool.gateways[0].record_transient_failure();
+        }
+        let racers = pool.select_for_race(Instant::now());
+        // The first two slots should be g1 and g2 (priorities 1 and 2),
+        // ahead of the penalized g0 (effective ~3.0).
+        assert_eq!(racers.len(), 2);
+        assert!(
+            racers[0].url_template.contains("g1") || racers[0].url_template.contains("g2"),
+            "penalized g0 must not be top of race; got {}",
+            racers[0].url_template
+        );
+    }
+
+    #[test]
+    fn test_select_for_race_empty_when_all_cooled_down() {
+        let pool = GatewayPool::with_gateways(
+            vec!["https://g0/{cid}".to_string(), "https://g1/{cid}".to_string()],
+            3,
+        );
+        for g in &pool.gateways {
+            g.record_rate_limit(Duration::from_secs(60));
+        }
+        let alive = pool.select_for_race(Instant::now());
+        assert!(alive.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_fetch_verified_first_gateway_wins() {
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let data = b"race-winner-content";
+        let cid = cid_blake3(data);
+
+        // Two mock gateways: one fast 200, one slow 200.
+        let server_fast = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(data.as_ref()))
+            .mount(&server_fast)
+            .await;
+
+        let server_slow = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(
+                ResponseTemplate::new(200)
+                    .set_body_bytes(data.as_ref())
+                    .set_delay(Duration::from_secs(2)),
+            )
+            .mount(&server_slow)
+            .await;
+
+        let pool = GatewayPool::with_gateways(
+            vec![
+                format!("{}/ipfs/{{cid}}", server_fast.uri()),
+                format!("{}/ipfs/{{cid}}", server_slow.uri()),
+            ],
+            2,
+        );
+        let http = reqwest::Client::new();
+
+        let body = pool.fetch_verified(&cid, &http).await.expect("race ok");
+        assert_eq!(body.as_ref(), data);
+    }
+
+    #[tokio::test]
+    async fn test_fetch_verified_falls_through_failed_gateway_to_succeeding_one() {
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let data = b"second-gateway-saves-the-day";
+        let cid = cid_blake3(data);
+
+        // First gateway always returns 503 (fast); second returns 200
+        // with matching bytes (slightly delayed) — this 150ms delay
+        // is for race-DETERMINISM in the test, not realism. Without
+        // it, both responses are instant and the FuturesUnordered
+        // arrival order is timing-dependent: when 200 wins the wire,
+        // the 503 future is cancelled by `drop(in_flight)` before its
+        // `record_transient_failure` can run, and the test's
+        // assert-on-503-penalty-bump becomes flaky.
+        let server_503 = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&server_503)
+            .await;
+
+        let server_ok = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(
+                ResponseTemplate::new(200)
+                    .set_body_bytes(data.as_ref())
+                    .set_delay(Duration::from_millis(150)),
+            )
+            .mount(&server_ok)
+            .await;
+
+        let pool = GatewayPool::with_gateways(
+            vec![
+                format!("{}/ipfs/{{cid}}", server_503.uri()),
+                format!("{}/ipfs/{{cid}}", server_ok.uri()),
+            ],
+            2,
+        );
+        let http = reqwest::Client::new();
+
+        let body = pool.fetch_verified(&cid, &http).await.expect("fallback ok");
+        assert_eq!(body.as_ref(), data);
+
+        // The 503 gateway should have its penalty bumped.
+        let s = pool.gateways[0].state.lock();
+        assert!(s.penalty > 0.0, "503 gateway must be penalized");
+        assert_eq!(s.consecutive_failures, 1);
+    }
+
+    #[tokio::test]
+    async fn test_fetch_verified_all_failed_returns_aggregate_error() {
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let cid = cid_blake3(b"unreachable");
+
+        let server_a = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&server_a)
+            .await;
+        let server_b = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(404))
+            .mount(&server_b)
+            .await;
+
+        let pool = GatewayPool::with_gateways(
+            vec![
+                format!("{}/ipfs/{{cid}}", server_a.uri()),
+                format!("{}/ipfs/{{cid}}", server_b.uri()),
+            ],
+            2,
+        );
+        let http = reqwest::Client::new();
+
+        match pool.fetch_verified(&cid, &http).await {
+            Err(GatewayPoolError::AllFailed { errors }) => {
+                assert_eq!(errors.len(), 2, "must aggregate per-gateway errors");
+            }
+            other => panic!("expected AllFailed, got {:?}", other),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_fetch_verified_all_unavailable_when_pool_in_cooldown() {
+        let pool = GatewayPool::with_gateways(
+            vec!["http://test.invalid/ipfs/{cid}".to_string()],
+            1,
+        );
+        pool.gateways[0].record_rate_limit(Duration::from_secs(60));
+
+        let cid = cid_blake3(b"x");
+        let http = reqwest::Client::new();
+        match pool.fetch_verified(&cid, &http).await {
+            Err(GatewayPoolError::AllUnavailable) => { /* ok */ }
+            other => panic!("expected AllUnavailable, got {:?}", other),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_fetch_verified_tampered_response_records_verify_failure() {
+        // Race a gateway that returns tampered bytes alone — the race
+        // must fail (no verified body), AND the gateway's state must
+        // record a verify failure (penalty=1.0, cooldown set).
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let cid_data = b"original";
+        let cid = cid_blake3(cid_data);
+        let tampered = b"NOT THE SAME";
+
+        let server = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(tampered.as_ref()))
+            .mount(&server)
+            .await;
+
+        let pool = GatewayPool::with_gateways(
+            vec![format!("{}/ipfs/{{cid}}", server.uri())],
+            1,
+        );
+        let http = reqwest::Client::new();
+        let result = pool.fetch_verified(&cid, &http).await;
+        assert!(matches!(result, Err(GatewayPoolError::AllFailed { .. })));
+
+        // Critical security assertion: the gateway is now in cooldown,
+        // so a future race won't include it for ~5 min.
+        let s = pool.gateways[0].state.lock();
+        assert_eq!(s.penalty, 1.0, "verify failure pegs penalty at 1.0");
+        assert!(s.cooldown_until.is_some(), "verify failure sets cooldown");
+        let cooldown_remaining = s
+            .cooldown_until
+            .unwrap()
+            .saturating_duration_since(Instant::now());
+        assert!(
+            cooldown_remaining > Duration::from_secs(290)
+                && cooldown_remaining <= VERIFY_FAILURE_COOLDOWN + Duration::from_secs(1),
+            "cooldown should be ~5 min; got {:?}",
+            cooldown_remaining
+        );
+    }
+
+    #[tokio::test]
+    async fn test_fetch_one_429_without_retry_after_uses_default() {
+        // Some gateways return 429 without a Retry-After header.
+        // We must not treat that as a parseable 0-second retry —
+        // the default 60s ensures we don't loop hot.
+        use wiremock::matchers::{method, path_regex};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        let cid = cid_blake3(b"some content");
+
+        let server = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path_regex(r"/ipfs/.+"))
+            .respond_with(ResponseTemplate::new(429))
+            .mount(&server)
+            .await;
+
+        let gw = Gateway::new(format!("{}/ipfs/{{cid}}", server.uri()), 0);
+        let http = reqwest::Client::new();
+        match fetch_one(&gw, &cid, &http, Duration::from_secs(5)).await {
+            Err(FetchError::RateLimited { retry_after_secs }) => {
+                assert_eq!(retry_after_secs, 60, "missing header → 60s default");
+            }
+            other => panic!("expected RateLimited, got {:?}", other),
+        }
+    }
+}
diff --git a/crates/fula-client/src/health_gate.rs b/crates/fula-client/src/health_gate.rs
new file mode 100644
index 0000000..a5e2024
--- /dev/null
+++ b/crates/fula-client/src/health_gate.rs
@@ -0,0 +1,240 @@
+//! Master health gate (Phase 2.1 of master-independent reads).
+//!
+//! Lock-free, lazy-probed state machine that tracks whether the master S3
+//! endpoint is reachable. The SDK consults the gate inside its HTTP request
+//! path: when the gate is `Up`, requests proceed normally; when `Down`,
+//! requests short-circuit with `Error::MasterUnreachable` for the configured
+//! TTL, avoiding the per-read timeout tax that would otherwise degrade the
+//! fast path under any flaky network.
+//!
+//! ## Design
+//!
+//! - **Lazy probing.** No eager init probe (which would waste a roundtrip on
+//!   every SDK construction when master is up — the common case). Failures
+//!   are observed inside normal traffic; once the gate trips, periodic
+//!   "probe" attempts are allowed through after the TTL expires.
+//!
+//! - **2-consecutive-failure threshold.** A single 5xx on a single bucket is
+//!   not a master-down signal — it's a request-level issue. The gate only
+//!   trips after **two** consecutive failures across any requests. This
+//!   prevents one transient error from sidelining the whole client.
+//!
+//! - **Lock-free atomic state.** `state_ms` is an `AtomicU64` representing
+//!   either `0` (Up) or the unix-millis when the gate flipped Down.
+//!   `consecutive_failures` is an `AtomicU32`. No `Mutex` / `RwLock`
+//!   contention even when 50 in-flight requests all fail simultaneously.
+//!
+//! - **Phase 2.1 ships the gate; Phase 2.4 wires it into a fallback to the
+//!   gateway race.** Standalone, the gate just turns "3-second-timeout per
+//!   read" into "fast-fail with `MasterUnreachable`" when Down.
+
+use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+/// Threshold for flipping from `Up` to `Down`. One transient 5xx on a single
+/// bucket isn't the same as "master is unreachable" — only two consecutive
+/// signals trip the gate.
+const CONSECUTIVE_FAILURE_THRESHOLD: u32 = 2;
+
+/// State machine for master reachability.
+///
+/// `state_ms == 0`  → currently `Up`.
+/// `state_ms != 0`  → flipped `Down` at this unix-millis timestamp.
+///
+/// Cheap to clone via `Arc`; shared across all `FulaClient` clones so a
+/// failure observed in one task immediately silences the rest.
+pub struct HealthGate {
+    state_ms: AtomicU64,
+    consecutive_failures: AtomicU32,
+    ttl: Duration,
+}
+
+impl HealthGate {
+    /// Create a new gate with the given TTL. Starts in the `Up` state.
+    pub fn new(ttl: Duration) -> Self {
+        Self {
+            state_ms: AtomicU64::new(0),
+            consecutive_failures: AtomicU32::new(0),
+            ttl,
+        }
+    }
+
+    /// Decide whether a request to master should be sent or short-circuited.
+    ///
+    /// Returns:
+    /// - `GateDecision::Allow` — gate is `Up`, OR `Down` but `now > since + ttl`
+    ///   (the TTL elapsed; this request is the next "probe").
+    /// - `GateDecision::ShortCircuit { down_for_secs }` — gate is `Down` and
+    ///   within the TTL; caller should fail fast with `MasterUnreachable`.
+    pub fn decide(&self) -> GateDecision {
+        let down_at = self.state_ms.load(Ordering::Acquire);
+        if down_at == 0 {
+            return GateDecision::Allow;
+        }
+        let now = now_ms();
+        let elapsed = now.saturating_sub(down_at);
+        if elapsed >= self.ttl.as_millis() as u64 {
+            // TTL elapsed — let this request through as a probe. Don't
+            // reset the gate yet; reset only on observed success.
+            GateDecision::Allow
+        } else {
+            GateDecision::ShortCircuit {
+                down_for_secs: elapsed / 1000,
+            }
+        }
+    }
+
+    /// Record a successful master interaction. Resets the failure counter
+    /// and clears the `Down` timestamp (gate returns to `Up`).
+    pub fn record_success(&self) {
+        self.consecutive_failures.store(0, Ordering::Release);
+        self.state_ms.store(0, Ordering::Release);
+    }
+
+    /// Record a master-side failure (connection refused / RST / 5xx /
+    /// request timeout). Increments the consecutive-failure counter; once
+    /// the threshold is reached, flips the gate to `Down(now)`.
+    ///
+    /// 4xx responses are NOT failures for gate purposes — they're
+    /// request-level issues, not master-down signals.
+    pub fn record_failure(&self) {
+        let prior = self.consecutive_failures.fetch_add(1, Ordering::AcqRel);
+        if prior + 1 >= CONSECUTIVE_FAILURE_THRESHOLD {
+            // Threshold crossed (or exceeded). Flip to `Down` if not already.
+            // Only update timestamp on the first transition this window so
+            // that repeated failures don't keep extending the TTL.
+            let _ = self.state_ms.compare_exchange(
+                0,
+                now_ms(),
+                Ordering::AcqRel,
+                Ordering::Acquire,
+            );
+        }
+    }
+}
+
+/// Decision returned by `HealthGate::decide`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum GateDecision {
+    /// Caller should send the request to master normally.
+    Allow,
+    /// Caller should fail fast with `Error::MasterUnreachable`.
+    ShortCircuit { down_for_secs: u64 },
+}
+
+/// Current unix-time in milliseconds. Wall-clock based (so SystemTime
+/// adjustments can shift the gate's perceived "since" — acceptable here
+/// since we only compare durations, and a clock jump is at worst a slight
+/// TTL anomaly).
+fn now_ms() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|d| d.as_millis() as u64)
+        .unwrap_or(0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_state_is_up() {
+        // A freshly-constructed gate must be `Up`. Lazy probing means we
+        // never assume master is down without observation.
+        let gate = HealthGate::new(Duration::from_secs(30));
+        assert_eq!(gate.decide(), GateDecision::Allow);
+    }
+
+    #[test]
+    fn test_one_failure_does_not_trip() {
+        // S1 from advisor: a single failure must NOT sideline the gate.
+        // One-off 5xx on a bucket-level operation is not "master is down."
+        let gate = HealthGate::new(Duration::from_secs(30));
+        gate.record_failure();
+        assert_eq!(
+            gate.decide(),
+            GateDecision::Allow,
+            "one failure must not flip the gate"
+        );
+    }
+
+    #[test]
+    fn test_two_consecutive_failures_trip_to_down() {
+        // CONSECUTIVE_FAILURE_THRESHOLD = 2. Two consecutive failures
+        // (across any requests) flip the gate.
+        let gate = HealthGate::new(Duration::from_secs(30));
+        gate.record_failure();
+        gate.record_failure();
+        match gate.decide() {
+            GateDecision::ShortCircuit { down_for_secs: _ } => { /* ok */ }
+            other => panic!("expected ShortCircuit, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_success_resets_consecutive_counter() {
+        // A success between failures must reset the counter so a second
+        // failure (after the success) doesn't pile on with the first.
+        let gate = HealthGate::new(Duration::from_secs(30));
+        gate.record_failure();
+        gate.record_success();
+        gate.record_failure();
+        // Only ONE failure since the last success — must not be down.
+        assert_eq!(gate.decide(), GateDecision::Allow);
+    }
+
+    #[test]
+    fn test_success_clears_down_state() {
+        // When the gate is Down and a probe (after TTL or first attempt
+        // that gets through) succeeds, the gate must return to Up.
+        let gate = HealthGate::new(Duration::from_secs(30));
+        gate.record_failure();
+        gate.record_failure();
+        assert!(matches!(gate.decide(), GateDecision::ShortCircuit { .. }));
+        gate.record_success();
+        assert_eq!(gate.decide(), GateDecision::Allow);
+    }
+
+    #[test]
+    fn test_down_state_expires_after_ttl() {
+        // After TTL elapses, the gate allows the next request through
+        // as a probe (without resetting state — only success resets).
+        // Use a very short TTL to keep the test fast.
+        let gate = HealthGate::new(Duration::from_millis(50));
+        gate.record_failure();
+        gate.record_failure();
+        assert!(matches!(gate.decide(), GateDecision::ShortCircuit { .. }));
+
+        std::thread::sleep(Duration::from_millis(80));
+
+        assert_eq!(
+            gate.decide(),
+            GateDecision::Allow,
+            "after TTL, next decide() must allow a probe"
+        );
+        // State is still Down until a probe succeeds (verify by observing
+        // that consecutive_failures hasn't auto-reset).
+        let down_at = gate.state_ms.load(Ordering::Acquire);
+        assert!(down_at > 0, "state remains Down until success observed");
+    }
+
+    #[test]
+    fn test_concurrent_failures_idempotent() {
+        // Two threads recording failures concurrently must not produce
+        // unexpected state. Even with N concurrent failures, the gate is
+        // either Up (if total < threshold) or Down (if >= threshold).
+        use std::sync::Arc;
+        let gate = Arc::new(HealthGate::new(Duration::from_secs(30)));
+
+        let mut handles = Vec::new();
+        for _ in 0..8 {
+            let g = gate.clone();
+            handles.push(std::thread::spawn(move || g.record_failure()));
+        }
+        for h in handles {
+            h.join().unwrap();
+        }
+        // 8 failures > threshold(2), so gate must be Down.
+        assert!(matches!(gate.decide(), GateDecision::ShortCircuit { .. }));
+    }
+}
diff --git a/crates/fula-client/src/lib.rs b/crates/fula-client/src/lib.rs
index cf3e047..3a233e1 100644
--- a/crates/fula-client/src/lib.rs
+++ b/crates/fula-client/src/lib.rs
@@ -37,10 +37,15 @@
 //! }
 //! ```
 
+#[cfg(not(target_arch = "wasm32"))]
+mod block_cache;
 mod client;
 mod config;
 mod encryption;
 mod error;
+#[cfg(not(target_arch = "wasm32"))]
+mod gateway_fetch;
+mod health_gate;
 mod multipart;
 mod types;
 #[cfg(not(target_arch = "wasm32"))]
diff --git a/crates/fula-core/src/bucket.rs b/crates/fula-core/src/bucket.rs
index 17868c5..c3633b7 100644
--- a/crates/fula-core/src/bucket.rs
+++ b/crates/fula-core/src/bucket.rs
@@ -1002,6 +1002,44 @@ impl<S: BlockStore + PinStore> BucketManager<S> {
             .collect()
     }
 
+    /// Populate `BucketMetadata.bucket_lookup_h` for a user-scoped bucket
+    /// **only if currently `None`**. Idempotent — never overwrites an
+    /// existing value. Sets the dirty flag on success; the caller is
+    /// responsible for triggering registry persistence (typically via the
+    /// existing `persist_registry_with_token` call in the put_object handler).
+    ///
+    /// This is called by master's PUT handler when the SDK includes the
+    /// `x-amz-meta-fula-bucket-lookup-h` control header on a Phase 2
+    /// manifest root PUT (the moment of forest commit).
+    ///
+    /// Returns `Ok(true)` if the field was newly populated, `Ok(false)` if it
+    /// was already set. `Err(BucketNotFound)` if the bucket doesn't exist.
+    pub fn populate_lookup_h_if_missing(
+        &self,
+        user_id: &str,
+        bucket_name: &str,
+        lookup_h: [u8; 16],
+    ) -> Result<bool> {
+        let internal_key = Self::scoped_bucket_key(user_id, bucket_name);
+
+        // Mutate within a sync block; DashMap shard guard never crosses an
+        // await. Persistence is intentionally NOT triggered here — the put
+        // handler already calls `persist_registry_with_token` post-flush,
+        // which picks up the new value via the dirty flag.
+        match self.buckets.get_mut(&internal_key) {
+            Some(mut entry) => {
+                if entry.bucket_lookup_h.is_some() {
+                    Ok(false)
+                } else {
+                    entry.bucket_lookup_h = Some(lookup_h);
+                    self.dirty.store(true, std::sync::atomic::Ordering::Relaxed);
+                    Ok(true)
+                }
+            }
+            None => Err(CoreError::BucketNotFound(bucket_name.to_string())),
+        }
+    }
+
     /// Find a bucket by display name that contains a specific object key
     ///
     /// Uses the secondary name index for O(1) lookup of matching buckets
@@ -1517,4 +1555,327 @@ mod tests {
             N
         );
     }
+
+    // ============================================================
+    // Phase 1.2 (master-independent reads) — populate_lookup_h_if_missing
+    // ============================================================
+
+    #[tokio::test]
+    async fn test_populate_lookup_h_if_missing_happy_path() {
+        // First-ever populate on a freshly-created bucket: sets the field,
+        // returns Ok(true), and marks the manager dirty so the next
+        // persist_registry call serializes the new value.
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = BucketManager::new(store);
+        let user_id = "userA";
+        let bucket_name = "photos";
+        let owner = Owner::new(user_id);
+
+        manager
+            .create_bucket_for_user(user_id, bucket_name.to_string(), owner)
+            .await
+            .expect("create_bucket_for_user");
+
+        // Pre-condition: bucket exists, lookup_h is None.
+        let pre = manager
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata exists");
+        assert_eq!(pre.bucket_lookup_h, None);
+
+        // create_bucket_for_user calls persist_registry which clears dirty;
+        // populate should re-set it.
+        manager
+            .dirty
+            .store(false, std::sync::atomic::Ordering::Relaxed);
+
+        let h: [u8; 16] = [
+            0xab, 0xcd, 0xef, 0x12, 0x34, 0x56, 0x78, 0x9a,
+            0xbc, 0xde, 0xf0, 0x11, 0x22, 0x33, 0x44, 0x55,
+        ];
+        let changed = manager
+            .populate_lookup_h_if_missing(user_id, bucket_name, h)
+            .expect("populate ok");
+        assert!(changed, "first call must report changed=true");
+
+        // Post-condition: field is now Some(h), dirty flag is set.
+        let post = manager
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata exists");
+        assert_eq!(post.bucket_lookup_h, Some(h));
+        assert!(
+            manager.dirty.load(std::sync::atomic::Ordering::Relaxed),
+            "dirty flag must be set after a real write"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_populate_lookup_h_if_missing_idempotent() {
+        // Second call with a DIFFERENT value must NOT overwrite. Returns
+        // Ok(false) and preserves the original. Dirty flag isn't set on
+        // the no-op (so we don't churn the registry on every PUT for an
+        // already-migrated bucket).
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = BucketManager::new(store);
+        let user_id = "userB";
+        let bucket_name = "documents";
+        let owner = Owner::new(user_id);
+
+        manager
+            .create_bucket_for_user(user_id, bucket_name.to_string(), owner)
+            .await
+            .expect("create_bucket_for_user");
+
+        let original_h: [u8; 16] = [1u8; 16];
+        let other_h: [u8; 16] = [2u8; 16];
+
+        // First populate → sets the field.
+        let changed = manager
+            .populate_lookup_h_if_missing(user_id, bucket_name, original_h)
+            .expect("populate ok");
+        assert!(changed);
+
+        // Reset dirty so we can detect whether the no-op call sets it again.
+        manager
+            .dirty
+            .store(false, std::sync::atomic::Ordering::Relaxed);
+
+        // Second populate with a different value → idempotent skip.
+        let changed = manager
+            .populate_lookup_h_if_missing(user_id, bucket_name, other_h)
+            .expect("populate idempotent");
+        assert!(!changed, "second call must report changed=false");
+
+        // Original value preserved; dirty flag NOT set by the no-op.
+        let post = manager
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata exists");
+        assert_eq!(
+            post.bucket_lookup_h,
+            Some(original_h),
+            "idempotent: original value must NOT be overwritten"
+        );
+        assert!(
+            !manager.dirty.load(std::sync::atomic::Ordering::Relaxed),
+            "no-op call must not set dirty flag"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_populate_lookup_h_bucket_not_found() {
+        // Calling on a bucket that doesn't exist returns BucketNotFound.
+        // Master's handler treats this as a non-fatal warn, but the API
+        // contract here is: explicit error, not silent success.
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = BucketManager::new(store);
+
+        let h: [u8; 16] = [0u8; 16];
+        let result = manager.populate_lookup_h_if_missing("ghost-user", "ghost-bucket", h);
+
+        assert!(matches!(result, Err(CoreError::BucketNotFound(ref n)) if n == "ghost-bucket"));
+    }
+
+    #[tokio::test]
+    async fn test_legacy_bucket_lazy_migrates_when_new_client_sends_header() {
+        // SCENARIO 2 from the rollout matrix:
+        //   - Bucket was created BEFORE Phase 1.2 ships (old data; old client
+        //     SDK; no `bucket_lookup_h` field in the persisted CBOR — i.e.
+        //     deserialized as None via #[serde(default)]).
+        //   - User upgrades their fula-client SDK and writes again.
+        //   - New SDK sends `x-amz-meta-fula-bucket-lookup-h` on the Phase 2
+        //     manifest root PUT. Master's handler calls populate.
+        //   - Bucket's `bucket_lookup_h` lazy-migrates from None → Some(_)
+        //     and persists. Subsequent reads (incl. Phase 3.2 chain
+        //     publication) see the blinded key.
+        //
+        // This test simulates that journey end-to-end through the master's
+        // BucketManager: persist + reload to mimic server restart between
+        // the old and new client uploads.
+        let tmp = std::env::temp_dir().join(format!(
+            "fula-phase12-legacy-{}.cid",
+            std::process::id()
+        ));
+        let store = Arc::new(MemoryBlockStore::new());
+        let user_id = "userL"; // L = Legacy
+        let bucket_name = "fula-metadata";
+        let owner = Owner::new(user_id);
+
+        // (1) Old client created the bucket pre-Phase-1.2.
+        {
+            let manager = BucketManager::with_persistence(store.clone(), &tmp);
+            manager
+                .create_bucket_for_user(user_id, bucket_name.to_string(), owner)
+                .await
+                .expect("legacy create");
+            // Old code never set bucket_lookup_h. Verify.
+            let pre = manager
+                .get_bucket_metadata_for_user(user_id, bucket_name)
+                .expect("metadata");
+            assert_eq!(
+                pre.bucket_lookup_h, None,
+                "legacy bucket must start with no lookup_h"
+            );
+            manager.persist_registry().await.expect("legacy persist");
+        }
+
+        // (2) Server restarts. New code loads the legacy CBOR.
+        let new_manager = BucketManager::with_persistence(store, &tmp);
+        let count = new_manager.load_registry().await.expect("reload");
+        assert_eq!(count, 1);
+        let after_reload = new_manager
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata after reload");
+        assert_eq!(
+            after_reload.bucket_lookup_h, None,
+            "legacy CBOR must round-trip with lookup_h=None"
+        );
+
+        // (3) New client uploads. Master receives the header → populates.
+        let h: [u8; 16] = [
+            0x7c, 0x68, 0xbe, 0x81, 0x43, 0xaf, 0x5b, 0xa2,
+            0x12, 0xa3, 0x6f, 0x81, 0x23, 0x20, 0x37, 0xf5,
+        ];
+        let changed = new_manager
+            .populate_lookup_h_if_missing(user_id, bucket_name, h)
+            .expect("lazy populate");
+        assert!(changed, "first populate on a legacy bucket must change");
+
+        let after_populate = new_manager
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata after populate");
+        assert_eq!(after_populate.bucket_lookup_h, Some(h));
+
+        // (4) Persist the migration → durable.
+        new_manager
+            .persist_registry()
+            .await
+            .expect("post-migration persist");
+
+        // (5) The next time the same client (or any other) writes, populate
+        // is a no-op (idempotent — never overwrites).
+        let other_h: [u8; 16] = [9u8; 16];
+        let changed = new_manager
+            .populate_lookup_h_if_missing(user_id, bucket_name, other_h)
+            .expect("idempotent");
+        assert!(!changed);
+        let still = new_manager
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata still present");
+        assert_eq!(still.bucket_lookup_h, Some(h), "must NOT overwrite migrated value");
+
+        // Cleanup
+        let _ = std::fs::remove_file(&tmp);
+        let _ = std::fs::remove_file(tmp.with_extension("cid.bak"));
+    }
+
+    #[tokio::test]
+    async fn test_old_client_without_header_leaves_bucket_intact() {
+        // SCENARIO 1 from the rollout matrix:
+        //   - Existing user with old fula-client SDK, post-server-update.
+        //   - Old SDK does NOT send `x-amz-meta-fula-bucket-lookup-h`.
+        //   - Master's handler: header absent → populate never called.
+        //   - Bucket continues to function normally; `bucket_lookup_h`
+        //     stays None.
+        //   - Phase 3.2 publisher will emit this bucket with `legacy=true`
+        //     + plaintext bucket name; SDK cold-start falls back to plain
+        //     bucket-name lookup.
+        //
+        // This test verifies the BucketManager side: a bucket without a
+        // populate call still works for all read/list/persist operations,
+        // and stays in the legacy (None) state across persist + reload.
+        let tmp = std::env::temp_dir().join(format!(
+            "fula-phase12-oldclient-{}.cid",
+            std::process::id()
+        ));
+        let store = Arc::new(MemoryBlockStore::new());
+        let user_id = "userO"; // O = Old client
+        let bucket_name = "videos";
+        let owner = Owner::new(user_id);
+
+        let manager = BucketManager::with_persistence(store.clone(), &tmp);
+        manager
+            .create_bucket_for_user(user_id, bucket_name.to_string(), owner)
+            .await
+            .expect("create");
+
+        // Simulate many writes from an old client — no populate call ever
+        // runs. The bucket continues to function; lookup_h stays None.
+        for _ in 0..3 {
+            // (in production, each iteration would be a put_object handler
+            // call without the header — here we just persist to mimic the
+            // post-flush registry update that handler normally triggers.)
+            manager.persist_registry().await.expect("persist");
+        }
+
+        let pre_reload = manager
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata");
+        assert_eq!(pre_reload.bucket_lookup_h, None);
+
+        // Reload simulates server restart with old-client data still in flight.
+        let reloaded = BucketManager::with_persistence(store, &tmp);
+        reloaded.load_registry().await.expect("reload");
+        let post_reload = reloaded
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata after reload");
+        assert_eq!(
+            post_reload.bucket_lookup_h, None,
+            "old-client buckets stay in legacy state until upgraded"
+        );
+        assert_eq!(post_reload.name, bucket_name);
+
+        // Cleanup
+        let _ = std::fs::remove_file(&tmp);
+        let _ = std::fs::remove_file(tmp.with_extension("cid.bak"));
+    }
+
+    #[tokio::test]
+    async fn test_populate_lookup_h_persists_through_registry_roundtrip() {
+        // The lookup_h must survive: populate → persist_registry → reload
+        // from IPFS → still Some(h). This is the end-to-end backward-compat
+        // safety: a Phase-1.2-populated bucket round-trips through master
+        // restart correctly.
+        let tmp = std::env::temp_dir().join(format!(
+            "fula-phase12-{}.cid",
+            std::process::id()
+        ));
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = BucketManager::with_persistence(store.clone(), &tmp);
+        let user_id = "userC";
+        let bucket_name = "videos";
+        let owner = Owner::new(user_id);
+
+        manager
+            .create_bucket_for_user(user_id, bucket_name.to_string(), owner)
+            .await
+            .expect("create");
+
+        let h: [u8; 16] = [
+            0x9d, 0xfb, 0x19, 0x47, 0xe5, 0x31, 0x5e, 0x62,
+            0xc1, 0x1f, 0x2c, 0xe4, 0x77, 0xc2, 0x80, 0x97,
+        ];
+        manager
+            .populate_lookup_h_if_missing(user_id, bucket_name, h)
+            .expect("populate");
+
+        // Persist (CID file written via with_persistence).
+        manager.persist_registry().await.expect("persist");
+
+        // Reload into a fresh manager.
+        let reloaded = BucketManager::with_persistence(store, &tmp);
+        let count = reloaded.load_registry().await.expect("reload");
+        assert_eq!(count, 1);
+
+        let restored = reloaded
+            .get_bucket_metadata_for_user(user_id, bucket_name)
+            .expect("metadata after reload");
+        assert_eq!(
+            restored.bucket_lookup_h,
+            Some(h),
+            "lookup_h must survive registry persist + reload"
+        );
+
+        // Cleanup
+        let _ = std::fs::remove_file(&tmp);
+        let _ = std::fs::remove_file(tmp.with_extension("cid.bak"));
+    }
 }
diff --git a/crates/fula-core/src/metadata.rs b/crates/fula-core/src/metadata.rs
index 1592733..d748d60 100644
--- a/crates/fula-core/src/metadata.rs
+++ b/crates/fula-core/src/metadata.rs
@@ -228,12 +228,23 @@ pub struct BucketMetadata {
     
     /// Object count (cached)
     pub object_count: u64,
-    
+
     /// Total size in bytes (cached)
     pub total_size: u64,
-    
+
     /// Last modified timestamp
     pub last_modified: DateTime<Utc>,
+
+    /// Blinded lookup key for the per-user bucketsIndex CBOR published in
+    /// Phase 3 chain snapshots. Computed client-side as
+    /// `BLAKE3(MetadataKey || bucket_name)` truncated to 16 bytes (matches
+    /// `hashed_user_id`'s 128-bit convention). `None` for buckets created
+    /// before this field was added; populated lazily on the next forest
+    /// flush via `BucketManager::populate_lookup_h_if_missing`.
+    /// `#[serde(default)]` makes existing `fula-bucket-registry` CBOR blocks
+    /// deserialize fine without migration.
+    #[serde(default)]
+    pub bucket_lookup_h: Option<[u8; 16]>,
 }
 
 impl BucketMetadata {
@@ -253,6 +264,7 @@ impl BucketMetadata {
             object_count: 0,
             total_size: 0,
             last_modified: now,
+            bucket_lookup_h: None,
         }
     }
 
@@ -390,4 +402,123 @@ mod tests {
         assert_eq!(bucket.name, "my-bucket");
         assert!(!bucket.versioning_enabled);
     }
+
+    // ============================================================
+    // Phase 1.2 (master-independent reads) — bucket_lookup_h tests
+    // ============================================================
+
+    #[test]
+    fn test_bucket_lookup_h_default_is_none() {
+        // Newly-created BucketMetadata must have bucket_lookup_h = None.
+        // The field is populated lazily on the next forest flush via the SDK header.
+        let cid = fula_blockstore::cid_utils::create_cid(
+            b"root",
+            fula_blockstore::cid_utils::CidCodec::DagCbor,
+        );
+        let bucket = BucketMetadata::new("b".to_string(), "owner".to_string(), cid);
+        assert_eq!(bucket.bucket_lookup_h, None);
+    }
+
+    #[test]
+    fn test_bucket_lookup_h_dagcbor_roundtrip() {
+        // BucketMetadata with Some(...) and None must both round-trip cleanly
+        // through dag-cbor (the production registry format).
+        let cid = fula_blockstore::cid_utils::create_cid(
+            b"root",
+            fula_blockstore::cid_utils::CidCodec::DagCbor,
+        );
+
+        // None case
+        let none_bucket = BucketMetadata::new("b1".into(), "owner".into(), cid);
+        let bytes = serde_ipld_dagcbor::to_vec(&none_bucket).expect("serialize None");
+        let restored: BucketMetadata =
+            serde_ipld_dagcbor::from_slice(&bytes).expect("deserialize None");
+        assert_eq!(restored.bucket_lookup_h, None);
+        assert_eq!(restored.name, "b1");
+
+        // Some case
+        let mut some_bucket = BucketMetadata::new("b2".into(), "owner".into(), cid);
+        let h: [u8; 16] = [
+            0xd2, 0xe4, 0xc4, 0x3d, 0xa6, 0x60, 0xe0, 0xb8,
+            0x5e, 0x7b, 0x08, 0xb6, 0x98, 0x91, 0x26, 0xb3,
+        ];
+        some_bucket.bucket_lookup_h = Some(h);
+        let bytes = serde_ipld_dagcbor::to_vec(&some_bucket).expect("serialize Some");
+        let restored: BucketMetadata =
+            serde_ipld_dagcbor::from_slice(&bytes).expect("deserialize Some");
+        assert_eq!(restored.bucket_lookup_h, Some(h));
+        assert_eq!(restored.name, "b2");
+    }
+
+    #[test]
+    fn test_bucket_lookup_h_legacy_cbor_deserializes_to_none() {
+        // BACKWARD-COMPAT GOLD STANDARD (Phase 1.2 hard-constraint #1):
+        // existing fula-bucket-registry blocks pinned to IPFS BEFORE this
+        // field was added must deserialize cleanly into the new struct,
+        // with bucket_lookup_h = None. Production data must not break.
+        //
+        // We simulate this by defining a struct with the same shape as
+        // BucketMetadata but WITHOUT the new field, serializing it via
+        // dag-cbor, then deserializing as the new BucketMetadata. The
+        // #[serde(default)] on the new field is what makes this work.
+        #[derive(Serialize, Deserialize)]
+        struct LegacyBucketMetadata {
+            name: String,
+            created_at: DateTime<Utc>,
+            owner_id: String,
+            #[serde(with = "cid_serde")]
+            root_cid: Cid,
+            #[serde(default)]
+            versioning_enabled: bool,
+            #[serde(default)]
+            default_storage_class: StorageClass,
+            #[serde(default)]
+            tags: HashMap<String, String>,
+            cors_config: Option<CorsConfiguration>,
+            #[serde(default)]
+            lifecycle_rules: Vec<LifecycleRule>,
+            object_count: u64,
+            total_size: u64,
+            last_modified: DateTime<Utc>,
+            // NOTE: deliberately no `bucket_lookup_h` field — this is the
+            // pre-Phase-1.2 shape.
+        }
+
+        let cid = fula_blockstore::cid_utils::create_cid(
+            b"root",
+            fula_blockstore::cid_utils::CidCodec::DagCbor,
+        );
+        let now = Utc::now();
+        let legacy = LegacyBucketMetadata {
+            name: "videos".to_string(),
+            created_at: now,
+            owner_id: "9797dfb1947e5315e62c11f2ce477c28".to_string(),
+            root_cid: cid,
+            versioning_enabled: false,
+            default_storage_class: StorageClass::default(),
+            tags: HashMap::new(),
+            cors_config: None,
+            lifecycle_rules: Vec::new(),
+            object_count: 2984,
+            total_size: 764_932_382,
+            last_modified: now,
+        };
+
+        let legacy_bytes =
+            serde_ipld_dagcbor::to_vec(&legacy).expect("serialize legacy bucket");
+
+        // Deserialize the legacy bytes as the NEW BucketMetadata struct.
+        // This is exactly what happens at runtime when master loads a
+        // pre-Phase-1.2 fula-bucket-registry block from IPFS.
+        let modern: BucketMetadata =
+            serde_ipld_dagcbor::from_slice(&legacy_bytes).expect("legacy → modern");
+
+        assert_eq!(modern.name, "videos");
+        assert_eq!(modern.owner_id, "9797dfb1947e5315e62c11f2ce477c28");
+        assert_eq!(modern.object_count, 2984);
+        assert_eq!(modern.total_size, 764_932_382);
+        // The critical assertion — Phase 1.2's serde(default) preserves
+        // the no-migration property for existing CBOR registries.
+        assert_eq!(modern.bucket_lookup_h, None);
+    }
 }
diff --git a/crates/fula-flutter/src/api/error.rs b/crates/fula-flutter/src/api/error.rs
index 6e0e2f8..d082dae 100644
--- a/crates/fula-flutter/src/api/error.rs
+++ b/crates/fula-flutter/src/api/error.rs
@@ -123,6 +123,14 @@ impl From<fula_client::ClientError> for FulaError {
             ClientError::MigrationLockHeld { bucket, expires_at } => FulaError::InvalidResponse(
                 format!("migration lock held for bucket {} (expires at {} ms)", bucket, expires_at),
             ),
+            // Phase 2.1 of master-independent reads: surface as a Network
+            // error to existing Flutter callers — the closest existing
+            // category, since the master is effectively unreachable.
+            // Phase 2.4 catches this variant earlier and falls back to the
+            // gateway race before reaching this conversion.
+            ClientError::MasterUnreachable { down_for_secs } => FulaError::Network(
+                format!("master unreachable (health gate; down for ~{}s)", down_for_secs),
+            ),
         }
     }
 }

From 8d14a2024850a9bf37db78e8278e8a7202390e03 Mon Sep 17 00:00:00 2001
From: ehsan shariati <ehsan6sha@gmail.com>
Date: Sat, 2 May 2026 11:42:01 -0400
Subject: [PATCH 2/6] cold start

---
 Cargo.lock                                    |   1 +
 crates/fula-cli/Cargo.toml                    |   3 +
 crates/fula-cli/src/handlers/internal.rs      | 527 ++++++++++++++++++
 crates/fula-cli/src/handlers/mod.rs           |   1 +
 .../src/handlers/users_index_publisher.rs     | 419 +++++++++++++-
 crates/fula-cli/src/routes.rs                 |  22 +-
 crates/fula-cli/src/server.rs                 |  16 +-
 crates/fula-cli/src/state.rs                  | 122 ++++
 8 files changed, 1106 insertions(+), 5 deletions(-)
 create mode 100644 crates/fula-cli/src/handlers/internal.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2cabddd..4c0dfda 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1787,6 +1787,7 @@ dependencies = [
  "url",
  "urlencoding",
  "uuid",
+ "wiremock",
 ]
 
 [[package]]
diff --git a/crates/fula-cli/Cargo.toml b/crates/fula-cli/Cargo.toml
index f8b11f7..7973b4d 100644
--- a/crates/fula-cli/Cargo.toml
+++ b/crates/fula-cli/Cargo.toml
@@ -82,3 +82,6 @@ tempfile = { workspace = true }
 rstest = { workspace = true }
 tokio-test = "0.4"
 reqwest = { workspace = true }
+# Used by users_index_publisher A3 tests to mock kubo's
+# /api/v0/name/publish HTTP endpoint without spinning up a real IPFS daemon.
+wiremock = { workspace = true }
diff --git a/crates/fula-cli/src/handlers/internal.rs b/crates/fula-cli/src/handlers/internal.rs
new file mode 100644
index 0000000..aeeb3c6
--- /dev/null
+++ b/crates/fula-cli/src/handlers/internal.rs
@@ -0,0 +1,527 @@
+//! Phase 3.2 A3 internal endpoints.
+//!
+//! Two endpoints, both bearer-token-protected:
+//!
+//! - `GET  /_internal/users-index-state` — returns the latest published
+//!   `(global_cid, sequence, updated_at_unix)` so the 12h chain cron in
+//!   `mainnet-reward-server` can fetch and submit on-chain.
+//! - `POST /_internal/publish-now`       — fires a publisher tick on
+//!   demand. Useful for deploy verification.
+//!
+//! ## Auth
+//!
+//! Bearer token from `users_index_publisher.config.internal_token`.
+//! When `internal_token = None`: every request returns **503**
+//! ("internal endpoints disabled"). Fail-closed: an operator who
+//! forgets to set the token doesn't accidentally expose an unauthed
+//! state-readout endpoint.
+//!
+//! When `internal_token = Some(t)`:
+//! - missing/wrong bearer → **401**
+//! - correct bearer       → **200**
+//!
+//! ## Wiring
+//!
+//! Routes are added to a dedicated branch in `routes.rs` so they bypass
+//! the user-JWT auth middleware and use a small bearer-token check
+//! instead. Endpoints return 503 when the publisher itself is `None`
+//! (publisher feature disabled at startup) — this is the regression
+//! check for "publisher disabled = byte-identical legacy behavior".
+
+use crate::handlers::users_index_publisher::UsersIndexPublisher;
+use crate::AppState;
+use axum::{
+    extract::State,
+    http::{HeaderMap, StatusCode},
+    response::{IntoResponse, Response},
+    Json,
+};
+use fula_blockstore::FlexibleBlockStore;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+
+/// `GET /_internal/users-index-state` response body. Designed for the
+/// chain cron — single deserialize, no fancy error envelopes.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct UsersIndexStateResponse {
+    /// Latest published global users-index CID, or `null` if no
+    /// publish has succeeded yet (fresh master).
+    pub cid: Option<String>,
+    /// Monotonic sequence embedded in the most recent global CBOR.
+    pub sequence: u64,
+    /// Wall-clock timestamp of the last successful publish.
+    pub updated_at_unix: u64,
+    /// IPNS key name (e.g., `fula-users-index`). Operators verify
+    /// against their kubo `key list` output. Logged-only — clients
+    /// resolve via the IPNS NAME (libp2p key hash), not this label.
+    pub ipns_key_name: String,
+}
+
+/// `POST /_internal/publish-now` response body.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct PublishNowResponse {
+    pub global_cid: String,
+    pub sequence: u64,
+    pub changed_users: usize,
+    pub total_users: usize,
+    pub global_rebuilt: bool,
+}
+
+/// Bearer-token check. Returns:
+/// - `Ok(())` when the publisher is configured AND the bearer matches.
+/// - `Err(503)` when the publisher OR `internal_token` is unset
+///   (fail-closed; documented in module doc).
+/// - `Err(401)` when the bearer is missing/wrong but auth IS configured.
+fn authenticate(
+    publisher: Option<&Arc<UsersIndexPublisher<FlexibleBlockStore>>>,
+    headers: &HeaderMap,
+) -> Result<(), Response> {
+    let publisher = match publisher {
+        Some(p) => p,
+        None => {
+            return Err((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "users-index publisher disabled",
+            )
+                .into_response());
+        }
+    };
+    let configured = match publisher.config().internal_token.as_deref() {
+        Some(t) if !t.is_empty() => t,
+        _ => {
+            return Err((
+                StatusCode::SERVICE_UNAVAILABLE,
+                "internal endpoints disabled (no internal_token configured)",
+            )
+                .into_response());
+        }
+    };
+    let presented = headers
+        .get("authorization")
+        .and_then(|v| v.to_str().ok())
+        .and_then(|s| s.strip_prefix("Bearer "))
+        .unwrap_or("");
+    // Constant-time compare to defend against timing oracles.
+    if !constant_time_eq(presented.as_bytes(), configured.as_bytes()) {
+        return Err((StatusCode::UNAUTHORIZED, "invalid or missing bearer token").into_response());
+    }
+    Ok(())
+}
+
+fn constant_time_eq(a: &[u8], b: &[u8]) -> bool {
+    if a.len() != b.len() {
+        return false;
+    }
+    let mut diff: u8 = 0;
+    for (x, y) in a.iter().zip(b.iter()) {
+        diff |= x ^ y;
+    }
+    diff == 0
+}
+
+/// `GET /_internal/users-index-state`
+pub async fn users_index_state(
+    State(state): State<Arc<AppState>>,
+    headers: HeaderMap,
+) -> Response {
+    if let Err(resp) = authenticate(state.users_index_publisher.as_ref(), &headers) {
+        return resp;
+    }
+    let publisher = state
+        .users_index_publisher
+        .as_ref()
+        .expect("authenticate already proved Some");
+    let latest = publisher.latest();
+    let body = UsersIndexStateResponse {
+        cid: latest.global_cid.map(|c| c.to_string()),
+        sequence: latest.sequence,
+        updated_at_unix: latest.updated_at_unix,
+        ipns_key_name: publisher.config().ipns_key_name.clone(),
+    };
+    (StatusCode::OK, Json(body)).into_response()
+}
+
+/// `POST /_internal/publish-now`
+pub async fn publish_now(
+    State(state): State<Arc<AppState>>,
+    headers: HeaderMap,
+) -> Response {
+    if let Err(resp) = authenticate(state.users_index_publisher.as_ref(), &headers) {
+        return resp;
+    }
+    let publisher = state
+        .users_index_publisher
+        .as_ref()
+        .expect("authenticate already proved Some");
+    match publisher.run_tick().await {
+        Ok(outcome) => {
+            let body = PublishNowResponse {
+                global_cid: outcome.global_cid.to_string(),
+                sequence: outcome.sequence,
+                changed_users: outcome.changed_users,
+                total_users: outcome.total_users,
+                global_rebuilt: outcome.global_rebuilt,
+            };
+            (StatusCode::OK, Json(body)).into_response()
+        }
+        Err(e) => {
+            tracing::error!(error = %e, "users-index publish-now failed");
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                format!("publish failed: {}", e),
+            )
+                .into_response()
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::handlers::users_index_publisher::PublisherConfig;
+    use axum::body::to_bytes;
+    use axum::http::{Method, Request};
+    use axum::Router;
+    use fula_blockstore::MemoryBlockStore;
+    use fula_core::BucketManager;
+    use std::path::PathBuf;
+    use std::time::Duration;
+    use tempfile::TempDir;
+    use tower::ServiceExt;
+
+    /// Build a router exposing only the two internal endpoints — no
+    /// JWT auth middleware in the way. Mirrors what `routes.rs` will
+    /// wire, minus everything irrelevant to these endpoints.
+    fn build_internal_router(state: Arc<AppState>) -> Router {
+        Router::new()
+            .route(
+                "/_internal/users-index-state",
+                axum::routing::get(users_index_state),
+            )
+            .route(
+                "/_internal/publish-now",
+                axum::routing::post(publish_now),
+            )
+            .with_state(state)
+    }
+
+    /// Build an `AppState` with a publisher backed by `MemoryBlockStore`,
+    /// optionally wrapping it in an `Arc` to satisfy the FlexibleBlockStore
+    /// type that AppState expects.
+    async fn fixture_state(internal_token: Option<String>, with_publisher: bool) -> Arc<AppState> {
+        // `keep()` returns the PathBuf and disables the TempDir's
+        // auto-delete-on-drop. Files persist for the test process; the
+        // OS cleans them up on next reboot if anything is left.
+        let dir = TempDir::new().unwrap().keep();
+        let state_path: PathBuf = dir.join("state.txt");
+
+        let inner = FlexibleBlockStore::Memory(MemoryBlockStore::new());
+        let block_store = Arc::new(inner);
+        let bucket_manager = Arc::new(BucketManager::new(Arc::clone(&block_store)));
+
+        let users_index_publisher = if with_publisher {
+            let config = PublisherConfig {
+                flush_interval: Duration::from_secs(300),
+                first_publish_max_pins_per_sec: 100,
+                ipns_lifetime: Duration::from_secs(36 * 3600),
+                ipns_ttl: Duration::from_secs(15 * 60),
+                ipns_key_name: "fula-users-index".to_string(),
+                state_file_path: state_path,
+                ipfs_api_url: "http://localhost:5001".to_string(),
+                internal_token,
+            };
+            // No IPNS publisher — the internal endpoints don't depend on it.
+            let p = UsersIndexPublisher::open_without_ipns(
+                config,
+                Arc::clone(&bucket_manager),
+                Arc::clone(&block_store),
+            )
+            .expect("open");
+            Some(Arc::new(p))
+        } else {
+            None
+        };
+
+        let config = crate::config::GatewayConfig::default();
+        Arc::new(AppState {
+            config,
+            block_store,
+            bucket_manager,
+            multipart_manager: Arc::new(crate::multipart::MultipartManager::new(60)),
+            lock_store: crate::handlers::locks::LockStore::new(),
+            users_index_publisher,
+        })
+    }
+
+    #[tokio::test]
+    async fn test_state_endpoint_503_when_publisher_disabled() {
+        // Publisher = None. Operators who deploy without flipping the
+        // env flag MUST get a 503, not a 500 or unauthed leak.
+        let state = fixture_state(None, false).await;
+        let app = build_internal_router(state);
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/_internal/users-index-state")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
+    }
+
+    #[tokio::test]
+    async fn test_state_endpoint_503_when_no_token() {
+        // Publisher is on but `internal_token = None`. Fail-closed.
+        let state = fixture_state(None, true).await;
+        let app = build_internal_router(state);
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/_internal/users-index-state")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
+    }
+
+    #[tokio::test]
+    async fn test_state_endpoint_401_on_wrong_token() {
+        let state = fixture_state(Some("supersecret".to_string()), true).await;
+        let app = build_internal_router(state);
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/_internal/users-index-state")
+                    .header("authorization", "Bearer wrongtoken")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
+    }
+
+    #[tokio::test]
+    async fn test_state_endpoint_401_on_missing_bearer_prefix() {
+        let state = fixture_state(Some("supersecret".to_string()), true).await;
+        let app = build_internal_router(state);
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/_internal/users-index-state")
+                    // No "Bearer " prefix.
+                    .header("authorization", "supersecret")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
+    }
+
+    #[tokio::test]
+    async fn test_state_endpoint_200_with_correct_token_returns_default_state() {
+        // Fresh publisher, never ticked → cid is null, sequence is 0.
+        // Verifies the JSON shape AND the "fresh" semantics.
+        let state = fixture_state(Some("supersecret".to_string()), true).await;
+        let app = build_internal_router(state);
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/_internal/users-index-state")
+                    .header("authorization", "Bearer supersecret")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::OK);
+        let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+        let body: UsersIndexStateResponse = serde_json::from_slice(&bytes).unwrap();
+        assert_eq!(body.cid, None);
+        assert_eq!(body.sequence, 0);
+        assert_eq!(body.updated_at_unix, 0);
+        assert_eq!(body.ipns_key_name, "fula-users-index");
+    }
+
+    #[tokio::test]
+    async fn test_publish_now_runs_tick_and_returns_outcome() {
+        // After publish-now succeeds, a follow-up GET reads the
+        // newly-committed state. Round-trip verification.
+        let token = "supersecret".to_string();
+        let state = fixture_state(Some(token.clone()), true).await;
+        let app = build_internal_router(Arc::clone(&state));
+
+        // Trigger publish-now.
+        let resp = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .method(Method::POST)
+                    .uri("/_internal/publish-now")
+                    .header("authorization", format!("Bearer {}", token))
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::OK);
+        let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+        let body: PublishNowResponse = serde_json::from_slice(&bytes).unwrap();
+        assert_eq!(body.sequence, 1);
+        assert!(body.global_rebuilt);
+
+        // GET the state — must reflect the just-published values.
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/_internal/users-index-state")
+                    .header("authorization", format!("Bearer {}", token))
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::OK);
+        let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+        let state_body: UsersIndexStateResponse = serde_json::from_slice(&bytes).unwrap();
+        assert_eq!(state_body.cid, Some(body.global_cid));
+        assert_eq!(state_body.sequence, 1);
+    }
+
+    #[tokio::test]
+    async fn test_publish_now_503_when_publisher_disabled() {
+        // Same fail-closed contract as the GET endpoint.
+        let state = fixture_state(None, false).await;
+        let app = build_internal_router(state);
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::POST)
+                    .uri("/_internal/publish-now")
+                    .header("authorization", "Bearer anything")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
+    }
+
+    #[test]
+    fn test_constant_time_eq_correct() {
+        assert!(constant_time_eq(b"hello", b"hello"));
+        assert!(!constant_time_eq(b"hello", b"hellP"));
+        assert!(!constant_time_eq(b"hello", b"hell"));
+        assert!(!constant_time_eq(b"", b"x"));
+        assert!(constant_time_eq(b"", b""));
+    }
+
+    /// Drive the **real** router from `routes::create_router` to verify
+    /// `/_internal/*` actually bypasses the user-JWT `auth_middleware`.
+    /// If the router merge accidentally inherited the parent's auth
+    /// layer, this test fails (auth_middleware would respond with a
+    /// 403 "Authentication required" S3 error before reaching our
+    /// handler). The 503/SERVICE_UNAVAILABLE we expect comes from
+    /// `authenticate()` in this module — proof the request reached us.
+    #[tokio::test]
+    async fn test_internal_route_bypasses_user_jwt_auth() {
+        // auth_enabled=true: this is what production uses. A request
+        // to a normal S3 route without a JWT would 403. The internal
+        // route must reach our handler instead.
+        let dir = TempDir::new().unwrap().keep();
+        let state_path: PathBuf = dir.join("state.txt");
+        let inner = FlexibleBlockStore::Memory(MemoryBlockStore::new());
+        let block_store = Arc::new(inner);
+        let bucket_manager = Arc::new(BucketManager::new(Arc::clone(&block_store)));
+
+        let mut config = crate::config::GatewayConfig::default();
+        config.auth_enabled = true;
+        config.jwt_secret = Some("test-secret".to_string());
+
+        let state = Arc::new(AppState {
+            config,
+            block_store,
+            bucket_manager,
+            multipart_manager: Arc::new(crate::multipart::MultipartManager::new(60)),
+            lock_store: crate::handlers::locks::LockStore::new(),
+            // Publisher disabled — we expect 503, not 401 (no token)
+            // and not 403 (S3 auth would trigger if middleware leaked).
+            users_index_publisher: None,
+        });
+
+        let _ = state_path; // silence unused; only here to mirror prod path layout
+
+        let app = crate::routes::create_router(Arc::clone(&state));
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/_internal/users-index-state")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(
+            resp.status(),
+            StatusCode::SERVICE_UNAVAILABLE,
+            "internal route MUST bypass user-JWT auth — got status {}",
+            resp.status()
+        );
+    }
+
+    /// Backward-compat regression: when the publisher is disabled
+    /// (default for un-flagged deploys), the existing S3 routes must
+    /// still respond. Specifically, the `auth_enabled=false` dev-mode
+    /// path serves `/healthz` without any token. If publisher wiring
+    /// somehow broke healthz, an old fula-client deployed against
+    /// the new master would lose its container health check.
+    #[tokio::test]
+    async fn test_publisher_disabled_does_not_break_existing_routes() {
+        let inner = FlexibleBlockStore::Memory(MemoryBlockStore::new());
+        let block_store = Arc::new(inner);
+        let bucket_manager = Arc::new(BucketManager::new(Arc::clone(&block_store)));
+
+        let mut config = crate::config::GatewayConfig::default();
+        config.auth_enabled = false; // dev mode, no JWT required
+        config.jwt_secret = Some("test-secret".to_string());
+
+        let state = Arc::new(AppState {
+            config,
+            block_store,
+            bucket_manager,
+            multipart_manager: Arc::new(crate::multipart::MultipartManager::new(60)),
+            lock_store: crate::handlers::locks::LockStore::new(),
+            users_index_publisher: None,
+        });
+
+        let app = crate::routes::create_router(Arc::clone(&state));
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::GET)
+                    .uri("/healthz")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(
+            resp.status(),
+            StatusCode::OK,
+            "existing /healthz must still respond when publisher is disabled"
+        );
+    }
+}
diff --git a/crates/fula-cli/src/handlers/mod.rs b/crates/fula-cli/src/handlers/mod.rs
index 91ea72d..216822e 100644
--- a/crates/fula-cli/src/handlers/mod.rs
+++ b/crates/fula-cli/src/handlers/mod.rs
@@ -3,6 +3,7 @@
 pub mod admin;
 pub mod batch;
 pub mod bucket;
+pub mod internal;
 pub mod locks;
 pub mod multipart;
 pub mod object;
diff --git a/crates/fula-cli/src/handlers/users_index_publisher.rs b/crates/fula-cli/src/handlers/users_index_publisher.rs
index 448cd56..1fa72d6 100644
--- a/crates/fula-cli/src/handlers/users_index_publisher.rs
+++ b/crates/fula-cli/src/handlers/users_index_publisher.rs
@@ -39,6 +39,7 @@ use std::collections::{BTreeMap, HashMap};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
+use tracing::{info, warn};
 
 /// State that persists across master restarts. Single source of truth
 /// for "what did we last successfully publish?". Written **after** a
@@ -437,6 +438,98 @@ impl From<&PersistedState> for LatestPublished {
     }
 }
 
+// ============================================================
+// IPNS publisher (kubo HTTP API client)
+// ============================================================
+
+/// Kubo `/api/v0/name/publish` response body. We only care about
+/// `Name` (= the IPNS NAME, libp2p key hash) for logging — clients
+/// resolve via the configured IPNS NAME, not via this response.
+#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
+pub struct IpnsPublishResponse {
+    #[serde(rename = "Name")]
+    pub name: String,
+    #[serde(rename = "Value")]
+    pub value: String,
+}
+
+/// Thin client over kubo's `/api/v0/name/publish`. Plain HTTP POST,
+/// no auth (kubo's API is localhost-only by default). Failures are
+/// surfaced via `Result` and the caller decides what to do — for
+/// the publisher tick, an IPNS failure logs at `warn!` and lets the
+/// commit proceed (chain backup at 12h still works).
+#[derive(Clone)]
+pub struct IpnsPublisher {
+    client: reqwest::Client,
+    api_url: String,
+}
+
+impl IpnsPublisher {
+    /// Construct a publisher targeting `api_url` (e.g.,
+    /// `http://localhost:5001`). The client uses kubo's default
+    /// timeout; the caller is responsible for outer timeouts if
+    /// needed (advisor noted: don't add inner backoff/timeout).
+    pub fn new(api_url: String) -> Self {
+        Self {
+            client: reqwest::Client::new(),
+            api_url,
+        }
+    }
+
+    /// Construct from an existing `reqwest::Client` (test hook —
+    /// lets wiremock-based tests inject a client with custom timeouts
+    /// if needed; production uses [`new`]).
+    #[doc(hidden)]
+    pub fn with_client(client: reqwest::Client, api_url: String) -> Self {
+        Self { client, api_url }
+    }
+
+    /// Publish `cid` under IPNS `key_name` with the given lifetime
+    /// + DHT-cache TTL.
+    ///
+    /// Kubo's API: `POST /api/v0/name/publish?arg=<cid>&key=<name>&lifetime=<dur>&ttl=<dur>`.
+    /// Lifetime/ttl are Go duration strings (`36h`, `15m`, …).
+    /// Returns the `(Name, Value)` from the response — `Name` is the
+    /// IPNS NAME (libp2p public-key hash). `Value` is the path the
+    /// IPNS record now resolves to (the input CID, prefixed with
+    /// `/ipfs/`).
+    pub async fn publish(
+        &self,
+        cid: &Cid,
+        key_name: &str,
+        lifetime: Duration,
+        ttl: Duration,
+    ) -> AnyResult<IpnsPublishResponse> {
+        let url = format!(
+            "{}/api/v0/name/publish?arg={}&key={}&lifetime={}&ttl={}",
+            self.api_url.trim_end_matches('/'),
+            cid,
+            urlencoding::encode(key_name),
+            format_go_duration(lifetime),
+            format_go_duration(ttl),
+        );
+        let resp = self.client.post(&url).send().await?;
+        let status = resp.status();
+        if !status.is_success() {
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!(
+                "kubo /api/v0/name/publish failed: status={}, body={}",
+                status,
+                body
+            );
+        }
+        let body: IpnsPublishResponse = resp.json().await?;
+        Ok(body)
+    }
+}
+
+/// Format a `Duration` as a Go-style duration string accepted by
+/// kubo (`<seconds>s` is universally accepted; we don't need
+/// pretty-formatting). E.g., `36h` → `129600s`. Kubo accepts both.
+fn format_go_duration(d: Duration) -> String {
+    format!("{}s", d.as_secs())
+}
+
 // ============================================================
 // Publisher skeleton
 // ============================================================
@@ -447,6 +540,10 @@ pub struct UsersIndexPublisher<S: BlockStore + PinStore + 'static> {
     config: PublisherConfig,
     bucket_manager: Arc<BucketManager<S>>,
     block_store: Arc<S>,
+    /// Optional IPNS publisher. `None` disables IPNS — useful for
+    /// tests that exercise just the pin/persist path, and for
+    /// operators who want the chain-backup path only.
+    ipns_publisher: Option<IpnsPublisher>,
     /// Per-user diff cache — owner_id → (content_hash, bucketsIndexCid).
     /// `Mutex` (not `RwLock`) because the tick is the only writer and
     /// the lock window is tiny (a HashMap insert).
@@ -488,10 +585,38 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
     /// Construct from config + handles to the bucket manager and
     /// block store. Loads existing state-file on-disk; fresh master
     /// starts with `PersistedState::default()`.
+    ///
+    /// IPNS is enabled by default (constructed from `config.ipfs_api_url`).
+    /// Tests may disable it via [`open_without_ipns`] to exercise the
+    /// pin/persist path independently.
     pub fn open(
         config: PublisherConfig,
         bucket_manager: Arc<BucketManager<S>>,
         block_store: Arc<S>,
+    ) -> Result<Self, PersistError> {
+        let ipns_publisher = Some(IpnsPublisher::new(config.ipfs_api_url.clone()));
+        Self::open_with_ipns(config, bucket_manager, block_store, ipns_publisher)
+    }
+
+    /// Construct without IPNS. Tick still pins + persists; the chain
+    /// path (12h cron in `mainnet-reward-server`) still works. Useful
+    /// for operators who don't want the kubo IPNS hop, and for the
+    /// pin/persist-only unit tests.
+    pub fn open_without_ipns(
+        config: PublisherConfig,
+        bucket_manager: Arc<BucketManager<S>>,
+        block_store: Arc<S>,
+    ) -> Result<Self, PersistError> {
+        Self::open_with_ipns(config, bucket_manager, block_store, None)
+    }
+
+    /// Internal constructor — also used by tests to inject a
+    /// wiremock-backed IPNS client.
+    pub fn open_with_ipns(
+        config: PublisherConfig,
+        bucket_manager: Arc<BucketManager<S>>,
+        block_store: Arc<S>,
+        ipns_publisher: Option<IpnsPublisher>,
     ) -> Result<Self, PersistError> {
         let persisted = PersistedState::load(&config.state_file_path)?;
         let latest = LatestPublished::from(&persisted);
@@ -499,6 +624,7 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
             config,
             bucket_manager,
             block_store,
+            ipns_publisher,
             diff_cache: Mutex::new(HashMap::new()),
             latest: RwLock::new(latest),
             tick_lock: tokio::sync::Mutex::new(()),
@@ -511,6 +637,13 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
         self.latest.read().clone()
     }
 
+    /// Read-only access to the publisher config. Used by the internal
+    /// HTTP endpoints to surface `internal_token` (auth check) and
+    /// `ipns_key_name` (response field).
+    pub fn config(&self) -> &PublisherConfig {
+        &self.config
+    }
+
     /// Read the on-disk persisted state directly (bypasses the
     /// in-memory `latest` cache). Used by tests and by the startup
     /// chain-cross-check (see plan 3.2.b advisor note).
@@ -705,9 +838,50 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
             }
         }
 
-        // 8. Persist new state. (A3 will insert IPNS publish between
-        //    pin and persist; commit_state stays last so a crash mid-
-        //    IPNS leaves us in a recoverable place.)
+        // 8. IPNS publish (best-effort). Order is documented as
+        //    "pin → IPNS → persist" (plan 3.2.b + advisor): an IPNS
+        //    publish failure does NOT abort the commit because the
+        //    chain-backup cron at 12h still works. If the publish
+        //    succeeds but persist fails, the next tick republishes
+        //    the same CID under sequence+1 — IPNS is idempotent on
+        //    `(cid, sequence)`. If we flipped the order to
+        //    persist-then-IPNS, a crash mid-IPNS would leave an
+        //    advanced on-disk sequence pointing at a CID never
+        //    published. Don't flip.
+        if let Some(ipns) = &self.ipns_publisher {
+            match ipns
+                .publish(
+                    &global_cid,
+                    &self.config.ipns_key_name,
+                    self.config.ipns_lifetime,
+                    self.config.ipns_ttl,
+                )
+                .await
+            {
+                Ok(resp) => {
+                    info!(
+                        cid = %global_cid,
+                        sequence = next_sequence,
+                        ipns_name = %resp.name,
+                        ipns_value = %resp.value,
+                        "users-index publisher: IPNS publish succeeded"
+                    );
+                }
+                Err(e) => {
+                    warn!(
+                        cid = %global_cid,
+                        sequence = next_sequence,
+                        error = %e,
+                        "users-index publisher: IPNS publish failed (best-effort; chain backup at 12h still works; next tick will retry)"
+                    );
+                }
+            }
+        }
+
+        // 9. Persist new state. commit_state is last so a crash mid-
+        //    IPNS leaves us in a recoverable place — the next tick
+        //    will retry IPNS with the same content (and on-chain
+        //    sequence enforcement keeps things monotonic regardless).
         let next_state = PersistedState {
             global_cid: Some(global_cid),
             sequence: next_sequence,
@@ -723,6 +897,68 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
             global_rebuilt: true,
         })
     }
+
+    /// Test-only accessor: read the IPNS publisher's API URL.
+    #[cfg(test)]
+    fn ipns_api_url_for_test(&self) -> Option<String> {
+        self.ipns_publisher.as_ref().map(|p| p.api_url.clone())
+    }
+}
+
+/// Spawn a background task that calls `publisher.run_tick()` on
+/// `flush_interval`. Mirrors `handlers::locks::start_sweeper`:
+/// holds an `Arc` to the publisher, lives for the process lifetime.
+///
+/// `MissedTickBehavior::Delay` ensures that if a single tick takes
+/// unusually long (e.g., master kubo blocked), the next tick fires
+/// after a fresh `flush_interval` rather than firing back-to-back to
+/// "catch up" — bursts can swamp the pinning service. The first tick
+/// is gated by an immediate `interval.tick().await` at the top of
+/// the loop, which fires after one interval has elapsed; if you want
+/// the first tick at startup, log + call run_tick once before the
+/// loop. We do NOT do that here: the operator's sequence-of-events
+/// at master startup is `BucketManager.load_registry → spawn this
+/// task → first tick fires after flush_interval` so the registry
+/// has time to load and persist before the publisher reads from it.
+pub fn start_publisher_loop<S: BlockStore + PinStore + 'static>(
+    publisher: Arc<UsersIndexPublisher<S>>,
+) {
+    let interval_dur = publisher.config.flush_interval;
+    tokio::spawn(async move {
+        let mut interval = tokio::time::interval(interval_dur);
+        interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
+        // Skip the first tick (which fires immediately) — see fn doc.
+        interval.tick().await;
+        loop {
+            interval.tick().await;
+            match publisher.run_tick().await {
+                Ok(outcome) => {
+                    if outcome.global_rebuilt {
+                        info!(
+                            sequence = outcome.sequence,
+                            changed_users = outcome.changed_users,
+                            total_users = outcome.total_users,
+                            cid = %outcome.global_cid,
+                            "users-index publisher: tick committed new global"
+                        );
+                    } else {
+                        tracing::debug!(
+                            sequence = outcome.sequence,
+                            total_users = outcome.total_users,
+                            "users-index publisher: tick was no-op"
+                        );
+                    }
+                }
+                Err(e) => {
+                    warn!(error = %e, "users-index publisher: tick failed; will retry on next interval");
+                }
+            }
+        }
+    });
+    info!(
+        interval_secs = interval_dur.as_secs(),
+        "users-index publisher loop started"
+    );
 }
 
 #[cfg(test)]
@@ -1447,4 +1683,181 @@ mod tests {
         assert!(outcome.global_rebuilt, "first publish must run even on empty");
         assert_eq!(outcome.sequence, 1);
     }
+
+    // ============================================================
+    // Phase 3.2 A3 — IPNS publisher tests (wiremock)
+    // ============================================================
+
+    use wiremock::matchers::{method, path};
+    use wiremock::{Mock, MockServer, ResponseTemplate};
+
+    /// Construct a publisher that targets `mock_url` for IPNS calls
+    /// (instead of a real kubo). The mock has full control over
+    /// success/failure responses.
+    fn fixture_publisher_with_ipns(
+        state_path: PathBuf,
+        ipns_api_url: String,
+    ) -> (
+        UsersIndexPublisher<MemoryBlockStore>,
+        Arc<MemoryBlockStore>,
+        Arc<BucketManager<MemoryBlockStore>>,
+    ) {
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = Arc::new(BucketManager::new(Arc::clone(&store)));
+        let mut config = fixture_config(state_path);
+        // Speed up: short lifetime/ttl in tests (kubo accepts them
+        // but our format function is tested below).
+        config.ipns_lifetime = Duration::from_secs(60);
+        config.ipns_ttl = Duration::from_secs(15);
+        let ipns = IpnsPublisher::new(ipns_api_url);
+        let publisher = UsersIndexPublisher::open_with_ipns(
+            config,
+            Arc::clone(&manager),
+            Arc::clone(&store),
+            Some(ipns),
+        )
+        .expect("open");
+        (publisher, store, manager)
+    }
+
+    #[test]
+    fn test_format_go_duration() {
+        assert_eq!(format_go_duration(Duration::from_secs(36 * 3600)), "129600s");
+        assert_eq!(format_go_duration(Duration::from_secs(15 * 60)), "900s");
+        assert_eq!(format_go_duration(Duration::from_secs(0)), "0s");
+    }
+
+    #[tokio::test]
+    async fn test_ipns_publisher_success() {
+        let mock = MockServer::start().await;
+        Mock::given(method("POST"))
+            .and(path("/api/v0/name/publish"))
+            .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
+                "Name": "k51qzi5uqu5dh-mock",
+                "Value": "/ipfs/QmFakeCidValue",
+            })))
+            .mount(&mock)
+            .await;
+
+        let publisher = IpnsPublisher::new(mock.uri());
+        let cid = fixture_cid(0xab);
+        let resp = publisher
+            .publish(
+                &cid,
+                "fula-users-index",
+                Duration::from_secs(36 * 3600),
+                Duration::from_secs(15 * 60),
+            )
+            .await
+            .expect("publish");
+        assert_eq!(resp.name, "k51qzi5uqu5dh-mock");
+        assert_eq!(resp.value, "/ipfs/QmFakeCidValue");
+    }
+
+    #[tokio::test]
+    async fn test_ipns_publisher_propagates_5xx_error() {
+        let mock = MockServer::start().await;
+        Mock::given(method("POST"))
+            .and(path("/api/v0/name/publish"))
+            .respond_with(ResponseTemplate::new(500).set_body_string("internal error"))
+            .mount(&mock)
+            .await;
+
+        let publisher = IpnsPublisher::new(mock.uri());
+        let cid = fixture_cid(0xab);
+        let result = publisher
+            .publish(
+                &cid,
+                "fula-users-index",
+                Duration::from_secs(60),
+                Duration::from_secs(15),
+            )
+            .await;
+        assert!(result.is_err(), "5xx must surface as error");
+        let err = format!("{}", result.unwrap_err());
+        assert!(err.contains("status=500"), "error message exposes status");
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_calls_ipns_with_correct_cid_and_sequence() {
+        // Verifies the integration point: run_tick fires kubo's
+        // /api/v0/name/publish with the freshly-built global CID.
+        let mock = MockServer::start().await;
+        Mock::given(method("POST"))
+            .and(path("/api/v0/name/publish"))
+            .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
+                "Name": "k51qzi5uqu5dh-mock",
+                "Value": "/ipfs/QmIgnored",
+            })))
+            .expect(1) // exactly one IPNS publish per tick
+            .mount(&mock)
+            .await;
+
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, _store, manager) =
+            fixture_publisher_with_ipns(path, mock.uri());
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        let outcome = publisher.run_tick().await.expect("tick");
+        assert_eq!(outcome.sequence, 1);
+        // wiremock's expect(1) verifies on drop that exactly one
+        // request hit the IPNS endpoint.
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_succeeds_when_ipns_5xx() {
+        // Operating-state matrix: kubo IPNS endpoint returns 500.
+        // The tick MUST still return Ok, the persisted state MUST
+        // still advance, and the global CID MUST still be pinned.
+        // Otherwise a flaky kubo blocks the entire publisher,
+        // which blocks subsequent writes on master.
+        let mock = MockServer::start().await;
+        Mock::given(method("POST"))
+            .and(path("/api/v0/name/publish"))
+            .respond_with(ResponseTemplate::new(500).set_body_string("kubo down"))
+            .mount(&mock)
+            .await;
+
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) =
+            fixture_publisher_with_ipns(path.clone(), mock.uri());
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        let outcome = publisher.run_tick().await.expect("tick still Ok on IPNS 5xx");
+        assert_eq!(outcome.sequence, 1);
+        assert!(outcome.global_rebuilt);
+
+        // Pin happened → block exists in store.
+        assert!(store.is_pinned(&outcome.global_cid).await.unwrap());
+
+        // Persist happened → state file reflects new sequence.
+        let persisted = PersistedState::load(&path).expect("load");
+        assert_eq!(persisted.sequence, 1);
+        assert_eq!(persisted.global_cid, Some(outcome.global_cid));
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_no_ipns_configured_still_pins_and_persists() {
+        // open_without_ipns: tick still pins + persists; chain backup
+        // path is the only publish channel. Useful regression check
+        // for operators who deploy without IPNS.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let store = Arc::new(MemoryBlockStore::new());
+        let manager = Arc::new(BucketManager::new(Arc::clone(&store)));
+        let publisher = UsersIndexPublisher::open_without_ipns(
+            fixture_config(path.clone()),
+            Arc::clone(&manager),
+            Arc::clone(&store),
+        )
+        .expect("open");
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        let outcome = publisher.run_tick().await.expect("tick");
+        assert_eq!(outcome.sequence, 1);
+        assert!(outcome.global_rebuilt);
+        assert!(publisher.ipns_api_url_for_test().is_none());
+    }
 }
diff --git a/crates/fula-cli/src/routes.rs b/crates/fula-cli/src/routes.rs
index e9f0e24..6d9e703 100644
--- a/crates/fula-cli/src/routes.rs
+++ b/crates/fula-cli/src/routes.rs
@@ -25,6 +25,25 @@ pub fn create_router(state: Arc<AppState>) -> Router {
     // Public routes that must bypass auth (e.g., container health checks)
     let public = Router::new().route("/healthz", get(handlers::healthz));
 
+    // Phase 3.2 internal endpoints. Bearer-token-protected at the
+    // handler level (see handlers::internal::authenticate). They
+    // bypass the user-JWT auth middleware so the chain cron in
+    // mainnet-reward-server can consume them with a shared secret,
+    // not a JWT. When the publisher is disabled OR the token is
+    // unset, both endpoints fail-closed with 503.
+    let internal = Router::new()
+        .route(
+            "/_internal/users-index-state",
+            get(handlers::internal::users_index_state),
+        )
+        .route(
+            "/_internal/publish-now",
+            post(handlers::internal::publish_now),
+        )
+        .layer(axum_middleware::from_fn(middleware::request_id_middleware))
+        .layer(axum_middleware::from_fn(middleware::logging_middleware))
+        .with_state(state.clone());
+
     // Admin routes (protected by admin middleware)
     let admin = Router::new()
         .route("/admin/users/{user_id}/buckets", get(handlers::list_user_buckets))
@@ -82,10 +101,11 @@ pub fn create_router(state: Arc<AppState>) -> Router {
         ))
         .with_state(state.clone());
 
-    // Combine public, admin, and private, then apply shared layers
+    // Combine public, admin, internal, and private, then apply shared layers
     Router::new()
         .merge(public)
         .merge(admin)
+        .merge(internal)
         .merge(private)
         .layer(cors)
         .layer(TraceLayer::new_for_http())
diff --git a/crates/fula-cli/src/server.rs b/crates/fula-cli/src/server.rs
index e9c05de..bd33944 100644
--- a/crates/fula-cli/src/server.rs
+++ b/crates/fula-cli/src/server.rs
@@ -1,6 +1,6 @@
 //! Server startup and lifecycle
 
-use crate::handlers::locks;
+use crate::handlers::{locks, users_index_publisher};
 use crate::{AppState, GatewayConfig, routes};
 use std::net::SocketAddr;
 use std::sync::Arc;
@@ -16,6 +16,13 @@ pub async fn run_server(config: GatewayConfig) -> anyhow::Result<()> {
     // lives for the lifetime of the process.
     locks::start_sweeper(state.lock_store.clone());
 
+    // Phase 3.2 — spawn the users-index publisher loop iff the env
+    // flag enabled the publisher at AppState construction time. When
+    // disabled, this is a no-op and nothing about S3 routing changes.
+    if let Some(publisher) = state.users_index_publisher.clone() {
+        users_index_publisher::start_publisher_loop(publisher);
+    }
+
     // Create router
     let app = routes::create_router(state);
 
@@ -41,6 +48,13 @@ pub async fn run_server_with_shutdown(
 
     locks::start_sweeper(state.lock_store.clone());
 
+    // Phase 3.2 — spawn the users-index publisher loop iff the env
+    // flag enabled the publisher at AppState construction time. When
+    // disabled, this is a no-op and nothing about S3 routing changes.
+    if let Some(publisher) = state.users_index_publisher.clone() {
+        users_index_publisher::start_publisher_loop(publisher);
+    }
+
     let app = routes::create_router(state);
 
     let addr = config.bind_addr();
diff --git a/crates/fula-cli/src/state.rs b/crates/fula-cli/src/state.rs
index 243c797..817a3d5 100644
--- a/crates/fula-cli/src/state.rs
+++ b/crates/fula-cli/src/state.rs
@@ -34,6 +34,14 @@ pub struct AppState {
     /// In-memory advisory lock store used to serialize v1 -> v7 forest
     /// migrations across devices. TTL-bounded; process-local only.
     pub lock_store: crate::handlers::locks::LockStore,
+    /// Phase 3.2 master-side users-index publisher. `None` when the
+    /// `FULA_USERS_INDEX_PUBLISHER_ENABLED` env flag is unset (default).
+    /// When `None`, the `/_internal/users-index-state` endpoint
+    /// returns 503; existing S3 handlers behave byte-identically to
+    /// pre-Phase-3 deploys.
+    pub users_index_publisher: Option<
+        Arc<crate::handlers::users_index_publisher::UsersIndexPublisher<FlexibleBlockStore>>,
+    >,
 }
 
 impl AppState {
@@ -118,12 +126,23 @@ impl AppState {
         // after AppState is wrapped in an Arc.
         let lock_store = crate::handlers::locks::LockStore::new();
 
+        // Phase 3.2 users-index publisher — env-flag-gated so day-one
+        // deploys behave byte-identically to pre-Phase-3 builds.
+        // Operators flip `FULA_USERS_INDEX_PUBLISHER_ENABLED=1` after
+        // canary verification.
+        let users_index_publisher = build_users_index_publisher(
+            &config,
+            Arc::clone(&bucket_manager),
+            Arc::clone(&block_store),
+        );
+
         Ok(Self {
             config,
             block_store,
             bucket_manager,
             multipart_manager,
             lock_store,
+            users_index_publisher,
         })
     }
 
@@ -207,6 +226,109 @@ impl UserSession {
     }
 }
 
+/// Phase 3.2 users-index publisher constructor — env-flag-gated.
+///
+/// Returns `None` when `FULA_USERS_INDEX_PUBLISHER_ENABLED` is unset
+/// or "0"/"false". When enabled:
+///
+/// | Env var                                     | Default                                                    |
+/// |---------------------------------------------|------------------------------------------------------------|
+/// | `FULA_USERS_INDEX_STATE_PATH`               | `/var/lib/fula-gateway/users_index_state.txt`              |
+/// | `FULA_USERS_INDEX_FLUSH_INTERVAL_SECS`      | 300                                                        |
+/// | `FULA_USERS_INDEX_INTERNAL_TOKEN`           | (none → endpoints fail-closed with 503)                    |
+/// | `FULA_USERS_INDEX_IPNS_KEY_NAME`            | `fula-users-index`                                         |
+/// | `FULA_USERS_INDEX_IPNS_LIFETIME_SECS`       | 129600 (36h)                                               |
+/// | `FULA_USERS_INDEX_IPNS_TTL_SECS`            | 900 (15m)                                                  |
+/// | `FULA_USERS_INDEX_IPNS_DISABLED`            | unset → IPNS enabled                                       |
+/// | `FULA_USERS_INDEX_FIRST_PUBLISH_PINS_PER_S` | 100                                                        |
+fn build_users_index_publisher(
+    config: &GatewayConfig,
+    bucket_manager: Arc<BucketManager<FlexibleBlockStore>>,
+    block_store: Arc<FlexibleBlockStore>,
+) -> Option<Arc<crate::handlers::users_index_publisher::UsersIndexPublisher<FlexibleBlockStore>>> {
+    use crate::handlers::users_index_publisher::{
+        IpnsPublisher, PublisherConfig, UsersIndexPublisher,
+    };
+    use std::time::Duration;
+
+    let enabled = std::env::var("FULA_USERS_INDEX_PUBLISHER_ENABLED")
+        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
+        .unwrap_or(false);
+    if !enabled {
+        info!("users-index publisher: disabled (FULA_USERS_INDEX_PUBLISHER_ENABLED unset)");
+        return None;
+    }
+
+    let state_file_path = std::env::var("FULA_USERS_INDEX_STATE_PATH")
+        .unwrap_or_else(|_| "/var/lib/fula-gateway/users_index_state.txt".to_string())
+        .into();
+    let flush_interval = Duration::from_secs(
+        std::env::var("FULA_USERS_INDEX_FLUSH_INTERVAL_SECS")
+            .ok()
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(300),
+    );
+    let ipns_lifetime = Duration::from_secs(
+        std::env::var("FULA_USERS_INDEX_IPNS_LIFETIME_SECS")
+            .ok()
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(36 * 3600),
+    );
+    let ipns_ttl = Duration::from_secs(
+        std::env::var("FULA_USERS_INDEX_IPNS_TTL_SECS")
+            .ok()
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(15 * 60),
+    );
+    let ipns_key_name = std::env::var("FULA_USERS_INDEX_IPNS_KEY_NAME")
+        .unwrap_or_else(|_| "fula-users-index".to_string());
+    let internal_token = std::env::var("FULA_USERS_INDEX_INTERNAL_TOKEN").ok().filter(|s| !s.is_empty());
+    let first_publish_max_pins_per_sec = std::env::var("FULA_USERS_INDEX_FIRST_PUBLISH_PINS_PER_S")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(100);
+
+    let pub_config = PublisherConfig {
+        flush_interval,
+        first_publish_max_pins_per_sec,
+        ipns_lifetime,
+        ipns_ttl,
+        ipns_key_name: ipns_key_name.clone(),
+        state_file_path,
+        ipfs_api_url: config.ipfs_url.clone(),
+        internal_token: internal_token.clone(),
+    };
+
+    let ipns_disabled = std::env::var("FULA_USERS_INDEX_IPNS_DISABLED")
+        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
+        .unwrap_or(false);
+    let ipns = if ipns_disabled {
+        warn!("users-index publisher: IPNS disabled (FULA_USERS_INDEX_IPNS_DISABLED=1) — chain backup is the only publish channel");
+        None
+    } else {
+        Some(IpnsPublisher::new(config.ipfs_url.clone()))
+    };
+
+    match UsersIndexPublisher::open_with_ipns(pub_config, bucket_manager, block_store, ipns) {
+        Ok(p) => {
+            info!(
+                flush_interval_secs = flush_interval.as_secs(),
+                ipns_key_name = %ipns_key_name,
+                internal_token_set = internal_token.is_some(),
+                "users-index publisher: enabled"
+            );
+            Some(Arc::new(p))
+        }
+        Err(e) => {
+            warn!(
+                error = %e,
+                "users-index publisher: failed to open state file; publisher disabled for this run"
+            );
+            None
+        }
+    }
+}
+
 /// Admin session information
 #[derive(Clone, Debug)]
 pub struct AdminSession {

From 5e0e2828bb447bca55a4be2b5b6bec9f51e8c5d2 Mon Sep 17 00:00:00 2001
From: ehsan shariati <ehsan6sha@gmail.com>
Date: Mon, 4 May 2026 11:44:21 -0400
Subject: [PATCH 3/6] Tolerate per-user pin failures and add wire helpers

Make users-index publishing tolerant of individual per-user pin failures: collect per-user pin results without aborting the tick, surface a failed_users count in TickOutcome/PublishNowResponse, emit per-user and tick-level warnings, and add comprehensive tests (including a FaultyBlockStore) for partial/failing/all-fail/retry scenarios. Extract and unit-test HTTP-layer helpers for Phase 1.2: control-header filtering and parse_bucket_lookup_h_header (with explicit error enum) so the lookup_h header is handled cleanly and not persisted as user metadata. Enhance ful a-client block cache: add KEY_TO_CID mapping for offline-fallback, resolver hot-start METADATA rows and accessors, debug impls, and store/load helpers for users_index state. Update ful a-client Cargo.toml to add serde_ipld_dagcbor and sha3 (tests). Misc: wire failed_users through publish_now response and small cleanup/refactors to object header handling.
---
 Cargo.lock                                    |    3 +
 crates/fula-cli/src/handlers/internal.rs      |   10 +
 crates/fula-cli/src/handlers/object.rs        |  274 ++-
 .../src/handlers/users_index_publisher.rs     |  562 +++++-
 crates/fula-client/Cargo.toml                 |   10 +
 crates/fula-client/src/block_cache.rs         |  451 ++++-
 crates/fula-client/src/client.rs              | 1049 +++++++++-
 crates/fula-client/src/config.rs              |  183 +-
 crates/fula-client/src/encryption.rs          |  881 +++++++-
 crates/fula-client/src/error.rs               |   86 +
 crates/fula-client/src/gateway_fetch.rs       |   32 +-
 crates/fula-client/src/health_gate.rs         |  235 ++-
 crates/fula-client/src/lib.rs                 |   21 +
 crates/fula-client/src/registry_resolver.rs   | 1785 +++++++++++++++++
 crates/fula-client/src/types.rs               |   76 +
 crates/fula-flutter/Cargo.toml                |    5 +
 crates/fula-flutter/src/api/client.rs         |  170 +-
 crates/fula-flutter/src/api/error.rs          |   74 +
 crates/fula-flutter/src/api/types.rs          |   72 +
 crates/fula-flutter/src/frb_generated.rs      |   20 +
 crates/fula-js/src/lib.rs                     |  204 +-
 21 files changed, 6103 insertions(+), 100 deletions(-)
 create mode 100644 crates/fula-client/src/registry_resolver.rs

diff --git a/Cargo.lock b/Cargo.lock
index 4c0dfda..a3e6cfd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1813,8 +1813,10 @@ dependencies = [
  "redb",
  "reqwest",
  "serde",
+ "serde_ipld_dagcbor",
  "serde_json",
  "sha2",
+ "sha3",
  "tempfile",
  "thiserror 2.0.17",
  "tokio",
@@ -1921,6 +1923,7 @@ dependencies = [
  "parking_lot",
  "serde",
  "serde_json",
+ "tempfile",
  "thiserror 2.0.17",
  "tokio",
  "wasm-bindgen-futures",
diff --git a/crates/fula-cli/src/handlers/internal.rs b/crates/fula-cli/src/handlers/internal.rs
index aeeb3c6..2fe7ef8 100644
--- a/crates/fula-cli/src/handlers/internal.rs
+++ b/crates/fula-cli/src/handlers/internal.rs
@@ -63,6 +63,15 @@ pub struct PublishNowResponse {
     pub global_cid: String,
     pub sequence: u64,
     pub changed_users: usize,
+    /// Number of users whose per-user CBOR pin failed this tick.
+    /// Surfaces the per-user-error-tolerance count from
+    /// `TickOutcome.failed_users` so an operator clicking
+    /// "publish now" in the admin UI sees per-user pin failures
+    /// without tailing logs. A non-zero value means the published
+    /// global may exclude one or more users (or carry their prior
+    /// CIDs). The per-user `warn!` lines inside `run_tick` identify
+    /// WHICH users failed; this field is the count for surfacing.
+    pub failed_users: usize,
     pub total_users: usize,
     pub global_rebuilt: bool,
 }
@@ -159,6 +168,7 @@ pub async fn publish_now(
                 global_cid: outcome.global_cid.to_string(),
                 sequence: outcome.sequence,
                 changed_users: outcome.changed_users,
+                failed_users: outcome.failed_users,
                 total_users: outcome.total_users,
                 global_rebuilt: outcome.global_rebuilt,
             };
diff --git a/crates/fula-cli/src/handlers/object.rs b/crates/fula-cli/src/handlers/object.rs
index e33594f..6863a07 100644
--- a/crates/fula-cli/src/handlers/object.rs
+++ b/crates/fula-cli/src/handlers/object.rs
@@ -130,14 +130,13 @@ pub async fn put_object(
         metadata = metadata.with_content_type(ct);
     }
 
-    // Extract user metadata (x-amz-meta-*).
-    // Internal Fula control headers (consumed by the handler, not stored as
-    // object metadata) are filtered out — they would otherwise pollute every
-    // object's persisted metadata.
-    const FULA_CONTROL_HEADERS: &[&str] = &["fula-bucket-lookup-h"];
+    // Extract user metadata (x-amz-meta-*). Internal Fula control
+    // headers (consumed by the handler, not stored as object metadata)
+    // are filtered out via `is_fula_control_header` — they would
+    // otherwise pollute every object's persisted metadata.
     for (name, value) in headers.iter() {
         if let Some(key) = name.as_str().strip_prefix("x-amz-meta-") {
-            if FULA_CONTROL_HEADERS.contains(&key) {
+            if is_fula_control_header(key) {
                 continue;
             }
             if let Ok(v) = value.to_str() {
@@ -180,10 +179,8 @@ pub async fn put_object(
             .get("x-amz-meta-fula-bucket-lookup-h")
             .and_then(|v| v.to_str().ok())
         {
-            match hex::decode(hex_str) {
-                Ok(bytes) if bytes.len() == 16 => {
-                    let mut lookup_h = [0u8; 16];
-                    lookup_h.copy_from_slice(&bytes);
+            match parse_bucket_lookup_h_header(hex_str) {
+                Ok(lookup_h) => {
                     match state.bucket_manager.populate_lookup_h_if_missing(
                         &session.hashed_user_id,
                         &bucket_name,
@@ -205,12 +202,12 @@ pub async fn put_object(
                         ),
                     }
                 }
-                Ok(other) => tracing::warn!(
-                    actual_len = other.len(),
+                Err(BucketLookupHError::WrongLength { actual }) => tracing::warn!(
+                    actual_len = actual,
                     "x-amz-meta-fula-bucket-lookup-h: expected 16-byte hex (32 chars), got {} bytes",
-                    other.len()
+                    actual
                 ),
-                Err(e) => tracing::warn!(
+                Err(BucketLookupHError::InvalidHex(e)) => tracing::warn!(
                     error = %e,
                     "Failed to hex-decode x-amz-meta-fula-bucket-lookup-h"
                 ),
@@ -768,6 +765,255 @@ fn parse_etag_list(s: &str) -> impl Iterator<Item = String> + '_ {
     })
 }
 
+// ============================================================
+// Phase 1.2 wire-path helpers (master-side)
+// ============================================================
+//
+// These are extracted out of the put_object handler so the
+// header-parsing + control-header-filter logic can be unit-tested
+// without spinning up the full HTTP server stack. Audit follow-up
+// item #5: cover the wire path beyond the BucketManager-direct
+// integration test in users_index_publisher.rs.
+
+/// Internal Fula control headers (consumed by handler logic, NOT
+/// persisted as object metadata). The list is `pub(crate)` so it
+/// can be referenced from sibling modules; tests below assert it
+/// stays in lockstep with the handler's filtering.
+pub(crate) const FULA_CONTROL_HEADERS: &[&str] = &["fula-bucket-lookup-h"];
+
+/// Returns `true` if the given x-amz-meta key (already stripped of
+/// the `x-amz-meta-` prefix) is a Fula control header — meaning it
+/// should NOT end up in `ObjectMetadata.user_metadata` even though
+/// it's a perfectly valid `x-amz-meta-*` name.
+pub(crate) fn is_fula_control_header(stripped_key: &str) -> bool {
+    FULA_CONTROL_HEADERS.contains(&stripped_key)
+}
+
+/// Parse error for the `x-amz-meta-fula-bucket-lookup-h` header
+/// value. Three failure modes today; expanding this enum is
+/// backward-compatible (the handler matches exhaustively).
+#[derive(Debug)]
+pub(crate) enum BucketLookupHError {
+    /// hex::decode failed — non-hex characters in the value.
+    InvalidHex(hex::FromHexError),
+    /// Decoded byte length wasn't 16 (the only legal width per
+    /// Phase 1.2 spec — `userKey`-equivalent 128-bit blinded key).
+    WrongLength { actual: usize },
+}
+
+impl From<hex::FromHexError> for BucketLookupHError {
+    fn from(e: hex::FromHexError) -> Self {
+        BucketLookupHError::InvalidHex(e)
+    }
+}
+
+/// Parse `x-amz-meta-fula-bucket-lookup-h` header value into a
+/// 16-byte fixed array. Pure: no I/O, no allocations beyond the
+/// transient hex::decode buffer. Used by `put_object` to convert
+/// the wire-format string into the format
+/// `BucketManager::populate_lookup_h_if_missing` expects.
+pub(crate) fn parse_bucket_lookup_h_header(
+    hex_str: &str,
+) -> Result<[u8; 16], BucketLookupHError> {
+    let bytes = hex::decode(hex_str)?;
+    if bytes.len() != 16 {
+        return Err(BucketLookupHError::WrongLength { actual: bytes.len() });
+    }
+    let mut out = [0u8; 16];
+    out.copy_from_slice(&bytes);
+    Ok(out)
+}
+
+#[cfg(test)]
+mod phase_1_2_wire_tests {
+    //! Phase 1.2 wire-path tests. Covers what the existing
+    //! `users_index_publisher::test_run_tick_legacy_to_blinded_replaces_entry`
+    //! test does NOT cover: the HTTP-layer header extraction +
+    //! parsing logic that sits between an SDK request and a
+    //! `populate_lookup_h_if_missing` call.
+
+    use super::*;
+    use axum::http::{HeaderMap, HeaderName, HeaderValue};
+
+    #[test]
+    fn control_header_filter_includes_lookup_h() {
+        // Audit gold: the lookup_h header IS recognized as a control
+        // header. If someone removes it from FULA_CONTROL_HEADERS the
+        // header would leak into user_metadata storage on every PUT.
+        assert!(is_fula_control_header("fula-bucket-lookup-h"));
+    }
+
+    #[test]
+    fn control_header_filter_excludes_arbitrary_user_metadata() {
+        // Defensive: an app's own metadata keys must NOT be filtered.
+        assert!(!is_fula_control_header("content-language"));
+        assert!(!is_fula_control_header("x-fula-encrypted"));
+        assert!(!is_fula_control_header(""));
+    }
+
+    #[test]
+    fn parse_lookup_h_accepts_valid_32_char_hex() {
+        // Mirrors what `compute_bucket_lookup_h_hex` produces in the
+        // SDK: 32 lowercase hex chars = 16 bytes.
+        let valid = "deadbeefcafebabefeedfacef00dbabe";
+        let parsed = parse_bucket_lookup_h_header(valid).expect("valid 32-char hex");
+        assert_eq!(parsed.len(), 16);
+        assert_eq!(parsed[0], 0xde);
+        assert_eq!(parsed[15], 0xbe);
+    }
+
+    #[test]
+    fn parse_lookup_h_accepts_uppercase_hex() {
+        // hex::decode is case-insensitive; we don't normalize.
+        let valid = "DEADBEEFCAFEBABEFEEDFACEF00DBABE";
+        let parsed = parse_bucket_lookup_h_header(valid).expect("uppercase ok");
+        assert_eq!(parsed[0], 0xde);
+    }
+
+    #[test]
+    fn parse_lookup_h_rejects_too_short() {
+        // 30 hex chars = 15 bytes — one short.
+        let too_short = "deadbeefcafebabefeedfacef00dba";
+        match parse_bucket_lookup_h_header(too_short) {
+            Err(BucketLookupHError::WrongLength { actual: 15 }) => {}
+            other => panic!("expected WrongLength{{actual:15}}, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn parse_lookup_h_rejects_too_long() {
+        // 34 hex chars = 17 bytes — one byte over.
+        let too_long = "deadbeefcafebabefeedfacef00dbabe11";
+        match parse_bucket_lookup_h_header(too_long) {
+            Err(BucketLookupHError::WrongLength { actual: 17 }) => {}
+            other => panic!("expected WrongLength{{actual:17}}, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn parse_lookup_h_rejects_non_hex_chars() {
+        // 'z' is not a valid hex char; even at correct length this
+        // fails with InvalidHex.
+        let bad_chars = "zzadbeefcafebabefeedfacef00dbabe";
+        match parse_bucket_lookup_h_header(bad_chars) {
+            Err(BucketLookupHError::InvalidHex(_)) => {}
+            other => panic!("expected InvalidHex, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn parse_lookup_h_rejects_empty_string() {
+        // An empty header value reaches us as "" — must not parse
+        // to a zero-byte array.
+        match parse_bucket_lookup_h_header("") {
+            Err(BucketLookupHError::WrongLength { actual: 0 }) => {}
+            other => panic!("expected WrongLength{{actual:0}}, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn parse_lookup_h_rejects_odd_length_hex() {
+        // 31 chars — odd-length is invalid per hex spec; hex::decode
+        // returns OddLength, which we surface as InvalidHex.
+        let odd = "deadbeefcafebabefeedfacef00dbab";
+        match parse_bucket_lookup_h_header(odd) {
+            Err(BucketLookupHError::InvalidHex(_)) => {}
+            other => panic!("expected InvalidHex (odd length), got {:?}", other),
+        }
+    }
+
+    /// End-to-end-ish wire-path simulation: from a real `HeaderMap`
+    /// (as the put_object handler would receive), extract:
+    /// - the user_metadata that should be persisted (lookup_h MUST
+    ///   NOT appear there)
+    /// - the parsed lookup_h bytes (MUST equal what the SDK sent)
+    ///
+    /// This is the critical regression guard for "old client uploads
+    /// without header → no populate" vs "new client uploads with
+    /// header → populate fires with correct bytes". The integration
+    /// with `BucketManager` and the publisher is already covered by
+    /// `users_index_publisher::test_run_tick_legacy_to_blinded_replaces_entry`.
+    #[test]
+    fn old_client_no_header_means_no_populate() {
+        let mut headers = HeaderMap::new();
+        // Old client sends content-type and a user metadata key; no
+        // lookup_h header.
+        headers.insert(
+            HeaderName::from_static("content-type"),
+            HeaderValue::from_static("image/jpeg"),
+        );
+        headers.insert(
+            HeaderName::from_static("x-amz-meta-myapp-tag"),
+            HeaderValue::from_static("vacation"),
+        );
+
+        // Wire-path step 1: lookup_h header absent → handler skips populate.
+        let lookup_h_present = headers.get("x-amz-meta-fula-bucket-lookup-h").is_some();
+        assert!(!lookup_h_present, "no header on old-client PUT");
+
+        // Wire-path step 2: user_metadata extraction filters control
+        // headers (none to filter here, but the loop must include the
+        // app's own tag).
+        let mut user_meta: Vec<(String, String)> = Vec::new();
+        for (name, value) in headers.iter() {
+            if let Some(key) = name.as_str().strip_prefix("x-amz-meta-") {
+                if is_fula_control_header(key) {
+                    continue;
+                }
+                if let Ok(v) = value.to_str() {
+                    user_meta.push((key.to_string(), v.to_string()));
+                }
+            }
+        }
+        assert_eq!(user_meta, vec![("myapp-tag".to_string(), "vacation".to_string())]);
+    }
+
+    #[test]
+    fn new_client_header_parses_and_does_not_leak_into_user_metadata() {
+        let mut headers = HeaderMap::new();
+        headers.insert(
+            HeaderName::from_static("content-type"),
+            HeaderValue::from_static("image/jpeg"),
+        );
+        headers.insert(
+            HeaderName::from_static("x-amz-meta-fula-bucket-lookup-h"),
+            HeaderValue::from_static("aabbccddeeff00112233445566778899"),
+        );
+        headers.insert(
+            HeaderName::from_static("x-amz-meta-myapp-tag"),
+            HeaderValue::from_static("vacation"),
+        );
+
+        // Wire-path step 1: lookup_h header parses to expected bytes.
+        let hex_str = headers
+            .get("x-amz-meta-fula-bucket-lookup-h")
+            .and_then(|v| v.to_str().ok())
+            .expect("present");
+        let parsed = parse_bucket_lookup_h_header(hex_str).expect("valid hex");
+        assert_eq!(parsed, [0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, 0x11,
+                            0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99]);
+
+        // Wire-path step 2: user_metadata extraction MUST drop the
+        // lookup_h header and keep the app's own tag.
+        let mut user_meta: Vec<(String, String)> = Vec::new();
+        for (name, value) in headers.iter() {
+            if let Some(key) = name.as_str().strip_prefix("x-amz-meta-") {
+                if is_fula_control_header(key) {
+                    continue;
+                }
+                if let Ok(v) = value.to_str() {
+                    user_meta.push((key.to_string(), v.to_string()));
+                }
+            }
+        }
+        assert_eq!(
+            user_meta,
+            vec![("myapp-tag".to_string(), "vacation".to_string())],
+            "lookup_h header must NOT leak into user_metadata"
+        );
+    }
+}
+
 #[cfg(test)]
 mod conditional_tests {
     use super::{match_if_match, match_if_none_match};
diff --git a/crates/fula-cli/src/handlers/users_index_publisher.rs b/crates/fula-cli/src/handlers/users_index_publisher.rs
index 1fa72d6..5f1c699 100644
--- a/crates/fula-cli/src/handlers/users_index_publisher.rs
+++ b/crates/fula-cli/src/handlers/users_index_publisher.rs
@@ -27,7 +27,13 @@
 //! one `tokio::spawn` from `server::run_server` after `AppState` is
 //! wrapped in `Arc`. The task lives for the process lifetime.
 
-#![allow(dead_code)] // A3 will consume `internal_token`
+// `dead_code` is permitted for module-level helpers that are exercised
+// only in tests (e.g. `ipns_api_url_for_test`) or that are reserved for
+// the planned Phase 3.3 SDK-side caller (e.g. structured config getters).
+// Production paths (`run_tick`, `start_publisher_loop`, internal HTTP
+// handlers) DO consume every field; this allow simply silences the
+// warning chatter on the test-only accessors.
+#![allow(dead_code)]
 
 use anyhow::Result as AnyResult;
 use cid::Cid;
@@ -567,6 +573,22 @@ pub struct TickOutcome {
     /// re-pinned this tick. Always equal to `total_users` on the
     /// first tick (cache is empty).
     pub changed_users: usize,
+    /// Number of users whose per-user CBOR pin attempt failed this
+    /// tick. Per-user failures are tolerated: the tick continues with
+    /// the users that succeeded, the global is rebuilt with whatever
+    /// state the diff-cache currently holds (which means failed users
+    /// retain their PRIOR `bucketsIndexCid` if they had one, and are
+    /// absent from the published global if they had no prior pin).
+    /// Failed users are retried on the next tick because their
+    /// `content_hash` still mismatches the cache row.
+    ///
+    /// Operators monitor this field: a sustained non-zero value
+    /// across many ticks indicates a user whose data triggers a
+    /// pinning-service edge case and warrants investigation. The
+    /// publisher loop also emits a `warn!` line per failed user
+    /// inside `run_tick` (with the user_id and full error chain) so
+    /// the failing user is identifiable from logs alone.
+    pub failed_users: usize,
     /// Total number of users in `BucketManager.buckets` at this tick.
     pub total_users: usize,
     /// CID of the global users-index CBOR pinned this tick.
@@ -734,18 +756,27 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
 
         // Buffer-unordered keeps at most `max_concurrent` pin ops in
         // flight at any time (advisor's first-publish throttle).
+        //
+        // Per-user error tolerance: each task returns
+        // `(owner_id, AnyResult<(hash, cid)>)` so the outer loop can
+        // identify WHICH user failed and log it. Without this, an
+        // anyhow `?` in the inner closure would drop the owner_id
+        // and the loop level would only see an opaque error.
         let block_store = Arc::clone(&self.block_store);
-        let pin_results: Vec<AnyResult<(String, [u8; 32], Cid)>> = {
+        let pin_results: Vec<(String, AnyResult<([u8; 32], Cid)>)> = {
             use futures::stream::{self, StreamExt};
             stream::iter(to_rebuild.into_iter().map(|(owner_id, buckets)| {
                 let bs = Arc::clone(&block_store);
                 async move {
-                    let hash = compute_user_content_hash(&buckets);
-                    let cbor = build_user_buckets_index(&buckets, now);
-                    let cid = bs.put_ipld(&cbor).await?;
-                    bs.pin(&cid, Some("fula-users-index-per-user"))
-                        .await?;
-                    Ok::<_, anyhow::Error>((owner_id, hash, cid))
+                    let inner: AnyResult<([u8; 32], Cid)> = async {
+                        let hash = compute_user_content_hash(&buckets);
+                        let cbor = build_user_buckets_index(&buckets, now);
+                        let cid = bs.put_ipld(&cbor).await?;
+                        bs.pin(&cid, Some("fula-users-index-per-user")).await?;
+                        Ok((hash, cid))
+                    }
+                    .await;
+                    (owner_id, inner)
                 }
             }))
             .buffer_unordered(max_concurrent)
@@ -753,17 +784,43 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
             .await
         };
 
+        // Per-user error tolerance: a single user's pin failure must
+        // NOT abort the tick. Today's behavior (abort on first error)
+        // means at scale a single corrupted user blocks every user's
+        // cold-start visibility. With tolerance:
+        //   - succeeded users update their diff_cache row
+        //   - failed users keep their PRIOR diff_cache row (or have
+        //     none if never succeeded)
+        //   - global is rebuilt from the cache as it stands
+        //   - failed users retry on the next tick because their
+        //     `content_hash` still mismatches the (un-updated) cache row
+        //
+        // The `warn!` per failure carries owner_id + full anyhow chain
+        // so an operator can identify the failing user and root cause
+        // without combing through thread-of-execution traces.
         let mut changed_users = 0usize;
-        for r in pin_results {
-            let (owner_id, hash, cid) = r?;
-            self.diff_cache.lock().insert(
-                owner_id,
-                PerUserDiffEntry {
-                    content_hash: hash,
-                    buckets_index_cid: cid,
-                },
-            );
-            changed_users += 1;
+        let mut failed_users = 0usize;
+        for (owner_id, r) in pin_results {
+            match r {
+                Ok((hash, cid)) => {
+                    self.diff_cache.lock().insert(
+                        owner_id,
+                        PerUserDiffEntry {
+                            content_hash: hash,
+                            buckets_index_cid: cid,
+                        },
+                    );
+                    changed_users += 1;
+                }
+                Err(e) => {
+                    failed_users += 1;
+                    warn!(
+                        user = %owner_id,
+                        error = %e,
+                        "users-index publisher: per-user pin failed; user will retry on next tick"
+                    );
+                }
+            }
         }
 
         // Prune diff-cache rows for users who disappeared from
@@ -796,6 +853,14 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
         if changed_users == 0 && users_pruned == 0 && prior.global_cid.is_some() {
             return Ok(TickOutcome {
                 changed_users: 0,
+                // `failed_users` IS surfaced even on the no-op path —
+                // operators need to see "we tried to advance state for
+                // these N users this tick but couldn't" even when the
+                // global itself is unchanged. Without this, repeated
+                // failures on the same user would be invisible at the
+                // tick-outcome layer (only via the per-user warn! line
+                // inside run_tick).
+                failed_users,
                 total_users,
                 global_cid: prior.global_cid.expect("checked is_some"),
                 sequence: prior.sequence,
@@ -891,6 +956,7 @@ impl<S: BlockStore + PinStore + 'static> UsersIndexPublisher<S> {
 
         Ok(TickOutcome {
             changed_users,
+            failed_users,
             total_users,
             global_cid,
             sequence: next_sequence,
@@ -933,10 +999,28 @@ pub fn start_publisher_loop<S: BlockStore + PinStore + 'static>(
             interval.tick().await;
             match publisher.run_tick().await {
                 Ok(outcome) => {
+                    // Tick-level failure surfacing: when ≥ 1 user's
+                    // pin failed but the tick otherwise progressed,
+                    // emit a warn so the failure is visible at the
+                    // loop layer (the per-user warn! inside run_tick
+                    // identifies WHICH user; this one summarizes the
+                    // shape so a log scraper / alerting rule can
+                    // count `failed_users` per tick).
+                    if outcome.failed_users > 0 {
+                        warn!(
+                            sequence = outcome.sequence,
+                            changed_users = outcome.changed_users,
+                            failed_users = outcome.failed_users,
+                            total_users = outcome.total_users,
+                            global_rebuilt = outcome.global_rebuilt,
+                            "users-index publisher: tick had per-user pin failures; failed users will retry next tick"
+                        );
+                    }
                     if outcome.global_rebuilt {
                         info!(
                             sequence = outcome.sequence,
                             changed_users = outcome.changed_users,
+                            failed_users = outcome.failed_users,
                             total_users = outcome.total_users,
                             cid = %outcome.global_cid,
                             "users-index publisher: tick committed new global"
@@ -1404,11 +1488,14 @@ mod tests {
     // *behavior* (sequence advance, pin/unpin, diff-cache state),
     // not exact CID values.
 
-    async fn create_user_bucket(
-        manager: &BucketManager<MemoryBlockStore>,
+    async fn create_user_bucket<S>(
+        manager: &BucketManager<S>,
         user_id: &str,
         bucket_name: &str,
-    ) {
+    )
+    where
+        S: fula_blockstore::BlockStore + fula_blockstore::PinStore + 'static,
+    {
         manager
             .create_bucket_for_user(
                 user_id,
@@ -1860,4 +1947,437 @@ mod tests {
         assert!(outcome.global_rebuilt);
         assert!(publisher.ipns_api_url_for_test().is_none());
     }
+
+    // ============================================================
+    // Per-user error tolerance (Phase 3.2 production hardening)
+    // ============================================================
+    //
+    // Before this hardening: a single user's pin failure aborted the
+    // ENTIRE tick (the `for r in pin_results { let (...) = r?; }`
+    // pattern at the per-user collection step). At scale this means
+    // one corrupted user blocks every user's cold-start visibility.
+    //
+    // After: per-user failures are tolerated. The tick continues with
+    // succeeded users, the global is rebuilt from whatever the
+    // diff_cache currently holds (failed users keep their PRIOR cache
+    // row if any), and failed users naturally retry on the next tick
+    // because their `content_hash` still mismatches the unchanged
+    // cache row.
+    //
+    // The four scenarios below come from the advisor's required
+    // matrix:
+    //  1. Partial failure → succeeded users in global, failed users
+    //     not in global, sequence advances.
+    //  2. All-unchanged + 1 new-but-failing → no rebuild needed,
+    //     sequence does NOT advance (early-return path), and the
+    //     "stale-but-consistent" property holds: prior global keeps
+    //     serving prior CIDs.
+    //  3. All-fail-first-tick → empty global, sequence = 1,
+    //     failed_users = N (deliberate empty-global semantic, same
+    //     as zero-users-on-first-tick).
+    //  4. Failed user retries successfully on next tick → eventually
+    //     appears in global.
+
+    /// Test-only fault-injecting block store. Wraps `MemoryBlockStore`
+    /// and fails `put_ipld` whenever the serialized CBOR bytes contain
+    /// the configured marker substring. Tests set up a fault by
+    /// naming a bucket with the marker; the per-user CBOR for that
+    /// user contains the bucket name (Phase 1.2 legacy mode keys
+    /// entries by plaintext name when `bucket_lookup_h = None`), so
+    /// `put_ipld(&UserBucketsIndex)` for that user fails with the
+    /// marker present.
+    ///
+    /// **Why content-driven, not order-driven.** Production failures
+    /// are content-driven (a specific user's data triggers a
+    /// pinning-service edge case). Substring matching captures that
+    /// failure shape and stays robust to any future refactor of
+    /// `buffer_unordered` ordering inside `run_tick`.
+    ///
+    /// The marker is also (incidentally) present in `BucketRegistry`
+    /// CBORs that `BucketManager::persist_registry` writes, but that
+    /// failure is caught by `create_bucket_for_user` (line 909-911
+    /// in bucket.rs) and only logged at warn level — the in-memory
+    /// `BucketManager.buckets` is updated regardless, which is what
+    /// the publisher reads.
+    #[derive(Clone)]
+    struct FaultyBlockStore {
+        inner: Arc<MemoryBlockStore>,
+        fail_marker: Arc<Mutex<Option<Vec<u8>>>>,
+    }
+
+    impl FaultyBlockStore {
+        fn new(inner: Arc<MemoryBlockStore>) -> Self {
+            Self {
+                inner,
+                fail_marker: Arc::new(Mutex::new(None)),
+            }
+        }
+
+        /// Configure the marker. `Some(s)` causes `put_ipld` to fail
+        /// when serialized bytes contain `s`. `None` clears injection.
+        fn set_fail_marker(&self, marker: Option<&str>) {
+            *self.fail_marker.lock() = marker.map(|s| s.as_bytes().to_vec());
+        }
+
+        /// Test helper: clone the inner store handle to inspect what
+        /// got pinned (since FaultyBlockStore.pin delegates).
+        fn inner(&self) -> Arc<MemoryBlockStore> {
+            Arc::clone(&self.inner)
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl fula_blockstore::BlockStore for FaultyBlockStore {
+        async fn put_block(&self, data: &[u8]) -> fula_blockstore::Result<Cid> {
+            self.inner.put_block(data).await
+        }
+        async fn get_block(&self, cid: &Cid) -> fula_blockstore::Result<bytes::Bytes> {
+            self.inner.get_block(cid).await
+        }
+        async fn has_block(&self, cid: &Cid) -> fula_blockstore::Result<bool> {
+            self.inner.has_block(cid).await
+        }
+        async fn delete_block(&self, cid: &Cid) -> fula_blockstore::Result<()> {
+            self.inner.delete_block(cid).await
+        }
+        async fn block_size(&self, cid: &Cid) -> fula_blockstore::Result<u64> {
+            self.inner.block_size(cid).await
+        }
+        async fn put_ipld<T: serde::Serialize + Send + Sync>(
+            &self,
+            data: &T,
+        ) -> fula_blockstore::Result<Cid> {
+            // Delegate to the inner store first so the bytes are
+            // available for marker inspection via `get_block`. This
+            // avoids depending on serde_ipld_dagcbor directly (which
+            // isn't a fula-cli direct dep). The "block stored but
+            // not pinned" outcome models real production failures
+            // where a block reaches kubo but the cluster pin call
+            // fails — which is exactly the failure-mode this
+            // tolerance work guards against.
+            let cid = self.inner.put_ipld(data).await?;
+            // Snapshot the marker out of the parking_lot mutex guard
+            // before any `.await`. parking_lot's `MutexGuard` is not
+            // `Send`, so holding it across an await point makes the
+            // future non-Send and tokio refuses to spawn it.
+            let marker_snapshot: Option<Vec<u8>> = self.fail_marker.lock().clone();
+            if let Some(marker) = marker_snapshot {
+                if !marker.is_empty() {
+                    let bytes = self.inner.get_block(&cid).await?;
+                    if bytes.windows(marker.len()).any(|w| w == marker.as_slice()) {
+                        return Err(fula_blockstore::BlockStoreError::PinFailed(
+                            "test-injected fault: marker substring present in stored block".into(),
+                        ));
+                    }
+                }
+            }
+            Ok(cid)
+        }
+        async fn get_ipld<T: serde::de::DeserializeOwned>(
+            &self,
+            cid: &Cid,
+        ) -> fula_blockstore::Result<T> {
+            self.inner.get_ipld(cid).await
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl fula_blockstore::PinStore for FaultyBlockStore {
+        async fn pin(&self, cid: &Cid, name: Option<&str>) -> fula_blockstore::Result<()> {
+            self.inner.pin(cid, name).await
+        }
+        async fn pin_with_token(
+            &self,
+            cid: &Cid,
+            name: Option<&str>,
+            token: &str,
+        ) -> fula_blockstore::Result<()> {
+            self.inner.pin_with_token(cid, name, token).await
+        }
+        async fn unpin(&self, cid: &Cid) -> fula_blockstore::Result<()> {
+            self.inner.unpin(cid).await
+        }
+        async fn is_pinned(&self, cid: &Cid) -> fula_blockstore::Result<bool> {
+            self.inner.is_pinned(cid).await
+        }
+        async fn list_pins(&self) -> fula_blockstore::Result<Vec<Cid>> {
+            self.inner.list_pins().await
+        }
+        async fn pin_status(&self, cid: &Cid) -> fula_blockstore::Result<fula_blockstore::PinStatus> {
+            self.inner.pin_status(cid).await
+        }
+    }
+
+    /// Marker substring used by the per-user-error-tolerance tests.
+    /// Picked to be:
+    ///   - lowercase letters + hyphens only → passes
+    ///     `validate_bucket_name` so it can be a real bucket name
+    ///   - long enough (19 chars) that a false-positive substring
+    ///     match in random CBOR bytes is implausible
+    const FAULT_MARKER: &str = "fault-inject-bucket";
+
+    fn fixture_publisher_with_faulty_store(
+        path: PathBuf,
+    ) -> (
+        UsersIndexPublisher<FaultyBlockStore>,
+        Arc<FaultyBlockStore>,
+        Arc<BucketManager<FaultyBlockStore>>,
+    ) {
+        let inner = Arc::new(MemoryBlockStore::new());
+        let faulty = Arc::new(FaultyBlockStore::new(Arc::clone(&inner)));
+        let manager = Arc::new(BucketManager::new(Arc::clone(&faulty)));
+        let publisher = UsersIndexPublisher::open_without_ipns(
+            fixture_config(path),
+            Arc::clone(&manager),
+            Arc::clone(&faulty),
+        )
+        .expect("open");
+        (publisher, faulty, manager)
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_partial_failure_publishes_succeeded_users() {
+        // Scenario 1: alice has a normal bucket, bob has a bucket
+        // whose name contains FAULT_MARKER. Bob's per-user CBOR
+        // pin fails. Alice's succeeds. The tick continues, advances
+        // sequence, and the published global contains alice but
+        // NOT bob.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) = fixture_publisher_with_faulty_store(path);
+
+        store.set_fail_marker(Some(FAULT_MARKER));
+
+        create_user_bucket(&manager, "alice", "photos").await;
+        // Bob's bucket name contains the marker. The per-user CBOR
+        // for bob is keyed by plaintext bucket name (Phase 1.2 legacy
+        // mode), so the marker substring lands in the CBOR bytes.
+        create_user_bucket(&manager, "bob", FAULT_MARKER).await;
+
+        let outcome = publisher
+            .run_tick()
+            .await
+            .expect("tick MUST return Ok despite per-user pin failure");
+
+        assert_eq!(
+            outcome.changed_users, 1,
+            "exactly one user's CBOR was newly pinned (alice)"
+        );
+        assert_eq!(
+            outcome.failed_users, 1,
+            "exactly one user's pin failed (bob)"
+        );
+        assert_eq!(outcome.total_users, 2);
+        assert!(
+            outcome.global_rebuilt,
+            "global must be rebuilt to reflect alice's commit"
+        );
+        assert_eq!(outcome.sequence, 1);
+
+        // Decode the global CBOR: alice present, bob absent.
+        let inner = store.inner();
+        let global: GlobalUsersIndex =
+            inner.get_ipld(&outcome.global_cid).await.expect("global");
+        assert!(
+            global.users.contains_key("alice"),
+            "alice's userKey must be in published global"
+        );
+        assert!(
+            !global.users.contains_key("bob"),
+            "bob's userKey must NOT be in published global (his pin failed)"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_failed_user_keeps_prior_cid_in_global() {
+        // Scenario 2 (advisor-mandated rigor): tick 1 — alice + bob
+        // both succeed. Tick 2 — alice gets a new bucket (will succeed),
+        // bob gets a marker bucket (will fail). The "stale-but-
+        // consistent" property: bob's entry in tick 2's published
+        // global must equal bob's PRIOR CID (from tick 1), NOT his
+        // new failed-pin CID.
+        //
+        // This guards against a future refactor that might
+        // accidentally republish bob with a stale-or-empty entry. If
+        // that happens, cold-start would point at content that isn't
+        // pinned, breaking bob's reads.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) = fixture_publisher_with_faulty_store(path);
+
+        // Tick 1: both users succeed.
+        create_user_bucket(&manager, "alice", "photos").await;
+        create_user_bucket(&manager, "bob", "docs").await;
+        let first = publisher.run_tick().await.expect("first tick");
+        assert_eq!(first.changed_users, 2);
+        assert_eq!(first.failed_users, 0);
+
+        // Capture bob's PRIOR per-user bucketsIndex CID.
+        let inner = store.inner();
+        let first_global: GlobalUsersIndex =
+            inner.get_ipld(&first.global_cid).await.expect("first global");
+        // CIDs in the global are stored as strings (not Cid), so
+        // clone for comparison after the next get_ipld call.
+        let bob_prior_cid = first_global.users["bob"].clone();
+        let alice_prior_cid = first_global.users["alice"].clone();
+
+        // Defensive sanity: bob's prior CID's bytes are present in
+        // the inner store. If a future refactor made `bob_prior_cid`
+        // a default/empty Cid, the equality assertion below would
+        // pass for the wrong reason. This catches that.
+        let bob_prior_cid_parsed: Cid = bob_prior_cid.parse().expect("parse prior cid");
+        assert!(
+            inner.get_block(&bob_prior_cid_parsed).await.is_ok(),
+            "bob's prior bucketsIndex CID must reference real bytes (sanity)"
+        );
+
+        // Now turn on fault injection.
+        store.set_fail_marker(Some(FAULT_MARKER));
+
+        // Alice gets a new (clean) bucket → her CBOR rebuilds + pins OK.
+        create_user_bucket(&manager, "alice", "videos").await;
+        // Bob gets a marker bucket → his per-user CBOR pin fails.
+        create_user_bucket(&manager, "bob", FAULT_MARKER).await;
+
+        let second = publisher.run_tick().await.expect("second tick");
+        assert_eq!(
+            second.changed_users, 1,
+            "alice's CBOR rebuild succeeded; bob's failed"
+        );
+        assert_eq!(
+            second.failed_users, 1,
+            "bob's pin failed"
+        );
+        assert!(
+            second.global_rebuilt,
+            "alice's change forces global rebuild"
+        );
+        assert_eq!(second.sequence, 2, "sequence advances on real change");
+        assert_ne!(
+            second.global_cid, first.global_cid,
+            "global CID must change because alice changed"
+        );
+
+        // Decode tick 2's global. bob's entry MUST be his PRIOR cid;
+        // alice's entry MUST be her new cid.
+        let second_global: GlobalUsersIndex =
+            inner.get_ipld(&second.global_cid).await.expect("second global");
+        assert_eq!(
+            second_global.users["bob"], bob_prior_cid,
+            "stale-but-consistent: bob's failed pin must NOT erase his prior CID; \
+             cold-start serves bob's prior bucketsIndex (still pinned + accessible)"
+        );
+        assert_ne!(
+            second_global.users["alice"], alice_prior_cid,
+            "alice's CID changed because her content changed and her pin succeeded"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_all_users_fail_first_tick_publishes_empty_global() {
+        // Scenario 3: every user's pin fails on the first tick.
+        // No prior state to preserve → publisher proceeds to publish
+        // an EMPTY global (same code path as "zero users on first
+        // tick", which the existing
+        // `test_run_tick_no_users_first_publish_emits_empty_global`
+        // test already pins down).
+        //
+        // Operators see this as a nonzero `failed_users` in TickOutcome
+        // + per-user `warn!` lines. The empty-global publish itself
+        // is not a regression: the next tick when users start
+        // succeeding republishes with non-empty global, sequence
+        // advances. The chain anchor cron eventually submits the
+        // first non-empty CID. No data corruption, no stuck state.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) = fixture_publisher_with_faulty_store(path);
+
+        store.set_fail_marker(Some(FAULT_MARKER));
+
+        // Both users have marker buckets → both pins fail.
+        create_user_bucket(&manager, "alice", FAULT_MARKER).await;
+        // Different bucket name to ensure two distinct users (BucketManager
+        // accepts duplicate names per-user but we want two USERS).
+        let bob_bucket_name = format!("{}-2", FAULT_MARKER);
+        create_user_bucket(&manager, "bob", &bob_bucket_name).await;
+
+        let outcome = publisher
+            .run_tick()
+            .await
+            .expect("tick MUST return Ok even when every per-user pin fails");
+
+        assert_eq!(
+            outcome.changed_users, 0,
+            "no per-user CBOR was successfully pinned"
+        );
+        assert_eq!(outcome.failed_users, 2);
+        assert_eq!(outcome.total_users, 2);
+        assert!(
+            outcome.global_rebuilt,
+            "first publish must run even when every user failed (same as zero-users path)"
+        );
+        assert_eq!(outcome.sequence, 1);
+
+        let inner = store.inner();
+        let global: GlobalUsersIndex =
+            inner.get_ipld(&outcome.global_cid).await.expect("global");
+        assert_eq!(
+            global.users.len(),
+            0,
+            "global has zero users — every user's CBOR pin failed"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_run_tick_failed_user_retries_on_next_tick() {
+        // Scenario 4: bob fails on tick 1. Marker is cleared between
+        // ticks. On tick 2, bob's content_hash STILL mismatches his
+        // (unupdated) diff_cache row, so he's in `to_rebuild`. His
+        // pin succeeds this time; he appears in tick 2's global.
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("state.txt");
+        let (publisher, store, manager) = fixture_publisher_with_faulty_store(path);
+
+        // Set up: alice clean, bob with marker.
+        create_user_bucket(&manager, "alice", "photos").await;
+        create_user_bucket(&manager, "bob", FAULT_MARKER).await;
+
+        // Tick 1: marker active → bob fails.
+        store.set_fail_marker(Some(FAULT_MARKER));
+        let first = publisher.run_tick().await.expect("first tick");
+        assert_eq!(first.changed_users, 1);
+        assert_eq!(first.failed_users, 1);
+
+        let inner = store.inner();
+        let first_global: GlobalUsersIndex =
+            inner.get_ipld(&first.global_cid).await.expect("first global");
+        assert!(
+            !first_global.users.contains_key("bob"),
+            "bob absent from tick 1's global (failed pin)"
+        );
+
+        // Tick 2: clear the marker. bob's content_hash still doesn't
+        // match the (empty) cache row, so he's re-attempted. Pin
+        // succeeds this time → bob is in the global.
+        store.set_fail_marker(None);
+        let second = publisher.run_tick().await.expect("second tick");
+        assert_eq!(
+            second.changed_users, 1,
+            "bob's retry succeeded; alice was unchanged"
+        );
+        assert_eq!(second.failed_users, 0);
+        assert!(second.global_rebuilt);
+        assert_eq!(second.sequence, 2);
+
+        let second_global: GlobalUsersIndex =
+            inner.get_ipld(&second.global_cid).await.expect("second global");
+        assert!(
+            second_global.users.contains_key("bob"),
+            "bob present in tick 2's global (retry succeeded)"
+        );
+        assert!(
+            second_global.users.contains_key("alice"),
+            "alice still present (unchanged across the two ticks)"
+        );
+    }
 }
diff --git a/crates/fula-client/Cargo.toml b/crates/fula-client/Cargo.toml
index 017b095..5543805 100644
--- a/crates/fula-client/Cargo.toml
+++ b/crates/fula-client/Cargo.toml
@@ -56,6 +56,12 @@ cid = { workspace = true }
 sha2 = { workspace = true }
 # Mutex for per-gateway state in gateway_fetch (Phase 2.3).
 parking_lot = { workspace = true }
+# Phase 3.3 cold-start hybrid resolver — parses the master-published
+# global users-index dag-cbor payload directly (the resolver doesn't
+# go through the full BlockStore trait, so it needs the codec by hand).
+# `serde_json` for the chain-side eth_call request body is already
+# pulled in via the cross-platform top-level deps.
+serde_ipld_dagcbor = { workspace = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 # WASM: disable default features (no tokio), enable wasm feature
@@ -72,6 +78,10 @@ tokio = { workspace = true }
 tokio-test = "0.4"
 wiremock = { workspace = true }
 tempfile = { workspace = true }
+# Phase 3.3 — verifies the hardcoded `latest()` ABI selector matches
+# `Keccak256("latest()")[..4]`. Tests-only; the prod build hardcodes
+# the 4-byte constant to avoid a runtime crypto dep on the hot path.
+sha3 = "0.10"
 
 [features]
 default = []
diff --git a/crates/fula-client/src/block_cache.rs b/crates/fula-client/src/block_cache.rs
index 52e84ca..b2d6595 100644
--- a/crates/fula-client/src/block_cache.rs
+++ b/crates/fula-client/src/block_cache.rs
@@ -56,6 +56,40 @@ use tokio::sync::Mutex;
 const BLOCKS: TableDefinition<&[u8], &[u8]> = TableDefinition::new("blocks");
 const META: TableDefinition<&[u8], u64> = TableDefinition::new("meta");
 
+/// Phase 2.4 lookup table: maps `(bucket, key)` (hashed with a
+/// domain separator) → CID bytes. Used by the offline-fallback path
+/// to translate an S3-key request into the IPFS CID it can fetch via
+/// the gateway race. Populated as a side-effect of master-up reads
+/// in `FulaClient::get_object_with_offline_fallback`.
+///
+/// Key format: `BLAKE3("fula:block-cache:key-to-cid:v1" || bucket || 0x00 || key)[..32]`
+/// — fixed 32 bytes, collision-resistant, fast B-tree lookup. Value:
+/// raw CID bytes (the same encoding used as the BLOCKS table key, so
+/// a `KEY_TO_CID` lookup directly gives the bytes needed to query
+/// BLOCKS or to construct a `Cid` for the gateway race).
+const KEY_TO_CID: TableDefinition<&[u8], &[u8]> = TableDefinition::new("key_to_cid");
+
+/// Phase 3.3.5 small-key-value metadata table. Stores resolver
+/// hot-start state across SDK restarts:
+///   - `users_index/cid`              → CID bytes (cid.to_bytes())
+///   - `users_index/sequence`         → u64 BE
+///   - `users_index/observed_at_unix` → u64 BE
+///
+/// Three rows, ~80 bytes total. The cached `(cid, sequence)` seeds
+/// the resolver's replay-defense floor on construction; a fresh
+/// `observed_at` lets the resolver short-circuit IPNS+chain when
+/// the entry is within `ResolverConfig::soft_ttl`.
+///
+/// Schema versioning: deliberately omitted in 3.3.5 (advisor cut).
+/// When a v2 schema lands, add a `metadata.schema_id` constant +
+/// drop-on-mismatch logic together with the real migration story.
+const METADATA: TableDefinition<&[u8], &[u8]> = TableDefinition::new("metadata");
+
+/// Metadata row keys (string literals stored as `&[u8]`).
+const META_USERS_INDEX_CID: &[u8] = b"users_index/cid";
+const META_USERS_INDEX_SEQUENCE: &[u8] = b"users_index/sequence";
+const META_USERS_INDEX_OBSERVED_AT: &[u8] = b"users_index/observed_at_unix";
+
 /// Eviction low-watermark: when triggered, free space until usage is at
 /// or below this fraction of `max_bytes`. 80 % is the industry-standard
 /// "evict-once-amortize-many-puts" point.
@@ -142,7 +176,7 @@ impl From<redb::CommitError> for BlockCacheError {
 ///
 /// Cheap-clone via `Arc`: clones share the same database, so a `put`
 /// observed by one clone is immediately visible to all others.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct BlockCache {
     inner: Arc<BlockCacheInner>,
 }
@@ -159,6 +193,19 @@ struct BlockCacheInner {
     evict_lock: Mutex<()>,
 }
 
+// `redb::Database` doesn't implement `Debug`, so we hand-roll a
+// minimal `Debug` for `BlockCacheInner` that prints just the
+// observable knobs. Required because `UsersIndexResolver` derives
+// `Debug` and now holds an `Option<Arc<BlockCache>>`.
+impl std::fmt::Debug for BlockCacheInner {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("BlockCacheInner")
+            .field("max_bytes", &self.max_bytes)
+            .field("current_bytes", &self.current_bytes.load(Ordering::Acquire))
+            .finish_non_exhaustive()
+    }
+}
+
 impl BlockCache {
     /// Open or create the block cache at `path` with a budget of
     /// `max_bytes` total stored block-bytes.
@@ -194,6 +241,14 @@ impl BlockCache {
         {
             let _ = init_txn.open_table(BLOCKS)?;
             let _ = init_txn.open_table(META)?;
+            // Phase 2.4 — additive table. An older redb file written
+            // before Phase 2.4 will not have it; opening it here
+            // creates it lazily without touching BLOCKS / META data.
+            let _ = init_txn.open_table(KEY_TO_CID)?;
+            // Phase 3.3.5 — resolver hot-start metadata. Same
+            // additive-on-open pattern; older Phase 2.x cache files
+            // gain it transparently on next open.
+            let _ = init_txn.open_table(METADATA)?;
         }
         init_txn.commit()?;
 
@@ -221,7 +276,18 @@ impl BlockCache {
         })
     }
 
+    // The three accessors below — `max_bytes`, `current_bytes`,
+    // `entry_count` — are public monitoring API for SDK consumers
+    // (apps that want to surface "cache 240 / 256 MiB used" UI, or
+    // for ops dashboards). The fula-client crate itself doesn't call
+    // them internally, hence the `#[allow(dead_code)]` to silence
+    // the workspace-default warning. Phase 19 (`HealthCallback` /
+    // `ReadFreshness`) will likely expose these via a typed status
+    // struct rather than direct field access; keep the accessors
+    // public until then so app integrators have a stable surface.
+
     /// Configured budget in bytes.
+    #[allow(dead_code)]
     pub fn max_bytes(&self) -> u64 {
         self.inner.max_bytes
     }
@@ -229,12 +295,14 @@ impl BlockCache {
     /// Approximate current byte usage. Eventually consistent under
     /// concurrent writes (the next read after all writes settle is
     /// exact).
+    #[allow(dead_code)]
     pub fn current_bytes(&self) -> u64 {
         self.inner.current_bytes.load(Ordering::Acquire)
     }
 
     /// Number of cached blocks. O(1) approximation via the underlying
     /// table length.
+    #[allow(dead_code)]
     pub fn entry_count(&self) -> Result<u64, BlockCacheError> {
         let read = self.inner.db.begin_read()?;
         let table = read.open_table(BLOCKS)?;
@@ -336,6 +404,139 @@ impl BlockCache {
         Ok(())
     }
 
+    /// Phase 2.4 — record an `(bucket, key) → cid` mapping observed
+    /// during a successful master-up read. Lets the offline-fallback
+    /// path translate a future S3-key request into the IPFS CID it
+    /// can fetch via the gateway race.
+    ///
+    /// Idempotent on repeated calls with the same arguments. The
+    /// underlying redb table grows unbounded today (one entry per
+    /// distinct `(bucket, key)` tuple ever observed). At expected
+    /// scale (a few thousand objects per device) this is fine; if
+    /// growth becomes an issue, eviction can be added at the same
+    /// point as block-cache LRU eviction in a future iteration.
+    /// Note that the mapping is small (~40 bytes per entry vs.
+    /// kilobytes for typical block payloads), so the BLOCKS table's
+    /// LRU pressure dominates space concerns by orders of magnitude.
+    pub fn record_key_cid(
+        &self,
+        bucket: &str,
+        key: &str,
+        cid: &Cid,
+    ) -> Result<(), BlockCacheError> {
+        let lookup_key = derive_key_cid_lookup(bucket, key);
+        let cid_bytes = cid.to_bytes();
+        let txn = self.inner.db.begin_write()?;
+        {
+            let mut table = txn.open_table(KEY_TO_CID)?;
+            table.insert(lookup_key.as_slice(), cid_bytes.as_slice())?;
+        }
+        txn.commit()?;
+        Ok(())
+    }
+
+    /// Phase 2.4 — look up a previously-observed CID for `(bucket, key)`.
+    /// Returns `None` if the SDK has not seen this object during a
+    /// master-up read yet (the cold-start case, which the wrapper
+    /// surfaces as `MasterUnreachable` so Phase 3.3 can take over).
+    pub fn lookup_cid(&self, bucket: &str, key: &str) -> Result<Option<Cid>, BlockCacheError> {
+        let lookup_key = derive_key_cid_lookup(bucket, key);
+        let read = self.inner.db.begin_read()?;
+        let table = read.open_table(KEY_TO_CID)?;
+        match table.get(lookup_key.as_slice())? {
+            Some(v) => {
+                let bytes = v.value();
+                // Round-trip through Cid to validate; corrupt entries
+                // are rare (would mean redb bit-flip) but failing
+                // closed is safer than serving a malformed CID to the
+                // gateway race.
+                Cid::try_from(bytes)
+                    .map(Some)
+                    .map_err(|e| BlockCacheError::Corrupt(format!("invalid CID in KEY_TO_CID: {}", e)))
+            }
+            None => Ok(None),
+        }
+    }
+
+    /// Phase 3.3.5 — persist the resolver's last successful resolve
+    /// so a future SDK process can skip the IPNS+chain dance when
+    /// it's still fresh AND seed the replay-defense floor across
+    /// restarts.
+    ///
+    /// Single redb write transaction (atomic across the three rows).
+    /// Crate-private: apps must not plant resolver state directly.
+    pub(crate) fn store_users_index_state(
+        &self,
+        cid: &Cid,
+        sequence: u64,
+        observed_at_unix: u64,
+    ) -> Result<(), BlockCacheError> {
+        let cid_bytes = cid.to_bytes();
+        let txn = self.inner.db.begin_write()?;
+        {
+            let mut table = txn.open_table(METADATA)?;
+            table.insert(META_USERS_INDEX_CID, cid_bytes.as_slice())?;
+            table.insert(META_USERS_INDEX_SEQUENCE, sequence.to_be_bytes().as_slice())?;
+            table.insert(
+                META_USERS_INDEX_OBSERVED_AT,
+                observed_at_unix.to_be_bytes().as_slice(),
+            )?;
+        }
+        txn.commit()?;
+        Ok(())
+    }
+
+    /// Phase 3.3.5 — load the resolver hot-start state. Returns
+    /// `None` if any of the three rows is missing or malformed
+    /// (treats partial writes as if the cache were empty — the
+    /// resolver then falls through to a full IPNS+chain resolve).
+    pub(crate) fn load_users_index_state(
+        &self,
+    ) -> Result<Option<(Cid, u64, u64)>, BlockCacheError> {
+        let read = self.inner.db.begin_read()?;
+        let table = read.open_table(METADATA)?;
+
+        let cid_bytes = match table.get(META_USERS_INDEX_CID)? {
+            Some(v) => v.value().to_vec(),
+            None => return Ok(None),
+        };
+        let cid = match Cid::try_from(cid_bytes.as_slice()) {
+            Ok(c) => c,
+            // Malformed → treat as no state (defensive). Don't
+            // surface as Corrupt — that would block all hot-start
+            // reads on a single bad row instead of degrading to a
+            // fresh resolve.
+            Err(e) => {
+                tracing::warn!(error = %e, "users-index metadata: invalid CID; treating as empty");
+                return Ok(None);
+            }
+        };
+
+        let seq_bytes = match table.get(META_USERS_INDEX_SEQUENCE)? {
+            Some(v) => v.value().to_vec(),
+            None => return Ok(None),
+        };
+        let observed_bytes = match table.get(META_USERS_INDEX_OBSERVED_AT)? {
+            Some(v) => v.value().to_vec(),
+            None => return Ok(None),
+        };
+        if seq_bytes.len() != 8 || observed_bytes.len() != 8 {
+            tracing::warn!("users-index metadata: malformed length; treating as empty");
+            return Ok(None);
+        }
+
+        let mut seq = [0u8; 8];
+        seq.copy_from_slice(&seq_bytes);
+        let mut obs = [0u8; 8];
+        obs.copy_from_slice(&observed_bytes);
+
+        Ok(Some((
+            cid,
+            u64::from_be_bytes(seq),
+            u64::from_be_bytes(obs),
+        )))
+    }
+
     /// Evict LRU entries until `current_bytes <= target_bytes`. Caller
     /// must hold `evict_lock`. Atomic via a single redb write txn.
     fn evict_to(&self, target_bytes: u64) -> Result<(), BlockCacheError> {
@@ -396,6 +597,26 @@ fn now_ms() -> u64 {
         .unwrap_or(0)
 }
 
+/// Phase 2.4 — derive the redb-key for the KEY_TO_CID table.
+///
+/// `BLAKE3("fula:block-cache:key-to-cid:v1" || bucket || 0x00 || key)[..32]`.
+/// Domain separator pins the namespace; the embedded `0x00` between
+/// bucket and key forecloses any ambiguity from S3 keys that contain
+/// `/` (a single concatenation without separator could collide
+/// `bucket=foo, key=bar` with `bucket=foo/bar, key=`). 32-byte output
+/// is fixed-length for fast B-tree lookups.
+fn derive_key_cid_lookup(bucket: &str, key: &str) -> [u8; 32] {
+    let mut hasher = blake3::Hasher::new();
+    hasher.update(b"fula:block-cache:key-to-cid:v1");
+    hasher.update(bucket.as_bytes());
+    hasher.update(&[0u8]);
+    hasher.update(key.as_bytes());
+    let h = hasher.finalize();
+    let mut out = [0u8; 32];
+    out.copy_from_slice(h.as_bytes());
+    out
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -625,4 +846,232 @@ mod tests {
             assert!(cache.current_bytes() >= 256);
         }
     }
+
+    // ============================================================
+    // Phase 2.4 — KEY_TO_CID lookup table tests
+    // ============================================================
+
+    #[tokio::test]
+    async fn test_record_and_lookup_key_cid_roundtrip() {
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+
+        let cid = test_cid(123);
+        cache
+            .record_key_cid("photos", "vacation/dsc_001.jpg", &cid)
+            .expect("record");
+
+        let got = cache
+            .lookup_cid("photos", "vacation/dsc_001.jpg")
+            .expect("lookup")
+            .expect("present");
+        assert_eq!(got, cid, "round-trip yields the exact CID");
+    }
+
+    #[tokio::test]
+    async fn test_lookup_missing_key_returns_none() {
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+        let got = cache.lookup_cid("photos", "never-seen.jpg").expect("lookup");
+        assert!(got.is_none(), "cold-start must return None, not error");
+    }
+
+    #[tokio::test]
+    async fn test_record_idempotent_on_repeat() {
+        // Re-recording the same (bucket, key, cid) triple must not error,
+        // and the lookup must continue returning the same CID. This is
+        // load-bearing: the offline-fallback wrapper records on every
+        // master-up read, so the same object will be re-recorded on each
+        // refetch.
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+
+        let cid = test_cid(7);
+        for _ in 0..5 {
+            cache.record_key_cid("docs", "tax/2024.pdf", &cid).expect("record");
+        }
+        let got = cache.lookup_cid("docs", "tax/2024.pdf").expect("lookup").expect("hit");
+        assert_eq!(got, cid);
+    }
+
+    #[tokio::test]
+    async fn test_record_overwrites_when_cid_changes() {
+        // After an object is updated on master, the etag (= CID)
+        // changes. The next master-up read records the NEW CID under
+        // the same `(bucket, key)` — and the old CID entry is replaced.
+        // Otherwise offline reads would serve a stale block forever.
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+
+        let cid_v1 = test_cid(1);
+        let cid_v2 = test_cid(2);
+
+        cache.record_key_cid("photos", "live.jpg", &cid_v1).expect("v1");
+        cache.record_key_cid("photos", "live.jpg", &cid_v2).expect("v2");
+
+        let got = cache.lookup_cid("photos", "live.jpg").expect("lookup").expect("hit");
+        assert_eq!(got, cid_v2, "must reflect the latest recorded CID");
+        assert_ne!(got, cid_v1);
+    }
+
+    #[tokio::test]
+    async fn test_distinct_buckets_dont_collide() {
+        // Same key in different buckets must map to distinct CIDs. The
+        // BLAKE3 domain-separated lookup-key derivation guarantees this;
+        // a regression here would mean two users seeing each other's data
+        // via the offline path.
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+
+        let cid_a = test_cid(10);
+        let cid_b = test_cid(11);
+
+        cache.record_key_cid("alice-bucket", "shared.txt", &cid_a).expect("a");
+        cache.record_key_cid("bob-bucket", "shared.txt", &cid_b).expect("b");
+
+        let got_a = cache
+            .lookup_cid("alice-bucket", "shared.txt")
+            .expect("lookup")
+            .expect("hit");
+        let got_b = cache
+            .lookup_cid("bob-bucket", "shared.txt")
+            .expect("lookup")
+            .expect("hit");
+        assert_eq!(got_a, cid_a);
+        assert_eq!(got_b, cid_b);
+        assert_ne!(got_a, got_b, "isolation between buckets is mandatory");
+    }
+
+    #[tokio::test]
+    async fn test_key_to_cid_survives_restart() {
+        // Same persistence contract as the BLOCKS table: lookups must
+        // survive SDK process restart. Without this, every SDK launch
+        // would degrade to "cold start until the cache repopulates",
+        // which defeats the warm-device offline guarantee.
+        let dir = TempDir::new().unwrap();
+        let cid = test_cid(99);
+
+        {
+            let cache = open_cache(&dir, 1024 * 1024);
+            cache
+                .record_key_cid("persist-bucket", "important.bin", &cid)
+                .expect("record");
+        }
+        {
+            let cache = open_cache(&dir, 1024 * 1024);
+            let got = cache
+                .lookup_cid("persist-bucket", "important.bin")
+                .expect("lookup")
+                .expect("hit after restart");
+            assert_eq!(got, cid);
+        }
+    }
+
+    #[test]
+    fn test_derive_key_cid_lookup_is_deterministic() {
+        // Same inputs → same hash. Required for repeated record/lookup
+        // to land in the same redb key.
+        let h1 = derive_key_cid_lookup("foo", "bar");
+        let h2 = derive_key_cid_lookup("foo", "bar");
+        assert_eq!(h1, h2);
+    }
+
+    #[test]
+    fn test_derive_key_cid_lookup_separator_prevents_concat_collision() {
+        // The 0x00 byte between bucket and key is load-bearing.
+        // Without it, ("foo/bar", "") and ("foo", "/bar") would collide.
+        // With it, they hash differently because the null byte is
+        // disambiguating.
+        let h1 = derive_key_cid_lookup("foo/bar", "");
+        let h2 = derive_key_cid_lookup("foo", "/bar");
+        assert_ne!(h1, h2, "domain separator must prevent concat-collision");
+    }
+
+    #[test]
+    fn test_derive_key_cid_lookup_outputs_32_bytes() {
+        let h = derive_key_cid_lookup("any-bucket", "any-key");
+        assert_eq!(h.len(), 32, "BLAKE3 output is exactly 32 bytes");
+    }
+
+    // ============================================================
+    // Phase 3.3.5 — METADATA table tests
+    // ============================================================
+
+    #[tokio::test]
+    async fn test_load_users_index_state_returns_none_on_fresh_cache() {
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+        let got = cache.load_users_index_state().expect("load");
+        assert!(
+            got.is_none(),
+            "fresh cache must have no resolver state — full IPNS+chain resolve required on first run"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_store_and_load_users_index_state_roundtrip() {
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+        let cid = test_cid(0xab);
+
+        cache
+            .store_users_index_state(&cid, 42, 1_700_000_000)
+            .expect("store");
+
+        let (got_cid, got_seq, got_observed) = cache
+            .load_users_index_state()
+            .expect("load")
+            .expect("present");
+        assert_eq!(got_cid, cid);
+        assert_eq!(got_seq, 42);
+        assert_eq!(got_observed, 1_700_000_000);
+    }
+
+    #[tokio::test]
+    async fn test_users_index_state_survives_restart() {
+        // Replay-defense critical: the `(cid, sequence)` floor MUST
+        // persist across SDK restarts so a malicious gateway can't
+        // serve a stale-but-valid payload to a fresh process.
+        let dir = TempDir::new().unwrap();
+        let cid = test_cid(0xee);
+
+        {
+            let cache = open_cache(&dir, 1024 * 1024);
+            cache
+                .store_users_index_state(&cid, 99, 1_700_000_999)
+                .expect("store");
+        }
+        {
+            let cache = open_cache(&dir, 1024 * 1024);
+            let (got_cid, got_seq, got_obs) = cache
+                .load_users_index_state()
+                .expect("load")
+                .expect("survived");
+            assert_eq!(got_cid, cid);
+            assert_eq!(got_seq, 99);
+            assert_eq!(got_obs, 1_700_000_999);
+        }
+    }
+
+    #[tokio::test]
+    async fn test_store_users_index_state_overwrites() {
+        // Each successful resolver run writes the latest `(cid, seq, ts)`.
+        // A subsequent write must overwrite the prior row, not stack.
+        let dir = TempDir::new().unwrap();
+        let cache = open_cache(&dir, 1024 * 1024);
+
+        let cid_v1 = test_cid(1);
+        cache.store_users_index_state(&cid_v1, 5, 100).expect("v1");
+
+        let cid_v2 = test_cid(2);
+        cache.store_users_index_state(&cid_v2, 10, 200).expect("v2");
+
+        let (got_cid, got_seq, got_obs) = cache
+            .load_users_index_state()
+            .expect("load")
+            .expect("hit");
+        assert_eq!(got_cid, cid_v2);
+        assert_eq!(got_seq, 10);
+        assert_eq!(got_obs, 200);
+    }
 }
diff --git a/crates/fula-client/src/client.rs b/crates/fula-client/src/client.rs
index af49186..576b69c 100644
--- a/crates/fula-client/src/client.rs
+++ b/crates/fula-client/src/client.rs
@@ -10,6 +10,17 @@ use reqwest::{Client, Response, header};
 use std::collections::HashMap;
 use std::sync::Arc;
 use tracing::{debug, instrument};
+// `warn` is only used by the native-only offline-fallback wrapper —
+// gate the import so wasm builds don't emit `unused_imports`.
+#[cfg(not(target_arch = "wasm32"))]
+use tracing::warn;
+
+#[cfg(not(target_arch = "wasm32"))]
+use crate::{
+    block_cache::BlockCache,
+    gateway_fetch::GatewayPool,
+    registry_resolver::{ResolverConfig, UsersIndexResolver},
+};
 
 /// Fula storage client
 #[derive(Clone)]
@@ -22,6 +33,31 @@ pub struct FulaClient {
     /// rest. `None` when the feature is off — request path then runs
     /// exactly as before (backward-compat).
     health_gate: Option<Arc<HealthGate>>,
+
+    /// Phase 2.2 / 2.4. `Some` when `Config::block_cache_enabled = true`
+    /// AND the configured path opens successfully. Native-only — wasm
+    /// builds compile without this field. Used by the offline-fallback
+    /// wrapper to record `(bucket, key) → cid` and to short-circuit
+    /// repeated reads of the same content via the BLOCKS table.
+    #[cfg(not(target_arch = "wasm32"))]
+    block_cache: Option<Arc<BlockCache>>,
+
+    /// Phase 2.3 / 2.4. `Some` when `Config::gateway_fallback_enabled
+    /// = true` AND `block_cache_enabled = true` (the cache is a
+    /// prerequisite — without it the fallback has no CID to fetch).
+    /// Native-only.
+    #[cfg(not(target_arch = "wasm32"))]
+    gateway_pool: Option<Arc<GatewayPool>>,
+
+    /// Phase 3.3. `Some` when `Config::users_index_resolver_enabled
+    /// = true` AND all four resolver fields (chain_rpc_url,
+    /// anchor_address, ipns_name, user_key) are populated. The
+    /// EncryptedClient cold-start path uses this to discover the
+    /// per-user `bucketsIndexCid` when KEY_TO_CID misses.
+    /// Native-only — cold-start is a no-op on wasm until a
+    /// browser-friendly resolver lands.
+    #[cfg(not(target_arch = "wasm32"))]
+    users_index_resolver: Option<Arc<UsersIndexResolver>>,
 }
 
 impl FulaClient {
@@ -43,12 +79,147 @@ impl FulaClient {
         let http = builder.build().map_err(ClientError::Http)?;
 
         let health_gate = if config.health_gate_enabled {
-            Some(Arc::new(HealthGate::new(config.health_gate_ttl)))
+            // Phase 19 — wire the optional health callback into the
+            // gate. With_callback fires `Online` / `OfflineFallbackActive`
+            // on Up↔Down transitions; without one the gate behaves
+            // identically to pre-Phase-19 builds (silent).
+            let gate = match config.health_callback.as_ref() {
+                Some(cb) => HealthGate::with_callback(config.health_gate_ttl, Arc::clone(cb)),
+                None => HealthGate::new(config.health_gate_ttl),
+            };
+            Some(Arc::new(gate))
+        } else {
+            None
+        };
+
+        // Phase 2.2 / 2.4 — block cache + gateway pool. Native-only.
+        // Construction failures degrade gracefully to "no cache /
+        // no fallback" rather than failing SDK init outright; the
+        // operator's other workflows (master-up reads) keep working.
+        #[cfg(not(target_arch = "wasm32"))]
+        let block_cache = if config.block_cache_enabled {
+            match build_block_cache(&config) {
+                Ok(cache) => Some(Arc::new(cache)),
+                Err(e) => {
+                    warn!(
+                        error = %e,
+                        "block_cache: failed to open; offline fallback disabled for this session"
+                    );
+                    None
+                }
+            }
+        } else {
+            None
+        };
+
+        // GatewayPool requires block_cache as a hard prereq: without
+        // a cached `(bucket, key) → cid` mapping the fallback path has
+        // no CID to fetch. If the cache failed to open we silently
+        // disable gateway fallback too.
+        #[cfg(not(target_arch = "wasm32"))]
+        let gateway_pool = if config.gateway_fallback_enabled && block_cache.is_some() {
+            let pool = if config.gateway_fallback_urls.is_empty() {
+                GatewayPool::default_pool()
+            } else {
+                GatewayPool::with_gateways(
+                    config.gateway_fallback_urls.clone(),
+                    config.gateway_race_concurrency.max(1),
+                )
+            };
+            Some(Arc::new(pool))
+        } else {
+            None
+        };
+
+        // Phase 3.3 — cold-start hybrid resolver. Configured iff
+        // ALL four required fields are populated (no separate
+        // `enabled` bool — field presence is the single source of
+        // truth, per the audit-driven simplification documented on
+        // Config). Fails closed: any missing field → resolver stays
+        // None and cold-start surfaces UsersIndexResolutionFailed
+        // at the call site rather than imploding SDK init.
+        #[cfg(not(target_arch = "wasm32"))]
+        let users_index_resolver = if !config.users_index_chain_rpc_url.is_empty()
+            && !config.users_index_anchor_address.is_empty()
+            && !config.users_index_ipns_name.is_empty()
+            && config.users_index_user_key.is_some()
+        {
+            let mut resolver_cfg = ResolverConfig::new(
+                config.users_index_chain_rpc_url.clone(),
+                config.users_index_anchor_address.clone(),
+                config.users_index_ipns_name.clone(),
+            );
+            // Phase 3.3 gateway overrides — empty Vec = use defaults.
+            // Operators (and tests) can pin custom gateways here.
+            if !config.users_index_ipns_gateway_urls.is_empty() {
+                resolver_cfg.ipns_gateways = config.users_index_ipns_gateway_urls.clone();
+            }
+            if !config.users_index_ipfs_gateway_urls.is_empty() {
+                resolver_cfg.ipfs_gateways = config.users_index_ipfs_gateway_urls.clone();
+            }
+            // Phase 3.3.5 — wire the BlockCache into the resolver
+            // when both are configured. The cache enables hot-start
+            // (replay-defense floor seeded across restarts; full
+            // network round-trip skipped within `soft_ttl`). When
+            // BlockCache is disabled, the resolver still works —
+            // just without the on-disk persistence layer.
+            let resolver_result = match block_cache.as_ref() {
+                Some(cache) => UsersIndexResolver::new_with_cache(resolver_cfg, Arc::clone(cache)),
+                None => UsersIndexResolver::new(resolver_cfg),
+            };
+            match resolver_result {
+                Ok(r) => Some(Arc::new(r)),
+                Err(e) => {
+                    warn!(
+                        error = %e,
+                        "users_index_resolver: construction failed; cold-start unavailable for this session"
+                    );
+                    None
+                }
+            }
         } else {
             None
         };
 
-        Ok(Self { config, http, health_gate })
+        Ok(Self {
+            config,
+            http,
+            health_gate,
+            #[cfg(not(target_arch = "wasm32"))]
+            block_cache,
+            #[cfg(not(target_arch = "wasm32"))]
+            gateway_pool,
+            #[cfg(not(target_arch = "wasm32"))]
+            users_index_resolver,
+        })
+    }
+
+    /// Phase 3.3 — accessor for the cold-start hybrid resolver.
+    /// Returns `Some` only when all four resolver config fields are
+    /// populated (`users_index_resolver_enabled = true` plus
+    /// `chain_rpc_url`, `anchor_address`, `ipns_name`, `user_key`)
+    /// AND construction succeeded. Native-only.
+    #[cfg(not(target_arch = "wasm32"))]
+    pub fn users_index_resolver(&self) -> Option<&Arc<UsersIndexResolver>> {
+        self.users_index_resolver.as_ref()
+    }
+
+    /// Phase 2.2 — accessor for the on-disk block cache. Returns
+    /// `Some` when the cache is enabled AND opened successfully.
+    /// Native-only. Used by the cold-start path to populate
+    /// `KEY_TO_CID` after resolving the manifest CID.
+    #[cfg(not(target_arch = "wasm32"))]
+    pub fn block_cache(&self) -> Option<&Arc<BlockCache>> {
+        self.block_cache.as_ref()
+    }
+
+    /// Phase 2.3 — accessor for the gateway pool. Returns `Some` when
+    /// the pool is enabled AND `block_cache` is also enabled (the
+    /// pair is required for the offline-fallback path to fetch
+    /// CID-addressed bytes). Native-only.
+    #[cfg(not(target_arch = "wasm32"))]
+    pub fn gateway_pool(&self) -> Option<&Arc<GatewayPool>> {
+        self.gateway_pool.as_ref()
     }
 
     /// Create with default configuration
@@ -66,6 +237,36 @@ impl FulaClient {
         &self.config
     }
 
+    /// Phase 19 — fire a `MasterHealthEvent` through the configured
+    /// health callback (if any). No-op when no callback is set.
+    /// Panic-safe: a buggy app callback that panics is swallowed and
+    /// logged at warn (same protection the gate uses internally).
+    ///
+    /// Used by the cold-start failure path in `EncryptedClient` to
+    /// emit `SeverelyDegraded` when both IPNS and chain channels
+    /// have exhausted; the health gate itself never emits
+    /// `SeverelyDegraded` because it can't authoritatively detect
+    /// "both down" without trying.
+    ///
+    /// Native-only: cold-start (the only consumer) is gated to
+    /// `cfg(not(target_arch = "wasm32"))`.
+    #[cfg(not(target_arch = "wasm32"))]
+    pub(crate) fn fire_health_event(&self, event: crate::health_gate::MasterHealthEvent) {
+        if let Some(cb) = self.config.health_callback.as_ref() {
+            let cb = Arc::clone(cb);
+            let event_clone = event.clone();
+            let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(move || {
+                cb(event_clone);
+            }));
+            if result.is_err() {
+                tracing::warn!(
+                    event = ?event,
+                    "health_callback panicked (cold-start path); SDK proceeding"
+                );
+            }
+        }
+    }
+
     /// Access the pooled HTTP client for internal modules (e.g. multipart
     /// helpers) that need to issue raw requests. Exposing this keeps
     /// connection pooling and configured timeouts intact instead of minting
@@ -344,19 +545,19 @@ impl FulaClient {
     ) -> Result<GetObjectResult> {
         let path = format!("/{}/{}", bucket, key);
         let response = self.request("GET", &path, None, None, None).await?;
-        
+
         let headers = response.headers();
         let etag = headers
             .get("ETag")
             .and_then(|v| v.to_str().ok())
             .map(|s| s.trim_matches('"').to_string())
             .unwrap_or_default();
-        
+
         let content_type = headers
             .get("Content-Type")
             .and_then(|v| v.to_str().ok())
             .map(|s| s.to_string());
-        
+
         let content_length = headers
             .get("Content-Length")
             .and_then(|v| v.to_str().ok())
@@ -384,6 +585,252 @@ impl FulaClient {
         })
     }
 
+    /// Phase 2.4 — `get_object` with offline fallback to public IPFS
+    /// gateways when master is unreachable.
+    ///
+    /// Behavior matrix:
+    ///
+    /// | State                                  | Behavior                                                    | source                       | freshness               |
+    /// |----------------------------------------|-------------------------------------------------------------|------------------------------|-------------------------|
+    /// | flags off                              | Identical to `get_object_with_metadata` (backward-compat). | `Master`                     | `Live`                  |
+    /// | flags on, master up, master responds   | Serve master bytes; populate KEY_TO_CID + BLOCKS.           | `Master`                     | `Live`                  |
+    /// | flags on, master down, KEY_TO_CID hit  | Race the gateway pool for the cached CID; verify; populate. | `Gateway(url)` or `LocalCache` | `Cached { observed_at }` |
+    /// | flags on, master down, KEY_TO_CID miss | Return `MasterUnreachable` (cold-start; Phase 3.3 territory).| n/a                          | n/a                     |
+    /// | wasm32 target                          | Always delegates to `get_object_with_metadata` (no cache /  | `Master`                     | `Live`                  |
+    /// |                                        | gateway race plumbing on web).                              |                              |                         |
+    ///
+    /// Etag rewrite: when the bytes come from the gateway race the
+    /// returned `OfflineGetResult.inner.etag` is set to `cid.to_string()`
+    /// so downstream callers (e.g., `load_forest_internal`) see the
+    /// same ETag-as-CID convention master uses on the fast path.
+    ///
+    /// **Known offline-path difference (Phase 2.4 v1):** when bytes
+    /// come from the gateway race or BLOCKS cache, the returned
+    /// `inner.metadata` is **empty** and `content_type` is `None`.
+    /// Master-up responses still surface `x-amz-meta-*` headers in
+    /// `metadata`. Encrypted-SDK callers never read user-metadata, so
+    /// this is invisible to them; app-level callers that depend on
+    /// user-metadata should treat the offline path as metadata-stripped.
+    ///
+    /// **Phase 19 — return type changed to `OfflineGetResult`.** The
+    /// extra fields `source: ReadSource` and `freshness: ReadFreshness`
+    /// let apps surface "you're offline; reading from cache" UI without
+    /// observing internal state. Existing callers extract `.inner.data`
+    /// / `.inner.etag` to access the bytes (one-line change).
+    ///
+    /// **Breaking change vs. Phase 2.4:** the previous `Result<GetObjectResult>`
+    /// signature is gone. Audit (2026-05-02) established no external
+    /// SDK consumers — Phase 2.4 GET-path wiring (task #15) is still
+    /// pending — so today's blast radius is zero. Document this in
+    /// the next release note so the Phase 2.4 wiring lands with the
+    /// new signature and doesn't accidentally inherit a backward-compat
+    /// expectation. Internal callers (S3BlobBackend, encrypted
+    /// cold-start) are already updated; their bytes are accessed via
+    /// `result.inner.data`.
+    #[cfg(not(target_arch = "wasm32"))]
+    #[instrument(skip(self))]
+    pub async fn get_object_with_offline_fallback(
+        &self,
+        bucket: &str,
+        key: &str,
+    ) -> Result<OfflineGetResult> {
+        // Fast path — if neither flag is on, this is byte-identical
+        // to the existing call. The new method costs nothing in
+        // existing deployments.
+        if self.block_cache.is_none() && self.gateway_pool.is_none() {
+            let inner = self.get_object_with_metadata(bucket, key).await?;
+            return Ok(OfflineGetResult {
+                inner,
+                source: ReadSource::Master,
+                freshness: ReadFreshness::Live,
+            });
+        }
+
+        let cache = self.block_cache.clone();
+
+        // Master attempt. If health gate already says Down, request()
+        // short-circuits before touching the network. Otherwise we
+        // hit master normally.
+        match self.get_object_with_metadata(bucket, key).await {
+            Ok(result) => {
+                // Master-up success path: record the CID side-effect.
+                // Skip if etag is empty (defensive: every master
+                // response should have one, but a future endpoint
+                // change shouldn't break the wrapper).
+                if let Some(cache) = &cache {
+                    if !result.etag.is_empty() {
+                        if let Ok(cid) = result.etag.parse::<cid::Cid>() {
+                            // Both writes are best-effort: a redb error
+                            // logs and proceeds (the master read already
+                            // succeeded, so the user gets their bytes).
+                            if let Err(e) = cache.record_key_cid(bucket, key, &cid) {
+                                debug!(
+                                    error = %e,
+                                    "block_cache: record_key_cid failed (best-effort; master fetch already succeeded)"
+                                );
+                            }
+                            // Cache the bytes themselves so a subsequent
+                            // master-down read can serve them without
+                            // any network round-trip at all.
+                            if let Err(e) = cache.put(&cid, &result.data).await {
+                                // BlockTooLarge is expected for huge
+                                // objects (>cache budget); not a bug.
+                                debug!(
+                                    error = %e,
+                                    "block_cache: put failed (best-effort)"
+                                );
+                            }
+                        }
+                    }
+                }
+                Ok(OfflineGetResult {
+                    inner: result,
+                    source: ReadSource::Master,
+                    freshness: ReadFreshness::Live,
+                })
+            }
+            Err(e) if is_master_unreachable_error(&e) => {
+                // Master-down: try the offline path. Requires the
+                // cache + pool to be set AND a prior master-up read
+                // for this `(bucket, key)` to have populated KEY_TO_CID.
+                self.try_offline_fallback(bucket, key, e).await
+            }
+            // Non-master-down errors (4xx, auth failures, etc.)
+            // propagate without any fallback attempt — they're not
+            // about availability.
+            Err(e) => Err(e),
+        }
+    }
+
+    /// Wasm version: no offline fallback infrastructure exists on
+    /// browsers (block_cache + gateway_fetch are gated out). Delegate
+    /// to the regular method so call sites can use one name across
+    /// targets without additional `cfg` gates of their own.
+    #[cfg(target_arch = "wasm32")]
+    pub async fn get_object_with_offline_fallback(
+        &self,
+        bucket: &str,
+        key: &str,
+    ) -> Result<OfflineGetResult> {
+        let inner = self.get_object_with_metadata(bucket, key).await?;
+        Ok(OfflineGetResult {
+            inner,
+            source: ReadSource::Master,
+            freshness: ReadFreshness::Live,
+        })
+    }
+
+    /// Phase 2.4 fallback step. Looks up the cached CID for the
+    /// requested `(bucket, key)`; if absent, returns the original
+    /// `MasterUnreachable` error (cold-start case — Phase 3.3 catches
+    /// it). If present, races the gateway pool for that CID; on
+    /// verification success, populates BLOCKS and returns a synthesized
+    /// `OfflineGetResult` with `source = LocalCache` (BLOCKS hit) or
+    /// `source = Gateway(url_template)` (gateway race), and
+    /// `freshness = Cached { observed_at }`. On any gateway-side
+    /// failure, propagates the original master-down error so the
+    /// caller sees a stable error type regardless of which channel
+    /// ultimately failed.
+    #[cfg(not(target_arch = "wasm32"))]
+    async fn try_offline_fallback(
+        &self,
+        bucket: &str,
+        key: &str,
+        master_error: ClientError,
+    ) -> Result<OfflineGetResult> {
+        let (cache, pool) = match (&self.block_cache, &self.gateway_pool) {
+            (Some(c), Some(p)) => (c.clone(), p.clone()),
+            _ => return Err(master_error),
+        };
+
+        // Step 1 — translate (bucket, key) → CID via the warm-cache
+        // table populated during prior master-up reads. Cold-start
+        // misses return MasterUnreachable so the app can show
+        // "offline mode unavailable for this object yet".
+        let cid = match cache.lookup_cid(bucket, key) {
+            Ok(Some(cid)) => cid,
+            Ok(None) => {
+                debug!(
+                    bucket = %bucket, key = %key,
+                    "offline fallback: no cached CID for this object (cold-start; needs Phase 3.3)"
+                );
+                return Err(master_error);
+            }
+            Err(e) => {
+                warn!(error = %e, "offline fallback: lookup_cid failed");
+                return Err(master_error);
+            }
+        };
+
+        // Step 2 — BLOCKS hit short-circuits the network entirely.
+        // Cheap: a single redb read.
+        if let Ok(Some(bytes)) = cache.get(&cid) {
+            debug!(cid = %cid, "offline fallback: BLOCKS hit");
+            let observed_at = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .map(|d| d.as_millis() as u64)
+                .unwrap_or(0);
+            return Ok(OfflineGetResult {
+                inner: GetObjectResult {
+                    content_length: bytes.len() as u64,
+                    data: bytes,
+                    etag: cid.to_string(),
+                    content_type: None,
+                    last_modified: None,
+                    metadata: HashMap::new(),
+                },
+                source: ReadSource::LocalCache,
+                freshness: ReadFreshness::Cached { observed_at },
+            });
+        }
+
+        // Step 3 — race the gateway pool. fetch_verified handles the
+        // CID verification (verify_cid_against_bytes) internally;
+        // bytes returned here are guaranteed to content-address to
+        // the requested CID. The accompanying URL template records
+        // which gateway won the race for transparency surfacing.
+        match pool.fetch_verified_with_source(&cid, &self.http).await {
+            Ok((bytes, gateway_url)) => {
+                debug!(cid = %cid, gateway = %gateway_url, "offline fallback: gateway race succeeded");
+                // Populate BLOCKS so the next read of this object
+                // serves entirely locally. BlockTooLarge is the only
+                // expected failure (huge objects); fall through and
+                // still return the bytes to the caller.
+                if let Err(e) = cache.put(&cid, &bytes).await {
+                    debug!(error = %e, "offline fallback: BLOCKS put failed (best-effort)");
+                }
+                let observed_at = std::time::SystemTime::now()
+                    .duration_since(std::time::UNIX_EPOCH)
+                    .map(|d| d.as_millis() as u64)
+                    .unwrap_or(0);
+                Ok(OfflineGetResult {
+                    inner: GetObjectResult {
+                        content_length: bytes.len() as u64,
+                        data: bytes,
+                        etag: cid.to_string(),
+                        content_type: None,
+                        last_modified: None,
+                        metadata: HashMap::new(),
+                    },
+                    source: ReadSource::Gateway(gateway_url),
+                    freshness: ReadFreshness::Cached { observed_at },
+                })
+            }
+            Err(e) => {
+                warn!(
+                    cid = %cid,
+                    error = %e,
+                    "offline fallback: gateway race failed"
+                );
+                // Propagate the original master error rather than the
+                // gateway error — callers expect a single failure type
+                // for "the object is unreachable" and the gateway-race
+                // error is a secondary signal.
+                Err(master_error)
+            }
+        }
+    }
+
     /// Check if an object exists
     #[instrument(skip(self))]
     pub async fn object_exists(&self, bucket: &str, key: &str) -> Result<bool> {
@@ -672,6 +1119,89 @@ impl FulaClient {
     }
 }
 
+/// Phase 2.4 — classify which error variants represent "master is
+/// unreachable" for the purpose of triggering the gateway-race
+/// fallback. Tightly scoped to:
+///   - explicit `MasterUnreachable` from the health gate short-circuit,
+///   - connection-level `Http` errors (DNS, RST, refused, timeout —
+///     reqwest::Error wraps these),
+///   - 5xx server errors (master is up but failing).
+///
+/// 4xx (auth, not-found, precondition-failed, etc.) do NOT count: the
+/// server responded correctly, the request was just refused. Falling
+/// back to gateway race in those cases would mask real bugs.
+///
+/// Native-only because the only caller (`try_offline_fallback`) is
+/// gated to `cfg(not(target_arch = "wasm32"))`. Defining it here
+/// without gates would yield a dead-code warning on wasm builds.
+#[cfg(not(target_arch = "wasm32"))]
+fn is_master_unreachable_error(e: &ClientError) -> bool {
+    match e {
+        ClientError::MasterUnreachable { .. } => true,
+        // reqwest::Error: cover the common transport failures. We
+        // can't easily distinguish "DNS down" from "connection RST"
+        // without inspecting the inner — for our purposes both are
+        // "master is unreachable".
+        ClientError::Http(re) => {
+            // `is_connect()` exists on native reqwest but not on the
+            // wasm32 build — guard it. On wasm the offline path is a
+            // no-op anyway (gated out at the call site), so the
+            // narrower native-only classification suffices.
+            //
+            // We DELIBERATELY do NOT include `is_request()` (audit
+            // follow-up): that variant covers body-build errors,
+            // redirect-loops, URL parsing — half are app bugs (bad
+            // bucket name, malformed header) that the fallback would
+            // mask. Limit to connect/timeout/5xx — the trio that
+            // genuinely means "master is unreachable right now".
+            #[cfg(not(target_arch = "wasm32"))]
+            let is_connect = re.is_connect();
+            #[cfg(target_arch = "wasm32")]
+            let is_connect = false;
+
+            is_connect
+                || re.is_timeout()
+                || matches!(re.status(), Some(s) if s.is_server_error())
+        }
+        ClientError::S3Error { code, .. } => {
+            // 5xx surfaces as S3Error with a status-derived code.
+            code.starts_with("HTTP5") || code == "InternalError" || code == "ServiceUnavailable"
+                || code == "SlowDown"
+        }
+        _ => false,
+    }
+}
+
+// ==================== Phase 2.4 helpers ====================
+
+/// Resolve the on-disk path for the block cache. Honors
+/// `Config::block_cache_path` if set; otherwise falls back to the
+/// platform's local data directory under `fula/cache/blocks.redb`.
+/// Native-only; the function is not compiled into the wasm target
+/// because BlockCache itself isn't.
+#[cfg(not(target_arch = "wasm32"))]
+fn resolve_block_cache_path(config: &Config) -> std::path::PathBuf {
+    if let Some(p) = &config.block_cache_path {
+        return p.clone();
+    }
+    // dirs::data_local_dir() returns the platform-conventional cache
+    // root: ~/.local/share on Linux, ~/Library/Application Support on
+    // macOS, %LOCALAPPDATA% on Windows. Falls back to ./fula-cache if
+    // dirs cannot resolve a home directory (extremely rare; common in
+    // CI containers without HOME set).
+    let base = dirs::data_local_dir().unwrap_or_else(|| std::path::PathBuf::from("."));
+    base.join("fula").join("cache").join("blocks.redb")
+}
+
+/// Open the BlockCache for `config`. Returns the typed
+/// BlockCacheError on any failure so the caller can decide whether
+/// to disable the offline path or surface it.
+#[cfg(not(target_arch = "wasm32"))]
+fn build_block_cache(config: &Config) -> std::result::Result<BlockCache, crate::block_cache::BlockCacheError> {
+    let path = resolve_block_cache_path(config);
+    BlockCache::open(path, config.block_cache_max_bytes)
+}
+
 // ==================== Response Parsers ====================
 
 fn parse_list_buckets_response(xml: &str) -> Result<ListBucketsResult> {
@@ -822,4 +1352,513 @@ mod tests {
         assert_eq!(result.buckets.len(), 1);
         assert_eq!(result.buckets[0].name, "bucket1");
     }
+
+    // ============================================================
+    // Phase 2.4 — offline-fallback wrapper helper-fn tests
+    // ============================================================
+    //
+    // The integration tests for the full wrapper (master + gateway
+    // wiremock combo) live in `phase_2_4_offline_tests` below. These
+    // smaller unit tests cover the classification helper that decides
+    // when to attempt the offline path, without spinning up a server.
+
+    #[test]
+    fn test_master_unreachable_classifier_explicit_variant() {
+        let e = ClientError::MasterUnreachable { down_for_secs: 5 };
+        assert!(is_master_unreachable_error(&e));
+    }
+
+    #[test]
+    fn test_master_unreachable_classifier_5xx_s3_codes() {
+        // 5xx surfaces as S3Error with a code derived from the body
+        // or the status line. All these forms must be classified as
+        // master-unreachable so the offline path triggers.
+        for code in &["HTTP500", "HTTP502", "HTTP503", "InternalError", "ServiceUnavailable", "SlowDown"] {
+            let e = ClientError::S3Error {
+                code: (*code).into(),
+                message: "x".into(),
+                request_id: None,
+            };
+            assert!(is_master_unreachable_error(&e), "code={} should classify as master-unreachable", code);
+        }
+    }
+
+    #[test]
+    fn test_master_unreachable_classifier_excludes_4xx() {
+        // 4xx must NOT trigger fallback — server responded, request
+        // was simply refused. Falling back here would mask real auth
+        // / not-found issues.
+        for code in &["NoSuchKey", "NoSuchBucket", "AccessDenied", "PreconditionFailed", "HTTP404", "HTTP403"] {
+            let e = ClientError::S3Error {
+                code: (*code).into(),
+                message: "x".into(),
+                request_id: None,
+            };
+            assert!(!is_master_unreachable_error(&e), "code={} must NOT classify as master-unreachable", code);
+        }
+    }
+
+    #[test]
+    fn test_master_unreachable_classifier_excludes_other_variants() {
+        // Encryption / config / NotFound / etc. are not master-down.
+        let e = ClientError::Config("bad".into());
+        assert!(!is_master_unreachable_error(&e));
+        let e = ClientError::BucketNotFound("b".into());
+        assert!(!is_master_unreachable_error(&e));
+        let e = ClientError::ConcurrentModification("etag mismatch".into());
+        assert!(!is_master_unreachable_error(&e));
+    }
+
+    /// Audit follow-up: request-build errors (URL parsing, malformed
+    /// headers, body-encoding) must NOT classify as master-unreachable.
+    /// Including `re.is_request()` would mask "I gave the SDK a bad
+    /// bucket name" by silently falling back to the gateway race.
+    /// Construct a request-build error by passing an invalid URL.
+    #[tokio::test]
+    async fn test_master_unreachable_classifier_excludes_request_build_errors() {
+        let http = reqwest::Client::new();
+        // Building a request to a malformed URL fails at request-build
+        // time, before any network I/O. reqwest classifies this as
+        // is_builder() / is_request() — NOT is_connect() / is_timeout().
+        let result = http.get("ht!tp://bad").build();
+        let req_err = match result {
+            Err(e) => e,
+            Ok(_) => {
+                // If reqwest happened to accept this URL, try sending
+                // it; the send will fail with a different request error.
+                http.get("ht!tp://bad").send().await.unwrap_err()
+            }
+        };
+        let wrapped = ClientError::Http(req_err);
+        assert!(
+            !is_master_unreachable_error(&wrapped),
+            "request-build / URL-parse errors must NOT classify as master-unreachable"
+        );
+    }
+
+    // Resolve-block-cache-path is native-only (uses dirs crate).
+    #[cfg(not(target_arch = "wasm32"))]
+    #[test]
+    fn test_resolve_block_cache_path_uses_explicit_when_set() {
+        let mut config = Config::default();
+        config.block_cache_path = Some(std::path::PathBuf::from("/tmp/explicit/blocks.redb"));
+        let p = resolve_block_cache_path(&config);
+        assert_eq!(p, std::path::PathBuf::from("/tmp/explicit/blocks.redb"));
+    }
+
+    #[cfg(not(target_arch = "wasm32"))]
+    #[test]
+    fn test_resolve_block_cache_path_uses_platform_default_when_unset() {
+        let config = Config::default();
+        let p = resolve_block_cache_path(&config);
+        // The exact path depends on the host OS, but it must end in
+        // the documented "fula/cache/blocks.redb" suffix.
+        let s = p.to_string_lossy().replace('\\', "/");
+        assert!(
+            s.ends_with("fula/cache/blocks.redb"),
+            "expected platform default to end with 'fula/cache/blocks.redb', got: {}",
+            s
+        );
+    }
+}
+
+// ============================================================
+// Phase 2.4 — offline-fallback integration tests (wiremock)
+// ============================================================
+//
+// These tests spin up:
+//   1. A wiremock master at 127.0.0.1:<random>
+//   2. A wiremock gateway at 127.0.0.1:<random>/ipfs/{cid}
+//
+// They exercise the wrapper end-to-end:
+//   - flags off → no cache, no fallback, byte-identical to old behavior
+//   - master up → cache populated (KEY_TO_CID + BLOCKS)
+//   - master down + cache hit → gateway race serves bytes
+//   - master down + cache miss → MasterUnreachable surfaces
+//   - master 5xx → fallback triggers
+//   - master 4xx → fallback does NOT trigger (auth/not-found preserved)
+//
+// Native-only: wiremock + block_cache aren't compiled into wasm builds.
+
+#[cfg(not(target_arch = "wasm32"))]
+#[cfg(test)]
+mod phase_2_4_offline_tests {
+    use super::*;
+    use crate::block_cache::BlockCache;
+    use cid::Cid;
+    use cid::multihash::Multihash;
+    use sha2::Digest;
+    use std::sync::Arc;
+    use std::time::Duration;
+    use tempfile::TempDir;
+    use wiremock::matchers::{method, path};
+    use wiremock::{Mock, MockServer, ResponseTemplate};
+
+    /// Compute the CID master would set as ETag for the given
+    /// payload. Master uses CIDv1 + raw codec + sha2-256 multihash
+    /// for direct S3-PUT objects (per `object.rs:103-105` and
+    /// `cid_utils::create_cid`). For tests we mirror that exactly so
+    /// `verify_cid_against_bytes` will pass.
+    fn cid_for_bytes(data: &[u8]) -> Cid {
+        let digest = sha2::Sha256::digest(data);
+        let mh = Multihash::<64>::wrap(0x12 /* sha2-256 */, &digest).unwrap();
+        Cid::new_v1(0x55 /* raw */, mh)
+    }
+
+    /// Helper: build a FulaClient pointed at `master_url` with
+    /// `gateway_url` in its fallback list. Cache lives in `cache_path`.
+    fn build_client(
+        master_url: &str,
+        cache_path: &std::path::Path,
+        gateway_url_template: &str,
+    ) -> FulaClient {
+        let mut config = Config::new(master_url);
+        config.timeout = Duration::from_secs(2);
+        config.block_cache_enabled = true;
+        config.block_cache_path = Some(cache_path.to_path_buf());
+        config.block_cache_max_bytes = 1024 * 1024;
+        config.gateway_fallback_enabled = true;
+        config.gateway_fallback_urls = vec![gateway_url_template.to_string()];
+        config.gateway_race_concurrency = 1;
+        // Health gate off — these tests construct the master-down
+        // signal via 5xx responses or a stopped wiremock; gate
+        // semantics are exercised separately in health_gate.rs.
+        config.health_gate_enabled = false;
+        FulaClient::new(config).expect("client")
+    }
+
+    #[tokio::test]
+    async fn test_flags_off_byte_identical_to_get_object_with_metadata() {
+        // Backward-compat: if neither flag is set, the wrapper must
+        // delegate to get_object_with_metadata with no observable
+        // difference (no extra cache writes, no extra network calls).
+        let master = MockServer::start().await;
+        let body = b"some bytes";
+        let cid = cid_for_bytes(body);
+        Mock::given(method("GET"))
+            .and(path("/bucket/key.txt"))
+            .respond_with(
+                ResponseTemplate::new(200)
+                    .insert_header("ETag", format!("\"{}\"", cid))
+                    .set_body_bytes(body.as_slice()),
+            )
+            .expect(1)
+            .mount(&master)
+            .await;
+
+        let mut config = Config::new(master.uri());
+        // Both flags OFF — backward-compat scenario.
+        config.block_cache_enabled = false;
+        config.gateway_fallback_enabled = false;
+        let client = FulaClient::new(config).expect("client");
+
+        let r = client
+            .get_object_with_offline_fallback("bucket", "key.txt")
+            .await
+            .expect("get");
+        // Phase 19: result is OfflineGetResult; bytes/etag on .inner.
+        assert_eq!(r.inner.data.as_ref(), body);
+        assert_eq!(r.inner.etag, cid.to_string());
+        assert_eq!(r.source, ReadSource::Master);
+        assert_eq!(r.freshness, ReadFreshness::Live);
+    }
+
+    #[tokio::test]
+    async fn test_master_up_populates_key_to_cid_and_blocks() {
+        let master = MockServer::start().await;
+        let body = b"payload bytes for cache";
+        let cid = cid_for_bytes(body);
+        Mock::given(method("GET"))
+            .and(path("/bucket/file.bin"))
+            .respond_with(
+                ResponseTemplate::new(200)
+                    .insert_header("ETag", format!("\"{}\"", cid))
+                    .set_body_bytes(body.as_slice()),
+            )
+            .mount(&master)
+            .await;
+
+        let dir = TempDir::new().unwrap();
+        let cache_path = dir.path().join("cache.redb");
+        let client = build_client(
+            &master.uri(),
+            &cache_path,
+            "http://unused.invalid/ipfs/{cid}",
+        );
+
+        let r = client
+            .get_object_with_offline_fallback("bucket", "file.bin")
+            .await
+            .expect("get");
+        assert_eq!(r.inner.data.as_ref(), body);
+        assert_eq!(r.source, ReadSource::Master);
+        assert_eq!(r.freshness, ReadFreshness::Live);
+
+        // Drop the client (and its BlockCache Arc) so we can re-open
+        // the on-disk file for inspection. redb holds an exclusive
+        // file lock; AlreadyOpen otherwise.
+        drop(client);
+
+        // Cache must have been populated as a side-effect.
+        let cache = BlockCache::open(&cache_path, 1024 * 1024).expect("re-open cache");
+        let looked_up = cache.lookup_cid("bucket", "file.bin").expect("lookup").expect("hit");
+        assert_eq!(looked_up, cid, "KEY_TO_CID must record the master's etag");
+        let bytes = cache.get(&cid).expect("get").expect("BLOCKS hit");
+        assert_eq!(bytes.as_ref(), body, "BLOCKS table must hold the payload");
+    }
+
+    #[tokio::test]
+    async fn test_master_down_with_cached_cid_falls_back_to_gateway() {
+        // Phase: warm-up against master, then simulate master-down
+        // and verify the gateway race fills in.
+        let master = MockServer::start().await;
+        let gateway = MockServer::start().await;
+        let body = b"served by gateway after master goes dark";
+        let cid = cid_for_bytes(body);
+
+        // Master serves the file ONCE, populating the cache.
+        Mock::given(method("GET"))
+            .and(path("/bucket/file.txt"))
+            .respond_with(
+                ResponseTemplate::new(200)
+                    .insert_header("ETag", format!("\"{}\"", cid))
+                    .set_body_bytes(body.as_slice()),
+            )
+            .up_to_n_times(1)
+            .mount(&master)
+            .await;
+        // Subsequent master requests fail with 503.
+        Mock::given(method("GET"))
+            .and(path("/bucket/file.txt"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&master)
+            .await;
+
+        // Gateway always serves the same bytes.
+        let gateway_path = format!("/ipfs/{}", cid);
+        Mock::given(method("GET"))
+            .and(path(gateway_path.clone()))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(body.as_slice()))
+            .mount(&gateway)
+            .await;
+
+        let dir = TempDir::new().unwrap();
+        let cache_path = dir.path().join("cache.redb");
+        let gateway_template = format!("{}/ipfs/{{cid}}", gateway.uri());
+        let client = build_client(&master.uri(), &cache_path, &gateway_template);
+
+        // Read 1: master up — populates cache.
+        let r1 = client
+            .get_object_with_offline_fallback("bucket", "file.txt")
+            .await
+            .expect("master read");
+        assert_eq!(r1.inner.data.as_ref(), body);
+        assert_eq!(r1.source, ReadSource::Master);
+
+        // Drop the in-process BLOCKS entry to force the gateway race
+        // (otherwise step 2 would short-circuit on a BLOCKS hit and
+        // we wouldn't be testing the fallback path).
+        // We do this by opening a fresh client without the populated
+        // cache — but actually keeping the same on-disk cache is what
+        // we want; just clear BLOCKS while keeping KEY_TO_CID.
+        // Simpler: we test against a SECOND client that re-uses the
+        // same cache file; since BLOCKS is populated by step 1, we'd
+        // expect a BLOCKS hit on read 2. So we'll first open a client
+        // with a different cache path (no warm-up), then manually
+        // call record_key_cid → that simulates "warm KEY_TO_CID, cold
+        // BLOCKS" which is the realistic scenario after a long enough
+        // outage.
+        let dir2 = TempDir::new().unwrap();
+        let cache_path2 = dir2.path().join("cache2.redb");
+        let cache2 = BlockCache::open(&cache_path2, 1024 * 1024).expect("open");
+        cache2.record_key_cid("bucket", "file.txt", &cid).expect("seed mapping");
+        drop(cache2);
+
+        let client2 = build_client(&master.uri(), &cache_path2, &gateway_template);
+        let r2 = client2
+            .get_object_with_offline_fallback("bucket", "file.txt")
+            .await
+            .expect("offline path read");
+        assert_eq!(r2.inner.data.as_ref(), body, "gateway must have served the bytes");
+        assert_eq!(r2.inner.etag, cid.to_string(), "synthesized etag = cid");
+        // Phase 19: gateway-served bytes get a Gateway(url) source +
+        // Cached freshness. The URL template should match the
+        // configured gateway template (NOT the per-CID-substituted URL).
+        match &r2.source {
+            ReadSource::Gateway(url) => {
+                assert_eq!(url, &gateway_template, "source URL = configured gateway template");
+            }
+            other => panic!("expected ReadSource::Gateway, got {:?}", other),
+        }
+        match r2.freshness {
+            ReadFreshness::Cached { .. } => { /* ok */ }
+            other => panic!("expected ReadFreshness::Cached, got {:?}", other),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_master_down_no_cached_cid_returns_master_unreachable() {
+        // Cold-start case: SDK has never read this object before, so
+        // KEY_TO_CID has no entry. Wrapper must surface the original
+        // master-down error rather than swallow it — Phase 3.3 will
+        // pick it up later.
+        let master = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path("/bucket/never-read.txt"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&master)
+            .await;
+
+        let dir = TempDir::new().unwrap();
+        let cache_path = dir.path().join("cache.redb");
+        let client = build_client(
+            &master.uri(),
+            &cache_path,
+            "http://unused.invalid/ipfs/{cid}",
+        );
+
+        let result = client
+            .get_object_with_offline_fallback("bucket", "never-read.txt")
+            .await;
+        assert!(result.is_err(), "no cached CID → must propagate master-down");
+        let err = result.unwrap_err();
+        // Either the explicit MasterUnreachable variant (if health
+        // gate were involved) or an S3Error with HTTP503 code is
+        // acceptable here. The point is: NOT Ok, and NOT silently
+        // swallowed.
+        assert!(
+            is_master_unreachable_error(&err),
+            "error must classify as master-unreachable: {:?}",
+            err
+        );
+    }
+
+    #[tokio::test]
+    async fn test_master_4xx_does_not_trigger_fallback() {
+        // 4xx (auth, not-found) surfaces as S3Error and MUST propagate
+        // unchanged. The fallback path would mask real bugs (e.g.,
+        // a typo in the bucket name yielding NoSuchBucket).
+        let master = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path("/bucket/missing.txt"))
+            .respond_with(
+                ResponseTemplate::new(404)
+                    .set_body_string(r#"<Error><Code>NoSuchKey</Code><Message>not here</Message></Error>"#),
+            )
+            .mount(&master)
+            .await;
+
+        let dir = TempDir::new().unwrap();
+        let cache_path = dir.path().join("cache.redb");
+        let client = build_client(
+            &master.uri(),
+            &cache_path,
+            "http://unused.invalid/ipfs/{cid}",
+        );
+
+        let err = client
+            .get_object_with_offline_fallback("bucket", "missing.txt")
+            .await
+            .expect_err("404 propagates");
+        assert!(err.is_not_found(), "expected NotFound, got: {:?}", err);
+        assert!(
+            !is_master_unreachable_error(&err),
+            "4xx must NOT classify as master-unreachable"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_master_down_gateway_failure_propagates_original_error() {
+        // If the offline path tries to fetch via the gateway race AND
+        // the race exhausts (all gateways down), the wrapper must
+        // surface the ORIGINAL master-down error so callers see a
+        // single failure type. The gateway-side error is already
+        // logged at warn level (operators can debug from logs).
+        let master = MockServer::start().await;
+        Mock::given(method("GET"))
+            .and(path("/bucket/x.txt"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&master)
+            .await;
+
+        // Gateway always 500s — race will exhaust.
+        let gateway = MockServer::start().await;
+        Mock::given(method("GET"))
+            .respond_with(ResponseTemplate::new(500))
+            .mount(&gateway)
+            .await;
+
+        let dir = TempDir::new().unwrap();
+        let cache_path = dir.path().join("cache.redb");
+        let body = b"would have been served";
+        let cid = cid_for_bytes(body);
+        let cache = BlockCache::open(&cache_path, 1024 * 1024).expect("open");
+        cache.record_key_cid("bucket", "x.txt", &cid).expect("seed");
+        drop(cache);
+
+        let gateway_template = format!("{}/ipfs/{{cid}}", gateway.uri());
+        let client = build_client(&master.uri(), &cache_path, &gateway_template);
+
+        let err = client
+            .get_object_with_offline_fallback("bucket", "x.txt")
+            .await
+            .expect_err("both channels failed");
+        assert!(
+            is_master_unreachable_error(&err),
+            "must surface master-unreachable, not a gateway-specific error"
+        );
+    }
+
+    // ============================================================
+    // Phase 19 — transparency surfaces on the offline path
+    // ============================================================
+
+    #[tokio::test]
+    async fn test_phase19_blocks_hit_carries_local_cache_source() {
+        // Advisor-mandated test #3: when BLOCKS already holds the
+        // bytes (e.g., from a prior master-up read), the offline path
+        // serves them from local cache and the result carries
+        // `ReadSource::LocalCache` + `ReadFreshness::Cached`. No
+        // network round-trip happens at all.
+        let master = MockServer::start().await;
+        // Master is unreachable (every request 503s).
+        Mock::given(method("GET"))
+            .and(path("/bucket/cached.txt"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&master)
+            .await;
+
+        let body = b"already cached locally";
+        let cid = cid_for_bytes(body);
+
+        // Pre-populate BOTH KEY_TO_CID and BLOCKS so the offline
+        // fallback's BLOCKS hit short-circuits before any gateway
+        // race attempt.
+        let dir = TempDir::new().unwrap();
+        let cache_path = dir.path().join("cache.redb");
+        let cache = BlockCache::open(&cache_path, 1024 * 1024).expect("open cache");
+        cache.record_key_cid("bucket", "cached.txt", &cid).expect("seed key→cid");
+        cache.put(&cid, body).await.expect("seed BLOCKS");
+        drop(cache);
+
+        // Use a gateway URL that would FAIL if the gateway race were
+        // even attempted — proves the BLOCKS hit short-circuited.
+        let gateway_template = "http://gateway-must-not-be-called.invalid/ipfs/{cid}";
+        let client = build_client(&master.uri(), &cache_path, gateway_template);
+
+        let r = client
+            .get_object_with_offline_fallback("bucket", "cached.txt")
+            .await
+            .expect("BLOCKS hit serves bytes");
+
+        assert_eq!(r.inner.data.as_ref(), body);
+        assert_eq!(r.inner.etag, cid.to_string(), "synthesized etag = cid");
+        assert_eq!(r.source, ReadSource::LocalCache, "BLOCKS hit → LocalCache");
+        match r.freshness {
+            ReadFreshness::Cached { observed_at } => {
+                assert!(observed_at > 0, "Cached.observed_at must be set");
+            }
+            other => panic!("expected ReadFreshness::Cached, got {:?}", other),
+        }
+    }
 }
diff --git a/crates/fula-client/src/config.rs b/crates/fula-client/src/config.rs
index b9187cd..cbbb5b1 100644
--- a/crates/fula-client/src/config.rs
+++ b/crates/fula-client/src/config.rs
@@ -1,9 +1,17 @@
 //! Client configuration
 
+use std::path::PathBuf;
 use std::time::Duration;
 
+use crate::health_gate::HealthCallback;
+
 /// Client configuration
-#[derive(Clone, Debug)]
+///
+/// Note: `Config` is `Clone` but the `health_callback` shares the
+/// underlying `Arc<dyn Fn>` across clones — there's exactly one
+/// callback closure per logical SDK construction, fired by every
+/// `FulaClient` clone derived from this config.
+#[derive(Clone)]
 pub struct Config {
     /// Gateway endpoint URL
     pub endpoint: String,
@@ -54,6 +62,150 @@ pub struct Config {
     /// this duration elapses, the next request is allowed through as a
     /// probe (without resetting state — only an observed success resets).
     pub health_gate_ttl: Duration,
+
+    /// Phase 2.2 of master-independent reads: enable the on-disk LRU
+    /// block cache. Off by default. Native-only — `wasm32` ignores
+    /// this flag (the redb-backed cache cannot open in browsers).
+    /// When enabled, master-up reads observe and persist the
+    /// `(bucket, key) → cid` mapping the offline path needs.
+    pub block_cache_enabled: bool,
+
+    /// Filesystem path for the block-cache redb database. `None` means
+    /// "use the platform default" (resolved at SDK init via the
+    /// `dirs` crate's `data_local_dir()`). Operators can override
+    /// this for tests or non-standard deployments. Native-only.
+    pub block_cache_path: Option<PathBuf>,
+
+    /// Maximum on-disk bytes for the block cache. Defaults to 256 MiB
+    /// per plan §2.2. The cache evicts to 80 % of this watermark when
+    /// `put` would push it past `max_bytes`. Native-only.
+    pub block_cache_max_bytes: u64,
+
+    /// Phase 2.4 of master-independent reads: enable falling back to
+    /// public IPFS gateways when master is unreachable AND the SDK has
+    /// already cached the requested object's CID via Phase 2.2's
+    /// `(bucket, key) → cid` table. Off by default; flip on AFTER
+    /// Phase 2.2 has had time to populate the cache during master-up
+    /// reads. Native-only — `wasm32` returns `MasterUnreachable`
+    /// instead of falling back (no gateway-race plumbing in the
+    /// browser target).
+    pub gateway_fallback_enabled: bool,
+
+    /// Custom gateway URL templates. Each must contain the literal
+    /// `{cid}` token, which the SDK substitutes per fetch. Empty =
+    /// use the SDK-shipped default list of six gateways
+    /// (`gateway_fetch::default_gateway_urls()`). Native-only.
+    pub gateway_fallback_urls: Vec<String>,
+
+    /// Number of gateways the SDK races in parallel for any single
+    /// CID. Default 3 per plan §2.3 (cancels in-flight losers via
+    /// `Drop` of the spawned futures). Capped at the gateway-pool
+    /// length. Native-only.
+    pub gateway_race_concurrency: usize,
+
+    // ============================================================
+    // Phase 3.3 — cold-start hybrid resolver
+    // ============================================================
+    //
+    // The resolver is "configured" iff ALL of:
+    //   - `users_index_chain_rpc_url` is non-empty
+    //   - `users_index_anchor_address` is non-empty
+    //   - `users_index_ipns_name` is non-empty
+    //   - `users_index_user_key` is `Some`
+    //
+    // are populated. Field presence is the single source of truth —
+    // there is no separate `enabled` bool. To disable cold-start an
+    // operator clears any one of the four fields; the SDK degrades
+    // to "warm-cache only" automatically. This eliminates the
+    // surprise of "I flipped the master switch but it's still off
+    // because I forgot field N" — an audit-driven simplification.
+
+    /// JSON-RPC URL for the chain anchor (Base or SKALE). One of
+    /// the four required fields for the cold-start resolver.
+    pub users_index_chain_rpc_url: String,
+
+    /// `FulaUsersIndexAnchor.sol` proxy address (20 bytes hex,
+    /// optionally `0x`-prefixed). Required when the resolver is
+    /// enabled.
+    pub users_index_anchor_address: String,
+
+    /// IPNS NAME (libp2p public-key hash, e.g. `k51qzi5...`) under
+    /// which the master publishes the users-index. Required when
+    /// the resolver is enabled.
+    pub users_index_ipns_name: String,
+
+    /// 32-hex-char `userKey` (= `BLAKE3("fula:user_id:" || sha256(lower(email)))[..16]`).
+    /// Computed once at sign-in via `registry_resolver::derive_user_key_from_email`
+    /// and passed in here; the SDK does not store the raw email. Required when
+    /// the resolver is enabled.
+    pub users_index_user_key: Option<String>,
+
+    /// IPNS-aware gateway URL templates the resolver races against
+    /// (each must contain `{name}`). Empty Vec = use the SDK-shipped
+    /// defaults (Cloudflare, dweb.link, ipfs.io, 4everland, Pinata —
+    /// `trustless-gateway.link` is excluded since it serves only
+    /// `/ipfs/`). Operators can override e.g. for staging tests
+    /// against wiremock or to add a private IPNS-aware gateway.
+    pub users_index_ipns_gateway_urls: Vec<String>,
+
+    /// `/ipfs/{cid}` gateway URL templates the resolver uses for
+    /// fetching the chain-anchored CID's bytes AND the cold-start
+    /// path uses for fetching the per-user `bucketsIndex` and forest
+    /// manifest CBORs. Empty Vec = use the SDK-shipped six-gateway
+    /// default. Independent of `gateway_fallback_urls` (which serves
+    /// the warm-device offline path) so cold-start works without
+    /// Phase 2.2/2.4 enabled.
+    pub users_index_ipfs_gateway_urls: Vec<String>,
+
+    /// Phase 19 — optional health-status callback. When set, the SDK
+    /// invokes this closure on every Up↔Down transition of the
+    /// master health gate (`MasterHealthEvent::Online` /
+    /// `OfflineFallbackActive`) plus on cold-start failure
+    /// (`SeverelyDegraded`). Apps wire this to surface offline UI
+    /// affordances. Default `None` = silent (gate works, just no
+    /// transparency callback). Native-only — `Arc<dyn Fn>` doesn't
+    /// cross FRB / wasm-bindgen cleanly, so wasm/Flutter surface
+    /// these via the typed error variants instead.
+    pub health_callback: Option<HealthCallback>,
+}
+
+// `Config` derives `Clone` but not `Debug` because `HealthCallback`
+// is `Arc<dyn Fn>` which has no `Debug`. Hand-roll a `Debug` impl
+// that omits the callback (printing "Some(<callback>)" or "None"),
+// preserving the Phase 1.x behavior where Config could be logged.
+impl std::fmt::Debug for Config {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Config")
+            .field("endpoint", &self.endpoint)
+            .field("access_token", &self.access_token.as_deref().map(|_| "<redacted>"))
+            .field("timeout", &self.timeout)
+            .field("encryption_enabled", &self.encryption_enabled)
+            .field("user_agent", &self.user_agent)
+            .field("max_retries", &self.max_retries)
+            .field("multipart_threshold", &self.multipart_threshold)
+            .field("multipart_chunk_size", &self.multipart_chunk_size)
+            .field("per_chunk_download_timeout", &self.per_chunk_download_timeout)
+            .field("buffered_download_max_bytes", &self.buffered_download_max_bytes)
+            .field("health_gate_enabled", &self.health_gate_enabled)
+            .field("health_gate_ttl", &self.health_gate_ttl)
+            .field("block_cache_enabled", &self.block_cache_enabled)
+            .field("block_cache_path", &self.block_cache_path)
+            .field("block_cache_max_bytes", &self.block_cache_max_bytes)
+            .field("gateway_fallback_enabled", &self.gateway_fallback_enabled)
+            .field("gateway_fallback_urls", &self.gateway_fallback_urls)
+            .field("gateway_race_concurrency", &self.gateway_race_concurrency)
+            .field("users_index_chain_rpc_url", &self.users_index_chain_rpc_url)
+            .field("users_index_anchor_address", &self.users_index_anchor_address)
+            .field("users_index_ipns_name", &self.users_index_ipns_name)
+            .field("users_index_user_key", &self.users_index_user_key)
+            .field("users_index_ipns_gateway_urls", &self.users_index_ipns_gateway_urls)
+            .field("users_index_ipfs_gateway_urls", &self.users_index_ipfs_gateway_urls)
+            .field(
+                "health_callback",
+                &self.health_callback.as_ref().map(|_| "<callback>"),
+            )
+            .finish()
+    }
 }
 
 impl Default for Config {
@@ -71,6 +223,26 @@ impl Default for Config {
             buffered_download_max_bytes: 256 * 1024 * 1024,       // 256 MB
             health_gate_enabled: false, // backward-compat: off by default
             health_gate_ttl: Duration::from_secs(30),
+            // Phase 2.2 / 2.4 — off by default for backward-compat.
+            // SDK consumers must opt in explicitly; existing apps see
+            // byte-identical behavior to pre-Phase-2 builds.
+            block_cache_enabled: false,
+            block_cache_path: None,
+            block_cache_max_bytes: 256 * 1024 * 1024, // 256 MiB
+            gateway_fallback_enabled: false,
+            gateway_fallback_urls: Vec::new(),
+            gateway_race_concurrency: 3,
+            // Phase 3.3 — resolver disabled by default (every required
+            // field is empty/None; field-presence is the single
+            // source of truth — see config-block doc above).
+            users_index_chain_rpc_url: String::new(),
+            users_index_anchor_address: String::new(),
+            users_index_ipns_name: String::new(),
+            users_index_user_key: None,
+            users_index_ipns_gateway_urls: Vec::new(),
+            users_index_ipfs_gateway_urls: Vec::new(),
+            // Phase 19 — no callback by default (silent gate).
+            health_callback: None,
         }
     }
 }
@@ -102,6 +274,15 @@ impl Config {
         self
     }
 
+    /// Phase 19 — set the health-status callback. The closure is shared
+    /// across `Config` clones via `Arc<dyn Fn>`; constructing once and
+    /// cloning the config gives every derived `FulaClient` the same
+    /// callback wiring.
+    pub fn with_health_callback(mut self, callback: HealthCallback) -> Self {
+        self.health_callback = Some(callback);
+        self
+    }
+
     /// Build the base URL for API requests
     pub fn base_url(&self) -> &str {
         &self.endpoint
diff --git a/crates/fula-client/src/encryption.rs b/crates/fula-client/src/encryption.rs
index dc79cfb..2968ddc 100644
--- a/crates/fula-client/src/encryption.rs
+++ b/crates/fula-client/src/encryption.rs
@@ -342,12 +342,28 @@ impl BlobBackend for S3BlobBackend {
     /// 429/500/502/503/504, S3 `SlowDown`/`InternalError`/`ServiceUnavailable`)
     /// with a fixed 300 ms + 0-100 ms jitter delay, up to 4 attempts total.
     /// Non-transient errors (auth failure, NotFound, etc.) short-circuit.
+    ///
+    /// Phase 2.4: when the SDK has Phase 2.2/2.3 enabled, this dispatches
+    /// through `get_object_with_offline_fallback` so a master-down read
+    /// can transparently fall through to the public-gateway race using
+    /// the cached `(bucket, key) → cid` mapping. When the flags are off
+    /// behavior is byte-identical to pre-Phase-2.4 (single inner call,
+    /// same retry policy).
     async fn get(&self, path: &str) -> fula_crypto::Result<Vec<u8>> {
         let mut attempt: u32 = 0;
         loop {
             attempt += 1;
-            match self.inner.get_object(&self.bucket, path).await {
-                Ok(bytes) => return Ok(bytes.to_vec()),
+            match self
+                .inner
+                .get_object_with_offline_fallback(&self.bucket, path)
+                .await
+            {
+                // Phase 19: get_object_with_offline_fallback now returns
+                // OfflineGetResult; the bytes live on `.inner.data`. The
+                // `source` / `freshness` fields are dropped here — the
+                // crypto blob backend has no plumbing to surface them
+                // and isn't a transparency consumer.
+                Ok(result) => return Ok(result.inner.data.to_vec()),
                 Err(e)
                     if attempt < BLOB_BACKEND_MAX_ATTEMPTS
                         && crate::multipart::is_transient(&e) =>
@@ -409,12 +425,16 @@ impl BlobBackend for S3BlobBackend {
 #[async_trait::async_trait(?Send)]
 impl BlobBackend for S3BlobBackend {
     async fn get(&self, path: &str) -> fula_crypto::Result<Vec<u8>> {
-        let bytes = self
+        // wasm32 has no offline fallback infrastructure (block_cache +
+        // gateway_fetch are gated out). The wrapper is a thin delegate
+        // here so the call site stays identical across targets.
+        let result = self
             .inner
-            .get_object(&self.bucket, path)
+            .get_object_with_offline_fallback(&self.bucket, path)
             .await
             .map_err(client_err_to_crypto)?;
-        Ok(bytes.to_vec())
+        // Phase 19: result is an OfflineGetResult; bytes are on .inner.data.
+        Ok(result.inner.data.to_vec())
     }
 
     async fn put(&self, path: &str, bytes: Vec<u8>) -> fula_crypto::Result<()> {
@@ -2230,8 +2250,23 @@ impl EncryptedClient {
         let forest_dek = self.encryption.key_manager.derive_path_key(&format!("forest:{}", bucket));
         let index_key = derive_index_key(&forest_dek, bucket);
 
-        // Try to load from storage
-        match self.inner.get_object_with_metadata(bucket, &index_key).await {
+        // Try to load from storage. Phase 2.4: route through the
+        // offline-fallback wrapper so a master-down read can transparently
+        // fall through to the gateway race using the cached
+        // `(bucket, index_key) → cid` mapping. Phase 3.3 layers cold-start
+        // escalation on top: when the offline-fallback returns
+        // `MasterUnreachable` (master down AND KEY_TO_CID miss for a
+        // fresh device that's never read this manifest before) AND the
+        // resolver is configured, escalate to the IPNS+chain hybrid
+        // resolver to fetch the manifest CID and its bytes via the
+        // public network. Wrapper synthesizes `etag = cid.to_string()`
+        // on the gateway-fetched / cold-start paths so the existing
+        // forest-format detector + sequence-replay guard handle the
+        // result identically (master also uses cid.to_string() as ETag).
+        match self
+            .fetch_manifest_with_cold_start_escalation(bucket, &index_key)
+            .await
+        {
             Ok(result) => {
                 let observed_etag = if result.etag.is_empty() { None } else { Some(result.etag.clone()) };
                 // Capture cache generation before dispatch so we can detect cross-format
@@ -2922,6 +2957,276 @@ impl EncryptedClient {
         hex::encode(&hash.as_bytes()[..16])
     }
 
+    /// Phase 3.3 escalation seam — fetch a manifest via the
+    /// offline-fallback wrapper, escalating to cold-start on
+    /// `MasterUnreachable` when the resolver is configured.
+    ///
+    /// Behavior:
+    ///
+    /// | State                                                                  | Result                                            |
+    /// |------------------------------------------------------------------------|---------------------------------------------------|
+    /// | Master up                                                              | normal path through `get_object_with_offline_fallback` |
+    /// | Master down + KEY_TO_CID hit (warm device)                             | gateway race serves bytes (Phase 2.4)             |
+    /// | Master down + KEY_TO_CID miss + resolver enabled (cold device)         | escalates to `cold_start_resolve_manifest`; populates KEY_TO_CID for next warm-cache read |
+    /// | Master down + KEY_TO_CID miss + resolver NOT enabled                   | propagates `MasterUnreachable`                    |
+    ///
+    /// On the cold-start path the synthesized result carries
+    /// `etag = manifest_cid.to_string()` so the existing forest-
+    /// format detector + sequence-replay guard handle the bytes
+    /// identically to a master-served fetch (master also uses
+    /// `cid.to_string()` as the ETag — see `fula-cli/src/handlers/object.rs:103-105`).
+    ///
+    /// Native-only: the cold-start resolver is gated to
+    /// `cfg(not(target_arch = "wasm32"))`. On wasm this method
+    /// degrades to the underlying `get_object_with_offline_fallback`
+    /// (which itself degrades to `get_object_with_metadata`).
+    #[cfg(not(target_arch = "wasm32"))]
+    async fn fetch_manifest_with_cold_start_escalation(
+        &self,
+        bucket: &str,
+        index_key: &str,
+    ) -> Result<GetObjectResult> {
+        match self
+            .inner
+            .get_object_with_offline_fallback(bucket, index_key)
+            .await
+        {
+            // Happy path: master up OR warm-cache hit. Phase 19 wraps
+            // the result in OfflineGetResult; this internal cold-start
+            // path doesn't surface source/freshness to callers, so
+            // unwrap the inner GetObjectResult and propagate.
+            Ok(r) => Ok(r.inner),
+
+            // Master-down + cache miss → try cold-start if resolver is
+            // configured. Identifying which "MasterUnreachable" case
+            // this is doesn't matter — both are "we don't know the
+            // CID locally, fetch from the public network".
+            Err(e) if matches!(e, ClientError::MasterUnreachable { .. }) => {
+                // Resolver-enabled? If not, propagate the original.
+                if self.inner.users_index_resolver().is_none() {
+                    return Err(e);
+                }
+                // Run the cold-start chain.
+                let (manifest_cid, manifest_bytes) =
+                    self.cold_start_resolve_manifest(bucket).await?;
+
+                // Best-effort: populate KEY_TO_CID so the next read
+                // of this manifest (which IS predictable — the
+                // index_key is deterministic from forest_dek) lands
+                // in the warm-device fast path. Failure is fine; we
+                // already have the bytes for THIS read.
+                if let Some(cache) = self.inner.block_cache() {
+                    if let Err(e) = cache.record_key_cid(bucket, index_key, &manifest_cid) {
+                        tracing::debug!(
+                            error = %e,
+                            "cold-start: KEY_TO_CID populate failed (best-effort)"
+                        );
+                    }
+                    // Also seed the BLOCKS cache with the manifest
+                    // bytes — saves the gateway race on the next read
+                    // of this same manifest.
+                    if let Err(e) = cache.put(&manifest_cid, &manifest_bytes).await {
+                        tracing::debug!(
+                            error = %e,
+                            "cold-start: BLOCKS put failed (best-effort)"
+                        );
+                    }
+                }
+
+                Ok(GetObjectResult {
+                    content_length: manifest_bytes.len() as u64,
+                    data: manifest_bytes,
+                    etag: manifest_cid.to_string(),
+                    content_type: None,
+                    last_modified: None,
+                    metadata: std::collections::HashMap::new(),
+                })
+            }
+
+            // Any other error (Http, S3 4xx, encryption, etc.) —
+            // not a master-down condition. Propagate unchanged.
+            Err(e) => Err(e),
+        }
+    }
+
+    /// Wasm fallback: cold-start is native-only, so on wasm we just
+    /// delegate to the existing wrapper. The native and wasm signatures
+    /// are kept identical so call sites don't need cfg gates of their
+    /// own.
+    #[cfg(target_arch = "wasm32")]
+    async fn fetch_manifest_with_cold_start_escalation(
+        &self,
+        bucket: &str,
+        index_key: &str,
+    ) -> Result<GetObjectResult> {
+        // Phase 19: extract `.inner` since get_object_with_offline_fallback
+        // now returns OfflineGetResult on every target.
+        self.inner
+            .get_object_with_offline_fallback(bucket, index_key)
+            .await
+            .map(|r| r.inner)
+    }
+
+    /// Phase 3.3 — cold-start resolution of a bucket's forest manifest
+    /// via the hybrid IPNS+chain resolver.
+    ///
+    /// Invoked from the offline-fallback path (see
+    /// `load_forest_internal`) when the local `KEY_TO_CID` cache
+    /// has no entry for the manifest's storage key AND the resolver
+    /// is configured. Walks the published chain:
+    ///
+    /// 1. Resolver returns the global `users` map (IPNS or chain).
+    /// 2. Look up the configured `userKey` → per-user
+    ///    `bucketsIndexCid`.
+    /// 3. Fetch the bucketsIndex CBOR via gateway race + verify.
+    /// 4. Compute `bucketLookupH = BLAKE3(MetadataKey || bucket)`;
+    ///    fall back to the legacy plaintext-name entry if the
+    ///    blinded key is absent (Phase 1.2 transition path).
+    /// 5. Fetch the manifest's CBOR-pinned-bytes via gateway race
+    ///    + verify.
+    ///
+    /// Returns `(manifest_cid, manifest_bytes)` so the caller writes
+    /// the bytes into the existing forest-format-detect / decrypt
+    /// pipeline without a second network round-trip — saves 5–30 s
+    /// on the first cold-start read. Caller is also responsible for
+    /// writing `(bucket, index_key) → manifest_cid` into KEY_TO_CID
+    /// so subsequent warm-device reads short-circuit.
+    ///
+    /// **Bounded semantics.** Phase 3.3 makes the *manifest* CID
+    /// reachable on a fresh device + master-down. It does **not**
+    /// fix chunk-level fetches in true cold-start (the chunk's
+    /// CID isn't derivable from its storage key without a master
+    /// ping). The user can read manifests, list directories, and
+    /// re-fetch any object whose chunks the warm-cache previously
+    /// observed; never-read-before objects still require master to
+    /// come back briefly. Phase 19+ may close that gap (e.g., by
+    /// embedding chunk CIDs in the forest manifest).
+    #[cfg(not(target_arch = "wasm32"))]
+    pub async fn cold_start_resolve_manifest(
+        &self,
+        bucket: &str,
+    ) -> Result<(cid::Cid, bytes::Bytes)> {
+        // 1. Resolver must be configured + user_key set. Both are
+        //    deferred to construction time, so absence here means
+        //    the operator has the resolver enabled but missed one of
+        //    the four required Config fields.
+        let resolver = self
+            .inner
+            .users_index_resolver()
+            .ok_or_else(|| ClientError::UsersIndexResolutionFailed {
+                reason: "cold-start resolver not configured (Config requires all four fields: \
+                         users_index_chain_rpc_url, users_index_anchor_address, \
+                         users_index_ipns_name, users_index_user_key)".into(),
+            })?
+            .clone();
+        let user_key = self
+            .inner
+            .config()
+            .users_index_user_key
+            .clone()
+            .ok_or_else(|| ClientError::UsersIndexResolutionFailed {
+                reason: "users_index_user_key is not set; compute it via derive_user_key_from_email at sign-in".into(),
+            })?;
+
+        // 2. Resolve the global users-index. Internal replay defense
+        //    in the resolver bumps the seen-sequence floor.
+        //
+        //    Phase 19: when both IPNS and chain paths fail, the
+        //    resolver returns `UsersIndexResolutionFailed`. Fire
+        //    `SeverelyDegraded` (master + cold-start network both
+        //    unreachable) before propagating so apps can disable
+        //    "open new bucket" / "first-read" UI affordances. This
+        //    is the ONLY emission point for `SeverelyDegraded` —
+        //    the health gate alone can't authoritatively detect
+        //    "both down" without trying.
+        let resolved = match resolver.resolve().await {
+            Ok(r) => r,
+            Err(e) => {
+                if matches!(e, ClientError::UsersIndexResolutionFailed { .. }) {
+                    self.inner.fire_health_event(
+                        crate::health_gate::MasterHealthEvent::SeverelyDegraded {
+                            reason: format!("cold-start resolver exhausted: {}", e),
+                        },
+                    );
+                }
+                return Err(e);
+            }
+        };
+
+        // 3. Look up our user_key in the global map.
+        let buckets_index_cid_str = resolved
+            .payload
+            .users
+            .get(&user_key)
+            .cloned()
+            .ok_or_else(|| ClientError::UsersIndexResolutionFailed {
+                reason: format!(
+                    "userKey {} not present in published global users-index (size={}); user has not written yet",
+                    user_key,
+                    resolved.payload.users.len(),
+                ),
+            })?;
+        let buckets_index_cid = buckets_index_cid_str.parse::<cid::Cid>().map_err(|e| {
+            ClientError::UsersIndexResolutionFailed {
+                reason: format!("invalid bucketsIndex CID '{}': {}", buckets_index_cid_str, e),
+            }
+        })?;
+
+        // 4. Fetch + verify + parse bucketsIndex CBOR.
+        let gateways = resolver.ipfs_gateways();
+        let bi_bytes = crate::registry_resolver::fetch_cid_via_gateways(
+            &buckets_index_cid,
+            &gateways,
+            resolver.http_client(),
+            resolver.per_request_timeout(),
+        )
+        .await?;
+        let buckets_index = crate::registry_resolver::decode_user_buckets_index(&bi_bytes)?;
+
+        // 5. Resolve the requested bucket. Try the blinded key
+        //    first (Phase 1.2 migrated state); fall back to the
+        //    plaintext bucket name for legacy entries (the user
+        //    hasn't yet uploaded with a Phase-1.2-aware client
+        //    since the field landed). The legacy fallback only
+        //    accepts entries explicitly marked `legacy = true`,
+        //    closing the loophole where a malicious gateway could
+        //    plant a stronger-looking plaintext-name entry next to
+        //    a real blinded one.
+        let blinded = self.compute_bucket_lookup_h_hex(bucket);
+        let entry = if let Some(e) = buckets_index.buckets.get(&blinded) {
+            e.clone()
+        } else if let Some(e) = buckets_index.buckets.get(bucket) {
+            if !e.legacy {
+                return Err(ClientError::UsersIndexResolutionFailed {
+                    reason: format!(
+                        "bucket {:?} present at plaintext key but legacy=false; refusing as ambiguous",
+                        bucket
+                    ),
+                });
+            }
+            e.clone()
+        } else {
+            return Err(ClientError::BucketNotFound(bucket.to_string()));
+        };
+
+        let manifest_cid = entry.manifest.parse::<cid::Cid>().map_err(|e| {
+            ClientError::UsersIndexResolutionFailed {
+                reason: format!("invalid manifest CID '{}' for bucket {}: {}", entry.manifest, bucket, e),
+            }
+        })?;
+
+        // 6. Fetch + verify manifest bytes.
+        let manifest_bytes = crate::registry_resolver::fetch_cid_via_gateways(
+            &manifest_cid,
+            &gateways,
+            resolver.http_client(),
+            resolver.per_request_timeout(),
+        )
+        .await?;
+
+        Ok((manifest_cid, manifest_bytes))
+    }
+
     /// Save the private forest index for a bucket (monolithic v4 format with AAD+sequence)
     pub async fn save_forest(&self, bucket: &str, forest: &PrivateForest) -> Result<()> {
         let forest_dek = self.encryption.key_manager.derive_path_key(&format!("forest:{}", bucket));
@@ -7976,4 +8281,566 @@ mod tests {
             "writer must not be touched when the size guard rejects the manifest"
         );
     }
+
+    // ============================================================
+    // Phase 3.3 — cold-start integration tests
+    // ============================================================
+
+    #[cfg(not(target_arch = "wasm32"))]
+    mod cold_start_phase_3_3 {
+        use super::*;
+        use crate::registry_resolver::{
+            derive_user_key_from_email, BucketEntry, GlobalUsersIndex, UserBucketsIndex,
+        };
+        use sha2::{Digest, Sha256};
+        use std::collections::BTreeMap;
+        use wiremock::matchers::{method, path};
+        use wiremock::{Mock, MockServer, ResponseTemplate};
+
+        /// Compute a CIDv1 (codec=dag-cbor 0x71, multihash=sha2-256
+        /// 0x12) from arbitrary bytes — the format master uses for
+        /// dag-cbor IPLD objects (production master's
+        /// `serde_ipld_dagcbor::to_vec → kubo /api/v0/dag/put`
+        /// produces this shape).
+        fn cid_for_dag_cbor_bytes(data: &[u8]) -> cid::Cid {
+            let digest = Sha256::digest(data);
+            let mh = cid::multihash::Multihash::<64>::wrap(0x12, &digest).unwrap();
+            cid::Cid::new_v1(0x71, mh)
+        }
+
+        /// Cold-start happy path against fully-mocked IPNS + IPFS
+        /// gateways. Asserts:
+        ///   - resolver fetches global users-index via IPNS gateway
+        ///   - cold-start looks up our `userKey` → bucketsIndexCid
+        ///   - bucketsIndex CBOR is fetched + verified
+        ///   - blinded `bucketLookupH` lookup succeeds
+        ///   - manifest bytes are returned
+        ///   - returned `Cid` matches what the gateway served
+        ///   - returned `Bytes` are byte-identical to the staged
+        ///     manifest payload
+        ///   - resolver advanced its highest-seen-sequence floor
+        #[tokio::test]
+        async fn cold_start_resolve_manifest_happy_path_via_ipns() {
+            let ipns = MockServer::start().await;
+            let ipfs = MockServer::start().await;
+            let chain_rpc = MockServer::start().await;
+
+            let email = "alice@example.com";
+            let user_key = derive_user_key_from_email(email);
+
+            let bucket = "photos";
+            let manifest_payload =
+                b"placeholder forest-manifest bytes for the cold-start test".to_vec();
+            let manifest_cid = cid_for_dag_cbor_bytes(&manifest_payload);
+
+            let secret = fula_crypto::SecretKey::generate();
+            let enc_cfg = EncryptionConfig::from_secret_key(secret);
+            let metadata_key = enc_cfg.key_manager.derive_path_key("fula-metadata-v1");
+            let mut h_input = metadata_key.as_bytes().to_vec();
+            h_input.extend_from_slice(bucket.as_bytes());
+            let blinded_hex = hex::encode(&blake3::hash(&h_input).as_bytes()[..16]);
+
+            let mut buckets = BTreeMap::new();
+            buckets.insert(
+                blinded_hex,
+                BucketEntry {
+                    manifest: manifest_cid.to_string(),
+                    legacy: false,
+                },
+            );
+            let user_buckets = UserBucketsIndex {
+                v: 2,
+                buckets,
+                updated_at_unix: 1_700_000_000,
+            };
+            let user_buckets_cbor = serde_ipld_dagcbor::to_vec(&user_buckets).expect("ubi");
+            let buckets_index_cid = cid_for_dag_cbor_bytes(&user_buckets_cbor);
+
+            let mut users_map = BTreeMap::new();
+            users_map.insert(user_key.clone(), buckets_index_cid.to_string());
+            let global = GlobalUsersIndex {
+                v: 1,
+                sequence: 42,
+                updated_at_unix: 1_700_000_001,
+                users: users_map,
+            };
+            let global_cbor = serde_ipld_dagcbor::to_vec(&global).expect("global");
+
+            let ipns_name = "k51qzi5uqu5dh-cold-start-test".to_string();
+            Mock::given(method("GET"))
+                .and(path(format!("/ipns/{}", ipns_name)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(global_cbor))
+                .mount(&ipns)
+                .await;
+            Mock::given(method("GET"))
+                .and(path(format!("/ipfs/{}", buckets_index_cid)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(user_buckets_cbor))
+                .mount(&ipfs)
+                .await;
+            Mock::given(method("GET"))
+                .and(path(format!("/ipfs/{}", manifest_cid)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(manifest_payload.clone()))
+                .mount(&ipfs)
+                .await;
+            Mock::given(method("POST"))
+                .respond_with(ResponseTemplate::new(503))
+                .mount(&chain_rpc)
+                .await;
+
+            let mut client_cfg = Config::new("http://master.unreachable.invalid");
+            client_cfg.timeout = std::time::Duration::from_secs(2);
+            client_cfg.users_index_chain_rpc_url = chain_rpc.uri();
+            client_cfg.users_index_anchor_address =
+                "0x0000000000000000000000000000000000000001".into();
+            client_cfg.users_index_ipns_name = ipns_name;
+            client_cfg.users_index_user_key = Some(user_key);
+            client_cfg.users_index_ipns_gateway_urls =
+                vec![format!("{}/ipns/{{name}}", ipns.uri())];
+            client_cfg.users_index_ipfs_gateway_urls =
+                vec![format!("{}/ipfs/{{cid}}", ipfs.uri())];
+            client_cfg.block_cache_enabled = false;
+
+            let client = EncryptedClient::new(client_cfg, enc_cfg).expect("client");
+            let (got_cid, got_bytes) = client
+                .cold_start_resolve_manifest(bucket)
+                .await
+                .expect("cold-start resolves");
+
+            assert_eq!(got_cid, manifest_cid, "returned CID matches manifest");
+            assert_eq!(
+                got_bytes.as_ref(),
+                manifest_payload.as_slice(),
+                "returned bytes match staged manifest"
+            );
+
+            let resolver = client
+                .inner
+                .users_index_resolver()
+                .expect("resolver configured")
+                .clone();
+            assert_eq!(
+                resolver.highest_seen_sequence(),
+                42,
+                "resolver bumped sequence floor on success"
+            );
+        }
+
+        /// Typed error when the configured `userKey` isn't present
+        /// in the resolved global users-index.
+        #[tokio::test]
+        async fn cold_start_user_absent_in_global_returns_typed_error() {
+            let ipns = MockServer::start().await;
+            let chain_rpc = MockServer::start().await;
+
+            let our_user_key = derive_user_key_from_email("alice@example.com");
+            let other_user_key = derive_user_key_from_email("bob@example.com");
+
+            let mut users_map = BTreeMap::new();
+            users_map.insert(other_user_key, "bafyabcdef".to_string());
+            let global = GlobalUsersIndex {
+                v: 1,
+                sequence: 5,
+                updated_at_unix: 1_700_000_000,
+                users: users_map,
+            };
+            let global_cbor = serde_ipld_dagcbor::to_vec(&global).expect("global");
+
+            let ipns_name = "k51qzi5uqu5dh-no-alice".to_string();
+            Mock::given(method("GET"))
+                .and(path(format!("/ipns/{}", ipns_name)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(global_cbor))
+                .mount(&ipns)
+                .await;
+            Mock::given(method("POST"))
+                .respond_with(ResponseTemplate::new(503))
+                .mount(&chain_rpc)
+                .await;
+
+            let secret = fula_crypto::SecretKey::generate();
+            let enc_cfg = EncryptionConfig::from_secret_key(secret);
+
+            let mut client_cfg = Config::new("http://master.unreachable.invalid");
+            client_cfg.timeout = std::time::Duration::from_secs(2);
+            client_cfg.users_index_chain_rpc_url = chain_rpc.uri();
+            client_cfg.users_index_anchor_address =
+                "0x0000000000000000000000000000000000000001".into();
+            client_cfg.users_index_ipns_name = ipns_name;
+            client_cfg.users_index_user_key = Some(our_user_key.clone());
+            client_cfg.users_index_ipns_gateway_urls =
+                vec![format!("{}/ipns/{{name}}", ipns.uri())];
+
+            let client = EncryptedClient::new(client_cfg, enc_cfg).expect("client");
+            let err = client
+                .cold_start_resolve_manifest("photos")
+                .await
+                .expect_err("user absent");
+            match err {
+                ClientError::UsersIndexResolutionFailed { reason } => {
+                    assert!(
+                        reason.contains(&our_user_key),
+                        "expected reason to reference missing userKey, got: {}",
+                        reason
+                    );
+                }
+                other => panic!("expected UsersIndexResolutionFailed, got: {:?}", other),
+            }
+        }
+
+        /// Phase 1.2 lazy-migration: legacy plaintext-keyed entry
+        /// with `legacy = true` is the fallback when the blinded
+        /// entry is absent. SDK accepts it.
+        #[tokio::test]
+        async fn cold_start_legacy_plaintext_fallback() {
+            let ipns = MockServer::start().await;
+            let ipfs = MockServer::start().await;
+            let chain_rpc = MockServer::start().await;
+
+            let user_key = derive_user_key_from_email("legacy@example.com");
+            let bucket = "old-photos";
+            let manifest_payload = b"legacy manifest".to_vec();
+            let manifest_cid = cid_for_dag_cbor_bytes(&manifest_payload);
+
+            let mut buckets = BTreeMap::new();
+            buckets.insert(
+                bucket.to_string(),
+                BucketEntry {
+                    manifest: manifest_cid.to_string(),
+                    legacy: true,
+                },
+            );
+            let user_buckets = UserBucketsIndex {
+                v: 2,
+                buckets,
+                updated_at_unix: 0,
+            };
+            let user_buckets_cbor = serde_ipld_dagcbor::to_vec(&user_buckets).expect("ubi");
+            let buckets_index_cid = cid_for_dag_cbor_bytes(&user_buckets_cbor);
+
+            let mut users_map = BTreeMap::new();
+            users_map.insert(user_key.clone(), buckets_index_cid.to_string());
+            let global = GlobalUsersIndex {
+                v: 1,
+                sequence: 1,
+                updated_at_unix: 0,
+                users: users_map,
+            };
+            let global_cbor = serde_ipld_dagcbor::to_vec(&global).expect("global");
+
+            let ipns_name = "k51qzi5uqu5dh-legacy".to_string();
+            Mock::given(method("GET"))
+                .and(path(format!("/ipns/{}", ipns_name)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(global_cbor))
+                .mount(&ipns)
+                .await;
+            Mock::given(method("GET"))
+                .and(path(format!("/ipfs/{}", buckets_index_cid)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(user_buckets_cbor))
+                .mount(&ipfs)
+                .await;
+            Mock::given(method("GET"))
+                .and(path(format!("/ipfs/{}", manifest_cid)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(manifest_payload.clone()))
+                .mount(&ipfs)
+                .await;
+            Mock::given(method("POST"))
+                .respond_with(ResponseTemplate::new(503))
+                .mount(&chain_rpc)
+                .await;
+
+            let secret = fula_crypto::SecretKey::generate();
+            let enc_cfg = EncryptionConfig::from_secret_key(secret);
+
+            let mut client_cfg = Config::new("http://master.unreachable.invalid");
+            client_cfg.timeout = std::time::Duration::from_secs(2);
+            client_cfg.users_index_chain_rpc_url = chain_rpc.uri();
+            client_cfg.users_index_anchor_address =
+                "0x0000000000000000000000000000000000000001".into();
+            client_cfg.users_index_ipns_name = ipns_name;
+            client_cfg.users_index_user_key = Some(user_key);
+            client_cfg.users_index_ipns_gateway_urls =
+                vec![format!("{}/ipns/{{name}}", ipns.uri())];
+            client_cfg.users_index_ipfs_gateway_urls =
+                vec![format!("{}/ipfs/{{cid}}", ipfs.uri())];
+
+            let client = EncryptedClient::new(client_cfg, enc_cfg).expect("client");
+            let (got_cid, got_bytes) = client
+                .cold_start_resolve_manifest(bucket)
+                .await
+                .expect("legacy fallback resolves");
+            assert_eq!(got_cid, manifest_cid);
+            assert_eq!(got_bytes.as_ref(), manifest_payload.as_slice());
+        }
+
+        /// Defense: a plaintext-keyed entry without `legacy = true`
+        /// is rejected. Closes the loophole where a malicious
+        /// gateway plants a stronger-looking plaintext-named entry
+        /// next to the real blinded one to trick the SDK.
+        #[tokio::test]
+        async fn cold_start_rejects_plaintext_entry_without_legacy_flag() {
+            let ipns = MockServer::start().await;
+            let ipfs = MockServer::start().await;
+            let chain_rpc = MockServer::start().await;
+
+            let user_key = derive_user_key_from_email("strict@example.com");
+            let bucket = "test";
+
+            let bogus_cid = cid_for_dag_cbor_bytes(b"forged manifest");
+            let mut buckets = BTreeMap::new();
+            buckets.insert(
+                bucket.to_string(),
+                BucketEntry {
+                    manifest: bogus_cid.to_string(),
+                    legacy: false,
+                },
+            );
+            let user_buckets = UserBucketsIndex {
+                v: 2,
+                buckets,
+                updated_at_unix: 0,
+            };
+            let user_buckets_cbor = serde_ipld_dagcbor::to_vec(&user_buckets).expect("ubi");
+            let buckets_index_cid = cid_for_dag_cbor_bytes(&user_buckets_cbor);
+
+            let mut users_map = BTreeMap::new();
+            users_map.insert(user_key.clone(), buckets_index_cid.to_string());
+            let global = GlobalUsersIndex {
+                v: 1,
+                sequence: 1,
+                updated_at_unix: 0,
+                users: users_map,
+            };
+            let global_cbor = serde_ipld_dagcbor::to_vec(&global).expect("global");
+
+            let ipns_name = "k51qzi5uqu5dh-strict".to_string();
+            Mock::given(method("GET"))
+                .and(path(format!("/ipns/{}", ipns_name)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(global_cbor))
+                .mount(&ipns)
+                .await;
+            Mock::given(method("GET"))
+                .and(path(format!("/ipfs/{}", buckets_index_cid)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(user_buckets_cbor))
+                .mount(&ipfs)
+                .await;
+            Mock::given(method("POST"))
+                .respond_with(ResponseTemplate::new(503))
+                .mount(&chain_rpc)
+                .await;
+
+            let secret = fula_crypto::SecretKey::generate();
+            let enc_cfg = EncryptionConfig::from_secret_key(secret);
+
+            let mut client_cfg = Config::new("http://master.unreachable.invalid");
+            client_cfg.timeout = std::time::Duration::from_secs(2);
+            client_cfg.users_index_chain_rpc_url = chain_rpc.uri();
+            client_cfg.users_index_anchor_address =
+                "0x0000000000000000000000000000000000000001".into();
+            client_cfg.users_index_ipns_name = ipns_name;
+            client_cfg.users_index_user_key = Some(user_key);
+            client_cfg.users_index_ipns_gateway_urls =
+                vec![format!("{}/ipns/{{name}}", ipns.uri())];
+            client_cfg.users_index_ipfs_gateway_urls =
+                vec![format!("{}/ipfs/{{cid}}", ipfs.uri())];
+
+            let client = EncryptedClient::new(client_cfg, enc_cfg).expect("client");
+            let err = client
+                .cold_start_resolve_manifest(bucket)
+                .await
+                .expect_err("must reject");
+            match err {
+                ClientError::UsersIndexResolutionFailed { reason } => {
+                    assert!(
+                        reason.contains("legacy=false"),
+                        "expected legacy-flag rejection, got: {}",
+                        reason
+                    );
+                }
+                other => panic!("expected UsersIndexResolutionFailed, got: {:?}", other),
+            }
+        }
+
+        /// `BucketNotFound` (not a new variant) when bucket is
+        /// absent from the user's bucketsIndex. Reuses the
+        /// established error type per advisor's narrowing.
+        #[tokio::test]
+        async fn cold_start_returns_bucket_not_found_when_bucket_absent() {
+            let ipns = MockServer::start().await;
+            let ipfs = MockServer::start().await;
+            let chain_rpc = MockServer::start().await;
+
+            let user_key = derive_user_key_from_email("user@example.com");
+            let manifest_cid = cid_for_dag_cbor_bytes(b"some manifest");
+            let mut buckets = BTreeMap::new();
+            buckets.insert(
+                "videos".to_string(),
+                BucketEntry {
+                    manifest: manifest_cid.to_string(),
+                    legacy: true,
+                },
+            );
+            let user_buckets = UserBucketsIndex {
+                v: 2,
+                buckets,
+                updated_at_unix: 0,
+            };
+            let user_buckets_cbor = serde_ipld_dagcbor::to_vec(&user_buckets).expect("ubi");
+            let buckets_index_cid = cid_for_dag_cbor_bytes(&user_buckets_cbor);
+
+            let mut users_map = BTreeMap::new();
+            users_map.insert(user_key.clone(), buckets_index_cid.to_string());
+            let global = GlobalUsersIndex {
+                v: 1,
+                sequence: 1,
+                updated_at_unix: 0,
+                users: users_map,
+            };
+            let global_cbor = serde_ipld_dagcbor::to_vec(&global).expect("global");
+
+            let ipns_name = "k51qzi5uqu5dh-only-videos".to_string();
+            Mock::given(method("GET"))
+                .and(path(format!("/ipns/{}", ipns_name)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(global_cbor))
+                .mount(&ipns)
+                .await;
+            Mock::given(method("GET"))
+                .and(path(format!("/ipfs/{}", buckets_index_cid)))
+                .respond_with(ResponseTemplate::new(200).set_body_bytes(user_buckets_cbor))
+                .mount(&ipfs)
+                .await;
+            Mock::given(method("POST"))
+                .respond_with(ResponseTemplate::new(503))
+                .mount(&chain_rpc)
+                .await;
+
+            let secret = fula_crypto::SecretKey::generate();
+            let enc_cfg = EncryptionConfig::from_secret_key(secret);
+
+            let mut client_cfg = Config::new("http://master.unreachable.invalid");
+            client_cfg.timeout = std::time::Duration::from_secs(2);
+            client_cfg.users_index_chain_rpc_url = chain_rpc.uri();
+            client_cfg.users_index_anchor_address =
+                "0x0000000000000000000000000000000000000001".into();
+            client_cfg.users_index_ipns_name = ipns_name;
+            client_cfg.users_index_user_key = Some(user_key);
+            client_cfg.users_index_ipns_gateway_urls =
+                vec![format!("{}/ipns/{{name}}", ipns.uri())];
+            client_cfg.users_index_ipfs_gateway_urls =
+                vec![format!("{}/ipfs/{{cid}}", ipfs.uri())];
+
+            let client = EncryptedClient::new(client_cfg, enc_cfg).expect("client");
+            let err = client
+                .cold_start_resolve_manifest("photos")
+                .await
+                .expect_err("bucket missing");
+            match err {
+                ClientError::BucketNotFound(name) => assert_eq!(name, "photos"),
+                other => panic!("expected BucketNotFound, got: {:?}", other),
+            }
+        }
+
+        /// Fail-closed when the resolver isn't configured.
+        /// `UsersIndexResolutionFailed` distinguishes "operator
+        /// misconfig" from "everything is down".
+        #[tokio::test]
+        async fn cold_start_without_resolver_returns_resolution_failed() {
+            let secret = fula_crypto::SecretKey::generate();
+            let enc_cfg = EncryptionConfig::from_secret_key(secret);
+
+            let mut client_cfg = Config::new("http://master.unreachable.invalid");
+            client_cfg.timeout = std::time::Duration::from_secs(2);
+            // No resolver fields populated → resolver stays None
+            // (field-presence model). Same effect as the old `=
+            // false` flag.
+            let client = EncryptedClient::new(client_cfg, enc_cfg).expect("client");
+            let err = client
+                .cold_start_resolve_manifest("any")
+                .await
+                .expect_err("not configured");
+            assert!(
+                matches!(err, ClientError::UsersIndexResolutionFailed { .. }),
+                "expected UsersIndexResolutionFailed, got: {:?}",
+                err
+            );
+        }
+
+        /// Phase 19 — when both IPNS and chain channels fail, the
+        /// resolver returns `UsersIndexResolutionFailed`. The
+        /// cold-start path MUST fire `MasterHealthEvent::SeverelyDegraded`
+        /// through the configured callback so apps can disable
+        /// "first-read" UI affordances.
+        #[tokio::test]
+        async fn cold_start_fires_severely_degraded_when_both_channels_fail() {
+            use crate::health_gate::{HealthCallback, MasterHealthEvent};
+
+            // IPNS: 503 on every request → resolver IPNS path fails.
+            let ipns = MockServer::start().await;
+            Mock::given(method("GET"))
+                .respond_with(ResponseTemplate::new(503))
+                .mount(&ipns)
+                .await;
+            // Chain RPC: 503 on every request → resolver chain path
+            // fails too. Both channels exhausted → resolver surfaces
+            // UsersIndexResolutionFailed → cold_start fires SeverelyDegraded.
+            let chain_rpc = MockServer::start().await;
+            Mock::given(method("POST"))
+                .respond_with(ResponseTemplate::new(503))
+                .mount(&chain_rpc)
+                .await;
+
+            // Capturing callback.
+            let captured: std::sync::Arc<std::sync::Mutex<Vec<MasterHealthEvent>>> =
+                std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+            let captured_for_cb = std::sync::Arc::clone(&captured);
+            let cb: HealthCallback = std::sync::Arc::new(move |ev| {
+                captured_for_cb.lock().unwrap().push(ev);
+            });
+
+            let secret = fula_crypto::SecretKey::generate();
+            let enc_cfg = EncryptionConfig::from_secret_key(secret);
+
+            let mut client_cfg = Config::new("http://master.unreachable.invalid");
+            client_cfg.timeout = std::time::Duration::from_secs(2);
+            client_cfg.users_index_chain_rpc_url = chain_rpc.uri();
+            client_cfg.users_index_anchor_address =
+                "0x0000000000000000000000000000000000000001".into();
+            client_cfg.users_index_ipns_name = "k51qzi5uqu5dh-test".to_string();
+            client_cfg.users_index_user_key =
+                Some(derive_user_key_from_email("alice@example.com"));
+            client_cfg.users_index_ipns_gateway_urls =
+                vec![format!("{}/ipns/{{name}}", ipns.uri())];
+            client_cfg.health_callback = Some(cb);
+
+            let client = EncryptedClient::new(client_cfg, enc_cfg).expect("client");
+            let err = client
+                .cold_start_resolve_manifest("any-bucket")
+                .await
+                .expect_err("both channels exhausted");
+
+            // Error must be UsersIndexResolutionFailed (the resolver's
+            // signal that both paths failed).
+            assert!(
+                matches!(err, ClientError::UsersIndexResolutionFailed { .. }),
+                "expected UsersIndexResolutionFailed, got: {:?}",
+                err
+            );
+
+            // And the callback must have observed exactly one
+            // SeverelyDegraded event.
+            let events = captured.lock().unwrap().clone();
+            assert_eq!(
+                events.len(),
+                1,
+                "expected exactly one SeverelyDegraded event, got: {:?}",
+                events
+            );
+            assert!(
+                matches!(
+                    events[0],
+                    MasterHealthEvent::SeverelyDegraded { .. }
+                ),
+                "expected SeverelyDegraded, got: {:?}",
+                events[0]
+            );
+        }
+    }
 }
diff --git a/crates/fula-client/src/error.rs b/crates/fula-client/src/error.rs
index 39920a8..8825245 100644
--- a/crates/fula-client/src/error.rs
+++ b/crates/fula-client/src/error.rs
@@ -106,6 +106,85 @@ pub enum ClientError {
     /// timeout" into "fast-fail with a clear signal."
     #[error("Master unreachable (health gate; down for ~{down_for_secs}s)")]
     MasterUnreachable { down_for_secs: u64 },
+
+    /// Phase 2.2 of master-independent reads: a single block exceeds the
+    /// configured `block_cache_max_bytes` budget and cannot be cached.
+    ///
+    /// **Native-only signal in practice.** `BlockCache` itself is
+    /// compiled out on `wasm32`; this variant is defined unconditionally
+    /// so the enum shape stays stable across native and web builds, and
+    /// so consumers (fula-flutter, app integrators) can write a single
+    /// exhaustive match arm without `#[cfg]` gates of their own.
+    /// Triggering it on wasm would require a manual construction —
+    /// the SDK never raises it there.
+    ///
+    /// Apps should surface this to the user with guidance to raise the
+    /// `block_cache_max_bytes` config or skip the cache for this object.
+    #[error("Block exceeds cache budget: size={size}, budget={budget}")]
+    BlockTooLarge { size: u64, budget: u64 },
+
+    /// Phase 2.2 of master-independent reads: catch-all for the
+    /// persistent block cache's I/O / storage / commit errors.
+    ///
+    /// Stringified at the SDK boundary so app code doesn't need to depend
+    /// on `redb` or its concrete error type. Native-only in practice
+    /// (same reasoning as `BlockTooLarge` above); kept unconditional for
+    /// enum-shape stability.
+    #[error("Block cache error: {0}")]
+    BlockCache(String),
+
+    /// Phase 3.3 of master-independent reads: cold-start hybrid
+    /// resolver could not resolve the master-published global
+    /// users-index CID through any channel (IPNS exhausted AND
+    /// chain failed / was unreachable / had no entry / sequence-
+    /// regressed). Fresh-device cold-start is unrecoverable until
+    /// at least one channel returns; the app should surface
+    /// "offline mode unavailable for this device yet".
+    ///
+    /// Defined unconditionally so the enum shape stays stable
+    /// across native and wasm. The native resolver lives in
+    /// `registry_resolver.rs`; the wasm cold-start path always
+    /// raises this variant until a browser-friendly resolver lands.
+    #[error("users-index resolution failed: {reason}")]
+    UsersIndexResolutionFailed { reason: String },
+
+    /// Phase 3.3 replay defense: the resolver observed a payload
+    /// whose embedded `sequence` is strictly less than what the SDK
+    /// has previously seen and persisted. A compromised gateway,
+    /// RPC node, or operator could try to serve a stale (but
+    /// otherwise valid-looking) payload to roll back the user's
+    /// view; this variant is the SDK's refusal to honor that.
+    ///
+    /// Apps should NOT retry — every retry from the same source
+    /// would fail identically. Surface as "your master appears to
+    /// be serving stale state; contact support" or equivalent.
+    /// `channel` is a free-form label identifying which path
+    /// observed the regression (e.g. `"chain.latest()"`,
+    /// `"Ipns"`, `"Chain"`). Named `channel` rather than `source`
+    /// because thiserror gives the latter special meaning
+    /// (it expects an `std::error::Error` impl).
+    #[error("sequence regression in {channel}: observed={observed}, highest seen={highest_seen}")]
+    SequenceRegression {
+        observed: u64,
+        highest_seen: u64,
+        channel: String,
+    },
+}
+
+#[cfg(not(target_arch = "wasm32"))]
+impl From<crate::block_cache::BlockCacheError> for ClientError {
+    fn from(err: crate::block_cache::BlockCacheError) -> Self {
+        use crate::block_cache::BlockCacheError;
+        match err {
+            BlockCacheError::BlockTooLarge { size, budget } => {
+                ClientError::BlockTooLarge { size, budget }
+            }
+            // Catch-all: stringify the rest so app code doesn't have to
+            // pattern-match on redb internals. Adds zero deps to the
+            // public SDK surface.
+            other => ClientError::BlockCache(other.to_string()),
+        }
+    }
 }
 
 impl ClientError {
@@ -143,6 +222,13 @@ impl ClientError {
             || matches!(self, Self::S3Error { code, .. }
                 if code == "PreconditionFailed" || code == "HTTP412" || code == "412")
     }
+
+    /// Check if this is a block-cache error (budget exceeded or storage
+    /// failure). Useful for app integrators that want to retry without
+    /// the cache (e.g., directly via the gateway-race path).
+    pub fn is_cache_error(&self) -> bool {
+        matches!(self, Self::BlockTooLarge { .. } | Self::BlockCache(_))
+    }
 }
 
 fn extract_xml_element(xml: &str, element: &str) -> Option<String> {
diff --git a/crates/fula-client/src/gateway_fetch.rs b/crates/fula-client/src/gateway_fetch.rs
index 5c20ff4..52aeec9 100644
--- a/crates/fula-client/src/gateway_fetch.rs
+++ b/crates/fula-client/src/gateway_fetch.rs
@@ -395,13 +395,21 @@ impl GatewayPool {
         }
     }
 
+    // `len` and `is_empty` are public monitoring API for app
+    // integrators that want to surface "configured N gateways" or
+    // detect a misconfigured empty pool before issuing requests.
+    // The crate itself doesn't call them internally — silence the
+    // workspace warning while keeping the surface stable for apps.
+
     /// Number of gateways in the pool.
+    #[allow(dead_code)]
     pub fn len(&self) -> usize {
         self.gateways.len()
     }
 
     /// True if no gateways are configured (effectively disables
     /// gateway-race fallback).
+    #[allow(dead_code)]
     pub fn is_empty(&self) -> bool {
         self.gateways.is_empty()
     }
@@ -457,6 +465,27 @@ impl GatewayPool {
         cid: &Cid,
         http: &reqwest::Client,
     ) -> Result<Bytes, GatewayPoolError> {
+        self.fetch_verified_with_source(cid, http)
+            .await
+            .map(|(b, _url)| b)
+    }
+
+    /// Phase 19 — like `fetch_verified` but also returns which gateway
+    /// URL template won the race. Used by the offline-fallback path
+    /// (Phase 2.4) to populate `OfflineGetResult.source =
+    /// ReadSource::Gateway(url)` for transparency surfacing. The URL
+    /// is the configured template (e.g. `https://ipfs.io/ipfs/{cid}`),
+    /// NOT the per-CID-substituted URL — apps display "served by
+    /// ipfs.io" without the per-fetch CID noise.
+    ///
+    /// Crate-private: this is an internal seam consumed only by
+    /// `try_offline_fallback`. Apps should call `fetch_verified` (which
+    /// is `pub` and forwards to this) when they need the bytes alone.
+    pub(crate) async fn fetch_verified_with_source(
+        &self,
+        cid: &Cid,
+        http: &reqwest::Client,
+    ) -> Result<(Bytes, String), GatewayPoolError> {
         use futures::stream::FuturesUnordered;
         use futures::StreamExt;
 
@@ -487,9 +516,10 @@ impl GatewayPool {
             match result {
                 Ok(body) => {
                     g.record_success();
+                    let url = g.url_template.clone();
                     // Drop in_flight to cancel remaining racers.
                     drop(in_flight);
-                    return Ok(body);
+                    return Ok((body, url));
                 }
                 Err(FetchError::Transient(msg)) => {
                     g.record_transient_failure();
diff --git a/crates/fula-client/src/health_gate.rs b/crates/fula-client/src/health_gate.rs
index a5e2024..0942743 100644
--- a/crates/fula-client/src/health_gate.rs
+++ b/crates/fula-client/src/health_gate.rs
@@ -29,8 +29,45 @@
 //!   read" into "fast-fail with `MasterUnreachable`" when Down.
 
 use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
+use std::sync::Arc;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
+/// Phase 19 transparency surface — events the SDK emits when its
+/// view of master-server reachability changes. Apps wire a
+/// [`HealthCallback`] via [`Config::health_callback`] and surface
+/// the transitions to users (e.g., "you're offline; reading from
+/// IPFS gateway"). The default behavior with no callback set is
+/// byte-identical to pre-Phase-19 builds — the gate still works,
+/// just silently.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum MasterHealthEvent {
+    /// Master S3 is reachable; reads use the fast path.
+    Online,
+
+    /// Master S3 is unreachable; SDK is falling back to IPFS
+    /// gateways (Phase 2.4) or cold-start resolver (Phase 3.3).
+    /// `reason` is human-readable for logging — not for end-user
+    /// display (use a localized string from your UI layer).
+    OfflineFallbackActive { reason: String },
+
+    /// Both master S3 AND the chain RPC are unreachable. Cold-
+    /// start reads will fail; warm reads (via cached `(bucket,
+    /// key) → cid`) still work via gateways. Apps should disable
+    /// "open new bucket" / "first-read" UI affordances when this
+    /// fires. **Emitted only from the cold-start failure path**
+    /// (the resolver), NOT from periodic health-gate observation —
+    /// the SDK can't authoritatively detect "both down" without
+    /// trying.
+    SeverelyDegraded { reason: String },
+}
+
+/// A callback the SDK invokes on every `MasterHealthEvent`
+/// transition. `Arc<dyn Fn + Send + Sync>` so the closure can be
+/// shared across all clones of `FulaClient` and called from any
+/// task. Transitions are deduplicated — a single Down→Up flip fires
+/// exactly one `Online` event, not one per request.
+pub type HealthCallback = Arc<dyn Fn(MasterHealthEvent) + Send + Sync + 'static>;
+
 /// Threshold for flipping from `Up` to `Down`. One transient 5xx on a single
 /// bucket isn't the same as "master is unreachable" — only two consecutive
 /// signals trip the gate.
@@ -47,15 +84,35 @@ pub struct HealthGate {
     state_ms: AtomicU64,
     consecutive_failures: AtomicU32,
     ttl: Duration,
+    /// Phase 19 — optional transparency callback. `Some` when
+    /// `Config::health_callback` was set on `FulaClient::new`.
+    /// Fires `Online` / `OfflineFallbackActive` on Up↔Down state
+    /// transitions, with deduplication so back-to-back events
+    /// don't double-fire.
+    callback: Option<HealthCallback>,
 }
 
 impl HealthGate {
     /// Create a new gate with the given TTL. Starts in the `Up` state.
+    /// No callback registered.
     pub fn new(ttl: Duration) -> Self {
         Self {
             state_ms: AtomicU64::new(0),
             consecutive_failures: AtomicU32::new(0),
             ttl,
+            callback: None,
+        }
+    }
+
+    /// Phase 19 — construct a gate with a transparency callback.
+    /// The callback fires once on each Up↔Down transition; consecutive
+    /// failures within an already-Down state do NOT re-fire.
+    pub fn with_callback(ttl: Duration, callback: HealthCallback) -> Self {
+        Self {
+            state_ms: AtomicU64::new(0),
+            consecutive_failures: AtomicU32::new(0),
+            ttl,
+            callback: Some(callback),
         }
     }
 
@@ -86,9 +143,15 @@ impl HealthGate {
 
     /// Record a successful master interaction. Resets the failure counter
     /// and clears the `Down` timestamp (gate returns to `Up`).
+    ///
+    /// Phase 19: fires `MasterHealthEvent::Online` exactly when the gate
+    /// flips from Down→Up. A success while already Up is a no-op.
     pub fn record_success(&self) {
         self.consecutive_failures.store(0, Ordering::Release);
-        self.state_ms.store(0, Ordering::Release);
+        let was_down = self.state_ms.swap(0, Ordering::AcqRel) != 0;
+        if was_down {
+            self.fire_event(MasterHealthEvent::Online);
+        }
     }
 
     /// Record a master-side failure (connection refused / RST / 5xx /
@@ -97,18 +160,51 @@ impl HealthGate {
     ///
     /// 4xx responses are NOT failures for gate purposes — they're
     /// request-level issues, not master-down signals.
+    ///
+    /// Phase 19: fires `MasterHealthEvent::OfflineFallbackActive` exactly
+    /// once on the Up→Down transition. Subsequent failures while already
+    /// Down do NOT re-fire (the `compare_exchange` filters duplicates).
     pub fn record_failure(&self) {
         let prior = self.consecutive_failures.fetch_add(1, Ordering::AcqRel);
         if prior + 1 >= CONSECUTIVE_FAILURE_THRESHOLD {
             // Threshold crossed (or exceeded). Flip to `Down` if not already.
             // Only update timestamp on the first transition this window so
             // that repeated failures don't keep extending the TTL.
-            let _ = self.state_ms.compare_exchange(
-                0,
-                now_ms(),
-                Ordering::AcqRel,
-                Ordering::Acquire,
-            );
+            let now = now_ms();
+            let prev = self
+                .state_ms
+                .compare_exchange(0, now, Ordering::AcqRel, Ordering::Acquire);
+            // `Ok(_)` means we successfully transitioned Up→Down — fire
+            // the event once. `Err(_)` means already Down (timestamp
+            // non-zero), no transition.
+            if prev.is_ok() {
+                self.fire_event(MasterHealthEvent::OfflineFallbackActive {
+                    reason: format!(
+                        "{} consecutive master failures observed",
+                        prior + 1
+                    ),
+                });
+            }
+        }
+    }
+
+    /// Phase 19 helper — invoke the registered callback if present.
+    /// Swallows panics inside the callback so a buggy app handler
+    /// can't crash the SDK request path.
+    fn fire_event(&self, event: MasterHealthEvent) {
+        if let Some(cb) = self.callback.as_ref() {
+            let cb = Arc::clone(cb);
+            // Clone the event for the closure; original is dropped after.
+            let event_clone = event.clone();
+            let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(move || {
+                cb(event_clone);
+            }));
+            if result.is_err() {
+                tracing::warn!(
+                    event = ?event,
+                    "health_callback panicked; SDK proceeding (callback panics are swallowed by design)"
+                );
+            }
         }
     }
 }
@@ -237,4 +333,129 @@ mod tests {
         // 8 failures > threshold(2), so gate must be Down.
         assert!(matches!(gate.decide(), GateDecision::ShortCircuit { .. }));
     }
+
+    // ============================================================
+    // Phase 19 — transparency callback wiring
+    // ============================================================
+
+    /// Helper: build a callback that pushes events into a Mutex<Vec>.
+    /// Returns the callback Arc + a clone of the same Vec for assertions.
+    fn capturing_callback() -> (
+        HealthCallback,
+        std::sync::Arc<std::sync::Mutex<Vec<MasterHealthEvent>>>,
+    ) {
+        let captured: std::sync::Arc<std::sync::Mutex<Vec<MasterHealthEvent>>> =
+            std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let captured_for_cb = std::sync::Arc::clone(&captured);
+        let cb: HealthCallback = std::sync::Arc::new(move |ev| {
+            captured_for_cb.lock().unwrap().push(ev);
+        });
+        (cb, captured)
+    }
+
+    #[test]
+    fn test_phase19_two_failures_fire_offline_event_single_failure_silent() {
+        // Advisor-mandated test #1: a single failure must NOT fire the
+        // callback (the gate stays Up). The second failure that crosses
+        // the threshold fires `OfflineFallbackActive` exactly once.
+        let (cb, captured) = capturing_callback();
+        let gate = HealthGate::with_callback(Duration::from_secs(30), cb);
+
+        gate.record_failure();
+        // After one failure: gate still Up, no callback fired.
+        assert_eq!(
+            captured.lock().unwrap().len(),
+            0,
+            "single failure must not fire callback"
+        );
+
+        gate.record_failure();
+        // After two failures: gate flipped Down, exactly one event fired.
+        let events = captured.lock().unwrap().clone();
+        assert_eq!(events.len(), 1, "expected exactly one event, got: {:?}", events);
+        match &events[0] {
+            MasterHealthEvent::OfflineFallbackActive { reason } => {
+                assert!(
+                    reason.contains("2 consecutive"),
+                    "reason should mention failure count: {}",
+                    reason
+                );
+            }
+            other => panic!("expected OfflineFallbackActive, got {:?}", other),
+        }
+
+        // Further failures while already Down must NOT re-fire the event
+        // (compare_exchange filters the no-transition case).
+        gate.record_failure();
+        gate.record_failure();
+        assert_eq!(
+            captured.lock().unwrap().len(),
+            1,
+            "additional failures while Down must not re-fire OfflineFallbackActive"
+        );
+    }
+
+    #[test]
+    fn test_phase19_success_after_down_fires_online() {
+        // Advisor-mandated test #2: when the gate is Down and a probe
+        // succeeds, the callback observes `Online` exactly once.
+        let (cb, captured) = capturing_callback();
+        let gate = HealthGate::with_callback(Duration::from_secs(30), cb);
+
+        // Trip the gate.
+        gate.record_failure();
+        gate.record_failure();
+        // One OfflineFallbackActive event so far.
+        assert_eq!(captured.lock().unwrap().len(), 1);
+
+        // Success — flips Down→Up; fires Online.
+        gate.record_success();
+        let events = captured.lock().unwrap().clone();
+        assert_eq!(events.len(), 2, "expected OfflineFallbackActive + Online");
+        assert!(matches!(events[1], MasterHealthEvent::Online));
+
+        // A second success while already Up must NOT re-fire Online.
+        gate.record_success();
+        assert_eq!(
+            captured.lock().unwrap().len(),
+            2,
+            "redundant success while Up must not re-fire Online"
+        );
+    }
+
+    #[test]
+    fn test_phase19_callback_panic_does_not_crash_caller() {
+        // A buggy app callback that panics must NOT crash the SDK.
+        // `fire_event` wraps the call in `catch_unwind` and proceeds.
+        let cb: HealthCallback = std::sync::Arc::new(|_ev| {
+            panic!("simulated app-level panic");
+        });
+        let gate = HealthGate::with_callback(Duration::from_secs(30), cb);
+
+        // These calls would propagate the panic if catch_unwind weren't
+        // wrapping the callback. The test passes by NOT panicking.
+        gate.record_failure();
+        gate.record_failure();
+        gate.record_success();
+
+        // And the gate state itself remains correct: a success after a
+        // Down state returns to Up.
+        assert_eq!(gate.decide(), GateDecision::Allow);
+    }
+
+    #[test]
+    fn test_phase19_no_callback_means_silent() {
+        // A gate constructed via `new` (no callback) must work
+        // identically to pre-Phase-19 builds: state machine works,
+        // no events are produced anywhere.
+        let gate = HealthGate::new(Duration::from_secs(30));
+        gate.record_failure();
+        gate.record_failure();
+        gate.record_success();
+        // No assertion on event capture — there's no captured Vec.
+        // The fact that we constructed the gate with `new` (no
+        // callback wiring) and reached this line proves the silent
+        // path works. Verify final state is sane.
+        assert_eq!(gate.decide(), GateDecision::Allow);
+    }
 }
diff --git a/crates/fula-client/src/lib.rs b/crates/fula-client/src/lib.rs
index 3a233e1..53e8240 100644
--- a/crates/fula-client/src/lib.rs
+++ b/crates/fula-client/src/lib.rs
@@ -47,6 +47,8 @@ mod error;
 mod gateway_fetch;
 mod health_gate;
 mod multipart;
+#[cfg(not(target_arch = "wasm32"))]
+mod registry_resolver;
 mod types;
 #[cfg(not(target_arch = "wasm32"))]
 mod orphan_queue;
@@ -77,6 +79,25 @@ pub use error::{ClientError, Result};
 pub use multipart::{MultipartUpload, UploadProgress, ProgressCallback, upload_large_file, MultipartAbortGuard};
 pub use types::*;
 
+/// Phase 19 — transparency surfaces. `HealthCallback` is the closure
+/// type apps wire via `Config::with_health_callback` to observe master
+/// reachability transitions. `MasterHealthEvent` is the variant the
+/// callback receives. Re-exported here so app-level code can construct
+/// callbacks without depending on internal module paths.
+pub use health_gate::{HealthCallback, MasterHealthEvent};
+
+/// Phase 3.3 — cold-start hybrid resolver public API. Native-only;
+/// the resolver itself is gated to `cfg(not(target_arch = "wasm32"))`.
+/// The free helper `derive_user_key_from_email` is also re-exported
+/// so JS / Flutter bindings can compute the user_key without holding
+/// a client.
+#[cfg(not(target_arch = "wasm32"))]
+pub use registry_resolver::{
+    decode_user_buckets_index, default_ipfs_gateway_urls, default_ipns_gateway_urls,
+    derive_user_key_from_email, fetch_cid_via_gateways, BucketEntry, GlobalUsersIndex,
+    ResolutionSource, ResolvedUsersIndex, ResolverConfig, UserBucketsIndex, UsersIndexResolver,
+};
+
 /// Process-wide count of WAL append failures (F11).
 ///
 /// The WAL is the crash-recovery log for in-memory forest upserts. When
diff --git a/crates/fula-client/src/registry_resolver.rs b/crates/fula-client/src/registry_resolver.rs
new file mode 100644
index 0000000..f602eb1
--- /dev/null
+++ b/crates/fula-client/src/registry_resolver.rs
@@ -0,0 +1,1785 @@
+//! Phase 3.3 — hybrid IPNS-primary + chain-fallback resolver for
+//! the master-published global users-index CID.
+//!
+//! Cold-start flow (per plan §3.3 step 5):
+//!
+//! 1. **IPNS path (primary).** Race a small fan-out of IPNS-aware
+//!    public gateways for `/ipns/<configured-name>`. Each gateway
+//!    resolves the IPNS NAME server-side and returns the underlying
+//!    dag-cbor bytes. We parse those bytes as
+//!    [`GlobalUsersIndex`], read the in-payload `sequence`, and
+//!    accept the first response whose sequence is ≥ the SDK's
+//!    process-wide `highest_seen_sequence` (replay defense).
+//!    Budget: 10 s, sequential; no per-gateway dynamic-priority
+//!    state (the cold-start path is one-shot — the warm-device
+//!    pool's state machine isn't a fit).
+//!
+//! 2. **Chain path (fallback).** If the IPNS path fails or times
+//!    out, fire one `eth_call` against the configured RPC URL for
+//!    `FulaUsersIndexAnchor.latest()`. The 96-byte ABI response is
+//!    `(bytes32 cid_digest, uint64 sequence, uint64 timestamp)`.
+//!    Reconstruct a CIDv1 (codec=dag-cbor 0x71, multihash=sha2-256
+//!    0x12 + the digest bytes), then iterate the same gateway list
+//!    fetching `/ipfs/<cid>` until one body content-addresses to
+//!    that CID via [`verify_cid_against_bytes`]. Parse the body as
+//!    [`GlobalUsersIndex`]; verify the in-payload `sequence`
+//!    matches the on-chain `sequence` and is ≥ `highest_seen_sequence`.
+//!
+//! 3. **Single sequence stream.** There is one monotonic `sequence`
+//!    field, embedded inside the CBOR payload itself. Both IPNS and
+//!    chain paths read it from the bytes — never from IPNS DHT
+//!    metadata or the chain-call return — so a compromised gateway
+//!    (or RPC node, or operator) can publish a fresh-but-malicious
+//!    *higher* sequence (closing that requires user wallets and is
+//!    out of scope), but **cannot regress** to a stale one.
+//!
+//! ## Native-only
+//!
+//! The resolver is gated to `cfg(not(target_arch = "wasm32"))` for
+//! the same reason as `block_cache.rs` and `gateway_fetch.rs`: it
+//! depends on `tokio::time::timeout`, on the `parking_lot::Mutex`
+//! used internally by gateway-side code, and on
+//! `verify_cid_against_bytes` (which itself is native-only because
+//! it lives in `gateway_fetch.rs`). Cold-start on browser/wasm
+//! surfaces [`ClientError::UsersIndexResolutionFailed`] until a
+//! browser-friendly resolver lands as a follow-up.
+
+#![cfg(not(target_arch = "wasm32"))]
+
+use crate::error::ClientError;
+use crate::gateway_fetch::verify_cid_against_bytes;
+use bytes::Bytes;
+use cid::multihash::Multihash;
+use cid::Cid;
+use serde::{Deserialize, Serialize};
+use std::collections::BTreeMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+use std::time::Duration;
+
+// ============================================================
+// Public types
+// ============================================================
+
+/// Master's published global users-index CBOR payload. Mirrors the
+/// `GlobalUsersIndex` struct in `fula-cli`'s
+/// `handlers::users_index_publisher`. The two definitions must stay
+/// in lockstep — see plan §3.2.a for the producer side.
+#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
+pub struct GlobalUsersIndex {
+    pub v: u32,
+    pub sequence: u64,
+    pub updated_at_unix: u64,
+    /// `userKey_hex` (32 hex chars) → bucketsIndexCid (string).
+    /// The SDK looks up its own `userKey` here on cold-start.
+    pub users: BTreeMap<String, String>,
+}
+
+/// Master's per-user `bucketsIndex` CBOR — one per user per snapshot
+/// when their state changed. Mirrors the `UserBucketsIndex` struct
+/// in `fula-cli`'s `handlers::users_index_publisher` (the producer
+/// side; see plan §3.2.a). The two definitions must stay in lockstep.
+///
+/// Map keys are either:
+///   - 32-hex BLAKE3-derived `bucketLookupH` (Phase 1.2 blinded form)
+///   - plaintext bucket name (Phase 1.2 lazy-migration legacy form)
+///
+/// `legacy=true` distinguishes the latter so the cold-start dispatch
+/// can fall back from `index[blinded_hex]` to `index[bucket_name]`
+/// for users who haven't yet uploaded with a Phase-1.2-aware client.
+#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
+pub struct UserBucketsIndex {
+    pub v: u32,
+    pub buckets: BTreeMap<String, BucketEntry>,
+    pub updated_at_unix: u64,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
+pub struct BucketEntry {
+    /// CIDv1 string of the user's per-bucket forest manifest.
+    pub manifest: String,
+    /// `true` ⇔ map key is the plaintext `bucket_name` (legacy
+    /// fallback). The cold-start lookup tries blinded first; on
+    /// miss it tries the plaintext name and accepts only entries
+    /// where `legacy = true`.
+    pub legacy: bool,
+}
+
+/// Result of a successful [`UsersIndexResolver::resolve`].
+#[derive(Clone, Debug)]
+pub struct ResolvedUsersIndex {
+    /// Which channel actually served the payload. Surfaced to apps
+    /// (and to Phase 19's `ReadFreshness`) so users can be told
+    /// "served from chain backup; expected staleness ≤ 12h".
+    pub source: ResolutionSource,
+    /// CID of the parsed payload. For the chain path this is the
+    /// reconstructed-and-verified CID. For the IPNS path it is
+    /// `Cid::new_v1(0x71, sha2-256(bytes))` — synthesized from the
+    /// returned bytes (the IPNS path has no externally-asserted CID
+    /// to verify against; the gateway does the IPNS-record
+    /// resolution upstream).
+    pub cid: Cid,
+    /// Decoded payload. Apps walk `payload.users` to find their own
+    /// `userKey` → bucketsIndexCid.
+    pub payload: GlobalUsersIndex,
+    /// Raw CBOR bytes — kept so callers can persist them (Phase
+    /// 3.3.5 hot-start cache) without re-fetching.
+    pub bytes: Bytes,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ResolutionSource {
+    Ipns,
+    Chain,
+    /// Phase 3.3.5 — served from the on-disk hot-start cache (the
+    /// resolver short-circuited IPNS + chain because the cached
+    /// `(cid, sequence)` was within `soft_ttl`). Apps/Phase 19's
+    /// `ReadFreshness` can surface this as "served from a recent
+    /// snapshot — last refreshed N seconds ago".
+    HotStartCache,
+}
+
+/// Resolver configuration. Construct via [`UsersIndexResolver::new`].
+#[derive(Clone, Debug)]
+pub struct ResolverConfig {
+    /// IPNS-aware gateway URL templates (each must contain `{name}`).
+    /// Empty = use the SDK-shipped default subset
+    /// ([`default_ipns_gateway_urls`]).
+    pub ipns_gateways: Vec<String>,
+    /// `/ipfs/{cid}` gateway URL templates for the chain path's
+    /// CID-fetch step. Empty = use the SDK-shipped default six.
+    pub ipfs_gateways: Vec<String>,
+    /// JSON-RPC URL for the chain anchor. Required.
+    pub chain_rpc_url: String,
+    /// `FulaUsersIndexAnchor.sol` proxy address (20 bytes hex,
+    /// optionally `0x`-prefixed). Required.
+    pub anchor_address: String,
+    /// IPNS NAME (libp2p public-key hash, e.g. `k51qzi5...`).
+    /// Required.
+    pub ipns_name: String,
+    /// Hard ceiling on the IPNS race; fall through to chain after.
+    /// Default 10 s per plan §3.3 step 5a.
+    pub ipns_race_timeout: Duration,
+    /// Per-gateway timeout for individual fetches (both IPNS and the
+    /// chain path's CID-fetch step).
+    pub per_request_timeout: Duration,
+
+    /// Phase 3.3.5 — soft TTL for the on-disk hot-start cache.
+    /// When the resolver was successfully run within this window
+    /// (per the cached `observed_at_unix`), `resolve()` returns the
+    /// cached state directly without touching IPNS or chain.
+    /// Beyond this, the resolver opportunistically re-runs.
+    /// Default: 5 minutes per plan §3.3.5 — matches the expected
+    /// IPNS publish cadence.
+    pub soft_ttl: Duration,
+}
+
+impl ResolverConfig {
+    /// Default config for a given chain RPC URL, IPNS NAME, and
+    /// anchor address. All other fields take audit-recommended
+    /// defaults.
+    pub fn new(
+        chain_rpc_url: impl Into<String>,
+        anchor_address: impl Into<String>,
+        ipns_name: impl Into<String>,
+    ) -> Self {
+        Self {
+            ipns_gateways: Vec::new(),
+            ipfs_gateways: Vec::new(),
+            chain_rpc_url: chain_rpc_url.into(),
+            anchor_address: anchor_address.into(),
+            ipns_name: ipns_name.into(),
+            ipns_race_timeout: Duration::from_secs(10),
+            per_request_timeout: Duration::from_secs(8),
+            soft_ttl: Duration::from_secs(300), // 5 min, matches IPNS publish cadence
+        }
+    }
+}
+
+/// Derive the SDK-side `userKey` from a user's email address.
+///
+/// Replicates the master-side identity derivation chain in
+/// `fula-cli/src/state.rs::hash_user_id`:
+///
+/// 1. `userId   = sha256(lower(email))`               — 32 bytes
+/// 2. `userIdHex = hex::encode(userId)`               — 64 ASCII hex chars
+/// 3. `userKey  = BLAKE3("fula:user_id:" || userIdHex)[..16]` — 16 bytes
+/// 4. Return `hex::encode(userKey)`                   — 32 ASCII hex chars
+///
+/// The 32-hex output matches `BucketMetadata.owner_id` on master
+/// (see `fula-cli/src/state.rs:15-22`). The SDK passes this string
+/// in `Config::users_index_user_key` so the cold-start path can
+/// look itself up in the published `GlobalUsersIndex.users` map.
+///
+/// This is a **free function**, not a method, so JS / Flutter
+/// bindings can compute the user_key without holding a client.
+/// Domain separator + double hashing + lowercase normalization MUST
+/// stay in lockstep with the master's `hash_user_id`; the
+/// `derive_user_key_matches_master_state_rs_algorithm` test below
+/// reproduces the master algorithm step-by-step and asserts equality.
+pub fn derive_user_key_from_email(email: &str) -> String {
+    use sha2::{Digest, Sha256};
+    let user_id_digest = Sha256::digest(email.to_lowercase().as_bytes());
+    let user_id_hex = hex::encode(user_id_digest);
+    let mut hasher = blake3::Hasher::new();
+    hasher.update(b"fula:user_id:");
+    hasher.update(user_id_hex.as_bytes());
+    hex::encode(&hasher.finalize().as_bytes()[..16])
+}
+
+/// Default IPNS-aware gateway list. Excludes
+/// `trustless-gateway.link` (only serves `/ipfs/`, not `/ipns/`).
+pub fn default_ipns_gateway_urls() -> Vec<String> {
+    vec![
+        "https://cloudflare-ipfs.com/ipns/{name}".into(),
+        "https://dweb.link/ipns/{name}".into(),
+        "https://ipfs.io/ipns/{name}".into(),
+        "https://4everland.io/ipns/{name}".into(),
+        "https://gateway.pinata.cloud/ipns/{name}".into(),
+    ]
+}
+
+/// Fetch a CID's bytes via simple sequential iteration over the
+/// configured IPFS-gateway list, verifying content-addressing on
+/// each successful response. Returns the first body whose
+/// `verify_cid_against_bytes` passes; surfaces
+/// `UsersIndexResolutionFailed` if all gateways exhaust.
+///
+/// Intentionally simpler than `GatewayPool::fetch_verified` (Phase
+/// 2.3's dynamic-priority race orchestrator). Cold-start is one-shot
+/// — the per-gateway state machine pays no benefit here, and keeping
+/// the resolver self-contained means cold-start doesn't require
+/// Phase 2.2/2.4 to be enabled.
+pub async fn fetch_cid_via_gateways(
+    cid: &Cid,
+    gateways: &[String],
+    http: &reqwest::Client,
+    per_request_timeout: Duration,
+) -> Result<Bytes, ClientError> {
+    if gateways.is_empty() {
+        return Err(ClientError::UsersIndexResolutionFailed {
+            reason: format!("no IPFS gateways configured to fetch {}", cid),
+        });
+    }
+    let cid_str = cid.to_string();
+    let mut last_err: Option<String> = None;
+    for tmpl in gateways {
+        let url = tmpl.replace("{cid}", &cid_str);
+        let resp = match tokio::time::timeout(per_request_timeout, http.get(&url).send()).await {
+            Ok(Ok(r)) => r,
+            Ok(Err(e)) => {
+                last_err = Some(format!("{} transport: {}", url, e));
+                continue;
+            }
+            Err(_) => {
+                last_err = Some(format!("{} timeout", url));
+                continue;
+            }
+        };
+        if !resp.status().is_success() {
+            last_err = Some(format!("{} HTTP {}", url, resp.status()));
+            continue;
+        }
+        let bytes = match resp.bytes().await {
+            Ok(b) => b,
+            Err(e) => {
+                last_err = Some(format!("{} body: {}", url, e));
+                continue;
+            }
+        };
+        if let Err(e) = verify_cid_against_bytes(cid, &bytes) {
+            last_err = Some(format!("{} verify: {}", url, e));
+            continue;
+        }
+        return Ok(bytes);
+    }
+    Err(ClientError::UsersIndexResolutionFailed {
+        reason: format!(
+            "CID {} unreachable across {} gateways: {}",
+            cid,
+            gateways.len(),
+            last_err.unwrap_or_else(|| "no gateways tried".into())
+        ),
+    })
+}
+
+/// Decode dag-cbor bytes as a per-user `UserBucketsIndex`. Wraps the
+/// dagcbor crate's error so callers see a single ClientError shape.
+pub fn decode_user_buckets_index(bytes: &[u8]) -> Result<UserBucketsIndex, ClientError> {
+    serde_ipld_dagcbor::from_slice(bytes).map_err(|e| {
+        ClientError::UsersIndexResolutionFailed {
+            reason: format!("UserBucketsIndex CBOR decode: {}", e),
+        }
+    })
+}
+
+/// Default `/ipfs/{cid}` gateway list — same six as the warm-device
+/// pool ships in `gateway_fetch::default_gateway_urls`. Re-declared
+/// here so the resolver's chain path doesn't need to depend on the
+/// pool's state machine.
+pub fn default_ipfs_gateway_urls() -> Vec<String> {
+    vec![
+        "https://cloudflare-ipfs.com/ipfs/{cid}".into(),
+        "https://dweb.link/ipfs/{cid}".into(),
+        "https://ipfs.io/ipfs/{cid}".into(),
+        "https://trustless-gateway.link/ipfs/{cid}".into(),
+        "https://4everland.io/ipfs/{cid}".into(),
+        "https://gateway.pinata.cloud/ipfs/{cid}".into(),
+    ]
+}
+
+// ============================================================
+// Resolver
+// ============================================================
+
+#[derive(Debug)]
+pub struct UsersIndexResolver {
+    config: ResolverConfig,
+    http: reqwest::Client,
+    /// Process-wide replay defense — only ever increases. SDK callers
+    /// can seed it from a persisted hot-start cache (Phase 3.3.5) at
+    /// construction time via [`UsersIndexResolver::new_with_cache`];
+    /// every successful `resolve` then bumps it.
+    highest_seen_sequence: AtomicU64,
+    /// Pre-validated 20-byte anchor address. Cached so each `resolve`
+    /// doesn't re-parse the hex.
+    anchor_address_bytes: [u8; 20],
+    /// Phase 3.3.5 — optional hot-start persistence layer. When set,
+    /// `resolve()` reads cached `(cid, sequence, observed_at_unix)`
+    /// from the cache's METADATA table on the first call AND writes
+    /// the freshly-resolved state on every successful resolve. This
+    /// makes the replay-defense floor survive SDK restarts AND lets
+    /// the resolver short-circuit IPNS+chain when within `soft_ttl`.
+    cache: Option<Arc<crate::block_cache::BlockCache>>,
+}
+
+impl UsersIndexResolver {
+    /// Build a resolver. Validates `anchor_address` is 20 bytes hex
+    /// up-front so misconfiguration fails at construction time, not
+    /// on the first cold-start.
+    pub fn new(config: ResolverConfig) -> Result<Self, ClientError> {
+        if config.chain_rpc_url.is_empty() {
+            return Err(ClientError::Config(
+                "registry resolver: chain_rpc_url is empty".into(),
+            ));
+        }
+        if config.ipns_name.is_empty() {
+            return Err(ClientError::Config(
+                "registry resolver: ipns_name is empty".into(),
+            ));
+        }
+        let anchor_address_bytes = parse_anchor_address(&config.anchor_address)?;
+        Ok(Self {
+            config,
+            http: reqwest::Client::new(),
+            highest_seen_sequence: AtomicU64::new(0),
+            anchor_address_bytes,
+            cache: None,
+        })
+    }
+
+    /// Phase 3.3.5 — construct a resolver wired to a persistent
+    /// hot-start cache. On construction the resolver:
+    ///   1. Reads `(cid, sequence, observed_at_unix)` from the
+    ///      cache's METADATA table.
+    ///   2. Seeds the replay-defense floor from the cached
+    ///      sequence — a malicious gateway cannot regress to a
+    ///      stale payload across SDK restarts.
+    ///
+    /// On every successful `resolve` the resolver:
+    ///   1. Writes the new `(cid, sequence, now)` to METADATA.
+    ///   2. Inserts the bytes into BLOCKS (so a future hot-start
+    ///      can serve the payload entirely from disk).
+    ///
+    /// The cache load/store paths are **best-effort**: failures
+    /// log at `warn!` and don't propagate, so a corrupted or
+    /// unwriteable cache never blocks SDK functionality. (The
+    /// resolver still works, just without hot-start.)
+    pub fn new_with_cache(
+        config: ResolverConfig,
+        cache: Arc<crate::block_cache::BlockCache>,
+    ) -> Result<Self, ClientError> {
+        let mut resolver = Self::new(config)?;
+        // Seed the floor from cached state, if any. Best-effort —
+        // a corrupt or empty cache gives us the default floor (0).
+        match cache.load_users_index_state() {
+            Ok(Some((_cid, sequence, _observed))) => {
+                resolver.bump_seen_sequence(sequence);
+                tracing::debug!(
+                    seeded_sequence = sequence,
+                    "registry_resolver: hot-start floor seeded from cache"
+                );
+            }
+            Ok(None) => {
+                tracing::debug!("registry_resolver: no hot-start state cached (fresh)");
+            }
+            Err(e) => {
+                tracing::warn!(
+                    error = %e,
+                    "registry_resolver: hot-start cache load failed; floor stays at 0 (best-effort)"
+                );
+            }
+        }
+        resolver.cache = Some(cache);
+        Ok(resolver)
+    }
+
+    /// Test/integration hook — production callers update via
+    /// `resolve()`'s side-effect of calling `bump_seen_sequence`.
+    /// Marked `pub(crate)` so tests can seed the floor without a
+    /// stable public API.
+    #[cfg(test)]
+    pub(crate) fn set_highest_seen_sequence(&self, seq: u64) {
+        self.bump_seen_sequence(seq);
+    }
+
+    /// Read the current replay-defense floor.
+    pub fn highest_seen_sequence(&self) -> u64 {
+        self.highest_seen_sequence.load(Ordering::Acquire)
+    }
+
+    /// Read-only access to the resolver's HTTP client. The cold-start
+    /// path on `EncryptedClient` reuses this client for the
+    /// bucketsIndex + manifest fetches so connection pooling stays
+    /// intact across all of the cold-start request burst.
+    pub fn http_client(&self) -> &reqwest::Client {
+        &self.http
+    }
+
+    /// Read-only access to the resolver's per-request timeout —
+    /// reused by the cold-start path's gateway fetches for the
+    /// bucketsIndex CBOR and the forest manifest, so a single config
+    /// knob governs all of cold-start.
+    pub fn per_request_timeout(&self) -> Duration {
+        self.config.per_request_timeout
+    }
+
+    /// Read-only access to the IPFS gateway list. Cold-start uses
+    /// this same list (rather than the warm-device pool's) so it
+    /// stays self-contained and works without Phase 2.2/2.4 enabled.
+    pub fn ipfs_gateways(&self) -> Vec<String> {
+        if self.config.ipfs_gateways.is_empty() {
+            default_ipfs_gateway_urls()
+        } else {
+            self.config.ipfs_gateways.clone()
+        }
+    }
+
+    /// Atomic monotonic-max — only ever increases. Lock-free CAS loop.
+    fn bump_seen_sequence(&self, seq: u64) {
+        let mut current = self.highest_seen_sequence.load(Ordering::Acquire);
+        while seq > current {
+            match self.highest_seen_sequence.compare_exchange_weak(
+                current,
+                seq,
+                Ordering::AcqRel,
+                Ordering::Acquire,
+            ) {
+                Ok(_) => break,
+                Err(observed) => current = observed,
+            }
+        }
+    }
+
+    /// Hybrid resolve.
+    ///
+    /// Order of operations:
+    ///   0. **Hot-start short-circuit (Phase 3.3.5).** If a cache is
+    ///      configured AND has a `(cid, sequence, observed_at)` row
+    ///      AND `now - observed_at < soft_ttl`, return the cached
+    ///      state directly. Bytes come from BLOCKS if cached,
+    ///      otherwise via gateway race for the cached cid. Sequence
+    ///      is re-checked against the in-memory floor for defense.
+    ///   1. Try IPNS for `ipns_race_timeout`.
+    ///   2. Fall through to chain on timeout / all-gateway failure
+    ///      / replay-rejection.
+    ///   3. On success (any path), write `(cid, sequence, now)` to
+    ///      METADATA and the bytes to BLOCKS — best-effort, so a
+    ///      cache write failure never aborts the resolve.
+    pub async fn resolve(&self) -> Result<ResolvedUsersIndex, ClientError> {
+        // Step 0 — hot-start short-circuit.
+        if let Some(resolved) = self.try_hot_start().await {
+            return Ok(resolved);
+        }
+
+        // Steps 1-2 — IPNS-then-chain.
+        let resolved = self.resolve_via_network().await?;
+
+        // Step 3 — write-back. Best-effort, synchronous-from-async
+        // so the next call observes the freshly-written cache without
+        // racing a spawned background task. Cold-start is a once-
+        // per-session event; the few hundred microseconds for the
+        // redb txns are negligible vs. the IPNS+chain budget we just
+        // paid.
+        self.persist_to_cache(&resolved).await;
+
+        Ok(resolved)
+    }
+
+    /// Phase 3.3.5 — try to serve from the persistent cache without
+    /// touching the network. Returns `Some(ResolvedUsersIndex)` when
+    /// a fresh-enough cached state exists AND the bytes are
+    /// available (BLOCKS hit OR a fast gateway-race fetch for the
+    /// cached cid succeeds). Returns `None` to indicate "fall
+    /// through to full IPNS+chain resolve."
+    ///
+    /// A `None` return is silent — the network path takes over.
+    async fn try_hot_start(&self) -> Option<ResolvedUsersIndex> {
+        let cache = self.cache.as_ref()?;
+        let (cached_cid, cached_seq, observed_at) = match cache.load_users_index_state() {
+            Ok(Some(triple)) => triple,
+            Ok(None) => return None,
+            Err(e) => {
+                tracing::warn!(error = %e, "hot-start: cache load failed");
+                return None;
+            }
+        };
+
+        // TTL check. Use wall-clock (matches what the writer used).
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .map(|d| d.as_secs())
+            .unwrap_or(0);
+        if now.saturating_sub(observed_at) >= self.config.soft_ttl.as_secs() {
+            tracing::debug!(
+                age_secs = now.saturating_sub(observed_at),
+                ttl_secs = self.config.soft_ttl.as_secs(),
+                "hot-start: cache entry beyond TTL; re-resolving"
+            );
+            return None;
+        }
+
+        // Replay-defense check on the cached sequence itself —
+        // defends against a corrupt/tampered METADATA row.
+        let seen = self.highest_seen_sequence();
+        if cached_seq < seen {
+            tracing::warn!(
+                cached = cached_seq,
+                seen,
+                "hot-start: cached sequence < in-memory floor; ignoring (corrupt or rolled-back cache)"
+            );
+            return None;
+        }
+
+        // Fetch bytes — BLOCKS first, then gateway race for the
+        // cached cid as a network fallback.
+        let bytes = match cache.get(&cached_cid) {
+            Ok(Some(b)) => {
+                tracing::debug!(cid = %cached_cid, "hot-start: BLOCKS hit");
+                b
+            }
+            Ok(None) => {
+                // BLOCKS miss: cached metadata says "we know the
+                // CID" but we don't have the bytes (LRU evicted, or
+                // the prior resolve failed mid-write). Fetch via
+                // gateway race for the cached cid; cheaper than the
+                // full IPNS dance because we skip the DHT lookup.
+                let gateways = self.ipfs_gateways();
+                match fetch_cid_via_gateways(
+                    &cached_cid,
+                    &gateways,
+                    &self.http,
+                    self.config.per_request_timeout,
+                )
+                .await
+                {
+                    Ok(b) => {
+                        tracing::debug!(cid = %cached_cid, "hot-start: BLOCKS miss → gateway race");
+                        // Repopulate BLOCKS for the next read.
+                        if let Err(e) = cache.put(&cached_cid, &b).await {
+                            tracing::debug!(error = %e, "hot-start: BLOCKS put failed (best-effort)");
+                        }
+                        b
+                    }
+                    Err(e) => {
+                        tracing::debug!(
+                            error = %e,
+                            "hot-start: BLOCKS miss AND gateway fetch failed; falling through"
+                        );
+                        return None;
+                    }
+                }
+            }
+            Err(e) => {
+                tracing::warn!(error = %e, "hot-start: BLOCKS lookup failed");
+                return None;
+            }
+        };
+
+        // Decode + cross-check sequence. The bytes content-address
+        // to `cached_cid` (BLOCKS hit) or were verified by the
+        // gateway-fetch (CID match guaranteed by
+        // `verify_cid_against_bytes`). Decode failure here is
+        // silent — fall through to network path so a fresh resolve
+        // can heal a poisoned cache.
+        let payload = match decode_users_index_cbor(&bytes) {
+            Ok(p) => p,
+            Err(e) => {
+                tracing::warn!(error = %e, "hot-start: cached CBOR parse failed; re-resolving");
+                return None;
+            }
+        };
+        if payload.sequence != cached_seq {
+            tracing::warn!(
+                payload_seq = payload.sequence,
+                metadata_seq = cached_seq,
+                "hot-start: payload sequence != metadata sequence; cache inconsistent, re-resolving"
+            );
+            return None;
+        }
+
+        // All checks passed. Bump the in-memory floor to match
+        // (no-op if already >= cached_seq) and return.
+        self.bump_seen_sequence(payload.sequence);
+        Some(ResolvedUsersIndex {
+            source: ResolutionSource::HotStartCache,
+            cid: cached_cid,
+            payload,
+            bytes,
+        })
+    }
+
+    /// Network resolve path (IPNS-then-chain). Extracted from the
+    /// old `resolve()` body so the hot-start short-circuit can fall
+    /// through to it cleanly.
+    async fn resolve_via_network(&self) -> Result<ResolvedUsersIndex, ClientError> {
+        let ipns_outcome = tokio::time::timeout(
+            self.config.ipns_race_timeout,
+            self.try_ipns(),
+        )
+        .await;
+
+        match ipns_outcome {
+            Ok(Ok(resolved)) => {
+                self.bump_seen_sequence(resolved.payload.sequence);
+                return Ok(resolved);
+            }
+            Ok(Err(e)) => {
+                tracing::debug!(
+                    error = %e,
+                    "registry_resolver: IPNS path exhausted; falling back to chain"
+                );
+            }
+            Err(_) => {
+                tracing::debug!(
+                    timeout_secs = self.config.ipns_race_timeout.as_secs(),
+                    "registry_resolver: IPNS timed out; falling back to chain"
+                );
+            }
+        }
+
+        match self.try_chain().await {
+            Ok(resolved) => {
+                self.bump_seen_sequence(resolved.payload.sequence);
+                Ok(resolved)
+            }
+            Err(e) => Err(ClientError::UsersIndexResolutionFailed {
+                reason: format!("IPNS exhausted; chain: {}", e),
+            }),
+        }
+    }
+
+    /// Phase 3.3.5 — best-effort write of the just-resolved state to
+    /// the METADATA table + BLOCKS. Failures log and proceed; the
+    /// caller already has the resolved value, so cache hiccups never
+    /// block SDK functionality.
+    ///
+    /// Synchronous-from-async (no `tokio::spawn`) so the next
+    /// `resolve()` call observes the freshly-written cache without
+    /// racing a background task — important because tests using
+    /// `Mock::expect(N)` would otherwise be flaky on slow CI hosts.
+    /// Cost is hundreds of microseconds for the two redb txns;
+    /// negligible vs. the network budget the caller just spent.
+    async fn persist_to_cache(&self, resolved: &ResolvedUsersIndex) {
+        let Some(cache) = self.cache.as_ref() else {
+            return;
+        };
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .map(|d| d.as_secs())
+            .unwrap_or(0);
+        if let Err(e) = cache.store_users_index_state(&resolved.cid, resolved.payload.sequence, now)
+        {
+            tracing::warn!(
+                error = %e,
+                "registry_resolver: hot-start metadata write failed (best-effort)"
+            );
+        }
+        if let Err(e) = cache.put(&resolved.cid, &resolved.bytes).await {
+            // BlockTooLarge is the expected failure for huge global
+            // CBORs (>cache budget); log at debug, not warn.
+            tracing::debug!(
+                error = %e,
+                "registry_resolver: hot-start BLOCKS put failed (best-effort)"
+            );
+        }
+    }
+
+    /// IPNS leg — sequential per-gateway fan-out. The first gateway
+    /// whose body parses + sequence-passes wins. We don't run them
+    /// in parallel because:
+    ///   - cold-start is rare (once per fresh-device sign-in),
+    ///   - five HEAD-of-line requests waste the user's bandwidth,
+    ///   - the outer 10-s budget bounds the worst case anyway.
+    async fn try_ipns(&self) -> Result<ResolvedUsersIndex, ClientError> {
+        let gateways: Vec<String> = if self.config.ipns_gateways.is_empty() {
+            default_ipns_gateway_urls()
+        } else {
+            self.config.ipns_gateways.clone()
+        };
+
+        let mut last_err: Option<String> = None;
+        for tmpl in &gateways {
+            let url = tmpl.replace("{name}", &self.config.ipns_name);
+            match self.fetch_with_timeout(&url).await {
+                Ok(bytes) => match self.parse_and_validate(bytes, ResolutionSource::Ipns) {
+                    Ok(resolved) => return Ok(resolved),
+                    Err(e) => {
+                        // Replay-rejected or parse-failed bodies are
+                        // not a fatal error; another gateway might
+                        // serve a fresher record.
+                        tracing::debug!(
+                            url = %url, error = %e,
+                            "registry_resolver: IPNS body rejected; trying next gateway"
+                        );
+                        last_err = Some(e.to_string());
+                    }
+                },
+                Err(e) => {
+                    last_err = Some(e.to_string());
+                    tracing::debug!(url = %url, error = %e, "registry_resolver: IPNS fetch failed");
+                }
+            }
+        }
+        Err(ClientError::UsersIndexResolutionFailed {
+            reason: format!(
+                "IPNS exhausted across {} gateways: {}",
+                gateways.len(),
+                last_err.unwrap_or_else(|| "no gateways tried".into())
+            ),
+        })
+    }
+
+    /// Chain leg — single eth_call to `latest()`, then iterate IPFS
+    /// gateways for the resulting CID.
+    async fn try_chain(&self) -> Result<ResolvedUsersIndex, ClientError> {
+        // Step 1 — eth_call.
+        let (cid_digest, on_chain_seq) = self.eth_call_latest().await?;
+        if on_chain_seq < self.highest_seen_sequence() {
+            return Err(ClientError::SequenceRegression {
+                observed: on_chain_seq,
+                highest_seen: self.highest_seen_sequence(),
+                channel: "chain.latest()".into(),
+            });
+        }
+
+        // Step 2 — reconstruct CID. dag-cbor codec (0x71) +
+        // sha2-256 multihash (0x12) + the on-chain digest.
+        let mh = Multihash::<64>::wrap(MULTIHASH_SHA2_256, &cid_digest).map_err(|e| {
+            ClientError::UsersIndexResolutionFailed {
+                reason: format!("invalid chain CID digest: {}", e),
+            }
+        })?;
+        let cid = Cid::new_v1(CODEC_DAG_CBOR, mh);
+
+        // Step 3 — iterate IPFS gateways until one body
+        // content-addresses to `cid`.
+        let gateways: Vec<String> = if self.config.ipfs_gateways.is_empty() {
+            default_ipfs_gateway_urls()
+        } else {
+            self.config.ipfs_gateways.clone()
+        };
+        let mut last_err: Option<String> = None;
+        for tmpl in &gateways {
+            let url = tmpl.replace("{cid}", &cid.to_string());
+            let bytes = match self.fetch_with_timeout(&url).await {
+                Ok(b) => b,
+                Err(e) => {
+                    last_err = Some(e.to_string());
+                    continue;
+                }
+            };
+            if let Err(e) = verify_cid_against_bytes(&cid, &bytes) {
+                last_err = Some(format!("verify failed at {}: {}", url, e));
+                continue;
+            }
+            // Step 4 — parse + cross-validate sequence.
+            let payload = decode_users_index_cbor(&bytes).map_err(|e| {
+                ClientError::UsersIndexResolutionFailed {
+                    reason: format!("chain-fetched payload parse: {}", e),
+                }
+            })?;
+            if payload.sequence != on_chain_seq {
+                return Err(ClientError::UsersIndexResolutionFailed {
+                    reason: format!(
+                        "in-CBOR sequence {} != on-chain sequence {} (anomaly: tamper or RPC inconsistency)",
+                        payload.sequence, on_chain_seq
+                    ),
+                });
+            }
+            return Ok(ResolvedUsersIndex {
+                source: ResolutionSource::Chain,
+                cid,
+                payload,
+                bytes,
+            });
+        }
+        Err(ClientError::UsersIndexResolutionFailed {
+            reason: format!(
+                "chain CID {} unreachable across {} gateways: {}",
+                cid,
+                gateways.len(),
+                last_err.unwrap_or_else(|| "no gateways tried".into())
+            ),
+        })
+    }
+
+    /// Issue the `latest()` eth_call and parse the 96-byte response.
+    /// Self-contained: assembles the JSON-RPC envelope manually, no
+    /// dependency on a full ethers-rs client.
+    async fn eth_call_latest(&self) -> Result<([u8; 32], u64), ClientError> {
+        let calldata = format!("0x{}", hex::encode(SELECTOR_LATEST));
+        let to_addr = format!("0x{}", hex::encode(self.anchor_address_bytes));
+        let body = serde_json::json!({
+            "jsonrpc": "2.0",
+            "method": "eth_call",
+            "params": [{ "to": to_addr, "data": calldata }, "latest"],
+            "id": 1,
+        });
+
+        let resp = tokio::time::timeout(
+            self.config.per_request_timeout,
+            self.http
+                .post(&self.config.chain_rpc_url)
+                .json(&body)
+                .send(),
+        )
+        .await
+        .map_err(|_| ClientError::UsersIndexResolutionFailed {
+            reason: format!(
+                "chain RPC timeout after {}s",
+                self.config.per_request_timeout.as_secs()
+            ),
+        })?
+        .map_err(|e| ClientError::UsersIndexResolutionFailed {
+            reason: format!("chain RPC transport: {}", e),
+        })?;
+
+        if !resp.status().is_success() {
+            return Err(ClientError::UsersIndexResolutionFailed {
+                reason: format!("chain RPC HTTP {}", resp.status()),
+            });
+        }
+        let json: serde_json::Value =
+            resp.json().await.map_err(|e| ClientError::UsersIndexResolutionFailed {
+                reason: format!("chain RPC response parse: {}", e),
+            })?;
+        if let Some(err) = json.get("error") {
+            return Err(ClientError::UsersIndexResolutionFailed {
+                reason: format!("chain RPC error: {}", err),
+            });
+        }
+        let result_hex = json
+            .get("result")
+            .and_then(|v| v.as_str())
+            .ok_or_else(|| ClientError::UsersIndexResolutionFailed {
+                reason: "chain RPC: missing result".into(),
+            })?;
+        let result_hex = result_hex.strip_prefix("0x").unwrap_or(result_hex);
+        let raw =
+            hex::decode(result_hex).map_err(|e| ClientError::UsersIndexResolutionFailed {
+                reason: format!("chain RPC: hex decode result: {}", e),
+            })?;
+        parse_latest_response(&raw)
+    }
+
+    /// Single-gateway HTTP GET with `per_request_timeout`. Returns
+    /// raw body on 2xx, error otherwise. Doesn't touch the gateway-
+    /// pool's dynamic-priority state machine — this is one-shot
+    /// cold-start, not the ongoing warm-device hot path.
+    async fn fetch_with_timeout(&self, url: &str) -> Result<Bytes, ClientError> {
+        let resp = tokio::time::timeout(
+            self.config.per_request_timeout,
+            self.http.get(url).send(),
+        )
+        .await
+        .map_err(|_| ClientError::UsersIndexResolutionFailed {
+            reason: format!("HTTP timeout: {}", url),
+        })?
+        .map_err(|e| ClientError::UsersIndexResolutionFailed {
+            reason: format!("HTTP transport ({}): {}", url, e),
+        })?;
+        if !resp.status().is_success() {
+            return Err(ClientError::UsersIndexResolutionFailed {
+                reason: format!("HTTP {} from {}", resp.status(), url),
+            });
+        }
+        resp.bytes()
+            .await
+            .map_err(|e| ClientError::UsersIndexResolutionFailed {
+                reason: format!("HTTP body read ({}): {}", url, e),
+            })
+    }
+
+    /// Parse + validate IPNS-fetched bytes. Synthesizes the CID
+    /// from the bytes (no external CID to verify against on the
+    /// IPNS path; the gateway did the IPNS-record resolution
+    /// upstream — the security boundary here is the in-CBOR
+    /// `sequence` field, not the bytes-to-CID hash).
+    fn parse_and_validate(
+        &self,
+        bytes: Bytes,
+        source: ResolutionSource,
+    ) -> Result<ResolvedUsersIndex, ClientError> {
+        let payload = decode_users_index_cbor(&bytes).map_err(|e| {
+            ClientError::UsersIndexResolutionFailed {
+                reason: format!("CBOR decode: {}", e),
+            }
+        })?;
+        let seen = self.highest_seen_sequence();
+        if payload.sequence < seen {
+            return Err(ClientError::SequenceRegression {
+                observed: payload.sequence,
+                highest_seen: seen,
+                channel: format!("{:?}", source),
+            });
+        }
+        let cid = synthesize_cid_from_bytes(&bytes);
+        Ok(ResolvedUsersIndex {
+            source,
+            cid,
+            payload,
+            bytes,
+        })
+    }
+}
+
+// ============================================================
+// Helpers
+// ============================================================
+
+/// Multihash code for sha2-256 (0x12).
+const MULTIHASH_SHA2_256: u64 = 0x12;
+/// IPLD codec for dag-cbor (0x71).
+const CODEC_DAG_CBOR: u64 = 0x71;
+
+/// `keccak256("latest()")[..4]`. Hardcoded so the production build
+/// has zero crypto dependency for this constant.
+///
+/// MUST stay in sync with `tests::abi_selector_latest_matches_keccak256`
+/// — that test is the **source of truth**, this constant is just the
+/// cache. Do not delete the test "because it's redundant"; without it,
+/// a typo here goes unnoticed until the SDK silently calls the wrong
+/// 4-byte selector on the deployed `FulaUsersIndexAnchor`.
+const SELECTOR_LATEST: [u8; 4] = [0x52, 0xbf, 0xe7, 0x89];
+
+/// Parse a 0x-prefixed-or-not 40-char hex address into 20 bytes.
+fn parse_anchor_address(s: &str) -> Result<[u8; 20], ClientError> {
+    let s = s.strip_prefix("0x").unwrap_or(s);
+    let bytes = hex::decode(s).map_err(|e| {
+        ClientError::Config(format!("registry resolver: invalid anchor_address hex: {}", e))
+    })?;
+    if bytes.len() != 20 {
+        return Err(ClientError::Config(format!(
+            "registry resolver: anchor_address must be 20 bytes, got {}",
+            bytes.len()
+        )));
+    }
+    let mut out = [0u8; 20];
+    out.copy_from_slice(&bytes);
+    Ok(out)
+}
+
+/// Parse the 96-byte ABI-encoded return of `latest()`.
+/// Layout (Solidity packs `uint64` right-aligned within a 32-byte slot):
+///   bytes[0..32]    = cid_digest (full 32 bytes)
+///   bytes[32..64]   = sequence  (u64 BE in last 8 bytes)
+///   bytes[64..96]   = updatedAt (u64 BE in last 8 bytes) — **dropped**
+///
+/// We deliberately drop `updatedAt` here: nothing in the SDK's
+/// security model depends on it (sequence is the security boundary,
+/// and `block.timestamp` is miner-influenceable on EVM chains anyway).
+/// Returning a richer tuple would invite callers to make decisions on
+/// it; keeping the parser narrow forces the right shape.
+fn parse_latest_response(raw: &[u8]) -> Result<([u8; 32], u64), ClientError> {
+    if raw.len() < 96 {
+        return Err(ClientError::UsersIndexResolutionFailed {
+            reason: format!(
+                "chain `latest()` returned {} bytes (expected ≥ 96)",
+                raw.len()
+            ),
+        });
+    }
+    let mut cid_digest = [0u8; 32];
+    cid_digest.copy_from_slice(&raw[0..32]);
+    // u64 lives in the last 8 bytes of the 32-byte slot.
+    let mut seq_be = [0u8; 8];
+    seq_be.copy_from_slice(&raw[32 + 24..32 + 32]);
+    let sequence = u64::from_be_bytes(seq_be);
+    Ok((cid_digest, sequence))
+}
+
+/// Decode dag-cbor bytes as a `GlobalUsersIndex`. Wraps the
+/// ipld-dagcbor crate's error in our own typed error.
+fn decode_users_index_cbor(bytes: &[u8]) -> Result<GlobalUsersIndex, String> {
+    serde_ipld_dagcbor::from_slice(bytes).map_err(|e| e.to_string())
+}
+
+/// Synthesize a CIDv1 (dag-cbor + sha2-256) from a body. Used for
+/// the IPNS path's reported `ResolvedUsersIndex.cid` so callers can
+/// use it as a cache key. NOT a security claim — IPNS bytes are
+/// trusted via the in-payload `sequence`, not via this hash.
+fn synthesize_cid_from_bytes(bytes: &[u8]) -> Cid {
+    use sha2::{Digest, Sha256};
+    let mut hasher = Sha256::new();
+    hasher.update(bytes);
+    let digest = hasher.finalize();
+    // wrap() can only fail if digest is wrong size; sha2-256 is
+    // always exactly 32 bytes so unwrap is safe.
+    let mh = Multihash::<64>::wrap(MULTIHASH_SHA2_256, &digest).expect("32-byte sha2 digest");
+    Cid::new_v1(CODEC_DAG_CBOR, mh)
+}
+
+// ============================================================
+// Tests
+// ============================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use sha3::{Digest, Keccak256};
+    use wiremock::matchers::{method, path};
+    use wiremock::{Mock, MockServer, ResponseTemplate};
+
+    /// The hardcoded `SELECTOR_LATEST` MUST equal the canonical
+    /// `keccak256("latest()")[..4]`. If a future refactor renames the
+    /// solidity function and someone forgets to update the constant,
+    /// this test catches it before the SDK silently calls the wrong
+    /// selector against the deployed contract.
+    #[test]
+    fn abi_selector_latest_matches_keccak256() {
+        let mut hasher = Keccak256::new();
+        hasher.update(b"latest()");
+        let full = hasher.finalize();
+        let expected: [u8; 4] = [full[0], full[1], full[2], full[3]];
+        assert_eq!(
+            SELECTOR_LATEST, expected,
+            "SELECTOR_LATEST drifted from keccak256(\"latest()\")[..4]: \
+             expected 0x{:02x}{:02x}{:02x}{:02x}, got 0x{:02x}{:02x}{:02x}{:02x}",
+            expected[0], expected[1], expected[2], expected[3],
+            SELECTOR_LATEST[0], SELECTOR_LATEST[1], SELECTOR_LATEST[2], SELECTOR_LATEST[3]
+        );
+    }
+
+    /// Build a syntactically-valid CBOR-encoded payload for tests.
+    fn make_payload_cbor(sequence: u64) -> (Bytes, GlobalUsersIndex) {
+        let payload = GlobalUsersIndex {
+            v: 1,
+            sequence,
+            updated_at_unix: 1_700_000_000,
+            users: BTreeMap::new(),
+        };
+        let bytes = serde_ipld_dagcbor::to_vec(&payload).expect("encode");
+        (Bytes::from(bytes), payload)
+    }
+
+    fn fixture_address() -> String {
+        // 20-byte zero address with 0x prefix. parse_anchor_address
+        // accepts both forms.
+        "0x0000000000000000000000000000000000000001".to_string()
+    }
+
+    fn fixture_ipns_name() -> String {
+        // Real-shape libp2p public key hash (b58btc-encoded ed25519);
+        // resolver doesn't validate the key format, just substitutes.
+        "k51qzi5uqu5dh-test".to_string()
+    }
+
+    #[test]
+    fn parse_anchor_address_accepts_with_or_without_0x() {
+        let with_prefix = parse_anchor_address("0x0000000000000000000000000000000000000001")
+            .expect("with 0x");
+        let without = parse_anchor_address("0000000000000000000000000000000000000001")
+            .expect("without 0x");
+        assert_eq!(with_prefix, without);
+        assert_eq!(with_prefix[19], 1);
+        for &b in &with_prefix[..19] {
+            assert_eq!(b, 0);
+        }
+    }
+
+    #[test]
+    fn parse_anchor_address_rejects_wrong_length() {
+        assert!(parse_anchor_address("0xdeadbeef").is_err());
+        assert!(parse_anchor_address("0x").is_err());
+        assert!(parse_anchor_address("not-hex").is_err());
+    }
+
+    #[test]
+    fn parse_latest_response_extracts_correct_fields() {
+        // Build a 96-byte response: digest = 0xff*32, sequence = 42, ts = 100.
+        let mut raw = vec![0u8; 96];
+        for i in 0..32 {
+            raw[i] = 0xff;
+        }
+        raw[32 + 24..32 + 32].copy_from_slice(&42u64.to_be_bytes());
+        raw[64 + 24..64 + 32].copy_from_slice(&100u64.to_be_bytes());
+
+        let (digest, seq) = parse_latest_response(&raw).expect("parse");
+        assert_eq!(digest, [0xff; 32]);
+        assert_eq!(seq, 42);
+    }
+
+    #[test]
+    fn parse_latest_response_rejects_short_input() {
+        let short = vec![0u8; 95];
+        assert!(parse_latest_response(&short).is_err());
+    }
+
+    #[test]
+    fn synthesize_cid_is_deterministic_and_dagcbor_sha256() {
+        let bytes = b"some payload bytes";
+        let c1 = synthesize_cid_from_bytes(bytes);
+        let c2 = synthesize_cid_from_bytes(bytes);
+        assert_eq!(c1, c2, "synthesis is deterministic");
+        assert_eq!(c1.codec(), CODEC_DAG_CBOR);
+        assert_eq!(c1.hash().code(), MULTIHASH_SHA2_256);
+        assert_eq!(c1.hash().digest().len(), 32);
+    }
+
+    #[test]
+    fn resolver_new_rejects_empty_rpc_url() {
+        let mut cfg = ResolverConfig::new("", fixture_address(), fixture_ipns_name());
+        let err = UsersIndexResolver::new(cfg.clone()).unwrap_err();
+        assert!(matches!(err, ClientError::Config(_)));
+        cfg.chain_rpc_url = "https://rpc.example".into();
+        cfg.ipns_name = "".into();
+        let err = UsersIndexResolver::new(cfg).unwrap_err();
+        assert!(matches!(err, ClientError::Config(_)));
+    }
+
+    #[test]
+    fn resolver_new_rejects_bad_anchor_address() {
+        let cfg = ResolverConfig::new(
+            "https://rpc.example",
+            "0xdeadbeef", // too short
+            fixture_ipns_name(),
+        );
+        let err = UsersIndexResolver::new(cfg).unwrap_err();
+        assert!(matches!(err, ClientError::Config(_)));
+    }
+
+    #[tokio::test]
+    async fn resolve_via_ipns_succeeds_when_first_gateway_serves_valid_payload() {
+        let (cbor, _) = make_payload_cbor(7);
+        let mock = MockServer::start().await;
+        let url_path = format!("/ipns/{}", fixture_ipns_name());
+        Mock::given(method("GET"))
+            .and(path(url_path))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(cbor.as_ref()))
+            .mount(&mock)
+            .await;
+
+        let mut cfg = ResolverConfig::new(
+            "https://chain.example/rpc", // never called on success
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        cfg.ipns_gateways = vec![format!("{}/ipns/{{name}}", mock.uri())];
+        cfg.ipns_race_timeout = Duration::from_secs(5);
+        cfg.per_request_timeout = Duration::from_secs(2);
+
+        let resolver = UsersIndexResolver::new(cfg).expect("new");
+        let r = resolver.resolve().await.expect("resolve");
+        assert_eq!(r.source, ResolutionSource::Ipns);
+        assert_eq!(r.payload.sequence, 7);
+        assert_eq!(resolver.highest_seen_sequence(), 7);
+    }
+
+    #[tokio::test]
+    async fn resolve_falls_through_to_chain_when_ipns_rejected_for_sequence_regression() {
+        // Setup: IPNS returns seq=3, but the resolver's floor is
+        // already at 5 (apps seeded it from a hot-start cache). The
+        // IPNS payload is replay-rejected. Chain returns seq=10,
+        // which is accepted. Resolver returns the chain payload.
+        let (ipns_cbor, _) = make_payload_cbor(3);
+        let (chain_cbor, _) = make_payload_cbor(10);
+
+        let ipns = MockServer::start().await;
+        let chain_rpc = MockServer::start().await;
+        let chain_gw = MockServer::start().await;
+
+        // IPNS gateway → seq=3 body (will be rejected as regression).
+        Mock::given(method("GET"))
+            .and(path(format!("/ipns/{}", fixture_ipns_name())))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(ipns_cbor.as_ref()))
+            .mount(&ipns)
+            .await;
+
+        // Compute the chain CID from the chain_cbor bytes so we can
+        // mock the gateway response correctly. The eth_call returns
+        // the digest; the gateway serves bytes that hash to it.
+        let chain_cid = synthesize_cid_from_bytes(&chain_cbor);
+        let chain_digest = chain_cid.hash().digest();
+
+        // Chain RPC mock — return the digest + seq=10 + ts=anything.
+        let mut raw = vec![0u8; 96];
+        raw[0..32].copy_from_slice(chain_digest);
+        raw[32 + 24..32 + 32].copy_from_slice(&10u64.to_be_bytes());
+        raw[64 + 24..64 + 32].copy_from_slice(&12345u64.to_be_bytes());
+        let result_hex = format!("0x{}", hex::encode(&raw));
+        Mock::given(method("POST"))
+            .respond_with(
+                ResponseTemplate::new(200).set_body_json(serde_json::json!({
+                    "jsonrpc": "2.0",
+                    "id": 1,
+                    "result": result_hex,
+                })),
+            )
+            .mount(&chain_rpc)
+            .await;
+
+        // IPFS gateway for the chain CID → return chain_cbor bytes.
+        let cid_str = chain_cid.to_string();
+        Mock::given(method("GET"))
+            .and(path(format!("/ipfs/{}", cid_str)))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(chain_cbor.as_ref()))
+            .mount(&chain_gw)
+            .await;
+
+        let mut cfg = ResolverConfig::new(
+            chain_rpc.uri(),
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        cfg.ipns_gateways = vec![format!("{}/ipns/{{name}}", ipns.uri())];
+        cfg.ipfs_gateways = vec![format!("{}/ipfs/{{cid}}", chain_gw.uri())];
+        cfg.ipns_race_timeout = Duration::from_secs(2);
+        cfg.per_request_timeout = Duration::from_secs(2);
+
+        let resolver = UsersIndexResolver::new(cfg).expect("new");
+        // Seed the floor to 5 so the IPNS seq=3 is rejected.
+        resolver.set_highest_seen_sequence(5);
+
+        let r = resolver.resolve().await.expect("resolve");
+        assert_eq!(r.source, ResolutionSource::Chain);
+        assert_eq!(r.payload.sequence, 10);
+        assert_eq!(resolver.highest_seen_sequence(), 10);
+    }
+
+    #[tokio::test]
+    async fn resolve_returns_error_when_both_paths_fail() {
+        // IPNS gateway returns 503; chain RPC returns malformed JSON.
+        // Resolver surfaces UsersIndexResolutionFailed.
+        let ipns = MockServer::start().await;
+        let chain_rpc = MockServer::start().await;
+
+        Mock::given(method("GET"))
+            .respond_with(ResponseTemplate::new(503))
+            .mount(&ipns)
+            .await;
+        Mock::given(method("POST"))
+            .respond_with(ResponseTemplate::new(500).set_body_string("not json"))
+            .mount(&chain_rpc)
+            .await;
+
+        let mut cfg = ResolverConfig::new(
+            chain_rpc.uri(),
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        cfg.ipns_gateways = vec![format!("{}/ipns/{{name}}", ipns.uri())];
+        cfg.ipns_race_timeout = Duration::from_secs(2);
+        cfg.per_request_timeout = Duration::from_secs(2);
+
+        let resolver = UsersIndexResolver::new(cfg).expect("new");
+        let err = resolver.resolve().await.expect_err("both fail");
+        assert!(
+            matches!(err, ClientError::UsersIndexResolutionFailed { .. }),
+            "expected UsersIndexResolutionFailed, got {:?}",
+            err
+        );
+    }
+
+    #[tokio::test]
+    async fn resolve_chain_path_rejects_cid_digest_mismatch() {
+        // The chain returns digest D, but the gateway serves bytes
+        // whose sha2-256 != D. verify_cid_against_bytes fails and
+        // the resolver should NOT accept the payload — surfaces an
+        // UsersIndexResolutionFailed mentioning verify failure.
+        let (cbor_legit, _) = make_payload_cbor(10);
+        let cbor_tampered = Bytes::from_static(b"this is not the real CBOR payload");
+
+        let ipns = MockServer::start().await;
+        let chain_rpc = MockServer::start().await;
+        let chain_gw = MockServer::start().await;
+
+        // IPNS gateway serves nothing useful → resolver must use chain.
+        Mock::given(method("GET"))
+            .respond_with(ResponseTemplate::new(404))
+            .mount(&ipns)
+            .await;
+
+        // Chain RPC says "real CID is X with seq=10".
+        let real_cid = synthesize_cid_from_bytes(&cbor_legit);
+        let real_digest = real_cid.hash().digest();
+        let mut raw = vec![0u8; 96];
+        raw[0..32].copy_from_slice(real_digest);
+        raw[32 + 24..32 + 32].copy_from_slice(&10u64.to_be_bytes());
+        Mock::given(method("POST"))
+            .respond_with(
+                ResponseTemplate::new(200).set_body_json(serde_json::json!({
+                    "jsonrpc": "2.0",
+                    "id": 1,
+                    "result": format!("0x{}", hex::encode(&raw)),
+                })),
+            )
+            .mount(&chain_rpc)
+            .await;
+
+        // Gateway serves DIFFERENT bytes — verify_cid_against_bytes
+        // must reject.
+        Mock::given(method("GET"))
+            .and(path(format!("/ipfs/{}", real_cid)))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(cbor_tampered.as_ref()))
+            .mount(&chain_gw)
+            .await;
+
+        let mut cfg = ResolverConfig::new(
+            chain_rpc.uri(),
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        cfg.ipns_gateways = vec![format!("{}/ipns/{{name}}", ipns.uri())];
+        cfg.ipfs_gateways = vec![format!("{}/ipfs/{{cid}}", chain_gw.uri())];
+        cfg.ipns_race_timeout = Duration::from_secs(2);
+        cfg.per_request_timeout = Duration::from_secs(2);
+
+        let resolver = UsersIndexResolver::new(cfg).expect("new");
+        let err = resolver.resolve().await.expect_err("verify fails");
+        assert!(matches!(err, ClientError::UsersIndexResolutionFailed { .. }));
+    }
+
+    #[tokio::test]
+    async fn resolve_chain_path_rejects_in_cbor_seq_mismatch() {
+        // Chain says seq=10 but the bytes-fetched payload has seq=11.
+        // Defensive: resolver must surface this as a tamper / RPC-
+        // inconsistency anomaly, NOT silently use either side.
+        let (cbor_seq_11, _) = make_payload_cbor(11);
+
+        let ipns = MockServer::start().await;
+        let chain_rpc = MockServer::start().await;
+        let chain_gw = MockServer::start().await;
+
+        Mock::given(method("GET"))
+            .respond_with(ResponseTemplate::new(404))
+            .mount(&ipns)
+            .await;
+
+        // Chain says seq=10, digest of cbor_seq_11.
+        let cid = synthesize_cid_from_bytes(&cbor_seq_11);
+        let mut raw = vec![0u8; 96];
+        raw[0..32].copy_from_slice(cid.hash().digest());
+        raw[32 + 24..32 + 32].copy_from_slice(&10u64.to_be_bytes());
+        Mock::given(method("POST"))
+            .respond_with(
+                ResponseTemplate::new(200).set_body_json(serde_json::json!({
+                    "jsonrpc": "2.0",
+                    "id": 1,
+                    "result": format!("0x{}", hex::encode(&raw)),
+                })),
+            )
+            .mount(&chain_rpc)
+            .await;
+
+        Mock::given(method("GET"))
+            .and(path(format!("/ipfs/{}", cid)))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(cbor_seq_11.as_ref()))
+            .mount(&chain_gw)
+            .await;
+
+        let mut cfg = ResolverConfig::new(
+            chain_rpc.uri(),
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        cfg.ipns_gateways = vec![format!("{}/ipns/{{name}}", ipns.uri())];
+        cfg.ipfs_gateways = vec![format!("{}/ipfs/{{cid}}", chain_gw.uri())];
+        cfg.ipns_race_timeout = Duration::from_secs(2);
+        cfg.per_request_timeout = Duration::from_secs(2);
+
+        let resolver = UsersIndexResolver::new(cfg).expect("new");
+        let err = resolver.resolve().await.expect_err("seq mismatch");
+        let msg = format!("{}", err);
+        assert!(
+            msg.contains("sequence")
+                || msg.contains("anomaly")
+                || matches!(err, ClientError::UsersIndexResolutionFailed { .. }),
+            "expected sequence-mismatch error, got: {}",
+            msg
+        );
+    }
+
+    #[tokio::test]
+    async fn replay_defense_rejects_chain_regression() {
+        // Floor is 100; chain returns seq=50. Resolver MUST reject
+        // even though the bytes verify and parse correctly. This
+        // is the chain-side replay-defense path.
+        let (cbor, _) = make_payload_cbor(50);
+        let cid = synthesize_cid_from_bytes(&cbor);
+
+        let ipns = MockServer::start().await;
+        let chain_rpc = MockServer::start().await;
+        let chain_gw = MockServer::start().await;
+
+        Mock::given(method("GET"))
+            .respond_with(ResponseTemplate::new(404))
+            .mount(&ipns)
+            .await;
+
+        let mut raw = vec![0u8; 96];
+        raw[0..32].copy_from_slice(cid.hash().digest());
+        raw[32 + 24..32 + 32].copy_from_slice(&50u64.to_be_bytes());
+        Mock::given(method("POST"))
+            .respond_with(
+                ResponseTemplate::new(200).set_body_json(serde_json::json!({
+                    "jsonrpc": "2.0",
+                    "id": 1,
+                    "result": format!("0x{}", hex::encode(&raw)),
+                })),
+            )
+            .mount(&chain_rpc)
+            .await;
+
+        Mock::given(method("GET"))
+            .and(path(format!("/ipfs/{}", cid)))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(cbor.as_ref()))
+            .mount(&chain_gw)
+            .await;
+
+        let mut cfg = ResolverConfig::new(
+            chain_rpc.uri(),
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        cfg.ipns_gateways = vec![format!("{}/ipns/{{name}}", ipns.uri())];
+        cfg.ipfs_gateways = vec![format!("{}/ipfs/{{cid}}", chain_gw.uri())];
+        cfg.ipns_race_timeout = Duration::from_secs(2);
+        cfg.per_request_timeout = Duration::from_secs(2);
+
+        let resolver = UsersIndexResolver::new(cfg).expect("new");
+        resolver.set_highest_seen_sequence(100);
+        let err = resolver.resolve().await.expect_err("regression rejected");
+        // Either UsersIndexResolutionFailed (wrapper) or
+        // SequenceRegression directly is acceptable; both signal
+        // "do not accept" to the caller.
+        match err {
+            ClientError::SequenceRegression { observed, highest_seen, channel } => {
+                assert_eq!(observed, 50);
+                assert_eq!(highest_seen, 100);
+                assert!(!channel.is_empty(), "channel label should be set");
+            }
+            ClientError::UsersIndexResolutionFailed { .. } => { /* also fine */ }
+            other => panic!("unexpected error: {:?}", other),
+        }
+    }
+
+    /// `derive_user_key_from_email` MUST produce a 32-hex-char output
+    /// matching what `fula-cli/src/state.rs::hash_user_id` would
+    /// produce against the same `userId` (= sha256-hex of
+    /// lower(email)). Reproduces the master algorithm step-by-step
+    /// here so the two stay in lockstep — without this test, a
+    /// future master-side refactor could silently desync the SDK
+    /// from the published global users-index keys.
+    #[test]
+    fn derive_user_key_matches_master_state_rs_algorithm() {
+        use sha2::{Digest, Sha256};
+
+        // Reference inputs.
+        let email = "User@Example.COM";
+        let email_lower = "user@example.com";
+
+        // SDK derives directly from email.
+        let sdk_key = derive_user_key_from_email(email);
+
+        // Reproduce master's chain: lower(email) → sha256 → hex → blake3 → first 16 bytes hex.
+        let user_id_digest = Sha256::digest(email_lower.as_bytes());
+        let user_id_hex = hex::encode(user_id_digest);
+        // master state.rs: hash_user_id(user_id_str) =
+        //   blake3::Hasher::new()
+        //     .update(b"fula:user_id:")
+        //     .update(user_id_str.as_bytes())
+        //     .finalize()[..16] hex
+        let mut hasher = blake3::Hasher::new();
+        hasher.update(b"fula:user_id:");
+        hasher.update(user_id_hex.as_bytes());
+        let master_key = hex::encode(&hasher.finalize().as_bytes()[..16]);
+
+        assert_eq!(
+            sdk_key, master_key,
+            "SDK derive_user_key_from_email diverged from master state.rs::hash_user_id; \
+             email={}, sdk={}, master={}",
+            email, sdk_key, master_key
+        );
+        assert_eq!(sdk_key.len(), 32, "userKey must be 32 hex chars (16 bytes)");
+    }
+
+    #[test]
+    fn derive_user_key_normalizes_email_case() {
+        // Email is case-insensitive (per RFC 5321 local-part is, in practice,
+        // a courtesy and master normalizes too). Same email different case
+        // MUST yield the same userKey, otherwise users would lose access
+        // when their app capitalizes differently than master.
+        let a = derive_user_key_from_email("alice@example.com");
+        let b = derive_user_key_from_email("ALICE@EXAMPLE.COM");
+        let c = derive_user_key_from_email("Alice@Example.com");
+        assert_eq!(a, b);
+        assert_eq!(a, c);
+    }
+
+    #[test]
+    fn derive_user_key_distinguishes_different_users() {
+        let a = derive_user_key_from_email("alice@example.com");
+        let b = derive_user_key_from_email("bob@example.com");
+        assert_ne!(a, b);
+    }
+
+    // ============================================================
+    // Phase 3.3.5 — hot-start cache reuse tests (advisor-mandated 4)
+    // ============================================================
+    //
+    // Each test constructs both a network-mock universe (wiremock)
+    // and a real on-disk BlockCache (TempDir + redb). The cache
+    // survives across resolver constructions (simulating SDK
+    // restart) so we can verify replay-defense persistence and the
+    // soft-TTL short-circuit behavior.
+
+    use crate::block_cache::BlockCache;
+    use std::path::PathBuf;
+    use tempfile::TempDir;
+
+    fn make_payload_with_seq(sequence: u64) -> (Bytes, GlobalUsersIndex) {
+        let payload = GlobalUsersIndex {
+            v: 1,
+            sequence,
+            updated_at_unix: 1_700_000_000,
+            users: BTreeMap::new(),
+        };
+        let bytes = serde_ipld_dagcbor::to_vec(&payload).expect("encode");
+        (Bytes::from(bytes), payload)
+    }
+
+    fn fixture_resolver_config_with_ipns(ipns_url: &str) -> ResolverConfig {
+        let mut cfg = ResolverConfig::new(
+            "http://chain-rpc.unused/", // never called on hot-start path
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        cfg.ipns_gateways = vec![format!("{}/ipns/{{name}}", ipns_url)];
+        cfg.ipns_race_timeout = Duration::from_secs(2);
+        cfg.per_request_timeout = Duration::from_secs(2);
+        cfg.soft_ttl = Duration::from_secs(60);
+        cfg
+    }
+
+    /// Test 1 — replay-defense floor survives SDK restart.
+    /// Round-trip through the cache: resolve seq=42 → drop resolver
+    /// → reopen against same cache → highest_seen_sequence == 42.
+    #[tokio::test]
+    async fn hot_start_seeds_floor_across_restart() {
+        let dir = TempDir::new().unwrap();
+        let cache_path: PathBuf = dir.path().join("cache.redb");
+
+        // Open cache, manually plant a (cid, seq) row — simulates
+        // a prior successful resolve. (Avoids the full wiremock
+        // setup since this test is about restart semantics, not
+        // resolve mechanics.)
+        {
+            let cache = BlockCache::open(&cache_path, 1024 * 1024).expect("open");
+            let cid = synthesize_cid_from_bytes(b"some payload");
+            cache
+                .store_users_index_state(&cid, 42, 1_700_000_000)
+                .expect("store");
+        } // cache dropped → file lock released
+
+        // Re-open cache + construct resolver via new_with_cache.
+        let cache = Arc::new(BlockCache::open(&cache_path, 1024 * 1024).expect("re-open"));
+        let cfg = ResolverConfig::new(
+            "http://rpc.unused/",
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        let resolver = UsersIndexResolver::new_with_cache(cfg, cache).expect("new_with_cache");
+
+        assert_eq!(
+            resolver.highest_seen_sequence(),
+            42,
+            "replay-defense floor MUST survive restart and seed from persisted state"
+        );
+    }
+
+    /// Test 2 — replay regression after restart is rejected.
+    /// Restart with floor=99; IPNS returns seq=50; resolver MUST
+    /// reject (not silently serve the stale payload).
+    #[tokio::test]
+    async fn hot_start_rejects_regression_after_restart() {
+        let dir = TempDir::new().unwrap();
+        let cache_path: PathBuf = dir.path().join("cache.redb");
+
+        // Plant a high floor (seq=99).
+        {
+            let cache = BlockCache::open(&cache_path, 1024 * 1024).expect("open");
+            let placeholder = synthesize_cid_from_bytes(b"placeholder");
+            cache
+                .store_users_index_state(&placeholder, 99, 0)
+                .expect("plant");
+        }
+
+        // wiremock IPNS serves seq=50 (regression).
+        let ipns = MockServer::start().await;
+        let (regress_bytes, _) = make_payload_with_seq(50);
+        Mock::given(method("GET"))
+            .and(path(format!("/ipns/{}", fixture_ipns_name())))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(regress_bytes.as_ref()))
+            .mount(&ipns)
+            .await;
+
+        let cache = Arc::new(BlockCache::open(&cache_path, 1024 * 1024).expect("re-open"));
+        // observed_at = 0 → way past TTL → hot-start short-circuit
+        // should NOT fire; resolver falls through to network.
+        let mut cfg = fixture_resolver_config_with_ipns(&ipns.uri());
+        cfg.soft_ttl = Duration::from_secs(60); // bigger than 0-vs-now gap doesn't matter; observed_at=0
+        let resolver = UsersIndexResolver::new_with_cache(cfg, cache).expect("new_with_cache");
+
+        assert_eq!(resolver.highest_seen_sequence(), 99, "floor seeded");
+
+        // resolve() → IPNS returns seq=50; replay-defense rejects.
+        // Falls through to chain (also fails since RPC URL is
+        // unused). Final error: UsersIndexResolutionFailed wrapping
+        // the IPNS exhaustion (the resolver internally rejected the
+        // regression and treated it as "IPNS failed").
+        let err = resolver.resolve().await.expect_err("must reject");
+        // The regression is observed inside try_ipns and surfaces
+        // as UsersIndexResolutionFailed — the chain leg also can't
+        // help (RPC URL unused), so the wrapper combines them.
+        assert!(
+            matches!(err, ClientError::UsersIndexResolutionFailed { .. }),
+            "expected resolution failure, got: {:?}",
+            err
+        );
+
+        // Floor unchanged — 99 still holds.
+        assert_eq!(
+            resolver.highest_seen_sequence(),
+            99,
+            "regression payload must NOT advance the floor"
+        );
+    }
+
+    /// Test 3 — hot-start within TTL serves cached state without
+    /// touching the network. Uses `Mock::expect(1)` on the IPNS
+    /// mock: the second resolve() call MUST hit the cache. If the
+    /// short-circuit is broken, IPNS would be called twice, and
+    /// wiremock would panic in its `Drop` impl on test exit.
+    #[tokio::test]
+    async fn hot_start_within_ttl_skips_network() {
+        let dir = TempDir::new().unwrap();
+        let cache_path: PathBuf = dir.path().join("cache.redb");
+        let cache = Arc::new(BlockCache::open(&cache_path, 1024 * 1024).expect("open"));
+
+        let ipns = MockServer::start().await;
+        let (cbor, _) = make_payload_with_seq(7);
+        Mock::given(method("GET"))
+            .and(path(format!("/ipns/{}", fixture_ipns_name())))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(cbor.as_ref()))
+            .expect(1) // IPNS hit at most ONCE; second resolve MUST be cached
+            .mount(&ipns)
+            .await;
+
+        let cfg = fixture_resolver_config_with_ipns(&ipns.uri());
+        let resolver =
+            UsersIndexResolver::new_with_cache(cfg, Arc::clone(&cache)).expect("new_with_cache");
+
+        // First resolve: hits IPNS, populates cache. `persist_to_cache`
+        // is synchronous-from-async (no spawned background task), so
+        // when `resolve` returns the METADATA + BLOCKS rows are
+        // already on disk. The second resolve will see them.
+        let r1 = resolver.resolve().await.expect("first resolve");
+        assert_eq!(r1.source, ResolutionSource::Ipns);
+        assert_eq!(r1.payload.sequence, 7);
+
+        // Second resolve: should be served from cache. wiremock
+        // panics on Drop if IPNS was called more than `expect(1)`.
+        let r2 = resolver.resolve().await.expect("second resolve");
+        assert_eq!(
+            r2.source,
+            ResolutionSource::HotStartCache,
+            "second resolve must be served from hot-start cache (not the network)"
+        );
+        assert_eq!(r2.payload.sequence, 7);
+    }
+
+    /// Test 4 — hot-start beyond TTL re-resolves. Configure a
+    /// 1-second `soft_ttl`; resolve once; sleep 2 seconds; resolve
+    /// again. The second resolve MUST re-hit IPNS (so the mock is
+    /// expected to fire twice).
+    #[tokio::test]
+    async fn hot_start_beyond_ttl_re_resolves() {
+        let dir = TempDir::new().unwrap();
+        let cache_path: PathBuf = dir.path().join("cache.redb");
+        let cache = Arc::new(BlockCache::open(&cache_path, 1024 * 1024).expect("open"));
+
+        let ipns = MockServer::start().await;
+        let (cbor, _) = make_payload_with_seq(11);
+        Mock::given(method("GET"))
+            .and(path(format!("/ipns/{}", fixture_ipns_name())))
+            .respond_with(ResponseTemplate::new(200).set_body_bytes(cbor.as_ref()))
+            .expect(2) // both resolves must hit IPNS — TTL elapsed between them
+            .mount(&ipns)
+            .await;
+
+        let mut cfg = fixture_resolver_config_with_ipns(&ipns.uri());
+        cfg.soft_ttl = Duration::from_secs(1); // tight TTL for the test
+        let resolver =
+            UsersIndexResolver::new_with_cache(cfg, Arc::clone(&cache)).expect("new_with_cache");
+
+        let r1 = resolver.resolve().await.expect("first resolve");
+        assert_eq!(r1.source, ResolutionSource::Ipns);
+
+        // Wait past the TTL.
+        tokio::time::sleep(Duration::from_millis(1500)).await;
+
+        let r2 = resolver.resolve().await.expect("second resolve");
+        assert_eq!(
+            r2.source,
+            ResolutionSource::Ipns,
+            "after TTL elapse, resolver must re-fetch from IPNS rather than serve stale cache"
+        );
+    }
+
+    // ============================================================
+    // Pre-existing tests below (Phase 3.3 sub-step A)
+    // ============================================================
+
+    #[test]
+    fn highest_seen_sequence_is_monotonic() {
+        let cfg = ResolverConfig::new(
+            "https://rpc.example",
+            fixture_address(),
+            fixture_ipns_name(),
+        );
+        let resolver = UsersIndexResolver::new(cfg).expect("new");
+        assert_eq!(resolver.highest_seen_sequence(), 0);
+        resolver.bump_seen_sequence(5);
+        assert_eq!(resolver.highest_seen_sequence(), 5);
+        // Lower value MUST NOT lower the floor.
+        resolver.bump_seen_sequence(3);
+        assert_eq!(resolver.highest_seen_sequence(), 5);
+        // Equal value is also a no-op.
+        resolver.bump_seen_sequence(5);
+        assert_eq!(resolver.highest_seen_sequence(), 5);
+        // Higher value advances.
+        resolver.bump_seen_sequence(7);
+        assert_eq!(resolver.highest_seen_sequence(), 7);
+    }
+}
diff --git a/crates/fula-client/src/types.rs b/crates/fula-client/src/types.rs
index 58319b0..063aadb 100644
--- a/crates/fula-client/src/types.rs
+++ b/crates/fula-client/src/types.rs
@@ -115,6 +115,82 @@ pub struct GetObjectResult {
     pub metadata: std::collections::HashMap<String, String>,
 }
 
+/// Phase 19 — origin of a successfully-served byte payload.
+///
+/// Apps that surface offline indicators inspect this field to decide
+/// what to show: `Master` is the fast path, `LocalCache` is a redb
+/// BLOCKS hit (no network), and `Gateway(url)` records which IPFS
+/// gateway the gateway-race elected. Defaulting to `Master` keeps
+/// pre-Phase-19 callers byte-identical (they ignore the field).
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum ReadSource {
+    /// Master S3 served the request directly.
+    Master,
+    /// On-disk redb BLOCKS table served the bytes — no network round-trip.
+    LocalCache,
+    /// Public IPFS gateway served the bytes (master-down fallback path).
+    /// The string is the URL template (e.g. `https://ipfs.io/ipfs/{cid}`)
+    /// used at fetch time, useful for diagnostics or "served by Cloudflare"
+    /// surfacing in operator dashboards.
+    Gateway(String),
+}
+
+/// Phase 19 — freshness signal for a successfully-served byte payload.
+///
+/// `Live` is the master-served fast path. `Cached { observed_at }` is
+/// returned when bytes came from local redb (BLOCKS hit) — apps may
+/// choose to surface "viewing a saved copy" UI based on age. The
+/// `StaleByDesign` / `StaleByOutage` variants are reserved for Phase
+/// 3.3 cold-start where the SDK can attribute snapshot age to the
+/// publisher cadence vs. an actual master outage; today the master-down
+/// fallback path emits `Cached`.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum ReadFreshness {
+    /// Master-served bytes (fresh).
+    Live,
+    /// Served from on-disk redb cache; `observed_at` is the unix-millis
+    /// when the entry was first written. Apps display age relative to
+    /// this if they care to show staleness.
+    Cached { observed_at: u64 },
+    /// Cold-start cross-device read; snapshot age within the configured
+    /// publisher cadence (≤ `USERS_INDEX_FLUSH_INTERVAL`). Apps may
+    /// surface "synced N min ago".
+    ///
+    /// **Phase 3.3 scaffolding — not emitted by Phase 19.** Wired in
+    /// when the cold-start resolver lands (task #18); resolver computes
+    /// `snapshot_age_secs = now - resolved.payload.updated_at_unix`
+    /// and selects this vs. `StaleByOutage` based on whether age is
+    /// inside the publisher cadence.
+    StaleByDesign { snapshot_age_secs: u64 },
+    /// Cold-start cross-device read; snapshot age exceeds the
+    /// publisher cadence — likely indicates an actual master outage.
+    ///
+    /// **Phase 3.3 scaffolding — not emitted by Phase 19.** See
+    /// `StaleByDesign` doc above.
+    StaleByOutage { snapshot_age_secs: u64 },
+}
+
+/// Phase 19 — wrapper around `GetObjectResult` carrying transparency
+/// fields (`source`, `freshness`).
+///
+/// **Why a wrapper instead of fields on `GetObjectResult`:** the
+/// existing struct is part of the SDK's public API consumed by callers
+/// that pattern-match it exhaustively. Adding fields breaks them. A
+/// new wrapper type lets callers opt in to the transparency surface
+/// while existing consumers (including encrypted-SDK internals that
+/// read `.data` / `.etag`) keep using `GetObjectResult` unchanged.
+#[derive(Clone, Debug)]
+pub struct OfflineGetResult {
+    /// The underlying `GetObjectResult` — `data`, `etag`, etc., are on
+    /// `inner`. Callers that don't care about transparency just read
+    /// `result.inner.data`.
+    pub inner: GetObjectResult,
+    /// Where the bytes ultimately came from. See `ReadSource` for variants.
+    pub source: ReadSource,
+    /// How fresh the bytes are. See `ReadFreshness` for variants.
+    pub freshness: ReadFreshness,
+}
+
 /// Head object result
 #[derive(Clone, Debug)]
 pub struct HeadObjectResult {
diff --git a/crates/fula-flutter/Cargo.toml b/crates/fula-flutter/Cargo.toml
index fdaf48e..55b4a72 100644
--- a/crates/fula-flutter/Cargo.toml
+++ b/crates/fula-flutter/Cargo.toml
@@ -57,6 +57,11 @@ wasm-bindgen-test = "0.3"
 # time APIs aren't re-introduced into the migration path.
 web-time = "1"
 
+# Native-only dev-deps (Phase 2.x config plumbing tests construct
+# tempdirs to verify block_cache_path round-trips through the bridge).
+[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
+tempfile = { workspace = true }
+
 [features]
 default = []
 
diff --git a/crates/fula-flutter/src/api/client.rs b/crates/fula-flutter/src/api/client.rs
index 8154543..84786b0 100644
--- a/crates/fula-flutter/src/api/client.rs
+++ b/crates/fula-flutter/src/api/client.rs
@@ -15,24 +15,55 @@ use async_lock::RwLock;
 
 use crate::api::types::*;
 
+/// Build the underlying `fula_client::Config` from the Dart-facing
+/// `FulaConfig`, plumbing every Phase 1.2 / 2.x field through. Used by
+/// `create_client`, `create_encrypted_client`, and
+/// `create_encrypted_client_with_pinning` to keep the three constructors
+/// in lockstep — adding a new field to FulaConfig only requires a
+/// change here.
+fn build_inner_config(config: &FulaConfig) -> fula_client::Config {
+    let mut inner = fula_client::Config::new(&config.endpoint)
+        .with_timeout(Duration::from_secs(config.timeout_seconds));
+
+    // Existing F8/F10 fields.
+    inner.per_chunk_download_timeout =
+        Duration::from_secs(config.per_chunk_download_timeout_seconds);
+    inner.buffered_download_max_bytes = config.buffered_download_max_bytes;
+
+    // Phase 2.1 — health gate.
+    inner.health_gate_enabled = config.health_gate_enabled;
+    inner.health_gate_ttl = Duration::from_secs(config.health_gate_ttl_seconds);
+
+    // Phase 2.2 — block cache. The path-string conversion treats
+    // empty string as `None` so the SDK's `dirs`-based platform
+    // default kicks in.
+    inner.block_cache_enabled = config.block_cache_enabled;
+    inner.block_cache_path = if config.block_cache_path.is_empty() {
+        None
+    } else {
+        Some(std::path::PathBuf::from(&config.block_cache_path))
+    };
+    inner.block_cache_max_bytes = config.block_cache_max_bytes;
+
+    // Phase 2.3 / 2.4 — gateway race + offline fallback.
+    inner.gateway_fallback_enabled = config.gateway_fallback_enabled;
+    inner.gateway_fallback_urls = config.gateway_fallback_urls.clone();
+    inner.gateway_race_concurrency = config.gateway_race_concurrency as usize;
+
+    if let Some(token) = &config.access_token {
+        inner = inner.with_token(token.clone());
+    }
+
+    inner
+}
+
 // ============================================================================
 // Client Creation
 // ============================================================================
 
 /// Create a new Fula client with the given configuration
 pub fn create_client(config: FulaConfig) -> anyhow::Result<FulaClientHandle> {
-    let mut inner_config = fula_client::Config::new(&config.endpoint)
-        .with_timeout(Duration::from_secs(config.timeout_seconds));
-    inner_config.per_chunk_download_timeout =
-        Duration::from_secs(config.per_chunk_download_timeout_seconds);
-    inner_config.buffered_download_max_bytes = config.buffered_download_max_bytes;
-
-    let inner_config = if let Some(token) = config.access_token {
-        inner_config.with_token(token)
-    } else {
-        inner_config
-    };
-
+    let inner_config = build_inner_config(&config);
     let client = fula_client::FulaClient::new(inner_config)?;
 
     Ok(FulaClientHandle {
@@ -45,17 +76,7 @@ pub fn create_encrypted_client(
     config: FulaConfig,
     encryption: EncryptionConfig,
 ) -> anyhow::Result<EncryptedClientHandle> {
-    let mut inner_config = fula_client::Config::new(&config.endpoint)
-        .with_timeout(Duration::from_secs(config.timeout_seconds));
-    inner_config.per_chunk_download_timeout =
-        Duration::from_secs(config.per_chunk_download_timeout_seconds);
-    inner_config.buffered_download_max_bytes = config.buffered_download_max_bytes;
-
-    let inner_config = if let Some(token) = config.access_token {
-        inner_config.with_token(token)
-    } else {
-        inner_config
-    };
+    let inner_config = build_inner_config(&config);
 
     // Create encryption config
     let enc_config = if let Some(secret_key) = encryption.secret_key {
@@ -101,17 +122,7 @@ pub fn create_encrypted_client_with_pinning(
     encryption: EncryptionConfig,
     pinning: PinningConfig,
 ) -> anyhow::Result<EncryptedClientHandle> {
-    let mut inner_config = fula_client::Config::new(&config.endpoint)
-        .with_timeout(Duration::from_secs(config.timeout_seconds));
-    inner_config.per_chunk_download_timeout =
-        Duration::from_secs(config.per_chunk_download_timeout_seconds);
-    inner_config.buffered_download_max_bytes = config.buffered_download_max_bytes;
-
-    let inner_config = if let Some(token) = config.access_token {
-        inner_config.with_token(token)
-    } else {
-        inner_config
-    };
+    let inner_config = build_inner_config(&config);
 
     // Create encryption config
     let enc_config = if let Some(secret_key) = encryption.secret_key {
@@ -304,6 +315,7 @@ mod tests {
             max_retries: 3,
             per_chunk_download_timeout_seconds: 120,
             buffered_download_max_bytes: 64 * 1024 * 1024,
+            ..FulaConfig::default()
         };
         let handle = create_client(cfg).expect("create_client should succeed");
         let inner_cfg = handle.inner.config();
@@ -333,4 +345,94 @@ mod tests {
             256 * 1024 * 1024,
         );
     }
+
+    /// Phase 2.x — verify all new fields plumb from FulaConfig
+    /// (Dart-facing) through `build_inner_config` into the underlying
+    /// `fula_client::Config`. Without this test, a future refactor of
+    /// `build_inner_config` could silently drop a field and Dart apps
+    /// would observe Phase 2.x as inert (config flag set, runtime
+    /// flag still false).
+    #[test]
+    fn fula_config_plumbs_phase_2_x_health_gate_fields() {
+        let cfg = FulaConfig {
+            health_gate_enabled: true,
+            health_gate_ttl_seconds: 45,
+            ..FulaConfig::default()
+        };
+        let handle = create_client(cfg).expect("create_client");
+        let inner = handle.inner.config();
+        assert!(inner.health_gate_enabled, "health_gate_enabled must plumb");
+        assert_eq!(inner.health_gate_ttl, Duration::from_secs(45));
+    }
+
+    #[test]
+    fn fula_config_plumbs_phase_2_x_block_cache_fields() {
+        // Use a path that won't actually open (we only assert the
+        // config plumbs; the cache opens lazily on first use).
+        let temp = tempfile::tempdir().expect("tempdir");
+        let cache_path = temp.path().join("cache.redb");
+
+        let cfg = FulaConfig {
+            block_cache_enabled: true,
+            block_cache_path: cache_path.to_string_lossy().into_owned(),
+            block_cache_max_bytes: 64 * 1024 * 1024,
+            ..FulaConfig::default()
+        };
+        let handle = create_client(cfg).expect("create_client");
+        let inner = handle.inner.config();
+        assert!(inner.block_cache_enabled);
+        assert_eq!(inner.block_cache_path, Some(cache_path));
+        assert_eq!(inner.block_cache_max_bytes, 64 * 1024 * 1024);
+    }
+
+    #[test]
+    fn fula_config_empty_block_cache_path_means_use_platform_default() {
+        // The Dart-facing field is `String` (FFI doesn't carry
+        // `Option`); empty string is the documented "use default" form.
+        // The bridge must translate to `None` so the SDK's `dirs`-based
+        // default kicks in.
+        let cfg = FulaConfig {
+            block_cache_enabled: true,
+            block_cache_path: String::new(),
+            ..FulaConfig::default()
+        };
+        let handle = create_client(cfg).expect("create_client");
+        let inner = handle.inner.config();
+        assert_eq!(inner.block_cache_path, None,
+            "empty block_cache_path string must translate to None so the SDK uses the platform default");
+    }
+
+    #[test]
+    fn fula_config_plumbs_phase_2_x_gateway_fields() {
+        let cfg = FulaConfig {
+            gateway_fallback_enabled: true,
+            gateway_fallback_urls: vec![
+                "https://custom1.example/ipfs/{cid}".into(),
+                "https://custom2.example/ipfs/{cid}".into(),
+            ],
+            gateway_race_concurrency: 5,
+            ..FulaConfig::default()
+        };
+        let handle = create_client(cfg).expect("create_client");
+        let inner = handle.inner.config();
+        assert!(inner.gateway_fallback_enabled);
+        assert_eq!(inner.gateway_fallback_urls.len(), 2);
+        assert_eq!(inner.gateway_fallback_urls[0], "https://custom1.example/ipfs/{cid}");
+        assert_eq!(inner.gateway_race_concurrency, 5);
+    }
+
+    #[test]
+    fn fula_config_default_phase_2_x_fields_are_off() {
+        // Backward-compat invariant: default-constructed Dart config
+        // produces a default-constructed Rust config. Apps that don't
+        // touch the new fields see byte-identical pre-Phase-2.x behavior.
+        let cfg = FulaConfig::default();
+        let handle = create_client(cfg).expect("create_client");
+        let inner = handle.inner.config();
+        assert!(!inner.health_gate_enabled);
+        assert!(!inner.block_cache_enabled);
+        assert!(!inner.gateway_fallback_enabled);
+        assert_eq!(inner.gateway_fallback_urls.len(), 0);
+        assert_eq!(inner.gateway_race_concurrency, 3);
+    }
 }
diff --git a/crates/fula-flutter/src/api/error.rs b/crates/fula-flutter/src/api/error.rs
index d082dae..bf1ae2e 100644
--- a/crates/fula-flutter/src/api/error.rs
+++ b/crates/fula-flutter/src/api/error.rs
@@ -69,6 +69,42 @@ pub enum FulaError {
     #[error("Forest error: {0}")]
     ForestError(String),
 
+    /// Phase 2.2 of master-independent reads: a single block exceeds
+    /// the configured `block_cache_max_bytes` budget. Surface to the
+    /// user with guidance to raise the cache size or skip the cache.
+    /// Native-only signal in practice (BlockCache is compiled out on
+    /// wasm32) but defined unconditionally so the Dart binding always
+    /// has the same enum shape across Android, iOS, Ubuntu, Windows,
+    /// and web (flutter-js + wasm).
+    #[error("Cache budget exceeded: size={size}, budget={budget}")]
+    CacheBudgetExceeded { size: u64, budget: u64 },
+
+    /// Phase 2.2 of master-independent reads: catch-all for the
+    /// persistent block cache's I/O / storage / commit errors.
+    /// Stringified at the FFI boundary; Dart code doesn't depend on
+    /// any Rust storage-engine specifics. Native-only in practice.
+    #[error("Cache error: {0}")]
+    CacheError(String),
+
+    /// Phase 3.3 — cold-start hybrid resolver could not resolve the
+    /// master-published global users-index CID via IPNS or chain.
+    /// Surface to Dart apps as "offline mode unavailable for this
+    /// device until master is reachable again" — distinct from
+    /// `Network` (which is a transient master-side glitch).
+    #[error("Users-index resolution failed: {0}")]
+    UsersIndexResolutionFailed(String),
+
+    /// Phase 3.3 — replay defense: a payload's embedded sequence
+    /// regressed below what the SDK has seen before. Dart apps
+    /// should NOT silently retry; surface as a clear "stale-state"
+    /// signal (possibly with a retry-after-N-minutes hint).
+    #[error("Sequence regression in {channel}: observed={observed}, highest seen={highest_seen}")]
+    SequenceRegression {
+        observed: u64,
+        highest_seen: u64,
+        channel: String,
+    },
+
     /// Internal error
     #[error("Internal error: {0}")]
     Internal(String),
@@ -131,6 +167,22 @@ impl From<fula_client::ClientError> for FulaError {
             ClientError::MasterUnreachable { down_for_secs } => FulaError::Network(
                 format!("master unreachable (health gate; down for ~{}s)", down_for_secs),
             ),
+            // Phase 2.2 — block cache surface. Map to first-class
+            // FulaError variants so Dart code can pattern-match without
+            // string parsing. Identical shape on every target (native +
+            // wasm) so flutter-js / web builds compile against the same
+            // enum.
+            ClientError::BlockTooLarge { size, budget } => {
+                FulaError::CacheBudgetExceeded { size, budget }
+            }
+            ClientError::BlockCache(msg) => FulaError::CacheError(msg),
+            // Phase 3.3 cold-start hybrid resolver.
+            ClientError::UsersIndexResolutionFailed { reason } => {
+                FulaError::UsersIndexResolutionFailed(reason)
+            }
+            ClientError::SequenceRegression { observed, highest_seen, channel } => {
+                FulaError::SequenceRegression { observed, highest_seen, channel }
+            }
         }
     }
 }
@@ -194,7 +246,29 @@ impl FulaError {
             FulaError::ShareError(_) => "SHARE_ERROR",
             FulaError::RotationError(_) => "ROTATION_ERROR",
             FulaError::ForestError(_) => "FOREST_ERROR",
+            FulaError::CacheBudgetExceeded { .. } => "CACHE_BUDGET_EXCEEDED",
+            FulaError::CacheError(_) => "CACHE_ERROR",
+            FulaError::UsersIndexResolutionFailed(_) => "USERS_INDEX_RESOLUTION_FAILED",
+            FulaError::SequenceRegression { .. } => "SEQUENCE_REGRESSION",
             FulaError::Internal(_) => "INTERNAL",
         }
     }
+
+    /// Phase 2.2 helper: detect block-cache-related errors so app code
+    /// can offer a "retry without cache" or "raise budget" prompt
+    /// without string-parsing the underlying message.
+    pub fn is_cache_error(&self) -> bool {
+        matches!(self, FulaError::CacheBudgetExceeded { .. } | FulaError::CacheError(_))
+    }
+
+    /// Phase 3.3 helper: detect cold-start resolution errors. Apps
+    /// should surface this as "offline mode unavailable" instead of
+    /// a generic "download failed" — the file is fine; we just can't
+    /// learn its CID without master.
+    pub fn is_users_index_error(&self) -> bool {
+        matches!(
+            self,
+            FulaError::UsersIndexResolutionFailed(_) | FulaError::SequenceRegression { .. }
+        )
+    }
 }
diff --git a/crates/fula-flutter/src/api/types.rs b/crates/fula-flutter/src/api/types.rs
index 5567b30..01c7810 100644
--- a/crates/fula-flutter/src/api/types.rs
+++ b/crates/fula-flutter/src/api/types.rs
@@ -45,6 +45,67 @@ pub struct FulaConfig {
     /// buffered path returns an error instead of allocating the buffer.
     /// Default: 256 MiB.
     pub buffered_download_max_bytes: u64,
+
+    // ============================================================
+    // Phase 2.1 — master-down detection (health gate)
+    // ============================================================
+    /// Enable the SDK's master health gate. Off by default
+    /// (backward-compat). When on, the SDK observes request outcomes
+    /// and short-circuits with `Network`/`MasterUnreachable` error
+    /// after two consecutive failures, instead of paying the per-read
+    /// timeout. Works on every platform fula-flutter ships against.
+    pub health_gate_enabled: bool,
+
+    /// TTL of the `Down` state when `health_gate_enabled = true`.
+    /// After this duration elapses, the next request is allowed
+    /// through as a probe. Default: 30 seconds.
+    pub health_gate_ttl_seconds: u64,
+
+    // ============================================================
+    // Phase 2.2 — persistent block cache
+    // ============================================================
+    /// Enable the on-disk LRU block cache.
+    ///
+    /// **Native-only.** The cache is `redb`-backed and not available
+    /// in browser-targeted builds. Setting `true` on a wasm32 target
+    /// is silently inert — the underlying SDK skips construction and
+    /// the offline path stays unavailable in the browser. On
+    /// Android/iOS/Ubuntu/Windows the field activates Phase 2.2.
+    pub block_cache_enabled: bool,
+
+    /// Filesystem path for the block-cache redb database. Empty
+    /// string = use the platform default (`dirs::data_local_dir()/
+    /// fula/cache/blocks.redb`). Native-only; ignored on wasm32.
+    pub block_cache_path: String,
+
+    /// Maximum on-disk bytes for the block cache. Default: 256 MiB.
+    /// The cache evicts to 80 % of this watermark when a `put` would
+    /// push it past `max_bytes`. Native-only; ignored on wasm32.
+    pub block_cache_max_bytes: u64,
+
+    // ============================================================
+    // Phase 2.3 / 2.4 — IPFS gateway race + offline GET fallback
+    // ============================================================
+    /// Enable falling back to public IPFS gateways when master is
+    /// unreachable AND the SDK has previously cached the requested
+    /// object's CID via Phase 2.2's KEY_TO_CID table.
+    ///
+    /// Requires `block_cache_enabled = true` (the cache holds the
+    /// `(bucket, key) → cid` map the gateway race needs). Native-only;
+    /// ignored on wasm32.
+    pub gateway_fallback_enabled: bool,
+
+    /// Custom gateway URL templates. Each must contain the literal
+    /// `{cid}` token, which the SDK substitutes per fetch. Empty Vec
+    /// means "use the SDK-shipped default list of six gateways"
+    /// (Cloudflare, dweb.link, ipfs.io, trustless-gateway.link,
+    /// 4everland.io, gateway.pinata.cloud). Native-only.
+    pub gateway_fallback_urls: Vec<String>,
+
+    /// Number of gateways the SDK races in parallel for any single
+    /// CID. Default: 3. Capped at the gateway-pool length.
+    /// Native-only.
+    pub gateway_race_concurrency: u32,
 }
 
 impl Default for FulaConfig {
@@ -56,6 +117,17 @@ impl Default for FulaConfig {
             max_retries: 3,
             per_chunk_download_timeout_seconds: 300,
             buffered_download_max_bytes: 256 * 1024 * 1024,
+            // Phase 2.x — all flags off by default (backward-compat).
+            // Apps must opt in explicitly; existing Dart code sees
+            // byte-identical behavior to pre-Phase-2.x builds.
+            health_gate_enabled: false,
+            health_gate_ttl_seconds: 30,
+            block_cache_enabled: false,
+            block_cache_path: String::new(),
+            block_cache_max_bytes: 256 * 1024 * 1024,
+            gateway_fallback_enabled: false,
+            gateway_fallback_urls: Vec::new(),
+            gateway_race_concurrency: 3,
         }
     }
 }
diff --git a/crates/fula-flutter/src/frb_generated.rs b/crates/fula-flutter/src/frb_generated.rs
index 1102ca3..b1b7398 100644
--- a/crates/fula-flutter/src/frb_generated.rs
+++ b/crates/fula-flutter/src/frb_generated.rs
@@ -4194,6 +4194,14 @@ impl SseDecode for crate::api::types::FulaConfig {
         let mut var_maxRetries = <u32>::sse_decode(deserializer);
         let mut var_perChunkDownloadTimeoutSeconds = <u64>::sse_decode(deserializer);
         let mut var_bufferedDownloadMaxBytes = <u64>::sse_decode(deserializer);
+        // MANUAL PATCH (Phase 2.x cross-platform audit): the new
+        // health_gate / block_cache / gateway_fallback fields are NOT
+        // yet on the wire from Dart (frb_codegen has not been re-run).
+        // Defaulting them via struct-update keeps the Rust struct
+        // initializable while the legacy 6-field wire format is still
+        // what Dart sends. Re-running `flutter_rust_bridge_codegen
+        // generate` regenerates this file and the new fields become
+        // settable from Dart.
         return crate::api::types::FulaConfig {
             endpoint: var_endpoint,
             access_token: var_accessToken,
@@ -4201,6 +4209,7 @@ impl SseDecode for crate::api::types::FulaConfig {
             max_retries: var_maxRetries,
             per_chunk_download_timeout_seconds: var_perChunkDownloadTimeoutSeconds,
             buffered_download_max_bytes: var_bufferedDownloadMaxBytes,
+            ..crate::api::types::FulaConfig::default()
         };
     }
 }
@@ -6336,6 +6345,11 @@ mod io {
     impl CstDecode<crate::api::types::FulaConfig> for wire_cst_fula_config {
         // Codec=Cst (C-struct based), see doc to use other codecs
         fn cst_decode(self) -> crate::api::types::FulaConfig {
+            // MANUAL PATCH (Phase 2.x cross-platform audit): see
+            // matching note on `SseDecode for FulaConfig`. The wire
+            // C-struct still has only the legacy 6 fields; new fields
+            // default until `flutter_rust_bridge_codegen generate`
+            // regenerates this file.
             crate::api::types::FulaConfig {
                 endpoint: self.endpoint.cst_decode(),
                 access_token: self.access_token.cst_decode(),
@@ -6345,6 +6359,7 @@ mod io {
                     .per_chunk_download_timeout_seconds
                     .cst_decode(),
                 buffered_download_max_bytes: self.buffered_download_max_bytes.cst_decode(),
+                ..crate::api::types::FulaConfig::default()
             }
         }
     }
@@ -8638,6 +8653,10 @@ mod web {
                 "Expected 6 elements, got {}",
                 self_.length()
             );
+            // MANUAL PATCH (Phase 2.x cross-platform audit): wasm/JS
+            // CstDecode path. The 6-element JsValue array carries the
+            // legacy fields; new Phase 2.x fields default until FRB
+            // regen.
             crate::api::types::FulaConfig {
                 endpoint: self_.get(0).cst_decode(),
                 access_token: self_.get(1).cst_decode(),
@@ -8645,6 +8664,7 @@ mod web {
                 max_retries: self_.get(3).cst_decode(),
                 per_chunk_download_timeout_seconds: self_.get(4).cst_decode(),
                 buffered_download_max_bytes: self_.get(5).cst_decode(),
+                ..crate::api::types::FulaConfig::default()
             }
         }
     }
diff --git a/crates/fula-js/src/lib.rs b/crates/fula-js/src/lib.rs
index 567827e..ae3def2 100644
--- a/crates/fula-js/src/lib.rs
+++ b/crates/fula-js/src/lib.rs
@@ -54,9 +54,80 @@ pub struct JsFulaConfig {
     /// Request timeout in seconds (default: 30)
     #[serde(default = "default_timeout")]
     pub timeout_seconds: u64,
+
+    // ============================================================
+    // Phase 2.1 — master-down detection (functional on wasm/web)
+    // ============================================================
+    /// Enable the SDK's master health gate. Off by default
+    /// (backward-compat). When on, two consecutive failed master
+    /// requests trip the gate and short-circuit subsequent reads
+    /// with a `MASTER_UNREACHABLE` error. **Functional on wasm/web.**
+    #[serde(default)]
+    pub health_gate_enabled: bool,
+
+    /// TTL of the `Down` state when `healthGateEnabled = true`.
+    /// After this duration elapses, the next request is allowed
+    /// through as a probe. Default: 30 seconds.
+    #[serde(default = "default_health_gate_ttl")]
+    pub health_gate_ttl_seconds: u64,
+
+    // ============================================================
+    // Phase 2.2 / 2.3 / 2.4 — block cache + gateway race
+    // ============================================================
+    //
+    // These fields are NATIVE-ONLY at runtime. The underlying
+    // `fula_client::Config` carries them across all builds, but on
+    // the wasm32 target the SDK gates out the `redb`-backed cache
+    // and the parking_lot-based gateway pool, so setting these
+    // flags has no effect in browsers.
+    //
+    // We expose them anyway for **API symmetry** with `fula-flutter`:
+    // a TypeScript app sharing config types between mobile and web
+    // builds can construct one config object and have it accepted
+    // by both. On web the offline path silently no-ops; on native
+    // (Tauri / Electron-with-Rust / Node-via-N-API integrations) the
+    // path activates as documented for fula-flutter.
+
+    /// Enable the on-disk LRU block cache. **Native-only at runtime.**
+    /// On wasm/web this flag is silently inert.
+    #[serde(default)]
+    pub block_cache_enabled: bool,
+
+    /// Filesystem path for the block-cache redb database. Empty
+    /// string = use platform default. **Native-only at runtime.**
+    #[serde(default)]
+    pub block_cache_path: String,
+
+    /// Maximum on-disk bytes for the block cache. Default: 256 MiB.
+    /// **Native-only at runtime.**
+    #[serde(default = "default_block_cache_max_bytes")]
+    pub block_cache_max_bytes: u64,
+
+    /// Enable falling back to public IPFS gateways when master is
+    /// unreachable. **Native-only at runtime.** Requires
+    /// `blockCacheEnabled = true` to populate the `(bucket,key) → cid`
+    /// lookup table the offline race needs.
+    #[serde(default)]
+    pub gateway_fallback_enabled: bool,
+
+    /// Custom gateway URL templates. Each must contain the literal
+    /// `{cid}` token. Empty Vec = use the SDK-shipped default list
+    /// of six gateways (Cloudflare, dweb.link, ipfs.io,
+    /// trustless-gateway.link, 4everland.io, gateway.pinata.cloud).
+    /// **Native-only at runtime.**
+    #[serde(default)]
+    pub gateway_fallback_urls: Vec<String>,
+
+    /// Number of gateways the SDK races in parallel. Default: 3.
+    /// **Native-only at runtime.**
+    #[serde(default = "default_gateway_race_concurrency")]
+    pub gateway_race_concurrency: u32,
 }
 
 fn default_timeout() -> u64 { 30 }
+fn default_health_gate_ttl() -> u64 { 30 }
+fn default_block_cache_max_bytes() -> u64 { 256 * 1024 * 1024 }
+fn default_gateway_race_concurrency() -> u32 { 3 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -173,12 +244,7 @@ pub async fn create_encrypted_client(
         .map_err(|e| JsError::new(&format!("Invalid encryption config: {}", e)))?;
 
     // Build client config
-    let mut client_config = fula_client::Config::new(&config.endpoint)
-        .with_timeout(std::time::Duration::from_secs(config.timeout_seconds));
-
-    if let Some(token) = config.access_token {
-        client_config = client_config.with_token(token);
-    }
+    let client_config = build_inner_config(config);
 
     // Build encryption config
     let enc_config = if let Some(secret_key) = encryption.secret_key {
@@ -204,13 +270,123 @@ pub async fn create_encrypted_client(
     };
 
     let client = fula_client::EncryptedClient::new(client_config, enc_config)
-        .map_err(|e| JsError::new(&format!("Failed to create client: {}", e)))?;
+        .map_err(|e| client_error_to_js_error("create_client_failed", e))?;
 
     Ok(EncryptedClient {
         inner: Arc::new(Mutex::new(client)),
     })
 }
 
+// ============================================================================
+// Phase 2.x helpers
+// ============================================================================
+
+/// Translate a Dart-flavoured `JsFulaConfig` into the underlying
+/// `fula_client::Config`, plumbing every Phase 1.2 / 2.x field
+/// through. Used by every JS client constructor — adding a new field
+/// means changing this function only.
+///
+/// Note on wasm32: the block_cache + gateway_fallback fields are
+/// silently ignored at runtime (the underlying SDK gates out the
+/// redb-backed cache and parking_lot-based pool). They're still
+/// plumbed through so that a single shared config struct works
+/// across native + web targets.
+fn build_inner_config(config: JsFulaConfig) -> fula_client::Config {
+    let mut inner = fula_client::Config::new(&config.endpoint)
+        .with_timeout(std::time::Duration::from_secs(config.timeout_seconds));
+
+    if let Some(token) = config.access_token {
+        inner = inner.with_token(token);
+    }
+
+    // Phase 2.1 — health gate (functional on wasm).
+    inner.health_gate_enabled = config.health_gate_enabled;
+    inner.health_gate_ttl =
+        std::time::Duration::from_secs(config.health_gate_ttl_seconds);
+
+    // Phase 2.2 — block cache (native-only at runtime; plumbed for symmetry).
+    inner.block_cache_enabled = config.block_cache_enabled;
+    inner.block_cache_path = if config.block_cache_path.is_empty() {
+        None
+    } else {
+        Some(std::path::PathBuf::from(config.block_cache_path))
+    };
+    inner.block_cache_max_bytes = config.block_cache_max_bytes;
+
+    // Phase 2.3 / 2.4 — gateway race (native-only at runtime).
+    inner.gateway_fallback_enabled = config.gateway_fallback_enabled;
+    inner.gateway_fallback_urls = config.gateway_fallback_urls;
+    inner.gateway_race_concurrency = config.gateway_race_concurrency as usize;
+
+    inner
+}
+
+/// Convert a `fula_client::ClientError` into a `JsError` whose
+/// message is a JSON object carrying a stable error `code` plus
+/// any structured fields. JS callers can `JSON.parse(err.message)`
+/// to dispatch on the code and surface it to UI logic — e.g.,
+/// "show offline indicator" on `MASTER_UNREACHABLE` rather than
+/// just a generic "download failed".
+///
+/// The set of codes is stable across native and wasm so apps can
+/// share an error-handling layer.
+fn client_error_to_js_error(operation: &str, e: fula_client::ClientError) -> JsError {
+    use fula_client::ClientError;
+
+    // Compose stable code + human-readable message.
+    let (code, structured) = match &e {
+        ClientError::MasterUnreachable { down_for_secs } => (
+            "MASTER_UNREACHABLE",
+            serde_json::json!({ "downForSecs": down_for_secs }),
+        ),
+        ClientError::BlockTooLarge { size, budget } => (
+            "BLOCK_TOO_LARGE",
+            serde_json::json!({ "size": size, "budget": budget }),
+        ),
+        ClientError::BlockCache(_) => ("BLOCK_CACHE_ERROR", serde_json::json!(null)),
+        ClientError::UsersIndexResolutionFailed { reason } => (
+            "USERS_INDEX_RESOLUTION_FAILED",
+            serde_json::json!({ "reason": reason }),
+        ),
+        ClientError::SequenceRegression { observed, highest_seen, channel } => (
+            "SEQUENCE_REGRESSION",
+            serde_json::json!({
+                "observed": observed,
+                "highestSeen": highest_seen,
+                "channel": channel,
+            }),
+        ),
+        ClientError::NotFound { bucket, key } => (
+            "NOT_FOUND",
+            serde_json::json!({ "bucket": bucket, "key": key }),
+        ),
+        ClientError::BucketNotFound(name) => (
+            "BUCKET_NOT_FOUND",
+            serde_json::json!({ "name": name }),
+        ),
+        ClientError::AccessDenied(_) => ("ACCESS_DENIED", serde_json::json!(null)),
+        ClientError::ConcurrentModification(_)
+        | ClientError::ConcurrentModificationExhausted { .. } => {
+            ("CONCURRENT_MODIFICATION", serde_json::json!(null))
+        }
+        ClientError::MigrationLockHeld { bucket, expires_at } => (
+            "MIGRATION_LOCK_HELD",
+            serde_json::json!({ "bucket": bucket, "expiresAt": expires_at }),
+        ),
+        ClientError::Encryption(_) => ("ENCRYPTION", serde_json::json!(null)),
+        ClientError::Http(_) => ("HTTP", serde_json::json!(null)),
+        _ => ("INTERNAL", serde_json::json!(null)),
+    };
+
+    let payload = serde_json::json!({
+        "code": code,
+        "operation": operation,
+        "message": e.to_string(),
+        "data": structured,
+    });
+    JsError::new(&payload.to_string())
+}
+
 // ============================================================================
 // Encrypted Operations
 // ============================================================================
@@ -270,6 +446,14 @@ pub async fn put_encrypted_with_type(
 /// @param bucket - Bucket name
 /// @param key - Original object key (path)
 /// @returns Decrypted data as Uint8Array
+///
+/// Errors surface as `JsError` whose `message` is a JSON-encoded
+/// `{ code, operation, message, data }` object — `code` is one of
+/// the stable codes documented on `client_error_to_js_error`. Apps
+/// should `JSON.parse(err.message)` to dispatch on `code` (e.g.,
+/// `"MASTER_UNREACHABLE"` is the Phase 2.1 signal that the SDK's
+/// health gate has tripped — surface an offline UI rather than a
+/// generic "download failed").
 #[wasm_bindgen(js_name = getDecrypted)]
 pub async fn get_decrypted(
     client: &EncryptedClient,
@@ -279,11 +463,13 @@ pub async fn get_decrypted(
     let guard = client.inner.lock().await;
     let data = guard.get_object_decrypted(bucket, key)
         .await
-        .map_err(|e| JsError::new(&format!("Download failed: {}", e)))?;
+        .map_err(|e| client_error_to_js_error("get_decrypted", e))?;
     Ok(data.to_vec())
 }
 
 /// Download and decrypt data by storage key
+///
+/// Same structured-error contract as `getDecrypted`.
 #[wasm_bindgen(js_name = getDecryptedByStorageKey)]
 pub async fn get_decrypted_by_storage_key(
     client: &EncryptedClient,
@@ -293,7 +479,7 @@ pub async fn get_decrypted_by_storage_key(
     let guard = client.inner.lock().await;
     let data = guard.get_object_decrypted_by_storage_key(bucket, storage_key)
         .await
-        .map_err(|e| JsError::new(&format!("Download failed: {}", e)))?;
+        .map_err(|e| client_error_to_js_error("get_decrypted_by_storage_key", e))?;
     Ok(data.to_vec())
 }
 

From be9ee9f667f70b23e7cfa4fbad1d2929a4bc93cc Mon Sep 17 00:00:00 2001
From: ehsan shariati <ehsan6sha@gmail.com>
Date: Mon, 4 May 2026 13:12:21 -0400
Subject: [PATCH 4/6] closed gaps in flutter and wasm for offline download

---
 crates/fula-client/Cargo.toml               |  10 +-
 crates/fula-client/src/lib.rs               |  23 +-
 crates/fula-client/src/registry_resolver.rs |  18 +-
 crates/fula-client/src/user_key.rs          |  48 +++
 crates/fula-flutter/src/api/client.rs       | 197 +++++++++-
 crates/fula-flutter/src/api/types.rs        | 272 ++++++++++++++
 crates/fula-js/src/lib.rs                   | 375 +++++++++++++++++++-
 7 files changed, 907 insertions(+), 36 deletions(-)
 create mode 100644 crates/fula-client/src/user_key.rs

diff --git a/crates/fula-client/Cargo.toml b/crates/fula-client/Cargo.toml
index 5543805..945579b 100644
--- a/crates/fula-client/Cargo.toml
+++ b/crates/fula-client/Cargo.toml
@@ -35,6 +35,14 @@ url = "2.5"
 base64 = { workspace = true }
 hex = { workspace = true }
 blake3 = { workspace = true }
+# Phase 3.3 — `derive_user_key_from_email` lives in `src/user_key.rs`
+# and is exposed on every target (wasm + native) so the wasm-bindgen
+# binding can compute the user_key without round-tripping through
+# `fula-crypto::derive_key_argon2id`. sha2 is a pure-Rust dep that
+# builds cleanly on wasm32; was previously gated to native-only when
+# the helper still lived inside the native-gated `registry_resolver`
+# module, but with the helper extracted we need cross-target.
+sha2 = { workspace = true }
 mime_guess = "2.0"
 tokio = { version = "1.42", default-features = false, features = ["sync"] }
 dashmap = { workspace = true }
@@ -52,8 +60,6 @@ dirs = "5"
 # Native-only — wasm builds skip the cache (no persistent storage there anyway).
 redb = { workspace = true }
 cid = { workspace = true }
-# CID verification on gateway-fetched bytes (Phase 2.3 of master-independent reads).
-sha2 = { workspace = true }
 # Mutex for per-gateway state in gateway_fetch (Phase 2.3).
 parking_lot = { workspace = true }
 # Phase 3.3 cold-start hybrid resolver — parses the master-published
diff --git a/crates/fula-client/src/lib.rs b/crates/fula-client/src/lib.rs
index 53e8240..aae6cb9 100644
--- a/crates/fula-client/src/lib.rs
+++ b/crates/fula-client/src/lib.rs
@@ -50,6 +50,11 @@ mod multipart;
 #[cfg(not(target_arch = "wasm32"))]
 mod registry_resolver;
 mod types;
+/// Phase 3.3 helper module — wasm-friendly userKey derivation
+/// extracted from `registry_resolver.rs` so the wasm-bindgen
+/// binding can expose it. Source-of-truth lives here; the
+/// resolver re-exports it on native.
+mod user_key;
 #[cfg(not(target_arch = "wasm32"))]
 mod orphan_queue;
 #[cfg(not(target_arch = "wasm32"))]
@@ -86,16 +91,24 @@ pub use types::*;
 /// callbacks without depending on internal module paths.
 pub use health_gate::{HealthCallback, MasterHealthEvent};
 
+/// Phase 3.3 — `derive_user_key_from_email` available on EVERY
+/// target (wasm + native). Apps compute the userKey at sign-in
+/// time from the OAuth-provided email and stash it in
+/// `Config::users_index_user_key`. The same function is also
+/// re-exported via `registry_resolver` on native for backward
+/// compatibility with code that imports it from there.
+pub use user_key::derive_user_key_from_email;
+
 /// Phase 3.3 — cold-start hybrid resolver public API. Native-only;
 /// the resolver itself is gated to `cfg(not(target_arch = "wasm32"))`.
-/// The free helper `derive_user_key_from_email` is also re-exported
-/// so JS / Flutter bindings can compute the user_key without holding
-/// a client.
+/// `derive_user_key_from_email` is re-exported above (cross-target);
+/// callers using the `fula_client::registry_resolver::derive_user_key_from_email`
+/// path also still resolve through the in-module `pub use`.
 #[cfg(not(target_arch = "wasm32"))]
 pub use registry_resolver::{
     decode_user_buckets_index, default_ipfs_gateway_urls, default_ipns_gateway_urls,
-    derive_user_key_from_email, fetch_cid_via_gateways, BucketEntry, GlobalUsersIndex,
-    ResolutionSource, ResolvedUsersIndex, ResolverConfig, UserBucketsIndex, UsersIndexResolver,
+    fetch_cid_via_gateways, BucketEntry, GlobalUsersIndex, ResolutionSource,
+    ResolvedUsersIndex, ResolverConfig, UserBucketsIndex, UsersIndexResolver,
 };
 
 /// Process-wide count of WAL append failures (F11).
diff --git a/crates/fula-client/src/registry_resolver.rs b/crates/fula-client/src/registry_resolver.rs
index f602eb1..4f28e5a 100644
--- a/crates/fula-client/src/registry_resolver.rs
+++ b/crates/fula-client/src/registry_resolver.rs
@@ -217,15 +217,15 @@ impl ResolverConfig {
 /// stay in lockstep with the master's `hash_user_id`; the
 /// `derive_user_key_matches_master_state_rs_algorithm` test below
 /// reproduces the master algorithm step-by-step and asserts equality.
-pub fn derive_user_key_from_email(email: &str) -> String {
-    use sha2::{Digest, Sha256};
-    let user_id_digest = Sha256::digest(email.to_lowercase().as_bytes());
-    let user_id_hex = hex::encode(user_id_digest);
-    let mut hasher = blake3::Hasher::new();
-    hasher.update(b"fula:user_id:");
-    hasher.update(user_id_hex.as_bytes());
-    hex::encode(&hasher.finalize().as_bytes()[..16])
-}
+///
+/// Source-of-truth lives in `crate::user_key` (extracted there so the
+/// wasm-bindgen binding can expose it — the `registry_resolver`
+/// module itself is gated to native targets). This re-export keeps
+/// the historical `fula_client::registry_resolver::derive_user_key_from_email`
+/// import path working for native callers AND lets the test module
+/// in this file (line 1485+) call the function via `use super::*;`.
+#[allow(unused_imports)]
+pub use crate::user_key::derive_user_key_from_email;
 
 /// Default IPNS-aware gateway list. Excludes
 /// `trustless-gateway.link` (only serves `/ipfs/`, not `/ipns/`).
diff --git a/crates/fula-client/src/user_key.rs b/crates/fula-client/src/user_key.rs
new file mode 100644
index 0000000..4b50265
--- /dev/null
+++ b/crates/fula-client/src/user_key.rs
@@ -0,0 +1,48 @@
+//! Phase 3.3 — userKey derivation, available on every target.
+//!
+//! `derive_user_key_from_email` was originally inlined in
+//! `registry_resolver.rs`, but that module is gated to native via
+//! `#![cfg(not(target_arch = "wasm32"))]` because it depends on
+//! `reqwest`, `parking_lot`, and other crates that don't compile on
+//! wasm. The userKey computation itself is pure: just `sha2` +
+//! `blake3` + `hex` — all of which build cleanly on wasm32 (these
+//! are already transitive deps of the wasm SDK build).
+//!
+//! Extracting the helper here lets the FRB and wasm-bindgen
+//! bindings expose `derive_user_key_from_email` without having to
+//! re-implement the algorithm. Master and SDK both produce the
+//! same `userKey` for the same email, regardless of which target
+//! the SDK was built for.
+//!
+//! **Algorithm (must stay in lockstep with master's `state.rs::hash_user_id`):**
+//!
+//! ```text
+//! email_lower = email.to_lowercase()
+//! user_id_digest = sha256(email_lower.as_bytes())
+//! user_id_hex = hex(user_id_digest)
+//! domain_separated = "fula:user_id:" || user_id_hex
+//! user_key = hex( blake3(domain_separated)[..16] )
+//! ```
+//!
+//! Drift here vs. master = silent cold-start failure (master
+//! publishes under userKey A, SDK looks up userKey B). The
+//! `derive_user_key_matches_master_state_rs_algorithm` test in
+//! `registry_resolver.rs` reproduces master's algorithm step-by-step
+//! and asserts equality.
+
+use sha2::{Digest, Sha256};
+
+/// Compute the canonical fula `userKey` for cold-start config from a
+/// plaintext email. Returns 32 hex chars (16-byte BLAKE3 truncated digest).
+///
+/// Apps call this at sign-in time (the OAuth flow has plaintext email)
+/// and pass the returned string into `Config::users_index_user_key`.
+/// The SDK never persists or transmits the raw email.
+pub fn derive_user_key_from_email(email: &str) -> String {
+    let user_id_digest = Sha256::digest(email.to_lowercase().as_bytes());
+    let user_id_hex = hex::encode(user_id_digest);
+    let mut hasher = blake3::Hasher::new();
+    hasher.update(b"fula:user_id:");
+    hasher.update(user_id_hex.as_bytes());
+    hex::encode(&hasher.finalize().as_bytes()[..16])
+}
diff --git a/crates/fula-flutter/src/api/client.rs b/crates/fula-flutter/src/api/client.rs
index 84786b0..a0da53c 100644
--- a/crates/fula-flutter/src/api/client.rs
+++ b/crates/fula-flutter/src/api/client.rs
@@ -16,12 +16,19 @@ use async_lock::RwLock;
 use crate::api::types::*;
 
 /// Build the underlying `fula_client::Config` from the Dart-facing
-/// `FulaConfig`, plumbing every Phase 1.2 / 2.x field through. Used by
-/// `create_client`, `create_encrypted_client`, and
-/// `create_encrypted_client_with_pinning` to keep the three constructors
-/// in lockstep — adding a new field to FulaConfig only requires a
-/// change here.
-fn build_inner_config(config: &FulaConfig) -> fula_client::Config {
+/// `FulaConfig`, plumbing every Phase 1.2 / 2.x / 3.3 / 19 field
+/// through. Used by `create_client`, `create_encrypted_client`, and
+/// `create_encrypted_client_with_pinning` to keep the three
+/// constructors in lockstep — adding a new field to FulaConfig only
+/// requires a change here.
+///
+/// `dispatcher` is the per-handle dispatcher that the FRB layer
+/// always wires into `Config::health_callback` so apps can subscribe
+/// to `MasterHealthEvent` events via `subscribe_master_health_events`.
+fn build_inner_config(
+    config: &FulaConfig,
+    dispatcher: &Arc<HealthEventDispatcher>,
+) -> fula_client::Config {
     let mut inner = fula_client::Config::new(&config.endpoint)
         .with_timeout(Duration::from_secs(config.timeout_seconds));
 
@@ -50,6 +57,45 @@ fn build_inner_config(config: &FulaConfig) -> fula_client::Config {
     inner.gateway_fallback_urls = config.gateway_fallback_urls.clone();
     inner.gateway_race_concurrency = config.gateway_race_concurrency as usize;
 
+    // Phase 3.3 — cold-start hybrid resolver. The resolver activates
+    // iff all four required strings (rpc_url, anchor_address,
+    // ipns_name, user_key) are non-empty AND the user_key is `Some`.
+    // Empty strings collapse to "disabled" — same default behavior as
+    // pre-Phase-3.3 builds.
+    inner.users_index_chain_rpc_url = config.users_index_chain_rpc_url.clone();
+    inner.users_index_anchor_address = config.users_index_anchor_address.clone();
+    inner.users_index_ipns_name = config.users_index_ipns_name.clone();
+    inner.users_index_user_key = if config.users_index_user_key.is_empty() {
+        None
+    } else {
+        Some(config.users_index_user_key.clone())
+    };
+    inner.users_index_ipns_gateway_urls =
+        config.users_index_ipns_gateway_urls.clone();
+    inner.users_index_ipfs_gateway_urls =
+        config.users_index_ipfs_gateway_urls.clone();
+
+    // Phase 19 — always wire a forwarding callback into the gate so
+    // Dart-side subscribers can observe health transitions. The
+    // dispatcher is per-handle, so events from this client never
+    // leak to a different client's subscribers. Native-only — wasm
+    // doesn't include the health-callback Arc in fula_client::Config
+    // because `Arc<dyn Fn>` doesn't cross wasm-bindgen cleanly; the
+    // wasm path surfaces via typed errors.
+    #[cfg(not(target_arch = "wasm32"))]
+    {
+        let dispatcher = Arc::clone(dispatcher);
+        let cb: fula_client::HealthCallback = Arc::new(move |ev| {
+            dispatcher.dispatch(ev);
+        });
+        inner.health_callback = Some(cb);
+    }
+    // Suppress unused-variable warning on wasm where we don't read
+    // `dispatcher` at config-build time (subscribers still register;
+    // they just never receive events because no callback fires).
+    #[cfg(target_arch = "wasm32")]
+    let _ = dispatcher;
+
     if let Some(token) = &config.access_token {
         inner = inner.with_token(token.clone());
     }
@@ -63,11 +109,13 @@ fn build_inner_config(config: &FulaConfig) -> fula_client::Config {
 
 /// Create a new Fula client with the given configuration
 pub fn create_client(config: FulaConfig) -> anyhow::Result<FulaClientHandle> {
-    let inner_config = build_inner_config(&config);
+    let dispatcher = Arc::new(HealthEventDispatcher::new());
+    let inner_config = build_inner_config(&config, &dispatcher);
     let client = fula_client::FulaClient::new(inner_config)?;
 
     Ok(FulaClientHandle {
         inner: Arc::new(client),
+        health_dispatcher: dispatcher,
     })
 }
 
@@ -76,7 +124,8 @@ pub fn create_encrypted_client(
     config: FulaConfig,
     encryption: EncryptionConfig,
 ) -> anyhow::Result<EncryptedClientHandle> {
-    let inner_config = build_inner_config(&config);
+    let dispatcher = Arc::new(HealthEventDispatcher::new());
+    let inner_config = build_inner_config(&config, &dispatcher);
 
     // Create encryption config
     let enc_config = if let Some(secret_key) = encryption.secret_key {
@@ -113,6 +162,7 @@ pub fn create_encrypted_client(
 
     Ok(EncryptedClientHandle {
         inner: Arc::new(RwLock::new(client)),
+        health_dispatcher: dispatcher,
     })
 }
 
@@ -122,7 +172,8 @@ pub fn create_encrypted_client_with_pinning(
     encryption: EncryptionConfig,
     pinning: PinningConfig,
 ) -> anyhow::Result<EncryptedClientHandle> {
-    let inner_config = build_inner_config(&config);
+    let dispatcher = Arc::new(HealthEventDispatcher::new());
+    let inner_config = build_inner_config(&config, &dispatcher);
 
     // Create encryption config
     let enc_config = if let Some(secret_key) = encryption.secret_key {
@@ -168,9 +219,137 @@ pub fn create_encrypted_client_with_pinning(
 
     Ok(EncryptedClientHandle {
         inner: Arc::new(RwLock::new(client)),
+        health_dispatcher: dispatcher,
     })
 }
 
+// ============================================================================
+// Phase 3.3 — derive_user_key_from_email
+// ============================================================================
+
+/// Compute the canonical fula `userKey` for cold-start config from a
+/// plaintext email. Mirrors `fula_client::derive_user_key_from_email`
+/// — same domain separator, same hash chain (sha256(lower(email))
+/// → BLAKE3("fula:user_id:" || _).bytes[..16] → hex-encode).
+///
+/// Apps call this once at sign-in (the OAuth flow has plaintext
+/// email), then set `FulaConfig::users_index_user_key` to the
+/// returned string. The SDK never sees the raw email.
+///
+/// Native-only — wasm32 surfaces this via the JS-side `deriveKey`
+/// helper because the cold-start resolver (Phase 3.3) itself isn't
+/// wired on wasm.
+#[cfg(not(target_arch = "wasm32"))]
+pub fn derive_user_key_from_email(email: String) -> String {
+    fula_client::derive_user_key_from_email(&email)
+}
+
+#[cfg(target_arch = "wasm32")]
+pub fn derive_user_key_from_email(_email: String) -> String {
+    // The Rust cold-start resolver isn't wired on wasm32; expose
+    // the function for API symmetry but emit an empty key so the
+    // resolver self-disables (per build_inner_config: empty user_key
+    // → users_index_user_key=None → resolver inactive).
+    String::new()
+}
+
+// ============================================================================
+// Phase 19 — health-event subscription
+// ============================================================================
+
+/// Drain every `MasterHealthEvent` observed since the last call to
+/// this function. Returns events in the order they fired (oldest
+/// first). After draining the buffer is empty.
+///
+/// Apps poll this on a timer (or on UI rebuilds) and update their
+/// online/offline indicator. Internal buffer is bounded at 64
+/// entries — if an app falls so far behind that the buffer
+/// overflows, the oldest events are dropped first; the latest state
+/// is preserved. For latest-only consumers, see
+/// [`get_last_master_health_event`].
+///
+/// Events delivered:
+///   - `Online` — master went Up after being Down
+///   - `OfflineFallbackActive { reason }` — master went Down
+///   - `SeverelyDegraded { reason }` — both master AND cold-start
+///     channels (IPNS + chain) are unreachable; cold-start GETs
+///     will fail
+///
+/// Native-only at runtime: on wasm32 the function compiles for API
+/// symmetry but never returns events because the health-callback
+/// Arc isn't wired on wasm (`Arc<dyn Fn>` doesn't cross
+/// wasm-bindgen cleanly).
+pub fn poll_master_health_events(
+    client: &FulaClientHandle,
+) -> Vec<MasterHealthEvent> {
+    client.health_dispatcher.drain_events()
+}
+
+/// Same as `poll_master_health_events` for an `EncryptedClientHandle`.
+/// Exposed separately because Dart-side the encrypted client has
+/// its own handle type and FRB doesn't auto-reflect "this method
+/// works on either handle".
+pub fn poll_master_health_events_encrypted(
+    client: &EncryptedClientHandle,
+) -> Vec<MasterHealthEvent> {
+    client.health_dispatcher.drain_events()
+}
+
+/// Read the most recent `MasterHealthEvent` observed by the SDK
+/// without draining the buffer. Returns `None` if no transition has
+/// happened yet (master has been Up the whole session). Useful for
+/// apps that build UI state from a single field on mount.
+pub fn get_last_master_health_event(
+    client: &FulaClientHandle,
+) -> Option<MasterHealthEvent> {
+    client.health_dispatcher.last_event()
+}
+
+/// Encrypted-client variant of `get_last_master_health_event`.
+pub fn get_last_master_health_event_encrypted(
+    client: &EncryptedClientHandle,
+) -> Option<MasterHealthEvent> {
+    client.health_dispatcher.last_event()
+}
+
+// ============================================================================
+// Phase 19 — get_object_with_offline_fallback
+// ============================================================================
+
+/// Phase 19 GET wrapper that returns transparency fields alongside
+/// the bytes. Routes through the SDK's full Phase 2.x + 3.3 stack:
+///
+/// | State                             | Returns                                   |
+/// |-----------------------------------|-------------------------------------------|
+/// | Master up                         | source = Master, freshness = Live         |
+/// | Master down + warm cache hit      | source = LocalCache or Gateway(url),      |
+/// |                                   | freshness = Cached { observed_at }        |
+/// | Master down + cold-start          | source = Gateway(url),                    |
+/// |                                   | freshness = Cached { observed_at }        |
+/// | Master down + cache miss + no     | Err(UsersIndexResolutionFailed)           |
+/// | resolver configured               |                                           |
+///
+/// Apps that don't care about transparency can read `result.inner.data`.
+/// Apps that surface "you're offline" UI inspect `result.source` /
+/// `result.freshness`.
+///
+/// Native-only at runtime: on wasm32 the SDK currently only wraps
+/// `get_object_with_metadata` (no offline fallback infrastructure on
+/// browsers — block_cache + gateway_fetch are gated out). The wasm
+/// path returns `OfflineGetResult` with `source = Master, freshness =
+/// Live` so the API shape is identical across platforms.
+pub async fn get_object_with_offline_fallback(
+    client: &FulaClientHandle,
+    bucket: String,
+    key: String,
+) -> anyhow::Result<OfflineGetResult> {
+    let result = client
+        .inner
+        .get_object_with_offline_fallback(&bucket, &key)
+        .await?;
+    Ok(result.into())
+}
+
 // ============================================================================
 // Bucket Operations
 // ============================================================================
diff --git a/crates/fula-flutter/src/api/types.rs b/crates/fula-flutter/src/api/types.rs
index 01c7810..0ba41f6 100644
--- a/crates/fula-flutter/src/api/types.rs
+++ b/crates/fula-flutter/src/api/types.rs
@@ -106,6 +106,52 @@ pub struct FulaConfig {
     /// CID. Default: 3. Capped at the gateway-pool length.
     /// Native-only.
     pub gateway_race_concurrency: u32,
+
+    // ============================================================
+    // Phase 3.3 — cold-start hybrid resolver
+    // ============================================================
+    //
+    // The resolver activates iff ALL of the following four fields
+    // are populated:
+    //   - users_index_chain_rpc_url (non-empty)
+    //   - users_index_anchor_address (non-empty)
+    //   - users_index_ipns_name (non-empty)
+    //   - users_index_user_key (non-empty)
+    //
+    // When any one is empty the resolver stays disabled; cold-start
+    // GETs fall through with `UsersIndexResolutionFailed`. Default
+    // values are all empty strings → resolver disabled (backward
+    // compat with pre-Phase-3.3 builds).
+
+    /// JSON-RPC URL for the chain anchor contract (Base or SKALE).
+    /// Required to enable Phase 3.3 cold-start. Empty → disabled.
+    pub users_index_chain_rpc_url: String,
+
+    /// `FulaUsersIndexAnchor.sol` contract address (20 bytes hex,
+    /// optionally `0x`-prefixed). Required to enable Phase 3.3.
+    pub users_index_anchor_address: String,
+
+    /// IPNS NAME (libp2p public-key hash, e.g. `k51qzi5...`) under
+    /// which the master publishes the global users-index CBOR.
+    /// Required to enable Phase 3.3.
+    pub users_index_ipns_name: String,
+
+    /// 32-hex-char `userKey` (= `BLAKE3("fula:user_id:" || sha256(lower(email)))[..16]`).
+    /// Compute via the free function [`derive_user_key_from_email`]
+    /// at sign-in time and pass in here. The SDK does not store the
+    /// raw email. Required to enable Phase 3.3.
+    pub users_index_user_key: String,
+
+    /// IPNS-aware gateway URL templates (each must contain `{name}`).
+    /// Empty Vec = use SDK-shipped defaults
+    /// (Cloudflare/dweb.link/ipfs.io/4everland/Pinata).
+    /// Native-only — wasm cold-start uses the typed-error path.
+    pub users_index_ipns_gateway_urls: Vec<String>,
+
+    /// `/ipfs/{cid}` gateway URL templates (each must contain `{cid}`).
+    /// Empty Vec = use SDK-shipped 6-gateway default list.
+    /// Native-only.
+    pub users_index_ipfs_gateway_urls: Vec<String>,
 }
 
 impl Default for FulaConfig {
@@ -128,10 +174,92 @@ impl Default for FulaConfig {
             gateway_fallback_enabled: false,
             gateway_fallback_urls: Vec::new(),
             gateway_race_concurrency: 3,
+            // Phase 3.3 — resolver disabled by default. Operator
+            // sets the four required fields at sign-in to enable
+            // cold-start; cold-start surfaces UsersIndexResolutionFailed
+            // until they're set, mirroring the Rust core's behavior.
+            users_index_chain_rpc_url: String::new(),
+            users_index_anchor_address: String::new(),
+            users_index_ipns_name: String::new(),
+            users_index_user_key: String::new(),
+            users_index_ipns_gateway_urls: Vec::new(),
+            users_index_ipfs_gateway_urls: Vec::new(),
         }
     }
 }
 
+// ============================================================
+// Phase 19 — transparency surfaces
+// ============================================================
+
+/// Where the bytes of a successfully-served read came from. Mirrors
+/// `fula_client::ReadSource`. Apps surface "you're reading from
+/// cache" / "served by a public gateway" UI based on this.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum FulaReadSource {
+    /// Master S3 served the request directly (fast path).
+    Master,
+    /// On-disk redb BLOCKS table served the bytes — no network at all.
+    LocalCache,
+    /// Public IPFS gateway served the bytes (master-down fallback).
+    /// The string is the URL template (e.g. `https://ipfs.io/ipfs/{cid}`)
+    /// that won the gateway race — useful for diagnostics or
+    /// "served by ipfs.io" labeling.
+    Gateway(String),
+}
+
+/// Freshness of a successfully-served read. Mirrors
+/// `fula_client::ReadFreshness`.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum FulaReadFreshness {
+    /// Master-served bytes (fresh, by definition).
+    Live,
+    /// Served from on-disk cache. `observed_at` is the unix-millis
+    /// when the entry was first written to cache.
+    Cached { observed_at: u64 },
+    /// Cold-start cross-device read; snapshot age within the
+    /// publisher cadence (≤ `USERS_INDEX_FLUSH_INTERVAL`). Apps
+    /// can surface "synced N min ago".
+    StaleByDesign { snapshot_age_secs: u64 },
+    /// Cold-start cross-device read; snapshot age exceeds the
+    /// publisher cadence — likely indicates an actual master outage.
+    StaleByOutage { snapshot_age_secs: u64 },
+}
+
+/// Wrapper around `GetObjectResult` adding Phase 19 transparency
+/// fields. Mirrors `fula_client::OfflineGetResult`. Apps that
+/// don't care about transparency just read `.inner.data`.
+#[derive(Debug, Clone)]
+pub struct OfflineGetResult {
+    /// Underlying `GetObjectResult` — `data`, `etag`, `content_type`,
+    /// `content_length`, `last_modified`, `metadata` are on `inner`.
+    pub inner: GetObjectResult,
+    /// Where the bytes ultimately came from.
+    pub source: FulaReadSource,
+    /// How fresh the bytes are.
+    pub freshness: FulaReadFreshness,
+}
+
+/// Master-server reachability transition events. Mirrors
+/// `fula_client::MasterHealthEvent`. Subscribed via
+/// `subscribe_master_health_events`; apps wire the stream to
+/// online/offline UI affordances.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum MasterHealthEvent {
+    /// Master S3 is reachable; reads use the fast path.
+    Online,
+    /// Master S3 is unreachable; SDK is falling back to IPFS
+    /// gateways (Phase 2.4) or cold-start resolver (Phase 3.3).
+    OfflineFallbackActive { reason: String },
+    /// Both master S3 AND chain RPC are unreachable. Cold-start
+    /// reads fail; warm reads (cached metadata) still work via
+    /// gateways. Apps should disable "open new bucket" / "first-
+    /// read" UI affordances when this fires. Emitted only from
+    /// the cold-start failure path — the health gate alone can't
+    /// authoritatively detect "both down" without trying.
+    SeverelyDegraded { reason: String },
+}
+
 /// Configuration for client-side encryption
 #[derive(Debug, Clone)]
 pub struct EncryptionConfig {
@@ -471,6 +599,14 @@ pub struct UploadProgress {
 #[derive(Clone)]
 pub struct FulaClientHandle {
     pub(crate) inner: Arc<fula_client::FulaClient>,
+    /// Phase 19 — dispatcher for `MasterHealthEvent` stream
+    /// subscribers. The Config's `health_callback` (set by
+    /// `build_inner_config`) captures a clone of this `Arc` and
+    /// forwards each transition to all live subscribers AND to a
+    /// "last event seen" slot exposed via `get_last_master_health_event`.
+    /// Always present so apps can subscribe at any time without
+    /// re-creating the client.
+    pub(crate) health_dispatcher: Arc<HealthEventDispatcher>,
 }
 
 /// Handle to an EncryptedClient instance
@@ -479,6 +615,86 @@ pub struct FulaClientHandle {
 #[derive(Clone)]
 pub struct EncryptedClientHandle {
     pub(crate) inner: Arc<RwLock<fula_client::EncryptedClient>>,
+    /// Phase 19 — same dispatcher pattern as FulaClientHandle.
+    /// Encrypted-client construction also threads the callback into
+    /// the underlying `fula_client::Config` so warm-cache + cold-
+    /// start transitions both surface to subscribers.
+    pub(crate) health_dispatcher: Arc<HealthEventDispatcher>,
+}
+
+/// Phase 19 — internal dispatcher that captures `MasterHealthEvent`
+/// transitions for two consumption patterns:
+///
+///   1. **Polling drain** (`poll_master_health_events`): apps call
+///      this periodically (or on UI rebuilds) and receive every
+///      event observed since the last call. The internal buffer is
+///      a bounded `VecDeque` capped at `MAX_BUFFERED_EVENTS`; if the
+///      app falls so far behind that the buffer overflows, oldest
+///      events are dropped first (apps care about the *latest* state,
+///      not the entire history).
+///
+///   2. **Latest-state read** (`get_last_master_health_event`):
+///      returns the most recent event without draining. Useful for
+///      apps that want to display "you're offline" immediately on
+///      mount based on whatever the SDK has observed so far.
+///
+/// **Why polling instead of a Dart `Stream`:** wiring `StreamSink<T>`
+/// requires the FRB codegen to have seen `MasterHealthEvent` — a
+/// chicken-and-egg dependency on `flutter_rust_bridge_codegen
+/// generate` having run after this commit. Polling sidesteps that
+/// while still giving apps every event in order. A future iteration
+/// can layer a `Stream<MasterHealthEvent>` on top once codegen has
+/// registered the type, without breaking this polling API.
+///
+/// Wrapping the buffer in `parking_lot::Mutex` (sync, no `await`)
+/// is required because the dispatcher is invoked from the SDK's hot
+/// path inside `health_gate::fire_event`, which doesn't tolerate
+/// async locks.
+pub struct HealthEventDispatcher {
+    /// Pending events not yet drained by `poll_master_health_events`.
+    /// Bounded by [`MAX_BUFFERED_EVENTS`].
+    buffer: parking_lot::Mutex<std::collections::VecDeque<MasterHealthEvent>>,
+    /// Most recent event observed, regardless of whether it was
+    /// drained. Read by `get_last_master_health_event`.
+    last_event: parking_lot::Mutex<Option<MasterHealthEvent>>,
+}
+
+/// Maximum number of pending events held by [`HealthEventDispatcher`]
+/// before older ones get dropped. 64 is plenty for typical apps —
+/// a healthy session sees a handful of transitions per hour at most.
+const MAX_BUFFERED_EVENTS: usize = 64;
+
+impl HealthEventDispatcher {
+    pub(crate) fn new() -> Self {
+        Self {
+            buffer: parking_lot::Mutex::new(std::collections::VecDeque::new()),
+            last_event: parking_lot::Mutex::new(None),
+        }
+    }
+
+    /// Called from the `health_callback` set on the underlying
+    /// `fula_client::Config`. Captures the event for both polling
+    /// drain and latest-state read.
+    pub(crate) fn dispatch(&self, event: fula_client::MasterHealthEvent) {
+        let app_event: MasterHealthEvent = event.into();
+        *self.last_event.lock() = Some(app_event.clone());
+        let mut buf = self.buffer.lock();
+        if buf.len() >= MAX_BUFFERED_EVENTS {
+            // Drop oldest to make room for newest. Apps care about
+            // the latest state more than ancient history.
+            buf.pop_front();
+        }
+        buf.push_back(app_event);
+    }
+
+    pub(crate) fn drain_events(&self) -> Vec<MasterHealthEvent> {
+        let mut buf = self.buffer.lock();
+        buf.drain(..).collect()
+    }
+
+    pub(crate) fn last_event(&self) -> Option<MasterHealthEvent> {
+        self.last_event.lock().clone()
+    }
 }
 
 /// Handle to an accepted share
@@ -604,6 +820,62 @@ impl From<fula_client::GetObjectResult> for GetObjectResult {
     }
 }
 
+// Phase 19 transparency conversions. These bridge the Rust-core
+// `fula_client::*` types to FRB-friendly Dart-side equivalents.
+// They're plain unit/struct/string-payload variants so FRB v2's
+// codegen produces a sealed Dart class without any custom adapter.
+
+impl From<fula_client::ReadSource> for FulaReadSource {
+    fn from(s: fula_client::ReadSource) -> Self {
+        match s {
+            fula_client::ReadSource::Master => FulaReadSource::Master,
+            fula_client::ReadSource::LocalCache => FulaReadSource::LocalCache,
+            fula_client::ReadSource::Gateway(url) => FulaReadSource::Gateway(url),
+        }
+    }
+}
+
+impl From<fula_client::ReadFreshness> for FulaReadFreshness {
+    fn from(f: fula_client::ReadFreshness) -> Self {
+        match f {
+            fula_client::ReadFreshness::Live => FulaReadFreshness::Live,
+            fula_client::ReadFreshness::Cached { observed_at } => {
+                FulaReadFreshness::Cached { observed_at }
+            }
+            fula_client::ReadFreshness::StaleByDesign { snapshot_age_secs } => {
+                FulaReadFreshness::StaleByDesign { snapshot_age_secs }
+            }
+            fula_client::ReadFreshness::StaleByOutage { snapshot_age_secs } => {
+                FulaReadFreshness::StaleByOutage { snapshot_age_secs }
+            }
+        }
+    }
+}
+
+impl From<fula_client::OfflineGetResult> for OfflineGetResult {
+    fn from(r: fula_client::OfflineGetResult) -> Self {
+        Self {
+            inner: r.inner.into(),
+            source: r.source.into(),
+            freshness: r.freshness.into(),
+        }
+    }
+}
+
+impl From<fula_client::MasterHealthEvent> for MasterHealthEvent {
+    fn from(e: fula_client::MasterHealthEvent) -> Self {
+        match e {
+            fula_client::MasterHealthEvent::Online => MasterHealthEvent::Online,
+            fula_client::MasterHealthEvent::OfflineFallbackActive { reason } => {
+                MasterHealthEvent::OfflineFallbackActive { reason }
+            }
+            fula_client::MasterHealthEvent::SeverelyDegraded { reason } => {
+                MasterHealthEvent::SeverelyDegraded { reason }
+            }
+        }
+    }
+}
+
 impl From<fula_client::DecryptedObjectInfo> for DecryptedObjectInfo {
     fn from(r: fula_client::DecryptedObjectInfo) -> Self {
         Self {
diff --git a/crates/fula-js/src/lib.rs b/crates/fula-js/src/lib.rs
index ae3def2..ed3b3fb 100644
--- a/crates/fula-js/src/lib.rs
+++ b/crates/fula-js/src/lib.rs
@@ -122,6 +122,53 @@ pub struct JsFulaConfig {
     /// **Native-only at runtime.**
     #[serde(default = "default_gateway_race_concurrency")]
     pub gateway_race_concurrency: u32,
+
+    // ============================================================
+    // Phase 3.3 — cold-start hybrid resolver (native-only at runtime)
+    // ============================================================
+    //
+    // The cold-start resolver itself is gated to native targets in
+    // `fula-client` (the JSON-RPC eth_call + IPNS gateway race rely
+    // on `reqwest` + `parking_lot` paths that aren't compiled on
+    // wasm32). These fields are accepted on wasm for **API symmetry**
+    // — a TS app sharing a config object across mobile + web can
+    // pass them through unconditionally; the wasm build silently
+    // disables cold-start. Apps that need offline reads on the web
+    // still get Phase 2.1 (health gate + typed `MASTER_UNREACHABLE`
+    // error); cold-start cross-device support is mobile-only today.
+
+    /// JSON-RPC URL for the chain anchor (Base or SKALE). Empty =
+    /// disabled. **Native-only at runtime.**
+    #[serde(default)]
+    pub users_index_chain_rpc_url: String,
+
+    /// `FulaUsersIndexAnchor.sol` proxy address (20 bytes hex,
+    /// optionally `0x`-prefixed). Empty = disabled. **Native-only
+    /// at runtime.**
+    #[serde(default)]
+    pub users_index_anchor_address: String,
+
+    /// IPNS NAME (libp2p public-key hash, e.g. `k51qzi5...`).
+    /// Empty = disabled. **Native-only at runtime.**
+    #[serde(default)]
+    pub users_index_ipns_name: String,
+
+    /// 32-hex-char `userKey` derived from the user's email via
+    /// [`derive_user_key_from_email`]. Empty = disabled.
+    /// **Native-only at runtime.**
+    #[serde(default)]
+    pub users_index_user_key: String,
+
+    /// IPNS-aware gateway URL templates (each must contain `{name}`).
+    /// Empty Vec = use SDK-shipped defaults. **Native-only at runtime.**
+    #[serde(default)]
+    pub users_index_ipns_gateway_urls: Vec<String>,
+
+    /// `/ipfs/{cid}` gateway URL templates (each must contain `{cid}`).
+    /// Empty Vec = use SDK-shipped 6-gateway default. **Native-only
+    /// at runtime.**
+    #[serde(default)]
+    pub users_index_ipfs_gateway_urls: Vec<String>,
 }
 
 fn default_timeout() -> u64 { 30 }
@@ -208,14 +255,191 @@ pub struct JsSharePermissions {
     pub expires_at: Option<i64>,
 }
 
+// ============================================================================
+// Phase 19 — transparency types
+// ============================================================================
+//
+// All three are `serde`-tagged enums / structs so JS sees an idiomatic
+// shape:
+//   ReadSource:   { kind: "Master" }
+//                 { kind: "LocalCache" }
+//                 { kind: "Gateway", url: "https://ipfs.io/ipfs/{cid}" }
+//   ReadFreshness: { kind: "Live" }
+//                  { kind: "Cached", observedAt: 1234567890 }
+//                  { kind: "StaleByDesign", snapshotAgeSecs: 60 }
+//                  { kind: "StaleByOutage", snapshotAgeSecs: 7200 }
+//   MasterHealthEvent: same `kind` discriminant
+// Apps `switch` on `result.source.kind` to drive UI.
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase", tag = "kind")]
+pub enum JsReadSource {
+    Master,
+    LocalCache,
+    Gateway { url: String },
+}
+
+impl From<fula_client::ReadSource> for JsReadSource {
+    fn from(s: fula_client::ReadSource) -> Self {
+        match s {
+            fula_client::ReadSource::Master => JsReadSource::Master,
+            fula_client::ReadSource::LocalCache => JsReadSource::LocalCache,
+            fula_client::ReadSource::Gateway(url) => JsReadSource::Gateway { url },
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase", tag = "kind")]
+pub enum JsReadFreshness {
+    Live,
+    #[serde(rename_all = "camelCase")]
+    Cached { observed_at: u64 },
+    #[serde(rename_all = "camelCase")]
+    StaleByDesign { snapshot_age_secs: u64 },
+    #[serde(rename_all = "camelCase")]
+    StaleByOutage { snapshot_age_secs: u64 },
+}
+
+impl From<fula_client::ReadFreshness> for JsReadFreshness {
+    fn from(f: fula_client::ReadFreshness) -> Self {
+        match f {
+            fula_client::ReadFreshness::Live => JsReadFreshness::Live,
+            fula_client::ReadFreshness::Cached { observed_at } => {
+                JsReadFreshness::Cached { observed_at }
+            }
+            fula_client::ReadFreshness::StaleByDesign { snapshot_age_secs } => {
+                JsReadFreshness::StaleByDesign { snapshot_age_secs }
+            }
+            fula_client::ReadFreshness::StaleByOutage { snapshot_age_secs } => {
+                JsReadFreshness::StaleByOutage { snapshot_age_secs }
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct JsOfflineGetResult {
+    /// Object data (bytes).
+    pub data: Vec<u8>,
+    /// ETag (CID string when bytes came from gateway race / cache;
+    /// master-issued ETag when bytes came from master).
+    pub etag: String,
+    /// Content type if known (always `None` on offline-fallback paths
+    /// today; master-served reads carry the response Content-Type).
+    pub content_type: Option<String>,
+    /// Object size in bytes.
+    pub size: u64,
+    /// Last-modified timestamp (Unix epoch seconds) if master served
+    /// the bytes; 0 on offline-fallback paths.
+    pub last_modified: i64,
+    /// Where the bytes ultimately came from.
+    pub source: JsReadSource,
+    /// How fresh the bytes are.
+    pub freshness: JsReadFreshness,
+}
+
+impl From<fula_client::OfflineGetResult> for JsOfflineGetResult {
+    fn from(r: fula_client::OfflineGetResult) -> Self {
+        let inner = r.inner;
+        Self {
+            data: inner.data.to_vec(),
+            etag: inner.etag,
+            content_type: inner.content_type,
+            size: inner.content_length,
+            last_modified: inner.last_modified.map(|d| d.timestamp()).unwrap_or(0),
+            source: r.source.into(),
+            freshness: r.freshness.into(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "camelCase", tag = "kind")]
+pub enum JsMasterHealthEvent {
+    Online,
+    OfflineFallbackActive { reason: String },
+    SeverelyDegraded { reason: String },
+}
+
+impl From<fula_client::MasterHealthEvent> for JsMasterHealthEvent {
+    fn from(e: fula_client::MasterHealthEvent) -> Self {
+        match e {
+            fula_client::MasterHealthEvent::Online => JsMasterHealthEvent::Online,
+            fula_client::MasterHealthEvent::OfflineFallbackActive { reason } => {
+                JsMasterHealthEvent::OfflineFallbackActive { reason }
+            }
+            fula_client::MasterHealthEvent::SeverelyDegraded { reason } => {
+                JsMasterHealthEvent::SeverelyDegraded { reason }
+            }
+        }
+    }
+}
+
 // ============================================================================
 // Client Handles (opaque types exposed to JS)
 // ============================================================================
 
+/// Phase 19 — wasm dispatcher capturing `MasterHealthEvent`
+/// transitions for polling consumers. The Rust callback set on the
+/// inner `Config::health_callback` pushes events here; JS apps drain
+/// via `pollMasterHealthEvents` or read latest via
+/// `getLastMasterHealthEvent`.
+///
+/// Buffer is bounded at 64 entries — apps that fall further behind
+/// drop the oldest events (latest state is what UI cares about).
+struct WasmHealthEventDispatcher {
+    buffer: std::sync::Mutex<std::collections::VecDeque<JsMasterHealthEvent>>,
+    last_event: std::sync::Mutex<Option<JsMasterHealthEvent>>,
+}
+
+const WASM_MAX_BUFFERED_EVENTS: usize = 64;
+
+impl WasmHealthEventDispatcher {
+    fn new() -> Self {
+        Self {
+            buffer: std::sync::Mutex::new(std::collections::VecDeque::new()),
+            last_event: std::sync::Mutex::new(None),
+        }
+    }
+
+    /// Called from the `health_callback` set on the inner Config.
+    /// Captures the event for both polling drain + latest-state read.
+    fn dispatch(&self, event: fula_client::MasterHealthEvent) {
+        let app_event: JsMasterHealthEvent = event.into();
+        if let Ok(mut last) = self.last_event.lock() {
+            *last = Some(app_event.clone());
+        }
+        if let Ok(mut buf) = self.buffer.lock() {
+            if buf.len() >= WASM_MAX_BUFFERED_EVENTS {
+                buf.pop_front();
+            }
+            buf.push_back(app_event);
+        }
+    }
+
+    fn drain_events(&self) -> Vec<JsMasterHealthEvent> {
+        self.buffer
+            .lock()
+            .map(|mut buf| buf.drain(..).collect())
+            .unwrap_or_default()
+    }
+
+    fn last_event(&self) -> Option<JsMasterHealthEvent> {
+        self.last_event.lock().ok().and_then(|guard| guard.clone())
+    }
+}
+
 /// Handle to an encrypted Fula client
 #[wasm_bindgen]
 pub struct EncryptedClient {
     inner: Arc<Mutex<fula_client::EncryptedClient>>,
+    /// Phase 19 — per-client health-event dispatcher. Always present
+    /// so apps can poll regardless of whether they wired
+    /// `healthGateEnabled = true`. When the gate is off, the buffer
+    /// stays empty (no events fire); polling returns `[]`.
+    health_dispatcher: Arc<WasmHealthEventDispatcher>,
 }
 
 /// Handle to an accepted share for accessing shared files
@@ -243,8 +467,11 @@ pub async fn create_encrypted_client(
     let encryption: JsEncryptionConfig = serde_wasm_bindgen::from_value(encryption)
         .map_err(|e| JsError::new(&format!("Invalid encryption config: {}", e)))?;
 
-    // Build client config
-    let client_config = build_inner_config(config);
+    // Phase 19 dispatcher — created per client so events from one
+    // EncryptedClient never leak to another's poll buffer.
+    let dispatcher = Arc::new(WasmHealthEventDispatcher::new());
+    // Build client config (callback wired to dispatcher).
+    let client_config = build_inner_config(config, &dispatcher);
 
     // Build encryption config
     let enc_config = if let Some(secret_key) = encryption.secret_key {
@@ -274,6 +501,7 @@ pub async fn create_encrypted_client(
 
     Ok(EncryptedClient {
         inner: Arc::new(Mutex::new(client)),
+        health_dispatcher: dispatcher,
     })
 }
 
@@ -282,16 +510,23 @@ pub async fn create_encrypted_client(
 // ============================================================================
 
 /// Translate a Dart-flavoured `JsFulaConfig` into the underlying
-/// `fula_client::Config`, plumbing every Phase 1.2 / 2.x field
-/// through. Used by every JS client constructor — adding a new field
-/// means changing this function only.
+/// `fula_client::Config`, plumbing every Phase 1.2 / 2.x / 3.3 / 19
+/// field through. Used by every JS client constructor — adding a new
+/// field means changing this function only.
 ///
-/// Note on wasm32: the block_cache + gateway_fallback fields are
-/// silently ignored at runtime (the underlying SDK gates out the
-/// redb-backed cache and parking_lot-based pool). They're still
-/// plumbed through so that a single shared config struct works
-/// across native + web targets.
-fn build_inner_config(config: JsFulaConfig) -> fula_client::Config {
+/// `dispatcher` is the per-client Phase 19 dispatcher; the callback
+/// wired into `Config::health_callback` forwards each transition to
+/// it so JS apps can poll via `pollMasterHealthEvents`.
+///
+/// Note on wasm32: the block_cache + gateway_fallback + cold-start
+/// resolver fields are silently inert at runtime (the underlying SDK
+/// gates out the redb-backed cache, parking_lot-based pool, and
+/// reqwest-based resolver). They're still plumbed through so a single
+/// shared config struct works across native + web targets.
+fn build_inner_config(
+    config: JsFulaConfig,
+    dispatcher: &Arc<WasmHealthEventDispatcher>,
+) -> fula_client::Config {
     let mut inner = fula_client::Config::new(&config.endpoint)
         .with_timeout(std::time::Duration::from_secs(config.timeout_seconds));
 
@@ -304,6 +539,17 @@ fn build_inner_config(config: JsFulaConfig) -> fula_client::Config {
     inner.health_gate_ttl =
         std::time::Duration::from_secs(config.health_gate_ttl_seconds);
 
+    // Phase 19 — wire forwarding callback into the gate. The callback
+    // is `Arc<dyn Fn>` which lives entirely in Rust; it never crosses
+    // the wasm-bindgen boundary (the wasm boundary is between Rust
+    // and JS — the Arc<dyn Fn> stays inside Rust). HealthGate fires
+    // it from `record_success` / `record_failure` regardless of target.
+    let dispatcher_for_cb = Arc::clone(dispatcher);
+    let cb: fula_client::HealthCallback = Arc::new(move |ev| {
+        dispatcher_for_cb.dispatch(ev);
+    });
+    inner.health_callback = Some(cb);
+
     // Phase 2.2 — block cache (native-only at runtime; plumbed for symmetry).
     inner.block_cache_enabled = config.block_cache_enabled;
     inner.block_cache_path = if config.block_cache_path.is_empty() {
@@ -318,6 +564,21 @@ fn build_inner_config(config: JsFulaConfig) -> fula_client::Config {
     inner.gateway_fallback_urls = config.gateway_fallback_urls;
     inner.gateway_race_concurrency = config.gateway_race_concurrency as usize;
 
+    // Phase 3.3 — cold-start hybrid resolver (native-only at runtime;
+    // plumbed for symmetry). Empty strings → resolver disabled (the
+    // four required fields are all string-empty in JsFulaConfig's
+    // Default impl-equivalent via `#[serde(default)]`).
+    inner.users_index_chain_rpc_url = config.users_index_chain_rpc_url;
+    inner.users_index_anchor_address = config.users_index_anchor_address;
+    inner.users_index_ipns_name = config.users_index_ipns_name;
+    inner.users_index_user_key = if config.users_index_user_key.is_empty() {
+        None
+    } else {
+        Some(config.users_index_user_key)
+    };
+    inner.users_index_ipns_gateway_urls = config.users_index_ipns_gateway_urls;
+    inner.users_index_ipfs_gateway_urls = config.users_index_ipfs_gateway_urls;
+
     inner
 }
 
@@ -793,6 +1054,98 @@ pub async fn is_flat_namespace(client: &EncryptedClient) -> bool {
     guard.is_flat_namespace()
 }
 
+// ============================================================================
+// Phase 3.3 — userKey derivation
+// ============================================================================
+
+/// Compute the canonical fula `userKey` for cold-start config from a
+/// plaintext email. Mirrors `fula_client::derive_user_key_from_email`
+/// — same domain separator + double-hash chain (sha256(lower(email))
+/// → BLAKE3("fula:user_id:" || _).bytes[..16] → hex).
+///
+/// Apps call this once at sign-in (the OAuth flow has plaintext
+/// email), then set `users_index_user_key` on the config object
+/// passed to `createEncryptedClient`. The SDK never persists or
+/// transmits the raw email.
+///
+/// On wasm32 the cold-start RESOLVER itself isn't wired (it depends
+/// on reqwest + parking_lot which aren't compiled for browsers), so
+/// this helper is exposed for API symmetry — apps can compute the
+/// userKey on web for sharing across native + web identity flows.
+#[wasm_bindgen(js_name = deriveUserKeyFromEmail)]
+pub fn derive_user_key_from_email(email: String) -> String {
+    fula_client::derive_user_key_from_email(&email)
+}
+
+// ============================================================================
+// Phase 19 — get_object_with_offline_fallback + transparency polling
+// ============================================================================
+
+/// Phase 19 GET wrapper that returns transparency fields alongside
+/// the bytes. Mirrors `fula-flutter`'s `getObjectWithOfflineFallback`.
+/// On wasm32 the offline fallback infrastructure is gated out (no
+/// block cache, no gateway race), so this delegates to the
+/// master-only `get_object_with_metadata` path; the returned shape
+/// always carries `source = Master, freshness = Live`. Exposed for
+/// API symmetry with the Flutter binding.
+///
+/// @param client - EncryptedClient (the underlying wraps a FulaClient too)
+/// @param bucket - Bucket name
+/// @param key    - Object key
+/// @returns      - JSON object matching `JsOfflineGetResult`
+///                 (`data: number[]`, `etag: string`, `source: {kind: ...}`,
+///                  `freshness: {kind: ...}`, ...)
+#[wasm_bindgen(js_name = getObjectWithOfflineFallback)]
+pub async fn get_object_with_offline_fallback(
+    client: &EncryptedClient,
+    bucket: String,
+    key: String,
+) -> Result<JsValue, JsError> {
+    let guard = client.inner.lock().await;
+    // The `EncryptedClient` doesn't expose `get_object_with_offline_fallback`
+    // directly; it's on the underlying `FulaClient`. Reach in via
+    // `inner()`.
+    let result = guard
+        .inner()
+        .get_object_with_offline_fallback(&bucket, &key)
+        .await
+        .map_err(|e| client_error_to_js_error("get_offline_fallback_failed", e))?;
+    let js_result: JsOfflineGetResult = result.into();
+    serde_wasm_bindgen::to_value(&js_result)
+        .map_err(|e| JsError::new(&format!("serialize OfflineGetResult: {}", e)))
+}
+
+/// Drain every `MasterHealthEvent` observed since the last call to
+/// this function. Returns events in the order they fired (oldest
+/// first); after draining the buffer is empty.
+///
+/// JS apps poll this on a timer (or on UI rebuilds) and update an
+/// online/offline indicator. Internal buffer bounded at 64 entries —
+/// if an app falls behind, oldest events drop first, latest state is
+/// preserved. For latest-only consumers, see `getLastMasterHealthEvent`.
+///
+/// Returned shape: `Array<{kind: 'Online'} | {kind: 'OfflineFallbackActive', reason: string} | {kind: 'SeverelyDegraded', reason: string}>`.
+#[wasm_bindgen(js_name = pollMasterHealthEvents)]
+pub fn poll_master_health_events(client: &EncryptedClient) -> Result<JsValue, JsError> {
+    let events = client.health_dispatcher.drain_events();
+    serde_wasm_bindgen::to_value(&events)
+        .map_err(|e| JsError::new(&format!("serialize health events: {}", e)))
+}
+
+/// Read the most recent `MasterHealthEvent` observed by the SDK
+/// without draining the buffer. Returns `null` if no transition has
+/// happened yet (master has been Up the whole session). Useful for
+/// apps that build UI state from a single field on mount.
+///
+/// Returned shape: same as a single element from `pollMasterHealthEvents`,
+/// or `null`.
+#[wasm_bindgen(js_name = getLastMasterHealthEvent)]
+pub fn get_last_master_health_event(client: &EncryptedClient) -> Result<JsValue, JsError> {
+    let last = client.health_dispatcher.last_event();
+    serde_wasm_bindgen::to_value(&last)
+        .map_err(|e| JsError::new(&format!("serialize last health event: {}", e)))
+}
+
 /// Get SDK version
 #[wasm_bindgen(js_name = getVersion)]
 pub fn get_version() -> String {

From 6bfca4e0d5417f59dfe7d2d168a2e7a891803d60 Mon Sep 17 00:00:00 2001
From: ehsan shariati <ehsan6sha@gmail.com>
Date: Mon, 4 May 2026 13:43:29 -0400
Subject: [PATCH 5/6] updated version + doc fix + CI tests fix

---
 Cargo.lock                                    |  16 +-
 Cargo.toml                                    |   2 +-
 crates/fula-client/src/health_gate.rs         |  21 +-
 crates/fula-crypto/src/time.rs                |  43 +++++
 .../tests/flutter_bridge_tests.rs             |   7 +
 docs/flutter-integration.md                   | 172 ++++++++++++++++-
 docs/wasm-compatibility.md                    |  34 ++++
 docs/website/api.html                         | 136 ++++++++++++-
 docs/website/benchmark.html                   |   2 +-
 docs/website/index.html                       |   2 +-
 docs/website/platforms.html                   |   2 +-
 docs/website/sdk.html                         | 180 +++++++++++++++++-
 docs/website/security.html                    |   4 +-
 docs/website/x402.html                        |   2 +-
 packages/fula_client/CHANGELOG.md             |  45 +++++
 packages/fula_client/ios/fula_client.podspec  |   2 +-
 packages/fula_client/pubspec.yaml             |   2 +-
 17 files changed, 640 insertions(+), 32 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a3e6cfd..d4d9e49 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1676,7 +1676,7 @@ dependencies = [
 
 [[package]]
 name = "fula-api"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "anyhow",
  "axum",
@@ -1704,7 +1704,7 @@ dependencies = [
 
 [[package]]
 name = "fula-blockstore"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1742,7 +1742,7 @@ dependencies = [
 
 [[package]]
 name = "fula-cli"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1792,7 +1792,7 @@ dependencies = [
 
 [[package]]
 name = "fula-client"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1831,7 +1831,7 @@ dependencies = [
 
 [[package]]
 name = "fula-core"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1866,7 +1866,7 @@ dependencies = [
 
 [[package]]
 name = "fula-crypto"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "aes-gcm",
  "anyhow",
@@ -1910,7 +1910,7 @@ dependencies = [
 
 [[package]]
 name = "fula-flutter"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "anyhow",
  "async-lock",
@@ -1933,7 +1933,7 @@ dependencies = [
 
 [[package]]
 name = "fula-js"
-version = "0.3.7"
+version = "0.4.0"
 dependencies = [
  "base64 0.22.1",
  "bytes",
diff --git a/Cargo.toml b/Cargo.toml
index e835bbc..bfad95e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,7 +77,7 @@ name = "encrypted_upload_test"
 path = "examples/encrypted_upload_test.rs"
 
 [workspace.package]
-version = "0.3.7"
+version = "0.4.0"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/functionland/fula-api"
diff --git a/crates/fula-client/src/health_gate.rs b/crates/fula-client/src/health_gate.rs
index 0942743..7e9081b 100644
--- a/crates/fula-client/src/health_gate.rs
+++ b/crates/fula-client/src/health_gate.rs
@@ -30,7 +30,7 @@
 
 use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 use std::sync::Arc;
-use std::time::{Duration, SystemTime, UNIX_EPOCH};
+use std::time::Duration;
 
 /// Phase 19 transparency surface — events the SDK emits when its
 /// view of master-server reachability changes. Apps wire a
@@ -218,15 +218,18 @@ pub enum GateDecision {
     ShortCircuit { down_for_secs: u64 },
 }
 
-/// Current unix-time in milliseconds. Wall-clock based (so SystemTime
-/// adjustments can shift the gate's perceived "since" — acceptable here
-/// since we only compare durations, and a clock jump is at worst a slight
-/// TTL anomaly).
+/// Current unix-time in milliseconds. Wall-clock based (so a system-
+/// clock adjustment can shift the gate's perceived "since" —
+/// acceptable here since we only compare durations, and a clock jump
+/// is at worst a slight TTL anomaly).
+///
+/// Routed through `fula_crypto::time::now_millis` so the wasm32 build
+/// uses `js_sys::Date::now()` instead of `SystemTime::now()` (the
+/// latter panics on wasm32 with "time not implemented on this
+/// platform" — the wasm clippy `disallowed-methods` config catches
+/// this at lint time).
 fn now_ms() -> u64 {
-    SystemTime::now()
-        .duration_since(UNIX_EPOCH)
-        .map(|d| d.as_millis() as u64)
-        .unwrap_or(0)
+    fula_crypto::time::now_millis()
 }
 
 #[cfg(test)]
diff --git a/crates/fula-crypto/src/time.rs b/crates/fula-crypto/src/time.rs
index f44d032..a39af7f 100644
--- a/crates/fula-crypto/src/time.rs
+++ b/crates/fula-crypto/src/time.rs
@@ -21,6 +21,27 @@ pub fn now_timestamp() -> i64 {
         .as_secs() as i64
 }
 
+/// Get current Unix timestamp in milliseconds (WASM-compatible)
+///
+/// Returns the current time as milliseconds since the Unix epoch.
+/// Companion to `now_timestamp` for callers that need millisecond
+/// resolution (e.g., the master health gate's TTL bookkeeping where
+/// sub-second precision matters across rapid Up↔Down transitions).
+/// Works in both native Rust and WASM environments.
+#[cfg(target_arch = "wasm32")]
+pub fn now_millis() -> u64 {
+    js_sys::Date::now() as u64
+}
+
+/// Get current Unix timestamp in milliseconds (native)
+#[cfg(not(target_arch = "wasm32"))]
+pub fn now_millis() -> u64 {
+    std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .map(|d| d.as_millis() as u64)
+        .unwrap_or(0)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -33,4 +54,26 @@ mod tests {
         // Should be before Jan 1, 2100 (timestamp: 4102444800)
         assert!(ts < 4102444800, "Timestamp should be before 2100");
     }
+
+    #[test]
+    fn test_now_millis_reasonable() {
+        let ms = now_millis();
+        // Should be after Jan 1, 2020 in ms (1577836800000)
+        assert!(ms > 1_577_836_800_000, "ms timestamp should be after 2020");
+        // Should be before Jan 1, 2100 in ms
+        assert!(ms < 4_102_444_800_000, "ms timestamp should be before 2100");
+    }
+
+    #[test]
+    fn test_now_millis_matches_seconds_within_tolerance() {
+        // Sanity: the millis helper agrees with the seconds helper to
+        // the second. Catches an accidental scaling bug.
+        let ms = now_millis();
+        let s = now_timestamp() as u64;
+        let derived_s = ms / 1000;
+        assert!(
+            derived_s.abs_diff(s) <= 1,
+            "now_millis()/1000 ({derived_s}) and now_timestamp() ({s}) must agree to within 1s",
+        );
+    }
 }
diff --git a/crates/fula-flutter/tests/flutter_bridge_tests.rs b/crates/fula-flutter/tests/flutter_bridge_tests.rs
index eaca096..6ef503b 100644
--- a/crates/fula-flutter/tests/flutter_bridge_tests.rs
+++ b/crates/fula-flutter/tests/flutter_bridge_tests.rs
@@ -25,6 +25,12 @@ fn test_fula_config_default() {
 
 #[test]
 fn test_fula_config_with_values() {
+    // Construct via `..Default::default()` so adding new fields to
+    // `FulaConfig` (e.g., Phase 2.x / 3.3 / 19) doesn't require
+    // updating this test. The pre-Phase-2.x fields below are the
+    // ones this test specifically exercises; everything else inherits
+    // from `Default::default()` which is the documented backward-
+    // compat shape (all new flags off / empty).
     let config = FulaConfig {
         endpoint: "https://api.example.com".to_string(),
         access_token: Some("test-token".to_string()),
@@ -32,6 +38,7 @@ fn test_fula_config_with_values() {
         max_retries: 5,
         per_chunk_download_timeout_seconds: 120,
         buffered_download_max_bytes: 64 * 1024 * 1024,
+        ..Default::default()
     };
     assert_eq!(config.endpoint, "https://api.example.com");
     assert_eq!(config.access_token, Some("test-token".to_string()));
diff --git a/docs/flutter-integration.md b/docs/flutter-integration.md
index fd6942e..9d664a1 100644
--- a/docs/flutter-integration.md
+++ b/docs/flutter-integration.md
@@ -104,15 +104,46 @@ application/wasm
 ### Configuration Types
 
 #### FulaConfig
+
 ```dart
 class FulaConfig {
-  final String endpoint;        // Gateway URL (e.g., "http://localhost:9000")
-  final String? accessToken;    // JWT authentication token
-  final int timeoutSeconds;     // Request timeout (default: 30)
-  final int maxRetries;         // Retry attempts (default: 3)
+  // Connection
+  final String endpoint;                     // Gateway URL (e.g., "http://localhost:9000")
+  final String? accessToken;                 // JWT authentication token
+  final int timeoutSeconds;                  // Request timeout (default: 30)
+  final int maxRetries;                      // Retry attempts (default: 3)
+  final int perChunkDownloadTimeoutSeconds;  // F10: per-chunk timeout (default: 300)
+  final int bufferedDownloadMaxBytes;        // F8: buffered download cap (default: 256 MiB)
+
+  // Phase 2.1 — master-down detection (functional on every target)
+  final bool healthGateEnabled;              // default: false
+  final int healthGateTtlSeconds;            // default: 30
+
+  // Phase 2.2 — persistent block cache (native-only at runtime; flags
+  // accepted on web for config symmetry, silently inert in browsers)
+  final bool blockCacheEnabled;              // default: false
+  final String blockCachePath;               // default: "" → platform default
+  final int blockCacheMaxBytes;              // default: 256 MiB
+
+  // Phase 2.3 / 2.4 — IPFS gateway race + warm-device offline GET
+  final bool gatewayFallbackEnabled;         // default: false (requires blockCacheEnabled)
+  final List<String> gatewayFallbackUrls;    // default: [] → ships 6 public gateways
+  final int gatewayRaceConcurrency;          // default: 3
+
+  // Phase 3.3 — cold-start hybrid resolver (native-only at runtime).
+  // The resolver activates iff ALL four required fields are populated;
+  // empty values disable cold-start (the warm-device path still works).
+  final String usersIndexChainRpcUrl;        // operator-supplied (Base/SKALE)
+  final String usersIndexAnchorAddress;      // operator-supplied
+  final String usersIndexIpnsName;           // operator-supplied (k51qzi5...)
+  final String usersIndexUserKey;            // app-derived via deriveUserKeyFromEmail
+  final List<String> usersIndexIpnsGatewayUrls; // default: [] → SDK defaults
+  final List<String> usersIndexIpfsGatewayUrls; // default: [] → SDK defaults
 }
 ```
 
+All flags default OFF — apps that don't opt in see byte-identical behavior to pre-Phase-2.x builds.
+
 #### EncryptionConfig
 ```dart
 class EncryptionConfig {
@@ -372,6 +403,111 @@ final restoredClient = await createEncryptedClient(
 );
 ```
 
+## Offline Reads (Phase 2 + 3)
+
+When the master gateway is unreachable, the SDK can transparently fall back to public IPFS gateways AND, on a fresh device install, cold-start by resolving a globally-published users-index from IPNS or the chain anchor — no client wallet, no fresh master required.
+
+### Two-tier offline read
+
+| Scenario | Path |
+|---|---|
+| **Warm device** (signed in before, has block cache) | Phase 2.x — gateway race using cached `(bucket, key) → cid` |
+| **Fresh install** (no cache) | Phase 3.3 — cold-start resolver fetches global users-index via IPNS or chain, then walks per-user manifest |
+| **Master up** | Direct master read (fast path, byte-identical to today) |
+
+### Step 1 — Enable warm-device offline reads
+
+```dart
+final config = FulaConfig(
+  endpoint: 'https://your-fula-gateway.com:9000',
+  accessToken: jwt,
+  // Phase 2.1 — detect master-down without per-read timeout tax
+  healthGateEnabled: true,
+  // Phase 2.2 — persistent block cache (gateway hits land here)
+  blockCacheEnabled: true,
+  // Phase 2.4 — fall back to public gateways when master is down
+  gatewayFallbackEnabled: true,
+);
+```
+
+### Step 2 — (Optional) Enable cold-start for fresh installs
+
+In addition to the Phase 2.x flags, pass the four operator-supplied resolver fields and the per-user `userKey` derived from the user's email:
+
+```dart
+import 'package:fula_client/fula_client.dart';
+
+// Compute userKey once at sign-in. Email is hashed locally;
+// the SDK never sees the plaintext on the wire.
+final userKey = deriveUserKeyFromEmail(userEmail);
+
+final config = FulaConfig(
+  endpoint: 'https://your-fula-gateway.com:9000',
+  accessToken: jwt,
+  healthGateEnabled: true,
+  blockCacheEnabled: true,
+  gatewayFallbackEnabled: true,
+  // Phase 3.3 — cold-start hybrid resolver
+  usersIndexChainRpcUrl: 'https://mainnet.base.org',  // or SKALE
+  usersIndexAnchorAddress: '0x...FulaUsersIndexAnchor...',
+  usersIndexIpnsName: 'k51qzi5uqu5dh...',  // operator's published IPNS NAME
+  usersIndexUserKey: userKey,
+);
+```
+
+### Step 3 — Read with transparency fields
+
+```dart
+final result = await getObjectWithOfflineFallback(client, 'my-bucket', 'photos/cat.jpg');
+final bytes = result.inner.data;
+
+// Surface "you're offline" UI
+switch (result.source) {
+  case FulaReadSource.master:
+    // fast path — master served the bytes directly
+    break;
+  case FulaReadSource.localCache:
+    // BLOCKS hit — no network round-trip at all
+    showToast('Reading from cache (offline)');
+    break;
+  case FulaReadSource.gateway:
+    // gateway race served the bytes; result.source.url has the gateway URL
+    showToast('Reading via public IPFS (master is down)');
+    break;
+}
+```
+
+### Step 4 — Subscribe to master health transitions
+
+Two patterns are exposed; pick whichever fits your app:
+
+```dart
+// Pattern A: drain events on a timer / on UI rebuild
+final events = pollMasterHealthEvents(client);
+for (final event in events) {
+  switch (event) {
+    case MasterHealthEvent.online:
+      setState(() => isOffline = false);
+      break;
+    case MasterHealthEvent.offlineFallbackActive:
+      setState(() => isOffline = true);
+      break;
+    case MasterHealthEvent.severelyDegraded:
+      // both master AND chain unreachable — disable "create new bucket" UI
+      setState(() => canStartFresh = false);
+      break;
+  }
+}
+
+// Pattern B: read latest event on mount (no buffer drain)
+final last = getLastMasterHealthEvent(client);
+if (last is MasterHealthEvent.offlineFallbackActive) {
+  // app started while master is down
+}
+```
+
+The `EncryptedClient` has corresponding `pollMasterHealthEventsEncrypted` and `getLastMasterHealthEventEncrypted` variants.
+
 ## Error Handling
 
 All operations can throw `FulaError` with specific error types:
@@ -388,11 +524,39 @@ try {
       print('Access denied: ${e.message}');
       break;
     case FulaError.network:
+      // includes Phase 2.1 MasterUnreachable
       print('Network error: ${e.message}');
       break;
     case FulaError.encryption:
       print('Encryption error: ${e.message}');
       break;
+    // Phase 2.x cache errors
+    case FulaError.cacheBudgetExceeded:
+      // Phase 2.2: block too large for the cache budget; not fatal —
+      // the read still succeeded, just not cached.
+      print('Cache budget exceeded for ${e.size} bytes (budget: ${e.budget})');
+      break;
+    case FulaError.cacheError:
+      // Phase 2.2: redb open / read / write failure; offline path
+      // disabled for this session.
+      print('Block cache unavailable: ${e.message}');
+      break;
+    // Phase 3.3 cold-start errors
+    case FulaError.usersIndexResolutionFailed:
+      // Both IPNS and chain channels failed — cold-start unavailable.
+      // Surface to user as "can't reach storage; please try again later".
+      print('Cold-start resolver exhausted: ${e.reason}');
+      break;
+    case FulaError.sequenceRegression:
+      // Replay-defense rejection — the resolver observed a sequence
+      // older than what it has previously seen. Either a stale gateway
+      // response or (rarely) a tampered payload. SDK retries the
+      // alternate channel automatically; this surface is for logging.
+      print(
+        'Sequence regression on ${e.channel}: '
+        'observed=${e.observed}, highestSeen=${e.highestSeen}',
+      );
+      break;
     default:
       print('Error: $e');
   }
diff --git a/docs/wasm-compatibility.md b/docs/wasm-compatibility.md
index 72bc2b0..1bbc65b 100644
--- a/docs/wasm-compatibility.md
+++ b/docs/wasm-compatibility.md
@@ -248,6 +248,40 @@ The API surface is identical between native and WASM builds. However:
 2. **Multithreading:** WASM is single-threaded. Async operations use the event loop.
 3. **Timing:** High-resolution timers may be limited in WASM for security reasons.
 
+## Master-Independent Reads (v0.4.0) — what works on wasm
+
+v0.4.0 adds the offline-read story (Phase 2.1 / 2.2 / 2.3 / 2.4 / 3.3 / 19). The full surface is exposed in **both** `fula-flutter` and `fula-js` bindings for API symmetry, but several layers are **inert at runtime on wasm32** because their dependencies (redb, `parking_lot`-based gateway pool, reqwest-with-tls, `std::time::SystemTime`) don't compile cleanly in browsers.
+
+### Functional on wasm32
+
+| Surface | Notes |
+|---|---|
+| **Health gate (Phase 2.1)** | `health_gate_enabled` + `health_gate_ttl` work. Internal `now_ms()` routes through `fula_crypto::time::now_millis()` which uses `js_sys::Date::now()` on wasm32 (the `clippy::disallowed_methods` config in `.github/clippy-wasm/clippy.toml` bans `std::time::SystemTime::now` to catch regressions at lint time). Two consecutive request failures still trip the gate; reads short-circuit with `MasterUnreachable` instead of paying the timeout tax. |
+| **Transparency polling (Phase 19)** | `pollMasterHealthEvents()` and `getLastMasterHealthEvent()` work. The dispatcher captures every transition fired by the in-Rust health gate. On wasm32 the gate fires for the same conditions as native (failed master requests), so apps see the same event stream. `MasterHealthEvent::SeverelyDegraded` is only emitted by the cold-start resolver, which is native-only — on wasm you'll see `Online` / `OfflineFallbackActive` only. |
+| **`derive_user_key_from_email`** | Pure `sha256` + `blake3` + `hex` — no native-only deps. Apps can compute the userKey on web for cross-platform identity flows (e.g., compute on a desktop Tauri app, replicate on a web companion using the same email + algorithm). |
+| **`get_object_with_offline_fallback`** | Compiles and runs on wasm32. With block_cache + gateway_fallback inert (see below), the wasm path always returns `source: Master, freshness: Live` — i.e., it's effectively `get_object_with_metadata` wrapped in the `OfflineGetResult` shape. Apps can consume the result identically across native + web. |
+
+### Inert on wasm32 (fields accepted, runtime no-op)
+
+| Surface | Why inert | Effect |
+|---|---|---|
+| **Block cache (Phase 2.2)** | `redb` is a native-only embedded KV (mmap + file locks). The whole `crates/fula-client/src/block_cache.rs` file is gated `#![cfg(not(target_arch = "wasm32"))]`. | Setting `block_cache_enabled = true` on wasm is silently ignored. The SDK never persists block bytes and never observes the `(bucket, key) → cid` map needed by the gateway-race fallback. |
+| **Gateway race (Phase 2.3 / 2.4)** | Depends on the block cache (for the `(bucket, key) → cid` lookup table) AND on `parking_lot`'s native-only mutex behavior in the gateway-state ring. Whole `gateway_fetch.rs` is gated. | Setting `gateway_fallback_enabled = true` on wasm is silently ignored. Master-down reads on web surface as `MasterUnreachable` errors instead of falling through to a public gateway. |
+| **Cold-start hybrid resolver (Phase 3.3)** | `registry_resolver.rs` is gated `#![cfg(not(target_arch = "wasm32"))]` because it depends on `reqwest` with native-tls, `parking_lot`, and `serde_ipld_dagcbor` paths that the wasm build chain doesn't currently support. | Setting `users_index_*` fields on wasm is silently ignored. Cold-start GETs (master-down + cache miss) surface `UsersIndexResolutionFailed`. |
+
+### Why expose inert flags at all
+
+The fields are accepted on every target so a TypeScript app sharing a config struct between mobile (where everything works) and web (where some flags are silently inert) can construct one config object without per-platform branches. On web, the offline path simply degrades to "no offline path" — typed errors come back instead of fallback paths firing. Apps that want web-side offline reads today should rely on browser caching (HTTP-level service workers) until the wasm-side gateway race lands in a future release.
+
+### Adding new wasm-incompatible API to fula-client
+
+When adding a new SDK surface that depends on `std::time::SystemTime::now`, `std::time::Instant::now`, file I/O, or any other native-only call:
+
+1. **Either** gate the function with `#[cfg(not(target_arch = "wasm32"))]` so it's excluded from wasm builds entirely.
+2. **Or** route the call through `fula_crypto::time::now_timestamp()` / `now_millis()` (or `web_time::Instant::now()` for monotonic timing).
+
+The CI's `test-wasm` job loads `.github/clippy-wasm/clippy.toml` via `CLIPPY_CONF_DIR` and runs `cargo clippy --target wasm32-unknown-unknown -D clippy::disallowed-methods`. This catches `SystemTime::now` / `Instant::now` regressions before merge. Native clippy ignores the config.
+
 ## Performance Considerations
 
 1. **Crypto operations:** libcrux-ml-kem is optimized but may be ~10-20% slower than native C in WASM.
diff --git a/docs/website/api.html b/docs/website/api.html
index c0d1dff..d436689 100644
--- a/docs/website/api.html
+++ b/docs/website/api.html
@@ -43,7 +43,7 @@
         <div class="sidebar-header">
             <div class="logo">
                 <h1>Fula API</h1>
-                <span class="version">v0.3.7</span>
+                <span class="version">v0.4.0</span>
             </div>
             <button class="theme-toggle" aria-label="Toggle theme">
                 <span class="icon-sun">☀️</span>
@@ -115,6 +115,16 @@ <h3>Client-Side Encryption</h3>
                 <li><a href="#secure-sharing-api">Secure Sharing</a></li>
             </ul>
         </div>
+        <div class="nav-section">
+            <h3>Master-Independent Reads (v0.4.0)</h3>
+            <ul>
+                <li><a href="#mir-overview">Overview</a></li>
+                <li><a href="#mir-bucket-lookup-h">Bucket Lookup-H Header</a></li>
+                <li><a href="#mir-internal-state">Users-Index State</a></li>
+                <li><a href="#mir-publish-now">Publish-Now Trigger</a></li>
+                <li><a href="#mir-anchor-trigger">Anchor Trigger</a></li>
+            </ul>
+        </div>
     </nav>
 
     <main class="content">
@@ -1214,6 +1224,130 @@ <h3>Permissions builder helpers</h3>
             </div>
         </section>
 
+        <section id="mir-overview" class="endpoint">
+            <div class="endpoint-content">
+                <div class="description">
+                    <h1>Master-Independent Reads (v0.4.0)</h1>
+                    <p>
+                        v0.4.0 adds a coordinated server + SDK story so non-blox clients can read their own files even when the master gateway is offline.
+                        The <strong>SDK-side</strong> surfaces (gateway race, block cache, cold-start resolver, transparency types) live in <a href="sdk.html#offline-reads">SDK Examples</a>;
+                        this section documents the <strong>HTTP-level</strong> additions on the master server itself.
+                    </p>
+                    <h3>What's new on the master</h3>
+                    <ul>
+                        <li>Optional PUT header <code>x-amz-meta-fula-bucket-lookup-h</code> the SDK attaches on Phase&nbsp;2 manifest-root commits (Phase&nbsp;1.2).</li>
+                        <li>Background <strong>users-index publisher</strong> that pins per-user + global CBORs to ipfs-cluster, publishes to IPNS every 5 min, and exposes the latest state via an internal endpoint (Phase&nbsp;3.2).</li>
+                        <li>Bearer-protected <strong>internal admin endpoints</strong> at <code>/_internal/*</code> (master) and <code>/admin/users-index-anchor/trigger</code> (mainnet-rewards-server) so operators can force a publish / chain-submit on demand instead of waiting up to 12&nbsp;hours.</li>
+                    </ul>
+                    <p>Every new server-side path is gated by an env flag <strong>default OFF</strong>; old fula-clients see byte-identical behavior to pre-v0.4.0 builds.</p>
+                </div>
+            </div>
+        </section>
+
+        <section id="mir-bucket-lookup-h" class="endpoint">
+            <div class="endpoint-content">
+                <div class="description">
+                    <h2>Phase 1.2 — <code>x-amz-meta-fula-bucket-lookup-h</code></h2>
+                    <p>
+                        Optional user-metadata header that the encrypted SDK attaches on the Phase&nbsp;2 manifest-root PUT.
+                        Carries a 16-byte client-derived <strong>blinded</strong> bucket lookup key (BLAKE3 of <code>MetadataKey || bucket_name</code>),
+                        so the published global users-index CBOR can key its bucket entries without leaking plaintext bucket names to anyone who fetches it.
+                    </p>
+                    <h3>Format</h3>
+                    <pre><code>x-amz-meta-fula-bucket-lookup-h: &lt;32 hex chars&gt;</code></pre>
+                    <p>32 lowercase hex chars (16 bytes). Master-side handler at <code>fula-cli/src/handlers/object.rs</code> calls <code>BucketManager::populate_lookup_h_if_missing</code> after the flush.</p>
+                    <h3>Behavior</h3>
+                    <ul>
+                        <li><strong>Idempotent</strong> — once populated for a bucket, the field is never overwritten on subsequent PUTs.</li>
+                        <li><strong>Non-fatal</strong> — a malformed or missing header is logged at <code>warn!</code> level; the PUT response is unchanged.</li>
+                        <li><strong>Env-gated</strong> — master ignores the header unless <code>FULA_BUCKET_LOOKUP_H_ENABLED=1</code>.</li>
+                        <li><strong>Backward-compat</strong> — old clients (no header) work unchanged. Buckets created without the header are emitted in the published CBOR with <code>legacy: true</code> and a plaintext-name key, so cold-start can still find them.</li>
+                    </ul>
+                </div>
+            </div>
+        </section>
+
+        <section id="mir-internal-state" class="endpoint">
+            <div class="endpoint-content">
+                <div class="description">
+                    <h2>Phase 3.2 — <code>GET /_internal/users-index-state</code></h2>
+                    <p>Returns the master's current published users-index state. Consumed by the chain-anchor cron in <code>mainnet-rewards-server</code>; rarely useful to apps directly.</p>
+                    <h3>Auth</h3>
+                    <p><code>Authorization: Bearer &lt;FULA_USERS_INDEX_INTERNAL_TOKEN&gt;</code> — generated by the operator setup script and shared between master + cron + (optionally) pinning-webui.</p>
+                    <h3>Response codes</h3>
+                    <ul>
+                        <li><strong>200</strong> — success; body documented below.</li>
+                        <li><strong>401</strong> — bearer missing or wrong (constant-time compared).</li>
+                        <li><strong>503</strong> — fail-closed: publisher disabled (<code>FULA_USERS_INDEX_PUBLISHER_ENABLED</code> unset) OR token unset.</li>
+                    </ul>
+                    <h3>Response body (JSON)</h3>
+<pre><code>{
+  "cid": "bafyrei...",          // CID of the latest pinned global users-index CBOR (or null on pre-first-tick)
+  "sequence": 17,                // monotonic sequence inside the CBOR payload
+  "updated_at_unix": 1714780000, // wall-clock timestamp of last commit
+  "ipns_key_name": "fula-users-index"
+}</code></pre>
+                </div>
+            </div>
+        </section>
+
+        <section id="mir-publish-now" class="endpoint">
+            <div class="endpoint-content">
+                <div class="description">
+                    <h2>Operator — <code>POST /_internal/publish-now</code></h2>
+                    <p>Triggers an immediate publisher tick instead of waiting up to <code>FULA_USERS_INDEX_FLUSH_INTERVAL_SECS</code> (default 5 min). Useful during deploy verification.</p>
+                    <h3>Auth</h3>
+                    <p>Same bearer token as <code>/_internal/users-index-state</code>. Same 401 / 503 fail-closed semantics.</p>
+                    <h3>Response (200) body</h3>
+<pre><code>{
+  "global_cid": "bafyrei...",
+  "sequence": 18,
+  "changed_users": 1,    // users whose per-user CBOR was newly pinned
+  "failed_users": 0,     // per-user pins that failed (tick continues; failed users retry next tick)
+  "total_users": 6,
+  "global_rebuilt": true
+}</code></pre>
+                    <h3>Operator UI</h3>
+                    <p>
+                        The <strong>pinning-webui</strong> admin section (<code>/admin/fula</code>) ships a "Publish now" button that proxies to this endpoint.
+                        Reuses the operator's session cookie for the inbound auth and the bearer token for the outbound call.
+                    </p>
+                </div>
+            </div>
+        </section>
+
+        <section id="mir-anchor-trigger" class="endpoint">
+            <div class="endpoint-content">
+                <div class="description">
+                    <h2>Operator — <code>POST /admin/users-index-anchor/trigger</code></h2>
+                    <p>
+                        On the <strong>mainnet-rewards-server</strong> (not the master gateway). Triggers an immediate users-index chain-anchor submission instead of waiting up to 12 h for the periodic cron.
+                    </p>
+                    <h3>Auth</h3>
+                    <p>Reuses <code>FULA_USERS_INDEX_INTERNAL_TOKEN</code> via <code>Authorization: Bearer ...</code>. Constant-time compare.</p>
+                    <h3>Response codes</h3>
+                    <ul>
+                        <li><strong>200</strong> — tick committed; per-network results in body.</li>
+                        <li><strong>401</strong> — wrong/missing bearer.</li>
+                        <li><strong>409</strong> — another tick is already in flight (cron OR a prior HTTP trigger). Retry after a moment.</li>
+                        <li><strong>503</strong> — fail-closed: anchor service disabled (<code>FULA_USERS_INDEX_ANCHOR_ENABLED</code> unset) OR token unset.</li>
+                    </ul>
+                    <h3>Response (200) body</h3>
+<pre><code>{
+  "committed": true,
+  "masterCid": "bafyrei...",
+  "masterSequence": "18",
+  "networks": [
+    { "network": "base",  "status": "fulfilled", "submitted": true },
+    { "network": "skale", "status": "fulfilled", "submitted": false }
+  ]
+}</code></pre>
+                    <h3>Concurrency</h3>
+                    <p>An in-flight flag inside <code>runTick</code> prevents two simultaneous ticks from racing the on-chain <code>latest()</code> reads + <code>publish()</code> calls (which would cause one tx to revert with <code>NonMonotonicSequence</code>). HTTP triggers that contend with the periodic cron get a clean 409.</p>
+                </div>
+            </div>
+        </section>
+
         <footer>
             <p>Fula API Documentation • Built with ❤️ for decentralized storage</p>
         </footer>
diff --git a/docs/website/benchmark.html b/docs/website/benchmark.html
index b751c62..18739a8 100644
--- a/docs/website/benchmark.html
+++ b/docs/website/benchmark.html
@@ -42,7 +42,7 @@
         <div class="sidebar-header">
             <div class="logo">
                 <h1>Fula API</h1>
-                <span class="version">v0.3.7</span>
+                <span class="version">v0.4.0</span>
             </div>
             <button class="theme-toggle" aria-label="Toggle theme">
                 <span class="icon-sun">☀️</span>
diff --git a/docs/website/index.html b/docs/website/index.html
index 7652b73..35bc749 100644
--- a/docs/website/index.html
+++ b/docs/website/index.html
@@ -44,7 +44,7 @@
         <div class="sidebar-header">
             <div class="logo">
                 <h1>Fula API</h1>
-                <span class="version">v0.3.7</span>
+                <span class="version">v0.4.0</span>
             </div>
             <button class="theme-toggle" aria-label="Toggle theme">
                 <span class="icon-sun">☀️</span>
diff --git a/docs/website/platforms.html b/docs/website/platforms.html
index d7a1faf..847ccd4 100644
--- a/docs/website/platforms.html
+++ b/docs/website/platforms.html
@@ -42,7 +42,7 @@
         <div class="sidebar-header">
             <div class="logo">
                 <h1>Fula API</h1>
-                <span class="version">v0.3.7</span>
+                <span class="version">v0.4.0</span>
             </div>
             <button class="theme-toggle" aria-label="Toggle theme">
                 <span class="icon-sun">☀️</span>
diff --git a/docs/website/sdk.html b/docs/website/sdk.html
index 7e055e9..d2d6566 100644
--- a/docs/website/sdk.html
+++ b/docs/website/sdk.html
@@ -42,7 +42,7 @@
         <div class="sidebar-header">
             <div class="logo">
                 <h1>Fula API</h1>
-                <span class="version">v0.3.7</span>
+                <span class="version">v0.4.0</span>
             </div>
             <button class="theme-toggle" aria-label="Toggle theme">
                 <span class="icon-sun">☀️</span>
@@ -70,6 +70,7 @@ <h3>SDK Examples</h3>
                 <li><a href="#file-manager">File Manager API</a></li>
                 <li><a href="#secure-sharing">Secure Sharing</a></li>
                 <li><a href="#multipart">Multipart Uploads</a></li>
+                <li><a href="#offline-reads">Offline Reads (v0.4.0)</a></li>
                 <li><a href="#aws-cli">AWS CLI</a></li>
                 <li><a href="#python">Python (boto3)</a></li>
                 <li><a href="#javascript">JavaScript</a></li>
@@ -629,6 +630,183 @@ <h3>Part Size</h3>
             </div>
         </section>
 
+        <section id="offline-reads" class="endpoint">
+            <div class="endpoint-content">
+                <div class="description">
+                    <h2>📡 Offline Reads (v0.4.0)</h2>
+                    <p>
+                        When the master gateway is unreachable, the SDK can transparently fall back to public IPFS gateways (warm-device path) and, on a fresh device install, cold-start by resolving a globally-published users-index from IPNS or the chain anchor — no client wallet, no fresh master required.
+                    </p>
+                    <p>
+                        Every flag defaults <strong>OFF</strong> for backward compatibility. Apps that don't opt in see byte-identical behavior to pre-v0.4.0 builds.
+                    </p>
+
+                    <h3>Three-tier read path</h3>
+                    <ul>
+                        <li><strong>Master up</strong> — direct master read (fast path, byte-identical to today)</li>
+                        <li><strong>Master down + warm cache</strong> — gateway race using the cached <code>(bucket, key) → cid</code> map (Phase 2.x)</li>
+                        <li><strong>Master down + fresh install</strong> — cold-start hybrid resolver fetches the global users-index via IPNS first, then chain anchor as backup; walks the per-user manifest from there (Phase 3.3)</li>
+                    </ul>
+
+                    <h3>Rust — enable warm-device offline reads</h3>
+<pre><code class="language-rust">use fula_client::{Config, FulaClient};
+use std::time::Duration;
+
+let mut config = Config::new("https://your-fula-gateway.com:9000")
+    .with_token(jwt);
+
+// Phase 2.1 — detect master-down without per-read timeout tax
+config.health_gate_enabled = true;
+config.health_gate_ttl = Duration::from_secs(30);
+
+// Phase 2.2 — persistent block cache (gateway hits land here)
+config.block_cache_enabled = true;
+// config.block_cache_path = Some(...);  // None → platform default
+config.block_cache_max_bytes = 256 * 1024 * 1024;
+
+// Phase 2.4 — fall back to public gateways when master is down
+config.gateway_fallback_enabled = true;
+// config.gateway_fallback_urls = vec![...];  // [] → SDK ships 6 default gateways
+config.gateway_race_concurrency = 3;
+
+let client = FulaClient::new(config)?;
+</code></pre>
+
+                    <h3>Rust — also enable cold-start (fresh device install)</h3>
+<pre><code class="language-rust">use fula_client::derive_user_key_from_email;
+
+// Compute userKey ONCE at sign-in. Email is hashed locally; the SDK
+// never persists or transmits it.
+let user_key = derive_user_key_from_email(&user_email);
+
+// In addition to the warm-device flags above:
+config.users_index_chain_rpc_url = "https://mainnet.base.org".into();
+config.users_index_anchor_address =
+    "0x...FulaUsersIndexAnchor...".into();
+config.users_index_ipns_name = "k51qzi5uqu5dh...".into();
+config.users_index_user_key = Some(user_key);
+// config.users_index_ipns_gateway_urls = vec![...];  // [] → SDK defaults
+// config.users_index_ipfs_gateway_urls = vec![...];  // [] → SDK defaults
+</code></pre>
+
+                    <h3>Rust — read with transparency fields</h3>
+<pre><code class="language-rust">use fula_client::{ReadSource, ReadFreshness};
+
+let result = client
+    .get_object_with_offline_fallback("my-bucket", "photos/cat.jpg")
+    .await?;
+
+let bytes = &result.inner.data;
+match result.source {
+    ReadSource::Master           => log::debug!("served by master"),
+    ReadSource::LocalCache       => log::info!("served from local cache (offline)"),
+    ReadSource::Gateway(url)     => log::info!("served via {}", url),
+}
+match result.freshness {
+    ReadFreshness::Live                                 => {}
+    ReadFreshness::Cached { observed_at }               => {
+        log::info!("cached entry from unix-millis {}", observed_at);
+    }
+    ReadFreshness::StaleByDesign { snapshot_age_secs }  => { /* Phase 3.3 */ }
+    ReadFreshness::StaleByOutage { snapshot_age_secs }  => { /* Phase 3.3 */ }
+}
+</code></pre>
+
+                    <h3>Flutter / Dart — same surface via FRB bindings</h3>
+<pre><code class="language-dart">// 1. Compute userKey at sign-in
+final userKey = deriveUserKeyFromEmail(userEmail);
+
+// 2. Construct config with all relevant flags
+final config = FulaConfig(
+  endpoint: 'https://your-fula-gateway.com:9000',
+  accessToken: jwt,
+  healthGateEnabled: true,
+  blockCacheEnabled: true,
+  gatewayFallbackEnabled: true,
+  // cold-start (Phase 3.3) — native-only at runtime
+  usersIndexChainRpcUrl: 'https://mainnet.base.org',
+  usersIndexAnchorAddress: '0x...',
+  usersIndexIpnsName: 'k51qzi5uqu5dh...',
+  usersIndexUserKey: userKey,
+);
+final client = await createClient(config);
+
+// 3. Read with transparency
+final result = await getObjectWithOfflineFallback(client, 'my-bucket', 'photos/cat.jpg');
+final bytes = result.inner.data;
+print('source: ${result.source}, freshness: ${result.freshness}');
+</code></pre>
+
+                    <h3>JavaScript / TypeScript — wasm-bindgen surface</h3>
+<pre><code class="language-typescript">import {
+  createEncryptedClient,
+  getObjectWithOfflineFallback,
+  deriveUserKeyFromEmail,
+  pollMasterHealthEvents,
+  getLastMasterHealthEvent,
+} from '@functionland/fula-client';
+
+const userKey = deriveUserKeyFromEmail(userEmail);
+
+const client = await createEncryptedClient(
+  {
+    endpoint: 'https://your-fula-gateway.com:9000',
+    accessToken: jwt,
+    healthGateEnabled: true,                 // functional on web
+    blockCacheEnabled: true,                 // accepted but inert on web
+    gatewayFallbackEnabled: true,            // accepted but inert on web
+    usersIndexChainRpcUrl: '...',            // accepted but inert on web
+    usersIndexAnchorAddress: '0x...',
+    usersIndexIpnsName: 'k51qzi5...',
+    usersIndexUserKey: userKey,
+  },
+  encryptionConfig,
+);
+
+const result = await getObjectWithOfflineFallback(client, 'my-bucket', 'photos/cat.jpg');
+console.log(result.source);     // {kind: 'Master'} | {kind: 'LocalCache'} | {kind: 'Gateway', url: ...}
+console.log(result.freshness);  // {kind: 'Live'} | {kind: 'Cached', observedAt: ...}
+</code></pre>
+
+                    <h3>Health-event subscription (Phase 19)</h3>
+                    <p>Two patterns are exposed in every binding (Rust closure / Dart polling / JS polling); pick whichever fits your app.</p>
+
+<pre><code class="language-typescript">// JS / TS — drain on a timer or UI rebuild
+const events = pollMasterHealthEvents(client);  // Array&lt;MasterHealthEvent&gt;
+for (const event of events) {
+  switch (event.kind) {
+    case 'Online':                  setOffline(false); break;
+    case 'OfflineFallbackActive':   setOffline(true); break;
+    case 'SeverelyDegraded':        disableNewBucketUI(); break;
+  }
+}
+
+// Or read latest on mount
+const last = getLastMasterHealthEvent(client);
+if (last && last.kind === 'OfflineFallbackActive') {
+  // app started while master is down
+}
+</code></pre>
+
+                    <h3>What works where</h3>
+                    <table style="margin-top: 1em;">
+                        <thead><tr><th>Surface</th><th>Native (Rust / Flutter)</th><th>Web (wasm / JS)</th></tr></thead>
+                        <tbody>
+                            <tr><td>Health gate (Phase 2.1)</td><td>✅ functional</td><td>✅ functional</td></tr>
+                            <tr><td>Block cache (Phase 2.2)</td><td>✅ functional</td><td>⚪ flag accepted, inert</td></tr>
+                            <tr><td>Gateway race (Phase 2.3 / 2.4)</td><td>✅ functional</td><td>⚪ flag accepted, inert</td></tr>
+                            <tr><td>Cold-start resolver (Phase 3.3)</td><td>✅ functional</td><td>⚪ fields accepted, inert</td></tr>
+                            <tr><td>Transparency types &amp; polling (Phase 19)</td><td>✅ functional</td><td>✅ functional (returns Master/Live on the inert paths)</td></tr>
+                            <tr><td><code>derive_user_key_from_email</code> helper</td><td>✅</td><td>✅ (computed locally; useful for cross-platform identity flows)</td></tr>
+                        </tbody>
+                    </table>
+                    <p>
+                        See <a href="#" onclick="window.location='wasm-compatibility.html'; return false;">WASM Compatibility</a> for the full gating story.
+                    </p>
+                </div>
+            </div>
+        </section>
+
         <section id="aws-cli" class="endpoint">
             <div class="endpoint-content">
                 <div class="description">
diff --git a/docs/website/security.html b/docs/website/security.html
index 39d69b2..5ec0b98 100644
--- a/docs/website/security.html
+++ b/docs/website/security.html
@@ -42,7 +42,7 @@
         <div class="sidebar-header">
             <div class="logo">
                 <h1>Fula API</h1>
-                <span class="version">v0.3.7</span>
+                <span class="version">v0.4.0</span>
             </div>
             <button class="theme-toggle" aria-label="Toggle theme">
                 <span class="icon-sun">☀️</span>
@@ -185,7 +185,7 @@ <h3>Cryptographic Primitives (what the production client actually runs)</h3>
                         </tbody>
                     </table>
                     <div class="quantum-note">
-                        <strong>Honest status on post-quantum:</strong> The default <code>EncryptedClient</code> wraps per-file DEKs with X25519 HPKE today. The hybrid X25519 + ML-KEM-768 primitive exists in <code>fula-crypto::hybrid_kem</code> and is exported, but it has no callers inside <code>fula-client</code>, <code>fula-flutter</code>, or <code>fula-js</code> as of v0.3.7. Wiring it into the default path is a planned follow-up. Applications that need PQ wrapping today can call <code>hybrid_encapsulate</code> / <code>hybrid_decapsulate</code> directly.
+                        <strong>Honest status on post-quantum:</strong> The default <code>EncryptedClient</code> wraps per-file DEKs with X25519 HPKE today. The hybrid X25519 + ML-KEM-768 primitive exists in <code>fula-crypto::hybrid_kem</code> and is exported, but it has no callers inside <code>fula-client</code>, <code>fula-flutter</code>, or <code>fula-js</code> as of v0.4.0. Wiring it into the default path is a planned follow-up. Applications that need PQ wrapping today can call <code>hybrid_encapsulate</code> / <code>hybrid_decapsulate</code> directly.
                     </div>
                 </div>
             </div>
diff --git a/docs/website/x402.html b/docs/website/x402.html
index cfb1578..a1a78e7 100644
--- a/docs/website/x402.html
+++ b/docs/website/x402.html
@@ -232,7 +232,7 @@
         <div class="sidebar-header">
             <div class="logo">
                 <h1>Fula API</h1>
-                <span class="version">v0.3.7</span>
+                <span class="version">v0.4.0</span>
             </div>
             <button class="theme-toggle" aria-label="Toggle theme">
                 <span class="icon-sun">☀️</span>
diff --git a/packages/fula_client/CHANGELOG.md b/packages/fula_client/CHANGELOG.md
index 750c9a4..77d002a 100644
--- a/packages/fula_client/CHANGELOG.md
+++ b/packages/fula_client/CHANGELOG.md
@@ -5,6 +5,51 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.0] - 2026-05-04
+
+### Added
+
+- **Master-independent reads (Phase 2 + 3 + 19).** When the master gateway is unreachable, the SDK now transparently falls back to public IPFS gateways AND, on a fresh device install, can cold-start by resolving a globally-published users-index from IPNS or the chain anchor — without a client wallet. End users keep reading their own files even during master outages.
+
+  - **Phase 2.1 — Master health gate.** Lock-free `AtomicU64` state machine that observes request outcomes and short-circuits with `MasterUnreachable` after two consecutive failures, instead of paying the per-read timeout tax. New `FulaConfig` fields: `healthGateEnabled`, `healthGateTtlSeconds`. Functional on every target including web. Default OFF for backward-compat.
+  - **Phase 2.2 — Persistent block cache.** redb-backed LRU cache (default 256 MiB) of fetched encrypted blocks keyed by CID. Populated transparently during master-up reads; serves repeat reads without any network hit during master outages. New fields: `blockCacheEnabled`, `blockCachePath`, `blockCacheMaxBytes`. Native-only at runtime; the flags are accepted on web for config symmetry but inert.
+  - **Phase 2.3 — Multi-gateway race + dynamic priority + CID verification.** Six default public IPFS gateways raced K-at-a-time (default K=3) with per-gateway penalty/cooldown state. Every fetched block is re-hashed against the requested CID's multihash (BLAKE3 or SHA2-256) before being trusted. New fields: `gatewayFallbackEnabled`, `gatewayFallbackUrls`, `gatewayRaceConcurrency`. Native-only.
+  - **Phase 2.4 — Wired warm-device offline GET.** New `getObjectWithOfflineFallback` returns `OfflineGetResult` with bytes + transparency. Master-up reads serve normally; master-down reads fall through to the gateway race using the cached `(bucket, key) → cid` mapping. Cold-start (cache miss) propagates `MasterUnreachable` for the resolver to handle.
+  - **Phase 3.2 — Master-side users-index publisher.** Master gateway now periodically (every 5 min by default) builds per-user `bucketsIndex` CBORs + a global users-index CBOR, pins them via cluster, publishes to IPNS, and a 12h cron in `mainnet-rewards-server` submits the same CID to a `FulaUsersIndexAnchor` contract on Base/SKALE. Two chain writes per day, fixed forever, gas-defensive against future Base pricing. Server-side change; SDK consumes via Phase 3.3.
+  - **Phase 3.3 — Cold-start hybrid resolver (IPNS-first → chain-fallback).** New `FulaConfig` fields: `usersIndexChainRpcUrl`, `usersIndexAnchorAddress`, `usersIndexIpnsName`, `usersIndexUserKey`, `usersIndexIpnsGatewayUrls`, `usersIndexIpfsGatewayUrls`. New free function `deriveUserKeyFromEmail(email)` — apps call once at sign-in to derive the userKey; SDK never sees the raw email. Resolver activates iff all four required fields are populated; fresh-install fresh-master-down reads now succeed. Native-only at runtime; web surfaces typed `UsersIndexResolutionFailed` errors.
+  - **Phase 19 — Transparency surfaces.** `OfflineGetResult { inner, source: ReadSource, freshness: ReadFreshness }`, `MasterHealthEvent` enum (`Online | OfflineFallbackActive | SeverelyDegraded`). New polling APIs `pollMasterHealthEvents(client)` and `getLastMasterHealthEvent(client)` so apps can drive online/offline UI affordances. The Rust core also exposes a closure-based `HealthCallback`; the FRB and wasm-bindgen bindings expose the polling form for cross-target ergonomics.
+
+### Changed
+
+- **`getObjectWithOfflineFallback` return type** is now `OfflineGetResult` (was `GetObjectResult`). Master-up reads return `source: Master, freshness: Live` so existing callers that only read `.inner.data` need a one-character change. The pre-existing `getObjectWithMetadata` is unchanged.
+- **`PublishNowResponse`** (master-side admin endpoint) gains a `failed_users` field exposing the per-user-error-tolerance count from `TickOutcome`.
+- **Per-user error tolerance in master publisher.** A single user's CBOR pin failure no longer aborts the whole tick; succeeded users still get published, failed users keep their prior CID, and they retry on the next tick. `TickOutcome` gains `failed_users: usize`.
+
+### Bindings
+
+- **fula-flutter (Dart)** — every Phase 2.x / 3.3 / 19 surface plumbed: 6 new Phase 3.3 config fields + 2 new types (`OfflineGetResult`, `MasterHealthEvent`) + 2 new enum types (`FulaReadSource`, `FulaReadFreshness`) + free function `deriveUserKeyFromEmail` + method `getObjectWithOfflineFallback` + polling `pollMasterHealthEvents` / `getLastMasterHealthEvent` (+ encrypted-handle variants). Generated Dart bindings + `frb_generated.rs` are regenerated by CI on tag push.
+- **fula-js (wasm-bindgen / TypeScript)** — same surfaces exposed via serde-tagged JS objects. Cross-target `deriveUserKeyFromEmail` extracted from `registry_resolver` to a wasm-friendly `user_key` module so JS apps can compute the userKey on web.
+- **Error mapping** — both bindings cover the new `UsersIndexResolutionFailed`, `SequenceRegression`, `BlockTooLarge`, and `BlockCacheError` variants.
+
+### Operational
+
+- **New admin endpoints** for triggering an immediate publisher tick / chain anchor submit without waiting up to 12h:
+  - `fula-cli`: `POST /_internal/publish-now` (already existed; response now includes `failed_users`)
+  - `mainnet-rewards-server`: `POST /admin/users-index-anchor/trigger` (new) — bearer-protected, fail-closed 503, 409 on contention
+  - `pinning-webui`: new admin tab "Fula Publisher" with two buttons proxying through `/api/admin/fula/publish-now` and `/api/admin/fula/anchor-now`
+- **Master deploy is backward-compat**: every new server-side path is gated by an env flag default-OFF. Old fula-clients (running pre-0.4.0 SDK) continue to work byte-identically against an 0.4.0 master.
+
+### Migration Guide
+
+- **Existing apps reading bytes**: change `result.data` → `result.inner.data` if you're using `getObjectWithOfflineFallback`. `getObjectWithMetadata` callers unchanged.
+- **To enable warm-device offline reads**: set `healthGateEnabled = true`, `blockCacheEnabled = true`, `gatewayFallbackEnabled = true` on `FulaConfig`. Native-only; safe to set on web (silently inert).
+- **To enable cold-start (fresh device install while master is down)**: in addition to the warm-device flags, populate `usersIndexChainRpcUrl`, `usersIndexAnchorAddress`, `usersIndexIpnsName` (operator-supplied at deploy), and `usersIndexUserKey` (computed via `deriveUserKeyFromEmail(email)` at sign-in). Native-only at runtime.
+- **No data migration required.** Existing on-chain / IPFS / S3 data remains readable through every new code path AND through the existing master path.
+
+## [0.3.0] - 2026-04-01
+
+Internal SDK refactors and incremental fixes between v0.2.18 and v0.4.0; see git history for the full set. The user-facing API additions are consolidated under v0.4.0 above.
+
 ## [0.2.18] - 2026-01-13
 
 ### Fixed
diff --git a/packages/fula_client/ios/fula_client.podspec b/packages/fula_client/ios/fula_client.podspec
index 406802e..397d2ae 100644
--- a/packages/fula_client/ios/fula_client.podspec
+++ b/packages/fula_client/ios/fula_client.podspec
@@ -6,7 +6,7 @@
 
 Pod::Spec.new do |s|
   s.name             = 'fula_client'
-  s.version          = '0.3.7'
+  s.version          = '0.4.0'
   s.summary          = 'Flutter SDK for Fula decentralized storage'
   s.description      = <<-DESC
     A Flutter plugin providing client-side encryption, metadata privacy,
diff --git a/packages/fula_client/pubspec.yaml b/packages/fula_client/pubspec.yaml
index a38ea0c..0e1b7bc 100644
--- a/packages/fula_client/pubspec.yaml
+++ b/packages/fula_client/pubspec.yaml
@@ -1,6 +1,6 @@
 name: fula_client
 description: Flutter SDK for Fula decentralized storage with client-side encryption, metadata privacy, and secure sharing.
-version: 0.3.7
+version: 0.4.0
 homepage: https://fx.land
 repository: https://github.com/functionland/fula-api
 issue_tracker: https://github.com/functionland/fula-api/issues

From 870a8b437939368c3f03c13d02c9ba36c4bdfe4d Mon Sep 17 00:00:00 2001
From: ehsan shariati <ehsan6sha@gmail.com>
Date: Mon, 4 May 2026 14:07:52 -0400
Subject: [PATCH 6/6] corrected tests for CI

---
 tests/audit3_wal_rotation_tests.rs |  7 +++++++
 tests/v7_hamt_tests.rs             | 11 +++++++++++
 2 files changed, 18 insertions(+)

diff --git a/tests/audit3_wal_rotation_tests.rs b/tests/audit3_wal_rotation_tests.rs
index 9b6acae..db185b0 100644
--- a/tests/audit3_wal_rotation_tests.rs
+++ b/tests/audit3_wal_rotation_tests.rs
@@ -27,6 +27,13 @@ async fn spawn_server() -> String {
     config.use_memory_store = true;
     config.registry_cid_path = None;
     config.jwt_secret = Some("test-secret-123".to_string());
+    // Bump production-default 100 RPS rate limit out of the way for
+    // tests. Matches the override in `tests/common/mod.rs`,
+    // `tests/v7_hamt_tests.rs`, and `tests/f8_buffered_download_tests.rs`.
+    // With `auth_enabled=false` every request hits the same "anonymous"
+    // bucket; rotation tests put multiple encrypted objects + rotate,
+    // each fanning out to several HTTP calls.
+    config.rate_limit_rps = 1_000_000;
 
     let state = Arc::new(AppState::new(config.clone()).await.unwrap());
     let app = routes::create_router(state);
diff --git a/tests/v7_hamt_tests.rs b/tests/v7_hamt_tests.rs
index 3148d5e..7382b49 100644
--- a/tests/v7_hamt_tests.rs
+++ b/tests/v7_hamt_tests.rs
@@ -20,6 +20,17 @@ async fn spawn_server() -> String {
     config.use_memory_store = true;
     config.registry_cid_path = None;
     config.jwt_secret = Some("test-secret-v7".to_string());
+    // Bump the production-default 100 RPS keyed rate limit out of the
+    // way for tests. With auth_enabled=false every request hits the
+    // same "anonymous" bucket, and the v7 paginated test seeds 64
+    // files in a tight loop where each `put_object_flat` issues
+    // multiple requests (forest read + blob put + forest persist).
+    // 64×~3 = 192 requests in <1s easily trips the default. Matches
+    // the override already used in `tests/common/mod.rs` and
+    // `tests/f8_buffered_download_tests.rs`. The production rate-
+    // limit semantics are exercised separately in unit tests; this
+    // E2E test only cares about correctness.
+    config.rate_limit_rps = 1_000_000;
 
     let state = Arc::new(AppState::new(config.clone()).await.unwrap());
     let app = routes::create_router(state);