From 3d595ed5f299ac4f3da671eca8c9dc652672535f Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Wed, 25 Mar 2026 13:38:00 -0400
Subject: [PATCH 01/20] Slow but correct impls

---
 .../examples/columnar/columnar_support.rs     | 670 +++++++-----------
 1 file changed, 274 insertions(+), 396 deletions(-)
diff --git a/differential-dataflow/examples/columnar/columnar_support.rs b/differential-dataflow/examples/columnar/columnar_support.rs
index ced700bec..b6a67af25 100644
--- a/differential-dataflow/examples/columnar/columnar_support.rs
+++ b/differential-dataflow/examples/columnar/columnar_support.rs
@@ -70,14 +70,17 @@ pub use updates::Updates;
 pub struct RecordedUpdates<U: layout::ColumnarUpdate> {
     pub updates: Updates<U>,
     pub records: usize,
+    /// Whether `updates` is known to be sorted and consolidated
+    /// (no duplicate (key, val, time) triples, no zero diffs).
+    pub consolidated: bool,
 }
 
 impl<U: layout::ColumnarUpdate> Default for RecordedUpdates<U> {
-    fn default() -> Self { Self { updates: Default::default(), records: 0 } }
+    fn default() -> Self { Self { updates: Default::default(), records: 0, consolidated: true } }
 }
 
 impl<U: layout::ColumnarUpdate> Clone for RecordedUpdates<U> {
-    fn clone(&self) -> Self { Self { updates: self.updates.clone(), records: self.records } }
+    fn clone(&self) -> Self { Self { updates: self.updates.clone(), records: self.records, consolidated: self.consolidated } }
 }
 
 impl<U: layout::ColumnarUpdate> timely::Accountable for RecordedUpdates<U> {
@@ -133,7 +136,9 @@ mod container_impls {
                 let t2 = T2::to_inner(t1_owned.clone());
                 new_times.push(&t2);
             }
+            // TODO: Assumes Enter (to_inner) is order-preserving on times.
             RecordedUpdates {
+                consolidated: self.consolidated,
                 updates: Updates {
                     keys: self.updates.keys,
                     vals: self.updates.vals,
@@ -168,6 +173,7 @@ mod container_impls {
             RecordedUpdates {
                 updates: flat.consolidate(),
                 records: self.records,
+                consolidated: true,
             }
         }
     }
@@ -185,7 +191,9 @@ mod container_impls {
                     output.push((k, v, &new_time, d));
                 }
             }
-            RecordedUpdates { updates: output, records: self.records }
+            // TODO: Time advancement may not be order preserving, but .. it could be.
+            // TODO: Before this is consolidated the above would need to be `form`ed.
+            RecordedUpdates { updates: output, records: self.records, consolidated: false }
         }
     }
 }
@@ -222,7 +230,8 @@ mod column_builder {
                 let mut refs = self.current.borrow().into_index_iter().collect::<Vec<_>>();
                 refs.sort();
                 let updates = Updates::form(refs.into_iter());
-                self.pending.push_back(RecordedUpdates { updates, records });
+                if std::env::var("DDIR_VALIDATE").is_ok() { updates.validate("ValColBuilder::push_into"); }
+                self.pending.push_back(RecordedUpdates { updates, records, consolidated: true });
                 self.current.clear();
             }
         }
@@ -260,7 +269,8 @@ mod column_builder {
                 let mut refs = self.current.borrow().into_index_iter().collect::<Vec<_>>();
                 refs.sort();
                 let updates = Updates::form(refs.into_iter());
-                self.pending.push_back(RecordedUpdates { updates, records });
+                if std::env::var("DDIR_VALIDATE").is_ok() { updates.validate("ValColBuilder::finish"); }
+                self.pending.push_back(RecordedUpdates { updates, records, consolidated: true });
                 self.current.clear();
             }
             self.empty = self.pending.pop_front();
@@ -327,7 +337,7 @@ mod distributor {
             let mut first_records = total_records.saturating_sub(non_empty.saturating_sub(1));
             for (pusher, output) in pushers.iter_mut().zip(outputs) {
                 if !output.keys.values.is_empty() {
-                    let recorded = RecordedUpdates { updates: output, records: first_records };
+                    let recorded = RecordedUpdates { updates: output, records: first_records, consolidated: container.consolidated };
                     first_records = 1;
                     let mut recorded = recorded;
                     Message::push_at(&mut recorded, time.clone(), pusher);
@@ -499,150 +509,21 @@ pub mod arrangement {
 
             type TimeOwned = U::Time;
 
-            fn len(&self) -> usize { self.diffs.values.len() }
+            fn len(&self) -> usize { self.keys.values.len() }
             fn clear(&mut self) { *self = Self::default(); }
 
             #[inline(never)]
             fn merge_from(&mut self, others: &mut [Self], positions: &mut [usize]) {
-                match others.len() {
-                    0 => {},
-                    1 => {
-                        // Bulk copy: take remaining keys from position onward.
-                        let other = &mut others[0];
-                        let pos = &mut positions[0];
-                        if self.keys.values.len() == 0 && *pos == 0 {
-                            std::mem::swap(self, other);
-                            return;
-                        }
-                        let other_len = other.keys.values.len();
-                        self.extend_from_keys(other, *pos .. other_len);
-                        *pos = other_len;
-                    },
-                    2 => {
-                        let mut this_sum = U::Diff::default();
-                        let mut that_sum = U::Diff::default();
-
-                        let (left, right) = others.split_at_mut(1);
-                        let this = &left[0];
-                        let that = &right[0];
-                        let this_keys = this.keys.values.borrow();
-                        let that_keys = that.keys.values.borrow();
-                        let mut this_key_range = positions[0] .. this_keys.len();
-                        let mut that_key_range = positions[1] .. that_keys.len();
-
-                        while !this_key_range.is_empty() && !that_key_range.is_empty() && !timely::container::SizableContainer::at_capacity(self) {
-                            let this_key = this_keys.get(this_key_range.start);
-                            let that_key = that_keys.get(that_key_range.start);
-                            match this_key.cmp(&that_key) {
-                                std::cmp::Ordering::Less => {
-                                    let lower = this_key_range.start;
-                                    gallop(this_keys, &mut this_key_range, |x| x < that_key);
-                                    self.extend_from_keys(this, lower .. this_key_range.start);
-                                },
-                                std::cmp::Ordering::Equal => {
-                                    let values_len = self.vals.values.len();
-                                    let mut this_val_range = this.vals_bounds(this_key_range.start .. this_key_range.start+1);
-                                    let mut that_val_range = that.vals_bounds(that_key_range.start .. that_key_range.start+1);
-                                    while !this_val_range.is_empty() && !that_val_range.is_empty() {
-                                        let this_val = this.vals.values.borrow().get(this_val_range.start);
-                                        let that_val = that.vals.values.borrow().get(that_val_range.start);
-                                        match this_val.cmp(&that_val) {
-                                            std::cmp::Ordering::Less => {
-                                                let lower = this_val_range.start;
-                                                gallop(this.vals.values.borrow(), &mut this_val_range, |x| x < that_val);
-                                                self.extend_from_vals(this, lower .. this_val_range.start);
-                                            },
-                                            std::cmp::Ordering::Equal => {
-                                                let updates_len = self.times.values.len();
-                                                let mut this_time_range = this.times_bounds(this_val_range.start .. this_val_range.start+1);
-                                                let mut that_time_range = that.times_bounds(that_val_range.start .. that_val_range.start+1);
-                                                while !this_time_range.is_empty() && !that_time_range.is_empty() {
-                                                    let this_time = this.times.values.borrow().get(this_time_range.start);
-                                                    let this_diff = this.diffs.values.borrow().get(this_time_range.start);
-                                                    let that_time = that.times.values.borrow().get(that_time_range.start);
-                                                    let that_diff = that.diffs.values.borrow().get(that_time_range.start);
-                                                    match this_time.cmp(&that_time) {
-                                                        std::cmp::Ordering::Less => {
-                                                            let lower = this_time_range.start;
-                                                            gallop(this.times.values.borrow(), &mut this_time_range, |x| x < that_time);
-                                                            self.times.values.extend_from_self(this.times.values.borrow(), lower .. this_time_range.start);
-                                                            self.diffs.extend_from_self(this.diffs.borrow(), lower .. this_time_range.start);
-                                                        },
-                                                        std::cmp::Ordering::Equal => {
-                                                            this_sum.copy_from(this_diff);
-                                                            that_sum.copy_from(that_diff);
-                                                            this_sum.plus_equals(&that_sum);
-                                                            if !this_sum.is_zero() {
-                                                                self.times.values.push(this_time);
-                                                                self.diffs.values.push(&this_sum);
-                                                                self.diffs.bounds.push(self.diffs.values.len() as u64);
-                                                            }
-                                                            this_time_range.start += 1;
-                                                            that_time_range.start += 1;
-                                                        },
-                                                        std::cmp::Ordering::Greater => {
-                                                            let lower = that_time_range.start;
-                                                            gallop(that.times.values.borrow(), &mut that_time_range, |x| x < this_time);
-                                                            self.times.values.extend_from_self(that.times.values.borrow(), lower .. that_time_range.start);
-                                                            self.diffs.extend_from_self(that.diffs.borrow(), lower .. that_time_range.start);
-                                                        },
-                                                    }
-                                                }
-                                                // Remaining from this side
-                                                if !this_time_range.is_empty() {
-                                                    self.times.values.extend_from_self(this.times.values.borrow(), this_time_range.clone());
-                                                    self.diffs.extend_from_self(this.diffs.borrow(), this_time_range.clone());
-                                                }
-                                                // Remaining from that side
-                                                if !that_time_range.is_empty() {
-                                                    self.times.values.extend_from_self(that.times.values.borrow(), that_time_range.clone());
-                                                    self.diffs.extend_from_self(that.diffs.borrow(), that_time_range.clone());
-                                                }
-                                                if self.times.values.len() > updates_len {
-                                                    self.times.bounds.push(self.times.values.len() as u64);
-                                                    self.vals.values.push(this_val);
-                                                }
-                                                this_val_range.start += 1;
-                                                that_val_range.start += 1;
-                                            },
-                                            std::cmp::Ordering::Greater => {
-                                                let lower = that_val_range.start;
-                                                gallop(that.vals.values.borrow(), &mut that_val_range, |x| x < this_val);
-                                                self.extend_from_vals(that, lower .. that_val_range.start);
-                                            },
-                                        }
-                                    }
-                                    self.extend_from_vals(this, this_val_range);
-                                    self.extend_from_vals(that, that_val_range);
-                                    if self.vals.values.len() > values_len {
-                                        self.vals.bounds.push(self.vals.values.len() as u64);
-                                        self.keys.values.push(this_key);
-                                    }
-                                    this_key_range.start += 1;
-                                    that_key_range.start += 1;
-                                },
-                                std::cmp::Ordering::Greater => {
-                                    let lower = that_key_range.start;
-                                    gallop(that_keys, &mut that_key_range, |x| x < this_key);
-                                    self.extend_from_keys(that, lower .. that_key_range.start);
-                                },
-                            }
-                        }
-                        // Copy remaining from whichever side has data, up to capacity.
-                        while !this_key_range.is_empty() && !timely::container::SizableContainer::at_capacity(self) {
-                            let lower = this_key_range.start;
-                            this_key_range.start = this_key_range.end; // take all remaining
-                            self.extend_from_keys(this, lower .. this_key_range.start);
-                        }
-                        while !that_key_range.is_empty() && !timely::container::SizableContainer::at_capacity(self) {
-                            let lower = that_key_range.start;
-                            that_key_range.start = that_key_range.end;
-                            self.extend_from_keys(that, lower .. that_key_range.start);
-                        }
-                        positions[0] = this_key_range.start;
-                        positions[1] = that_key_range.start;
-                    },
-                    n => unimplemented!("{n}-way merge not supported"),
+                // Slow but correct: collect self + all others (skipping already-consumed updates),
+                // rebuild via form_unsorted which sorts and consolidates.
+                let self_iter = self.iter();
+                let others_iter = others.iter().zip(positions.iter()).flat_map(|(other, pos)| {
+                    other.iter().skip(*pos)
+                });
+                *self = Self::form_unsorted(self_iter.chain(others_iter));
+                // Mark all inputs as fully consumed.
+                for (other, pos) in others.iter().zip(positions.iter_mut()) {
+                    *pos = other.len();
                 }
             }
 
@@ -653,49 +534,35 @@ pub mod arrangement {
                 keep: &mut Self,
                 ship: &mut Self,
             ) {
+                use columnar::Columnar;
+                // Slow but correct: partition, form_unsorted onto keep/ship.
+                let mut keep_vec: Vec<(U::Key, U::Val, U::Time, U::Diff)> = Vec::new();
+                let mut ship_vec: Vec<(U::Key, U::Val, U::Time, U::Diff)> = Vec::new();
+
                 let mut time = U::Time::default();
-                for key_idx in 0 .. self.keys.values.len() {
-                    let key = self.keys.values.borrow().get(key_idx);
-                    let keep_vals_len = keep.vals.values.len();
-                    let ship_vals_len = ship.vals.values.len();
-                    for val_idx in self.vals_bounds(key_idx..key_idx+1) {
-                        let val = self.vals.values.borrow().get(val_idx);
-                        let keep_times_len = keep.times.values.len();
-                        let ship_times_len = ship.times.values.len();
-                        for time_idx in self.times_bounds(val_idx..val_idx+1) {
-                            let t = self.times.values.borrow().get(time_idx);
-                            let diff = self.diffs.values.borrow().get(time_idx);
-                            time.copy_from(t);
-                            if upper.less_equal(&time) {
-                                frontier.insert_ref(&time);
-                                keep.times.values.push(t);
-                                keep.diffs.values.push(diff);
-                                keep.diffs.bounds.push(keep.diffs.values.len() as u64);
-                            }
-                            else {
-                                ship.times.values.push(t);
-                                ship.diffs.values.push(diff);
-                                ship.diffs.bounds.push(ship.diffs.values.len() as u64);
-                            }
-                        }
-                        if keep.times.values.len() > keep_times_len {
-                            keep.times.bounds.push(keep.times.values.len() as u64);
-                            keep.vals.values.push(val);
-                        }
-                        if ship.times.values.len() > ship_times_len {
-                            ship.times.bounds.push(ship.times.values.len() as u64);
-                            ship.vals.values.push(val);
-                        }
-                    }
-                    if keep.vals.values.len() > keep_vals_len {
-                        keep.vals.bounds.push(keep.vals.values.len() as u64);
-                        keep.keys.values.push(key);
-                    }
-                    if ship.vals.values.len() > ship_vals_len {
-                        ship.vals.bounds.push(ship.vals.values.len() as u64);
-                        ship.keys.values.push(key);
+                for (k, v, t, d) in self.iter() {
+                    Columnar::copy_from(&mut time, t);
+                    if upper.less_equal(&time) {
+                        frontier.insert_ref(&time);
+                        keep_vec.push((Columnar::into_owned(k), Columnar::into_owned(v), time.clone(), Columnar::into_owned(d)));
+                    } else {
+                        ship_vec.push((Columnar::into_owned(k), Columnar::into_owned(v), time.clone(), Columnar::into_owned(d)));
                     }
                 }
+
+                // Merge new data into keep/ship: push existing + new into flat Updates, consolidate.
+                if !keep_vec.is_empty() {
+                    let mut flat = Self::default();
+                    for (k, v, t, d) in keep.iter() { flat.push((k, v, t, d)); }
+                    for (k, v, t, d) in keep_vec.iter() { flat.push((k, v, t, d)); }
+                    *keep = flat.consolidate();
+                }
+                if !ship_vec.is_empty() {
+                    let mut flat = Self::default();
+                    for (k, v, t, d) in ship.iter() { flat.push((k, v, t, d)); }
+                    for (k, v, t, d) in ship_vec.iter() { flat.push((k, v, t, d)); }
+                    *ship = flat.consolidate();
+                }
             }
         }
 
@@ -748,7 +615,7 @@ pub mod arrangement {
         }
 
         pub struct ValMirror<U: Update> {
-            current: Updates<U>,
+            chunks: Vec<Updates<U>>,
         }
         impl<U: Update> differential_dataflow::trace::Builder for ValMirror<U> {
             type Time = U::Time;
@@ -756,64 +623,49 @@ pub mod arrangement {
             type Output = OrdValBatch<Layout<U>>;
 
             fn with_capacity(_keys: usize, _vals: usize, _upds: usize) -> Self {
-                Self { current: Updates::default() }
+                Self { chunks: Vec::new() }
             }
             fn push(&mut self, chunk: &mut Self::Input) {
-                use columnar::Len;
-                let len = chunk.keys.values.len();
-                if len > 0 {
-                    self.current.extend_from_keys(chunk, 0..len);
+                if chunk.len() > 0 {
+                    self.chunks.push(std::mem::take(chunk));
                 }
             }
             fn done(self, description: Description<Self::Time>) -> Self::Output {
-                let mut chain = if self.current.len() > 0 {
-                    vec![self.current]
-                } else {
-                    vec![]
-                };
+                let mut chain = self.chunks;
                 Self::seal(&mut chain, description)
             }
             fn seal(chain: &mut Vec<Self::Input>, description: Description<Self::Time>) -> Self::Output {
-                if chain.len() == 0 {
+                use columnar::Len;
+
+                // Slow but correct: merge all chain entries via form_unsorted.
+                let merged: Updates<U> = Updates::form_unsorted(chain.iter().flat_map(|c| c.iter()));
+                chain.clear();
+
+                let updates = Len::len(&merged.diffs.values);
+                if updates == 0 {
                     let storage = OrdValStorage {
                         keys: Default::default(),
                         vals: Default::default(),
                         upds: Default::default(),
                     };
                     OrdValBatch { storage, description, updates: 0 }
-                }
-                else if chain.len() == 1 {
-                    use columnar::Len;
-                    let storage = chain.pop().unwrap();
-                    let updates = storage.diffs.values.len();
-                    let val_offs = strides_to_offset_list(&storage.vals.bounds, storage.keys.values.len());
-                    let time_offs = strides_to_offset_list(&storage.times.bounds, storage.vals.values.len());
+                } else {
+                    let val_offs = strides_to_offset_list(&merged.vals.bounds, Len::len(&merged.keys.values));
+                    let time_offs = strides_to_offset_list(&merged.times.bounds, Len::len(&merged.vals.values));
                     let storage = OrdValStorage {
-                        keys: Coltainer { container: storage.keys.values },
+                        keys: Coltainer { container: merged.keys.values },
                         vals: Vals {
                             offs: val_offs,
-                            vals: Coltainer { container: storage.vals.values },
+                            vals: Coltainer { container: merged.vals.values },
                         },
                         upds: Upds {
                             offs: time_offs,
-                            times: Coltainer { container: storage.times.values },
-                            diffs: Coltainer { container: storage.diffs.values },
+                            times: Coltainer { container: merged.times.values },
+                            diffs: Coltainer { container: merged.diffs.values },
                         },
                     };
                     OrdValBatch { storage, description, updates }
                 }
-                else {
-                    use columnar::Len;
-                    let mut merged = chain.remove(0);
-                    for other in chain.drain(..) {
-                        let len = other.keys.values.len();
-                        if len > 0 {
-                            merged.extend_from_keys(&other, 0..len);
-                        }
-                    }
-                    chain.push(merged);
-                    Self::seal(chain, description)
-                }
             }
         }
 
@@ -888,6 +740,57 @@ pub mod updates {
         lower..upper
     }
 
+    /// A streaming consolidation iterator for sorted `(key, val, time, diff)` data.
+    ///
+    /// Accumulates diffs for equal `(key, val, time)` triples, yielding at most
+    /// one output per distinct triple, with a non-zero accumulated diff.
+    /// Input must be sorted by `(key, val, time)`.
+    pub struct Consolidating<I: Iterator, D> {
+        iter: std::iter::Peekable<I>,
+        diff: D,
+    }
+
+    impl<K, V, T, D, I> Consolidating<I, D>
+    where
+        K: Copy + Eq,
+        V: Copy + Eq,
+        T: Copy + Eq,
+        D: Semigroup + IsZero + Default,
+        I: Iterator<Item = (K, V, T, D)>,
+    {
+        pub fn new(iter: I) -> Self {
+            Self { iter: iter.peekable(), diff: D::default() }
+        }
+    }
+
+    impl<K, V, T, D, I> Iterator for Consolidating<I, D>
+    where
+        K: Copy + Eq,
+        V: Copy + Eq,
+        T: Copy + Eq,
+        D: Semigroup + IsZero + Default + Clone,
+        I: Iterator<Item = (K, V, T, D)>,
+    {
+        type Item = (K, V, T, D);
+        fn next(&mut self) -> Option<Self::Item> {
+            loop {
+                let (k, v, t, d) = self.iter.next()?;
+                self.diff = d;
+                while let Some(&(k2, v2, t2, _)) = self.iter.peek() {
+                    if k2 == k && v2 == v && t2 == t {
+                        let (_, _, _, d2) = self.iter.next().unwrap();
+                        self.diff.plus_equals(&d2);
+                    } else {
+                        break;
+                    }
+                }
+                if !self.diff.is_zero() {
+                    return Some((k, v, t, self.diff.clone()));
+                }
+            }
+        }
+    }
+
     impl<U: Update> Updates<U> {
 
         pub fn vals_bounds(&self, key_range: std::ops::Range<usize>) -> std::ops::Range<usize> {
@@ -933,98 +836,64 @@ pub mod updates {
             self.diffs.extend_from_self(other.diffs.borrow(), time_range);
         }
 
-        /// Forms a consolidated `Updates` from sorted `(key, val, time, diff)` refs.
-        ///
-        /// Tracks a `prev` reference to the previous element. On each new element,
-        /// compares against `prev` to detect key/val/time changes. Only pushes
-        /// accumulated diffs when they are nonzero, and only emits times/vals/keys
-        /// that have at least one nonzero diff beneath them.
-        pub fn form<'a>(mut sorted: impl Iterator<Item = columnar::Ref<'a, Tuple<U>>>) -> Self {
+        /// Forms a consolidated `Updates` trie from unsorted `(key, val, time, diff)` refs.
+        pub fn form_unsorted<'a>(unsorted: impl Iterator<Item = columnar::Ref<'a, Tuple<U>>>) -> Self {
+            let mut data = unsorted.collect::<Vec<_>>();
+            data.sort();
+            Self::form(data.into_iter())
+        }
+
+        /// Forms a consolidated `Updates` trie from sorted `(key, val, time, diff)` refs.
+        pub fn form<'a>(sorted: impl Iterator<Item = columnar::Ref<'a, Tuple<U>>>) -> Self {
 
+            // Step 1: Streaming consolidation — accumulate diffs, drop zeros.
+            let consolidated = Consolidating::new(
+                sorted.map(|(k, v, t, d)| (k, v, t, <U::Diff as Columnar>::into_owned(d)))
+            );
+
+            // Step 2: Build the trie from consolidated, sorted, non-zero data.
             let mut output = Self::default();
-            let mut diff_stash = U::Diff::default();
-            let mut diff_temp = U::Diff::default();
-
-            if let Some(first) = sorted.next() {
-
-                let mut prev = first;
-                Columnar::copy_from(&mut diff_stash, prev.3);
-
-                for curr in sorted {
-                    let key_differs = ContainerOf::<U::Key>::reborrow_ref(curr.0) != ContainerOf::<U::Key>::reborrow_ref(prev.0);
-                    let val_differs = key_differs || ContainerOf::<U::Val>::reborrow_ref(curr.1) != ContainerOf::<U::Val>::reborrow_ref(prev.1);
-                    let time_differs = val_differs || ContainerOf::<U::Time>::reborrow_ref(curr.2) != ContainerOf::<U::Time>::reborrow_ref(prev.2);
-
-                    if time_differs {
-                        // Flush the accumulated diff for prev's (key, val, time).
-                        if !diff_stash.is_zero() {
-                            // We have a real update to emit. Push time (and val/key
-                            // if this is the first time under them).
-                            let times_len = output.times.values.len();
-                            let vals_len = output.vals.values.len();
-
-                            if val_differs {
-                                // Seal the previous val's time list, if any times were emitted.
-                                if times_len > 0 {
-                                    output.times.bounds.push(times_len as u64);
-                                }
-                                if key_differs {
-                                    // Seal the previous key's val list, if any vals were emitted.
-                                    if vals_len > 0 {
-                                        output.vals.bounds.push(vals_len as u64);
-                                    }
-                                    output.keys.values.push(prev.0);
-                                }
-                                output.vals.values.push(prev.1);
-                            }
-                            output.times.values.push(prev.2);
-                            output.diffs.values.push(&diff_stash);
-                            output.diffs.bounds.push(output.diffs.values.len() as u64);
-                        }
-                        Columnar::copy_from(&mut diff_stash, curr.3);
-                    } else {
-                        // Same (key, val, time): accumulate diff.
-                        Columnar::copy_from(&mut diff_temp, curr.3);
-                        diff_stash.plus_equals(&diff_temp);
+            let mut updates = consolidated;
+            if let Some((key, val, time, diff)) = updates.next() {
+                let mut prev = (key, val, time);
+                output.keys.values.push(key);
+                output.vals.values.push(val);
+                output.times.values.push(time);
+                output.diffs.values.push(&diff);
+                output.diffs.bounds.push(output.diffs.values.len() as u64);
+
+                // As we proceed, seal up known complete runs.
+                for (key, val, time, diff) in updates {
+
+                    // If keys differ, record key and seal vals and times.
+                    if key != prev.0 {
+                        output.vals.bounds.push(output.vals.values.len() as u64);
+                        output.times.bounds.push(output.times.values.len() as u64);
+                        output.keys.values.push(key);
+                        output.vals.values.push(val);
                     }
-                    prev = curr;
-                }
-
-                // Flush the final accumulated diff.
-                if !diff_stash.is_zero() {
-                    let keys_len = output.keys.values.len();
-                    let vals_len = output.vals.values.len();
-                    let times_len = output.times.values.len();
-                    let need_key = keys_len == 0 || ContainerOf::<U::Key>::reborrow_ref(prev.0) != output.keys.values.borrow().get(keys_len - 1);
-                    let need_val = need_key || vals_len == 0 || ContainerOf::<U::Val>::reborrow_ref(prev.1) != output.vals.values.borrow().get(vals_len - 1);
-
-                    if need_val {
-                        if times_len > 0 {
-                            output.times.bounds.push(times_len as u64);
-                        }
-                        if need_key {
-                            if vals_len > 0 {
-                                output.vals.bounds.push(vals_len as u64);
-                            }
-                            output.keys.values.push(prev.0);
-                        }
-                        output.vals.values.push(prev.1);
+                    // If vals differ, record val and seal times.
+                    else if val != prev.1 {
+                        output.times.bounds.push(output.times.values.len() as u64);
+                        output.vals.values.push(val);
                     }
-                    output.times.values.push(prev.2);
-                    output.diffs.values.push(&diff_stash);
+                    else {
+                        // We better not find a duplicate time.
+                        assert!(time != prev.2);
+                    }
+
+                    // Always record (time, diff).
+                    output.times.values.push(time);
+                    output.diffs.values.push(&diff);
                     output.diffs.bounds.push(output.diffs.values.len() as u64);
-                }
 
-                // Seal the final groups at each level.
-                if !output.times.values.is_empty() {
-                    output.times.bounds.push(output.times.values.len() as u64);
-                }
-                if !output.vals.values.is_empty() {
-                    output.vals.bounds.push(output.vals.values.len() as u64);
-                }
-                if !output.keys.values.is_empty() {
-                    output.keys.bounds.push(output.keys.values.len() as u64);
+                    prev = (key, val, time);
                 }
+
+                // Seal up open lists.
+                output.keys.bounds.push(output.keys.values.len() as u64);
+                output.vals.bounds.push(output.vals.values.len() as u64);
+                output.times.bounds.push(output.times.values.len() as u64);
             }
 
             output
@@ -1034,102 +903,60 @@ pub mod updates {
         /// single outer key list, all lists sorted and deduplicated,
         /// diff lists are singletons (or absent if cancelled).
         pub fn consolidate(self) -> Self {
+            Self::form_unsorted(self.iter())
+        }
 
-            let Self { keys, vals, times, diffs } = self;
-
-            let keys_b = keys.borrow();
-            let vals_b = vals.borrow();
-            let times_b = times.borrow();
-            let diffs_b = diffs.borrow();
-
-            // Flatten to index tuples: [key_abs, val_abs, time_abs, diff_abs].
-            let mut tuples: Vec<[usize; 4]> = Vec::new();
-            for outer in 0..Len::len(&keys_b) {
-                for k in child_range(keys_b.bounds, outer) {
-                    for v in child_range(vals_b.bounds, k) {
-                        for t in child_range(times_b.bounds, v) {
-                            for d in child_range(diffs_b.bounds, t) {
-                                tuples.push([k, v, t, d]);
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Sort by (key, val, time). Diff is payload.
-            tuples.sort_by(|a, b| {
-                keys_b.values.get(a[0]).cmp(&keys_b.values.get(b[0]))
-                    .then_with(|| vals_b.values.get(a[1]).cmp(&vals_b.values.get(b[1])))
-                    .then_with(|| times_b.values.get(a[2]).cmp(&times_b.values.get(b[2])))
-            });
-
-            // Build consolidated output, bottom-up cancellation.
-            let mut output = Self::default();
-            let mut diff_stash = U::Diff::default();
-            let mut diff_temp = U::Diff::default();
-
-            let mut idx = 0;
-            while idx < tuples.len() {
-                let key_ref = keys_b.values.get(tuples[idx][0]);
-                let key_start_vals = output.vals.values.len();
-
-                // All entries with this key.
-                while idx < tuples.len() && keys_b.values.get(tuples[idx][0]) == key_ref {
-                    let val_ref = vals_b.values.get(tuples[idx][1]);
-                    let val_start_times = output.times.values.len();
-
-                    // All entries with this (key, val).
-                    while idx < tuples.len()
-                        && keys_b.values.get(tuples[idx][0]) == key_ref
-                        && vals_b.values.get(tuples[idx][1]) == val_ref
-                    {
-                        let time_ref = times_b.values.get(tuples[idx][2]);
-
-                        // Sum all diffs for this (key, val, time).
-                        Columnar::copy_from(&mut diff_stash, diffs_b.values.get(tuples[idx][3]));
-                        idx += 1;
-                        while idx < tuples.len()
-                            && keys_b.values.get(tuples[idx][0]) == key_ref
-                            && vals_b.values.get(tuples[idx][1]) == val_ref
-                            && times_b.values.get(tuples[idx][2]) == time_ref
-                        {
-                            Columnar::copy_from(&mut diff_temp, diffs_b.values.get(tuples[idx][3]));
-                            diff_stash.plus_equals(&diff_temp);
-                            idx += 1;
-                        }
-
-                        // Emit time + singleton diff if nonzero.
-                        if !diff_stash.is_zero() {
-                            output.times.values.push(time_ref);
-                            output.diffs.values.push(&diff_stash);
-                            output.diffs.bounds.push(output.diffs.values.len() as u64);
-                        }
-                    }
-
-                    // Seal time list for this val; emit val if any times survived.
-                    if output.times.values.len() > val_start_times {
-                        output.times.bounds.push(output.times.values.len() as u64);
-                        output.vals.values.push(val_ref);
-                    }
-                }
+        /// The number of leaf-level diff entries (total updates).
+        pub fn len(&self) -> usize { self.diffs.values.len() }
 
-                // Seal val list for this key; emit key if any vals survived.
-                if output.vals.values.len() > key_start_vals {
-                    output.vals.bounds.push(output.vals.values.len() as u64);
-                    output.keys.values.push(key_ref);
+        /// Checks trie invariants and panics with a descriptive message if any fail.
+        pub fn validate(&self, label: &str) {
+            let keys_len = Len::len(&self.keys.values);
+            let vals_bounds_len = Len::len(&self.vals.bounds);
+            let vals_len = Len::len(&self.vals.values);
+            let times_bounds_len = Len::len(&self.times.bounds);
+            let times_len = Len::len(&self.times.values);
+            let diffs_bounds_len = Len::len(&self.diffs.bounds);
+            let diffs_len = Len::len(&self.diffs.values);
+
+            // 1. One val-group bound per key.
+            assert_eq!(vals_bounds_len, keys_len,
+                "{label}: vals.bounds.len() ({vals_bounds_len}) != keys.values.len() ({keys_len})");
+
+            // 2. One time-group bound per val.
+            assert_eq!(times_bounds_len, vals_len,
+                "{label}: times.bounds.len() ({times_bounds_len}) != vals.values.len() ({vals_len})");
+
+            // 3. One diff-group bound per time (singleton diffs).
+            assert_eq!(diffs_bounds_len, times_len,
+                "{label}: diffs.bounds.len() ({diffs_bounds_len}) != times.values.len() ({times_len})");
+
+            // 4. Cumulative bounds are monotonically non-decreasing at each level.
+            // 5. Final bound at each level matches the child values count.
+            fn check_monotone_and_final<B: IndexAs<u64> + Len>(
+                bounds: &B,
+                child_len: usize,
+                level: &str,
+                label: &str,
+            ) {
+                let n = Len::len(bounds);
+                if n == 0 { return; }
+                let mut prev = bounds.index_as(0);
+                for i in 1..n {
+                    let curr = bounds.index_as(i);
+                    assert!(curr >= prev,
+                        "{label}: {level}.bounds not monotonic at index {i}: {prev} > {curr}");
+                    prev = curr;
                 }
+                let last = bounds.index_as(n - 1) as usize;
+                assert_eq!(last, child_len,
+                    "{label}: last {level}.bounds ({last}) != child values len ({child_len})");
             }
 
-            // Seal the single outer key list.
-            if !output.keys.values.is_empty() {
-                output.keys.bounds.push(output.keys.values.len() as u64);
-            }
-
-            output
+            check_monotone_and_final(&self.vals.bounds.borrow(), vals_len, "vals", label);
+            check_monotone_and_final(&self.times.bounds.borrow(), times_len, "times", label);
+            check_monotone_and_final(&self.diffs.bounds.borrow(), diffs_len, "diffs", label);
         }
-
-        /// The number of leaf-level diff entries (total updates).
-        pub fn len(&self) -> usize { self.diffs.values.len() }
     }
 
     /// Push a single flat update as a stride-1 entry.
@@ -1203,6 +1030,54 @@ pub mod updates {
         fn into_bytes<W: std::io::Write>(&self, _writer: &mut W) { unimplemented!() }
     }
 
+    /// An incremental trie builder that accepts sorted, consolidated `Updates` chunks
+    /// and melds them into a single `Updates` trie.
+    ///
+    /// The internal `Updates` has open (unsealed) bounds at the keys, vals, and times
+    /// levels — the last group at each level has its values pushed but no corresponding
+    /// bounds entry. `diffs.bounds` is always 1:1 with `times.values`.
+    ///
+    /// `meld` accepts a consolidated `Updates` whose first `(key, val, time)` is
+    /// strictly greater than the builder's last `(key, val, time)`. The key and val
+    /// may equal the builder's current open key/val, as long as the time is greater.
+    ///
+    /// `done` seals all open bounds and returns the completed `Updates`.
+    pub struct UpdatesBuilder<U: Update> {
+        /// Non-empty, consolidated updates.
+        updates: Updates<U>,
+    }
+
+    impl<U: Update> UpdatesBuilder<U> {
+        /// Construct a new builder from consolidated updates.
+        ///
+        /// If the updates are not consolidated none of this works.
+        pub fn new_from(updates: Updates<U>) -> Self { Self { updates } }
+
+        /// Meld a sorted, consolidated `Updates` chunk into this builder.
+        ///
+        /// The chunk's first `(key, val, time)` must be strictly greater than
+        /// the builder's last `(key, val, time)`. Keys and vals may overlap
+        /// (continue the current group), but times must be strictly increasing
+        /// within the same `(key, val)`.
+        pub fn meld(&mut self, chunk: &Updates<U>) {
+            todo!("to be implemented")
+        }
+
+        /// Seal all open bounds and return the completed `Updates`.
+        pub fn done(mut self) -> Updates<U> {
+            use columnar::Len;
+            if Len::len(&self.updates.keys.values) > 0 {
+                // Seal the open time group.
+                self.updates.times.bounds.push(Len::len(&self.updates.times.values) as u64);
+                // Seal the open val group.
+                self.updates.vals.bounds.push(Len::len(&self.updates.vals.values) as u64);
+                // Seal the outer key group.
+                self.updates.keys.bounds.push(Len::len(&self.updates.keys.values) as u64);
+            }
+            self.updates
+        }
+    }
+
     #[cfg(test)]
     mod tests {
         use super::*;
@@ -1348,6 +1223,9 @@ where
         .unary::<ValColBuilder<U>, _, _, _>(Pipeline, "JoinFunction", move |_, _| {
             move |input, output| {
                 input.for_each(|time, data| {
+                    if std::env::var("DDIR_VALIDATE").is_ok() {
+                        data.updates.validate("JoinFunction input");
+                    }
                     let mut session = output.session_with_builder(&time);
                     for (k1, v1, t1, d1) in data.updates.iter() {
                         let t1o: U::Time = Columnar::into_owned(t1);

From b25fd430c0b709e84efeb352ff53d1c07332c0ed Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Thu, 26 Mar 2026 06:21:08 -0400
Subject: [PATCH 02/20] WIP

---
 .../examples/columnar/columnar_support.rs     | 317 +++++++++++++++---
 differential-dataflow/src/operators/reduce.rs |   2 +
 2 files changed, 279 insertions(+), 40 deletions(-)

diff --git a/differential-dataflow/examples/columnar/columnar_support.rs b/differential-dataflow/examples/columnar/columnar_support.rs
index b6a67af25..c4f17de87 100644
--- a/differential-dataflow/examples/columnar/columnar_support.rs
+++ b/differential-dataflow/examples/columnar/columnar_support.rs
@@ -443,8 +443,8 @@ pub mod arrangement {
     }
 
     use crate::{Updates, RecordedUpdates};
-    use differential_dataflow::trace::implementations::merge_batcher::{MergeBatcher, InternalMerger};
-    type ValBatcher2<U> = MergeBatcher<RecordedUpdates<U>, TrieChunker<U>, InternalMerger<Updates<U>>>;
+    use differential_dataflow::trace::implementations::merge_batcher::MergeBatcher;
+    type ValBatcher2<U> = MergeBatcher<RecordedUpdates<U>, TrieChunker<U>, trie_merger::TrieMerger<U>>;
 
     /// A chunker that unwraps `RecordedUpdates` into bare `Updates` for the merge batcher.
     /// The `records` accounting is discarded here — it has served its purpose for exchange.
@@ -488,8 +488,7 @@ pub mod arrangement {
 
     pub mod batcher {
 
-        use std::ops::Range;
-        use columnar::{Borrow, Columnar, Container, Index, Len, Push};
+        use columnar::{Borrow, Columnar, Index, Len, Push};
         use differential_dataflow::difference::{Semigroup, IsZero};
         use timely::progress::frontier::{Antichain, AntichainRef};
         use differential_dataflow::trace::implementations::merge_batcher::container::InternalMerge;
@@ -499,34 +498,34 @@ pub mod arrangement {
 
         impl<U: Update> timely::container::SizableContainer for Updates<U> {
             fn at_capacity(&self) -> bool {
-                use columnar::Len;
                 self.diffs.values.len() >= 64 * 1024
             }
             fn ensure_capacity(&mut self, _stash: &mut Option<Self>) { }
         }
 
+        /// Required by `reduce_abelian`'s bound `Builder::Input: InternalMerge`.
+        /// Not called at runtime — our batcher uses `TrieMerger` instead.
+        /// TODO: Relax the bound in DD's reduce to remove this requirement.
         impl<U: Update> InternalMerge for Updates<U> {
-
             type TimeOwned = U::Time;
-
-            fn len(&self) -> usize { self.keys.values.len() }
-            fn clear(&mut self) { *self = Self::default(); }
-
-            #[inline(never)]
+            fn len(&self) -> usize { Len::len(&self.keys.values) }
+            fn clear(&mut self) {
+                use columnar::Clear;
+                self.keys.clear();
+                self.vals.clear();
+                self.times.clear();
+                self.diffs.clear();
+            }
             fn merge_from(&mut self, others: &mut [Self], positions: &mut [usize]) {
-                // Slow but correct: collect self + all others (skipping already-consumed updates),
-                // rebuild via form_unsorted which sorts and consolidates.
                 let self_iter = self.iter();
                 let others_iter = others.iter().zip(positions.iter()).flat_map(|(other, pos)| {
                     other.iter().skip(*pos)
                 });
                 *self = Self::form_unsorted(self_iter.chain(others_iter));
-                // Mark all inputs as fully consumed.
                 for (other, pos) in others.iter().zip(positions.iter_mut()) {
                     *pos = other.len();
                 }
             }
-
             fn extract(
                 &mut self,
                 upper: AntichainRef<U::Time>,
@@ -534,12 +533,9 @@ pub mod arrangement {
                 keep: &mut Self,
                 ship: &mut Self,
             ) {
-                use columnar::Columnar;
-                // Slow but correct: partition, form_unsorted onto keep/ship.
+                let mut time = U::Time::default();
                 let mut keep_vec: Vec<(U::Key, U::Val, U::Time, U::Diff)> = Vec::new();
                 let mut ship_vec: Vec<(U::Key, U::Val, U::Time, U::Diff)> = Vec::new();
-
-                let mut time = U::Time::default();
                 for (k, v, t, d) in self.iter() {
                     Columnar::copy_from(&mut time, t);
                     if upper.less_equal(&time) {
@@ -549,8 +545,6 @@ pub mod arrangement {
                         ship_vec.push((Columnar::into_owned(k), Columnar::into_owned(v), time.clone(), Columnar::into_owned(d)));
                     }
                 }
-
-                // Merge new data into keep/ship: push existing + new into flat Updates, consolidate.
                 if !keep_vec.is_empty() {
                     let mut flat = Self::default();
                     for (k, v, t, d) in keep.iter() { flat.push((k, v, t, d)); }
@@ -565,28 +559,135 @@ pub mod arrangement {
                 }
             }
         }
+    }
+
+    pub mod trie_merger {
+
+        use columnar::{Columnar, Len};
+        use timely::PartialOrder;
+        use timely::progress::frontier::{Antichain, AntichainRef};
+        use differential_dataflow::trace::implementations::merge_batcher::Merger;
+
+        use crate::ColumnarUpdate as Update;
+        use crate::Updates;
 
-        #[inline(always)]
-        pub(crate) fn gallop<TC: columnar::Index>(input: TC, range: &mut Range<usize>, mut cmp: impl FnMut(<TC as columnar::Index>::Ref) -> bool) {
-            // if empty input, or already >= element, return
-            if !Range::<usize>::is_empty(range) && cmp(input.get(range.start)) {
-                let mut step = 1;
-                while range.start + step < range.end && cmp(input.get(range.start + step)) {
-                    range.start += step;
-                    step <<= 1;
+        pub struct TrieMerger<U: Update> {
+            _marker: std::marker::PhantomData<U>,
+        }
+
+        impl<U: Update> Default for TrieMerger<U> {
+            fn default() -> Self { Self { _marker: std::marker::PhantomData } }
+        }
+
+        /// A merging iterator over two sorted iterators.
+        struct Merging<I1: Iterator, I2: Iterator> {
+            iter1: std::iter::Peekable<I1>,
+            iter2: std::iter::Peekable<I2>,
+        }
+
+        impl<K, V, T, D, I1, I2> Iterator for Merging<I1, I2>
+        where
+            K: Copy + Ord,
+            V: Copy + Ord,
+            T: Copy + Ord,
+            I1: Iterator<Item = (K, V, T, D)>,
+            I2: Iterator<Item = (K, V, T, D)>,
+        {
+            type Item = (K, V, T, D);
+            #[inline]
+            fn next(&mut self) -> Option<Self::Item> {
+                match (self.iter1.peek(), self.iter2.peek()) {
+                    (Some(a), Some(b)) => {
+                        if (a.0, a.1, a.2) <= (b.0, b.1, b.2) {
+                            self.iter1.next()
+                        } else {
+                            self.iter2.next()
+                        }
+                    }
+                    (Some(_), None) => self.iter1.next(),
+                    (None, Some(_)) => self.iter2.next(),
+                    (None, None) => None,
+                }
+            }
+        }
+
+        /// Build sorted `Updates` chunks from a sorted iterator of refs,
+        /// using `Updates::form` (which consolidates internally) on batches.
+        fn form_chunks<'a, U: Update>(
+            sorted: impl Iterator<Item = columnar::Ref<'a, crate::updates::Tuple<U>>>,
+            output: &mut Vec<Updates<U>>,
+        ) {
+            let mut sorted = sorted.peekable();
+            while sorted.peek().is_some() {
+                let chunk = Updates::<U>::form((&mut sorted).take(64 * 1024));
+                if chunk.len() > 0 {
+                    output.push(chunk);
                 }
+            }
+        }
+
+        impl<U: Update> Merger for TrieMerger<U>
+        where
+            U::Time: Ord + PartialOrder + Clone + 'static,
+        {
+            type Chunk = Updates<U>;
+            type Time = U::Time;
+
+            fn merge(
+                &mut self,
+                list1: Vec<Updates<U>>,
+                list2: Vec<Updates<U>>,
+                output: &mut Vec<Updates<U>>,
+                _stash: &mut Vec<Updates<U>>,
+            ) {
+                let iter1 = list1.iter().flat_map(|chunk| chunk.iter());
+                let iter2 = list2.iter().flat_map(|chunk| chunk.iter());
+
+                let merged = Merging {
+                    iter1: iter1.peekable(),
+                    iter2: iter2.peekable(),
+                };
+
+                form_chunks::<U>(merged, output);
+            }
 
-                step >>= 1;
-                while step > 0 {
-                    if range.start + step < range.end && cmp(input.get(range.start + step)) {
-                        range.start += step;
+            fn extract(
+                &mut self,
+                merged: Vec<Self::Chunk>,
+                upper: AntichainRef<Self::Time>,
+                frontier: &mut Antichain<Self::Time>,
+                ship: &mut Vec<Self::Chunk>,
+                kept: &mut Vec<Self::Chunk>,
+                _stash: &mut Vec<Self::Chunk>,
+            ) {
+                // Flatten the sorted, consolidated chain into refs.
+                let all = merged.iter().flat_map(|chunk| chunk.iter());
+
+                // Partition into two sorted streams by time.
+                let mut time_owned = U::Time::default();
+                let mut keep_vec = Vec::new();
+                let mut ship_vec = Vec::new();
+                for (k, v, t, d) in all {
+                    Columnar::copy_from(&mut time_owned, t);
+                    if upper.less_equal(&time_owned) {
+                        frontier.insert_ref(&time_owned);
+                        keep_vec.push((k, v, t, d));
+                    } else {
+                        ship_vec.push((k, v, t, d));
                     }
-                    step >>= 1;
                 }
 
-                range.start += 1;
+                // Build chunks via form (which consolidates internally).
+                form_chunks::<U>(keep_vec.into_iter(), kept);
+                form_chunks::<U>(ship_vec.into_iter(), ship);
+            }
+
+            fn account(chunk: &Self::Chunk) -> (usize, usize, usize, usize) {
+                use timely::Accountable;
+                (chunk.record_count() as usize, 0, 0, 0)
             }
         }
+
     }
 
     use builder::ValMirror;
@@ -637,8 +738,19 @@ pub mod arrangement {
             fn seal(chain: &mut Vec<Self::Input>, description: Description<Self::Time>) -> Self::Output {
                 use columnar::Len;
 
-                // Slow but correct: merge all chain entries via form_unsorted.
-                let merged: Updates<U> = Updates::form_unsorted(chain.iter().flat_map(|c| c.iter()));
+                // Meld sorted, consolidated chain entries in order.
+                // Pre-allocate to avoid reallocations during meld.
+                use columnar::{Borrow, Container};
+                let mut updates = Updates::<U>::default();
+                updates.keys.reserve_for(chain.iter().map(|c| c.keys.borrow()));
+                updates.vals.reserve_for(chain.iter().map(|c| c.vals.borrow()));
+                updates.times.reserve_for(chain.iter().map(|c| c.times.borrow()));
+                updates.diffs.reserve_for(chain.iter().map(|c| c.diffs.borrow()));
+                let mut builder = crate::updates::UpdatesBuilder::new_from(updates);
+                for chunk in chain.iter() {
+                    builder.meld(chunk);
+                }
+                let merged = builder.done();
                 chain.clear();
 
                 let updates = Len::len(&merged.diffs.values);
@@ -1048,10 +1160,20 @@ pub mod updates {
     }
 
     impl<U: Update> UpdatesBuilder<U> {
-        /// Construct a new builder from consolidated updates.
+        /// Construct a new builder from consolidated, sealed updates.
         ///
+        /// Unseals the last group at keys, vals, and times levels so that
+        /// subsequent `meld` calls can extend the open groups.
         /// If the updates are not consolidated none of this works.
-        pub fn new_from(updates: Updates<U>) -> Self { Self { updates } }
+        pub fn new_from(mut updates: Updates<U>) -> Self {
+            use columnar::Len;
+            if Len::len(&updates.keys.values) > 0 {
+                updates.keys.bounds.pop();
+                updates.vals.bounds.pop();
+                updates.times.bounds.pop();
+            }
+            Self { updates }
+        }
 
         /// Meld a sorted, consolidated `Updates` chunk into this builder.
         ///
@@ -1060,7 +1182,122 @@ pub mod updates {
         /// (continue the current group), but times must be strictly increasing
         /// within the same `(key, val)`.
         pub fn meld(&mut self, chunk: &Updates<U>) {
-            todo!("to be implemented")
+            use columnar::{Borrow, Index, Len};
+
+            if chunk.len() == 0 { return; }
+
+            // Empty builder: clone the chunk and unseal it.
+            if Len::len(&self.updates.keys.values) == 0 {
+                self.updates = chunk.clone();
+                self.updates.keys.bounds.pop();
+                self.updates.vals.bounds.pop();
+                self.updates.times.bounds.pop();
+                return;
+            }
+
+            // Pre-compute boundary comparisons before mutating.
+            let keys_match = {
+                let skb = self.updates.keys.values.borrow();
+                let ckb = chunk.keys.values.borrow();
+                skb.get(Len::len(&skb) - 1) == ckb.get(0)
+            };
+            let vals_match = keys_match && {
+                let svb = self.updates.vals.values.borrow();
+                let cvb = chunk.vals.values.borrow();
+                svb.get(Len::len(&svb) - 1) == cvb.get(0)
+            };
+
+            let chunk_num_keys = Len::len(&chunk.keys.values);
+            let chunk_num_vals = Len::len(&chunk.vals.values);
+            let chunk_num_times = Len::len(&chunk.times.values);
+
+            // Child ranges for the first element at each level of the chunk.
+            let first_key_vals = child_range(chunk.vals.borrow().bounds, 0);
+            let first_val_times = child_range(chunk.times.borrow().bounds, 0);
+
+            // There is a first position where coordinates disagree.
+            // Strictly beyond that position: seal bounds, extend lists, re-open the last bound.
+            // At that position: meld the first list, extend subsequent lists, re-open.
+            let mut differ = false;
+
+            // --- Keys ---
+            if keys_match {
+                // Skip the duplicate first key; add remaining keys.
+                if chunk_num_keys > 1 {
+                    self.updates.keys.values.extend_from_self(chunk.keys.values.borrow(), 1..chunk_num_keys);
+                }
+            } else {
+                // All keys are new.
+                self.updates.keys.values.extend_from_self(chunk.keys.values.borrow(), 0..chunk_num_keys);
+                differ = true;
+            }
+
+            // --- Vals ---
+            if differ {
+                // Keys differed: seal open val group, extend all val lists, unseal last.
+                self.updates.vals.bounds.push(Len::len(&self.updates.vals.values) as u64);
+                self.updates.vals.extend_from_self(chunk.vals.borrow(), 0..chunk_num_keys);
+                self.updates.vals.bounds.pop();
+            } else {
+                // Keys matched: meld vals for the shared key.
+                if vals_match {
+                    // Skip the duplicate first val; add remaining vals from the first key's list.
+                    if first_key_vals.len() > 1 {
+                        self.updates.vals.values.extend_from_self(
+                            chunk.vals.values.borrow(),
+                            (first_key_vals.start + 1)..first_key_vals.end,
+                        );
+                    }
+                } else {
+                    // First val differs: add all vals from the first key's list.
+                    self.updates.vals.values.extend_from_self(
+                        chunk.vals.values.borrow(),
+                        first_key_vals.clone(),
+                    );
+                    differ = true;
+                }
+                // Seal the matched key's val group, extend remaining keys' val lists, unseal.
+                if chunk_num_keys > 1 {
+                    self.updates.vals.bounds.push(Len::len(&self.updates.vals.values) as u64);
+                    self.updates.vals.extend_from_self(chunk.vals.borrow(), 1..chunk_num_keys);
+                    self.updates.vals.bounds.pop();
+                }
+            }
+
+            // --- Times ---
+            if differ {
+                // Seal open time group, extend all time lists, unseal last.
+                self.updates.times.bounds.push(Len::len(&self.updates.times.values) as u64);
+                self.updates.times.extend_from_self(chunk.times.borrow(), 0..chunk_num_vals);
+                self.updates.times.bounds.pop();
+            } else {
+                // Keys and vals matched. Times must be strictly greater (precondition),
+                // so we always set differ = true here.
+                debug_assert!({
+                    let stb = self.updates.times.values.borrow();
+                    let ctb = chunk.times.values.borrow();
+                    stb.get(Len::len(&stb) - 1) != ctb.get(0)
+                }, "meld: duplicate time within same (key, val)");
+                // Add times from the first val's time list into the open group.
+                self.updates.times.values.extend_from_self(
+                    chunk.times.values.borrow(),
+                    first_val_times.clone(),
+                );
+                differ = true;
+                // Seal the matched val's time group, extend remaining vals' time lists, unseal.
+                if chunk_num_vals > 1 {
+                    self.updates.times.bounds.push(Len::len(&self.updates.times.values) as u64);
+                    self.updates.times.extend_from_self(chunk.times.borrow(), 1..chunk_num_vals);
+                    self.updates.times.bounds.pop();
+                }
+            }
+
+            // --- Diffs ---
+            // Diffs are always sealed (1:1 with times). By the precondition that
+            // times are strictly increasing for the same (key, val), differ is
+            // always true by this point — just extend all diff lists.
+            debug_assert!(differ);
+            self.updates.diffs.extend_from_self(chunk.diffs.borrow(), 0..chunk_num_times);
         }
 
         /// Seal all open bounds and return the completed `Updates`.
diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index fe3622d74..77834ab42 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -23,6 +23,8 @@ use crate::trace::implementations::containers::BatchContainer;
 use crate::trace::implementations::merge_batcher::container::InternalMerge;
 use crate::trace::TraceReader;
 
+// TODO: Remove the InternalMerge constraint on Bu::Input. It only needs Clear.
+
 /// A key-wise reduction of values in an input trace.
 ///
 /// This method exists to provide reduce functionality without opinions about qualifying trace types.

From a7e92f69e067f00fa6246de202f5b87a001b6707 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 28 Mar 2026 20:33:22 -0400
Subject: [PATCH 03/20] Wildly overcomplicated

---
 .../examples/columnar/columnar_support.rs     | 638 ++++++++++++++----
 1 file changed, 522 insertions(+), 116 deletions(-)

diff --git a/differential-dataflow/examples/columnar/columnar_support.rs b/differential-dataflow/examples/columnar/columnar_support.rs
index c4f17de87..149a93b60 100644
--- a/differential-dataflow/examples/columnar/columnar_support.rs
+++ b/differential-dataflow/examples/columnar/columnar_support.rs
@@ -230,7 +230,6 @@ mod column_builder {
                 let mut refs = self.current.borrow().into_index_iter().collect::<Vec<_>>();
                 refs.sort();
                 let updates = Updates::form(refs.into_iter());
-                if std::env::var("DDIR_VALIDATE").is_ok() { updates.validate("ValColBuilder::push_into"); }
                 self.pending.push_back(RecordedUpdates { updates, records, consolidated: true });
                 self.current.clear();
             }
@@ -269,7 +268,6 @@ mod column_builder {
                 let mut refs = self.current.borrow().into_index_iter().collect::<Vec<_>>();
                 refs.sort();
                 let updates = Updates::form(refs.into_iter());
-                if std::env::var("DDIR_VALIDATE").is_ok() { updates.validate("ValColBuilder::finish"); }
                 self.pending.push_back(RecordedUpdates { updates, records, consolidated: true });
                 self.current.clear();
             }
@@ -466,7 +464,9 @@ pub mod arrangement {
 
     impl<'a, U: crate::layout::ColumnarUpdate> timely::container::PushInto<&'a mut RecordedUpdates<U>> for TrieChunker<U> {
         fn push_into(&mut self, container: &'a mut RecordedUpdates<U>) {
-            self.ready.push_back(std::mem::take(&mut container.updates));
+            let mut updates = std::mem::take(&mut container.updates);
+            if !container.consolidated { updates = updates.consolidate(); }
+            if updates.len() > 0 { self.ready.push_back(updates); }
         }
     }
 
@@ -497,9 +497,7 @@ pub mod arrangement {
         use crate::Updates;
 
         impl<U: Update> timely::container::SizableContainer for Updates<U> {
-            fn at_capacity(&self) -> bool {
-                self.diffs.values.len() >= 64 * 1024
-            }
+            fn at_capacity(&self) -> bool { self.diffs.values.len() >= 64 * 1024 }
             fn ensure_capacity(&mut self, _stash: &mut Option<Self>) { }
         }
 
@@ -508,7 +506,7 @@ pub mod arrangement {
         /// TODO: Relax the bound in DD's reduce to remove this requirement.
         impl<U: Update> InternalMerge for Updates<U> {
             type TimeOwned = U::Time;
-            fn len(&self) -> usize { Len::len(&self.keys.values) }
+            fn len(&self) -> usize { unimplemented!() }
             fn clear(&mut self) {
                 use columnar::Clear;
                 self.keys.clear();
@@ -516,48 +514,13 @@ pub mod arrangement {
                 self.times.clear();
                 self.diffs.clear();
             }
-            fn merge_from(&mut self, others: &mut [Self], positions: &mut [usize]) {
-                let self_iter = self.iter();
-                let others_iter = others.iter().zip(positions.iter()).flat_map(|(other, pos)| {
-                    other.iter().skip(*pos)
-                });
-                *self = Self::form_unsorted(self_iter.chain(others_iter));
-                for (other, pos) in others.iter().zip(positions.iter_mut()) {
-                    *pos = other.len();
-                }
-            }
-            fn extract(
-                &mut self,
-                upper: AntichainRef<U::Time>,
-                frontier: &mut Antichain<U::Time>,
-                keep: &mut Self,
-                ship: &mut Self,
-            ) {
-                let mut time = U::Time::default();
-                let mut keep_vec: Vec<(U::Key, U::Val, U::Time, U::Diff)> = Vec::new();
-                let mut ship_vec: Vec<(U::Key, U::Val, U::Time, U::Diff)> = Vec::new();
-                for (k, v, t, d) in self.iter() {
-                    Columnar::copy_from(&mut time, t);
-                    if upper.less_equal(&time) {
-                        frontier.insert_ref(&time);
-                        keep_vec.push((Columnar::into_owned(k), Columnar::into_owned(v), time.clone(), Columnar::into_owned(d)));
-                    } else {
-                        ship_vec.push((Columnar::into_owned(k), Columnar::into_owned(v), time.clone(), Columnar::into_owned(d)));
-                    }
-                }
-                if !keep_vec.is_empty() {
-                    let mut flat = Self::default();
-                    for (k, v, t, d) in keep.iter() { flat.push((k, v, t, d)); }
-                    for (k, v, t, d) in keep_vec.iter() { flat.push((k, v, t, d)); }
-                    *keep = flat.consolidate();
-                }
-                if !ship_vec.is_empty() {
-                    let mut flat = Self::default();
-                    for (k, v, t, d) in ship.iter() { flat.push((k, v, t, d)); }
-                    for (k, v, t, d) in ship_vec.iter() { flat.push((k, v, t, d)); }
-                    *ship = flat.consolidate();
-                }
-            }
+            fn merge_from(&mut self, _others: &mut [Self], _positions: &mut [usize]) { unimplemented!() }
+            fn extract(&mut self,
+                _upper: AntichainRef<U::Time>,
+                _frontier: &mut Antichain<U::Time>,
+                _keep: &mut Self,
+                _ship: &mut Self,
+            ) { unimplemented!() }
         }
     }
 
@@ -640,15 +603,7 @@ pub mod arrangement {
                 output: &mut Vec<Updates<U>>,
                 _stash: &mut Vec<Updates<U>>,
             ) {
-                let iter1 = list1.iter().flat_map(|chunk| chunk.iter());
-                let iter2 = list2.iter().flat_map(|chunk| chunk.iter());
-
-                let merged = Merging {
-                    iter1: iter1.peekable(),
-                    iter2: iter2.peekable(),
-                };
-
-                form_chunks::<U>(merged, output);
+                Self::merge_batches(list1, list2, output, _stash);
             }
 
             fn extract(
@@ -688,6 +643,507 @@ pub mod arrangement {
             }
         }
 
+        impl<U: Update> TrieMerger<U>
+        where
+            U::Time: Ord + PartialOrder + Clone + 'static,
+        {
+            /// Iterator-based merge: flatten, merge, consolidate, form.
+            /// Correct but slow — used as fallback.
+            #[allow(dead_code)]
+            fn merge_iterator(
+                list1: &[Updates<U>],
+                list2: &[Updates<U>],
+                output: &mut Vec<Updates<U>>,
+            ) {
+                let iter1 = list1.iter().flat_map(|chunk| chunk.iter());
+                let iter2 = list2.iter().flat_map(|chunk| chunk.iter());
+
+                let merged = Merging {
+                    iter1: iter1.peekable(),
+                    iter2: iter2.peekable(),
+                };
+
+                form_chunks::<U>(merged, output);
+            }
+
+            /// A merge implementation that operates batch-at-a-time.
+            #[inline(never)]
+            fn merge_batches(
+                list1: Vec<Updates<U>>,
+                list2: Vec<Updates<U>>,
+                output: &mut Vec<Updates<U>>,
+                stash: &mut Vec<Updates<U>>,
+            ) {
+
+                // The design for efficient "batch" merginging of chains of links is:
+                // 0.   We choose a target link size, K, and will keep the average link size at least K and the max size at 2k.
+                //      K should be large enough to amortize some set-up, but not so large that one or two extra break the bank.
+                // 1.   We will repeatedly consider pairs of links, and fully merge one with a prefix of the other.
+                //      The last elements of each link will tell us which of the two suffixes must be held back.
+                // 2.   We then have a chain of as many links as we started with, with potential defects to correct:
+                //      a.  A link may contain some number of zeros: we can remove them if we are eager, based on size.
+                //      b.  A link may contain more than 2K updates; we can split it.
+                //      c.  Two adjacent links may contain fewer than 2K updates; we can meld (careful append) them.
+                // 3.   After a pass of the above, we should have restored the invariant.
+                //      We can try and me smarter and fuse some of the above work rather than explicitly stage results.
+                //
+                // The challenging moment is the merge that can start with a suffix of one link, involving a prefix of one link.
+                // These could be the same link, different links, and generally there is the potential for complexity here.
+
+                let mut builder = ChainBuilder::default();
+
+                let mut queue1: std::collections::VecDeque<_> = list1.into();
+                let mut queue2: std::collections::VecDeque<_> = list2.into();
+
+                // The first unconsumed update in each block, via (k_idx, v_idx, t_idx), or None if exhausted.
+                // These are (0,0,0) for a new block, and should become None once there are no remaining updates.
+                let mut cursor1 = queue1.pop_front().map(|b| ((0,0,0), b));
+                let mut cursor2 = queue2.pop_front().map(|b| ((0,0,0), b));
+
+                // For each pair of batches
+                while cursor1.is_some() && cursor2.is_some() {
+                    Self::merge_batch(&mut cursor1, &mut cursor2, &mut builder, stash);
+                    if cursor1.is_none() { cursor1 = queue1.pop_front().map(|b| ((0,0,0), b)); }
+                    if cursor2.is_none() { cursor2 = queue2.pop_front().map(|b| ((0,0,0), b)); }
+                }
+
+                // TODO: create batch for the non-empty cursor.
+                if let Some(((k,v,t),batch)) = cursor1 {
+                    let mut out_batch = stash.pop().unwrap_or_default();
+                    let empty: Updates<U> = Default::default();
+                    write_from_surveys(
+                        &batch,
+                        &empty,
+                        &[Report::This(0, 1)],
+                        &[Report::This(k, batch.keys.values.len())],
+                        &[Report::This(v, batch.vals.values.len())],
+                        &[Report::This(t, batch.times.values.len())],
+                        &mut out_batch,
+                    );
+                    builder.push(out_batch);
+                }
+                if let Some(((k,v,t),batch)) = cursor2 {
+                    let mut out_batch = stash.pop().unwrap_or_default();
+                    let empty: Updates<U> = Default::default();
+                    write_from_surveys(
+                        &empty,
+                        &batch,
+                        &[Report::That(0, 1)],
+                        &[Report::That(k, batch.keys.values.len())],
+                        &[Report::That(v, batch.vals.values.len())],
+                        &[Report::That(t, batch.times.values.len())],
+                        &mut out_batch,
+                    );
+                    builder.push(out_batch);
+                }
+
+                builder.extend(queue1);
+                builder.extend(queue2);
+                *output = builder.done();
+                // TODO: Tidy output to satisfy structural invariants.
+            }
+
+            /// Merge two batches, one completely and another through the corresponding prefix.
+            ///
+            /// Each invocation determines the maximum amount of both batches we can merge, determined
+            /// by comparing the elements at the tails of each batch, and locating the lesser in other.
+            /// We will merge the whole of the batch containing the lesser, and the prefix up through
+            /// the lesser element in the other batch, setting the cursor to the first element strictly
+            /// greater than that lesser element.
+            ///
+            /// The algorithm uses a list of `Report` findings to map the interleavings of the layers.
+            /// Each indicates either a range exclusive to one of the inputs, or a one element common
+            /// to the layers from both inputs, which must be further explored. This map would normally
+            /// allow the full merge to happen, but we need to carefully start at each cursor, and end
+            /// just before the first element greater than the lesser bound.
+            ///
+            /// The consumed prefix and disjoint suffix should be single report entries, and it seems
+            /// fine to first produce all reports and then reflect on the cursors, rather than use the
+            /// cursors as part of the mapping.
+            #[inline(never)]
+            fn merge_batch(
+                batch1: &mut Option<((usize, usize, usize), Updates<U>)>,
+                batch2: &mut Option<((usize, usize, usize), Updates<U>)>,
+                builder: &mut ChainBuilder<U>,
+                stash: &mut Vec<Updates<U>>,
+            ) {
+                let ((k0_idx, v0_idx, t0_idx), updates0) = batch1.take().unwrap();
+                let ((k1_idx, v1_idx, t1_idx), updates1) = batch2.take().unwrap();
+
+                use columnar::Borrow;
+                let keys0 = updates0.keys.borrow();
+                let keys1 = updates1.keys.borrow();
+                let vals0 = updates0.vals.borrow();
+                let vals1 = updates1.vals.borrow();
+                let times0 = updates0.times.borrow();
+                let times1 = updates1.times.borrow();
+
+                // Survey the interleaving of the two inputs.
+                let mut key_survey = survey::<columnar::ContainerOf<U::Key>>(keys0, keys1, &[Report::Both(0,0)]);
+                let mut val_survey = survey::<columnar::ContainerOf<U::Val>>(vals0, vals1, &key_survey);
+                let mut time_survey = survey::<columnar::ContainerOf<U::Time>>(times0, times1, &val_survey);
+
+                // We now know enough to start writing into an output batch.
+                // We should update the input surveys to reflect the subset
+                // of data that we want.
+                //
+                // At most one cursor should be non-zero (assert!).
+                // A non-zero cursor must correspond to the first entry of the surveys,
+                // as there is at least one consumed update that precedes the other batch.
+                // We need to nudge that report forward to align with the cursor, potentially
+                // squeezing the report to nothing (to the upper bound).
+
+                // We start by updating the surveys to reflect the cursors.
+                // If either cursor is set, then its batch has an element strictly less than the other batch.
+                // We therefore expect to find a prefix of This/That at the start of the survey.
+                if (k0_idx, v0_idx, t0_idx) != (0,0,0) {
+                    let mut done = false; while !done { if let Report::This(l,u) = &mut key_survey[0] { if *u <= k0_idx { key_survey.remove(0); } else { *l = k0_idx; done = true; } } else { done = true; } }
+                    let mut done = false; while !done { if let Report::This(l,u) = &mut val_survey[0] { if *u <= v0_idx { val_survey.remove(0); } else { *l = v0_idx; done = true; } } else { done = true; } }
+                    let mut done = false; while !done { if let Report::This(l,u) = &mut time_survey[0] { if *u <= t0_idx { time_survey.remove(0); } else { *l = t0_idx; done = true; } } else { done = true; } }
+                }
+
+                if (k1_idx, v1_idx, t1_idx) != (0,0,0) {
+                    let mut done = false; while !done { if let Report::That(l,u) = &mut key_survey[0] { if *u <= k1_idx { key_survey.remove(0); } else { *l = k1_idx; done = true; } } else { done = true; } }
+                    let mut done = false; while !done { if let Report::That(l,u) = &mut val_survey[0] { if *u <= v1_idx { val_survey.remove(0); } else { *l = v1_idx; done = true; } } else { done = true; } }
+                    let mut done = false; while !done { if let Report::That(l,u) = &mut time_survey[0] { if *u <= t1_idx { time_survey.remove(0); } else { *l = t1_idx; done = true; } } else { done = true; } }
+                }
+
+                // We want to trim the tails of the surveys to only cover ranges present in both inputs.
+                // We can determine which was "longer" by looking at the last entry of the bottom layer,
+                // which tells us which input (or both) contained the last element.
+                //
+                // From the bottom layer up, we'll identify the index of the last item, and then determine
+                // the index of the list it belongs to. We use that index in the next layer, to locate the
+                // index of the list it belongs to, on upward.
+                let next_cursor = match time_survey.last().unwrap() {
+                    Report::This(_,_) => {
+                        // Collect the last value indexes known to strictly exceed an entry in the other batch.
+                        let mut t = times0.values.len();
+                        while let Some(Report::This(l,_)) = time_survey.last() { t = *l; time_survey.pop(); }
+                        let mut v = vals0.values.len();
+                        while let Some(Report::This(l,_)) = val_survey.last() { v = *l; val_survey.pop(); }
+                        let mut k = keys0.values.len();
+                        while let Some(Report::This(l,_)) = key_survey.last() { k = *l; key_survey.pop(); }
+                        // Now we may need to correct by nudging down.
+                        if v == times0.len() || times0.bounds.bounds(v).0 > t { v -= 1; }
+                        if k == vals0.len() || vals0.bounds.bounds(k).0 > v { k -= 1; }
+                        Some(Ok((k,v,t)))
+                    }
+                    Report::Both(_,_) => { None }
+                    Report::That(_,_) => {
+                        // Collect the last value indexes known to strictly exceed an entry in the other batch.
+                        let mut t = times1.values.len();
+                        while let Some(Report::That(l,_)) = time_survey.last() { t = *l; time_survey.pop(); }
+                        let mut v = vals1.values.len();
+                        while let Some(Report::That(l,_)) = val_survey.last() { v = *l; val_survey.pop(); }
+                        let mut k = keys1.values.len();
+                        while let Some(Report::That(l,_)) = key_survey.last() { k = *l; key_survey.pop(); }
+                        // Now we may need to correct by nudging down.
+                        if v == times1.len() || times1.bounds.bounds(v).0 > t { v -= 1; }
+                        if k == vals1.len() || vals1.bounds.bounds(k).0 > v { k -= 1; }
+                        Some(Err((k,v,t)))
+                    }
+                };
+
+                // Having updated the surveys, we now copy over the ranges they identify.
+                let mut out_batch = stash.pop().unwrap_or_default();
+                // TODO: We should be able to size `out_batch` pretty accurately from the survey.
+                write_from_surveys(&updates0, &updates1, &[Report::Both(0,0)], &key_survey, &val_survey, &time_survey, &mut out_batch);
+                builder.push(out_batch);
+
+                match next_cursor {
+                    Some(Ok(kvt)) => { *batch1 = Some((kvt, updates0)); }
+                    Some(Err(kvt)) => {*batch2 = Some((kvt, updates1)); }
+                    None => { }
+                }
+            }
+
+        }
+
+        /// Write merged output from four levels of survey reports.
+        ///
+        /// Each layer is written independently: `write_layer` handles keys, vals,
+        /// and times; `write_diffs` handles diff consolidation.
+        #[inline(never)]
+        fn write_from_surveys<U: Update>(
+            updates0: &Updates<U>,
+            updates1: &Updates<U>,
+            root_survey: &[Report],
+            key_survey: &[Report],
+            val_survey: &[Report],
+            time_survey: &[Report],
+            output: &mut Updates<U>,
+        ) {
+            use columnar::Borrow;
+
+            write_layer(updates0.keys.borrow(), updates1.keys.borrow(), root_survey, key_survey, &mut output.keys);
+            write_layer(updates0.vals.borrow(), updates1.vals.borrow(), key_survey, val_survey, &mut output.vals);
+            write_layer(updates0.times.borrow(), updates1.times.borrow(), val_survey, time_survey, &mut output.times);
+            write_diffs::<U>(updates0.diffs.borrow(), updates1.diffs.borrow(), time_survey, &mut output.diffs);
+        }
+
+        /// From two sequences of interleaved lists, map out the interleaving of their values.
+        ///
+        /// The sequence of input reports identify constraints on the sorted order of lists in the two inputs,
+        /// callout out ranges of each that are exclusively order, and elements that have equal prefixes and
+        /// therefore "overlap" and should be further investigated through the values of the lists.
+        ///
+        /// The output should have the same form but for the next layer: subject to the ordering of `reports`,
+        /// a similar report for the values of the two lists, appropriate for the next layer.
+        #[inline(never)]
+        pub fn survey<'a, C: columnar::Container<Ref<'a>: Ord>>(
+            lists0: <crate::updates::Lists<C> as columnar::Borrow>::Borrowed<'a>,
+            lists1: <crate::updates::Lists<C> as columnar::Borrow>::Borrowed<'a>,
+            reports: &[Report],
+        ) -> Vec<Report> {
+            use columnar::Index;
+            let mut output = Vec::with_capacity(reports.len()); // may grow larger, but at least this large.
+            for report in reports.iter() {
+                match report {
+                    Report::This(lower0, upper0) => {
+                        let (new_lower, _) = lists0.bounds.bounds(*lower0);
+                        let (_, new_upper) = lists0.bounds.bounds(*upper0-1);
+                        output.push(Report::This(new_lower, new_upper));
+                    }
+                    Report::Both(index0, index1) => {
+
+                        // Fetch the bounds from the layers.
+                        let (mut lower0, upper0) = lists0.bounds.bounds(*index0);
+                        let (mut lower1, upper1) = lists1.bounds.bounds(*index1);
+
+                        // Scour the intersecting range for matches.
+                        while lower0 < upper0 && lower1 < upper1 {
+                            let val0 = lists0.values.get(lower0);
+                            let val1 = lists1.values.get(lower1);
+                            match val0.cmp(&val1) {
+                                std::cmp::Ordering::Less => {
+                                    let start = lower0;
+                                    lower0 += 1;
+                                    gallop(lists0.values, &mut lower0, upper0, |x| x < val1);
+                                    output.push(Report::This(start, lower0));
+                                },
+                                std::cmp::Ordering::Equal => {
+                                    output.push(Report::Both(lower0, lower1));
+                                    lower0 += 1;
+                                    lower1 += 1;
+                                },
+                                std::cmp::Ordering::Greater => {
+                                    let start = lower1;
+                                    lower1 += 1;
+                                    gallop(lists1.values, &mut lower1, upper1, |x| x < val0);
+                                    output.push(Report::That(start, lower1));
+                                },
+                            }
+                        }
+                        if lower0 < upper0 { output.push(Report::This(lower0, upper0)); }
+                        if lower1 < upper1 { output.push(Report::That(lower1, upper1)); }
+
+                    }
+                    Report::That(lower1, upper1) => {
+                        let (new_lower, _) = lists1.bounds.bounds(*lower1);
+                        let (_, new_upper) = lists1.bounds.bounds(*upper1-1);
+                        output.push(Report::That(new_lower, new_upper));
+                    }
+                }
+            }
+
+            output
+        }
+
+        /// Write one layer of merged output from a list survey and item survey.
+        ///
+        /// The list survey describes which lists to produce (from the layer above).
+        /// The item survey describes how the items within those lists interleave.
+        /// Both surveys are consumed completely; a mismatch is a bug.
+        ///
+        /// Pruning (from cursor adjustments) can affect the first and last list
+        /// survey entries: the item survey's ranges may not match the natural
+        /// bounds of those lists. Middle entries are guaranteed unpruned and can
+        /// be bulk-copied.
+        #[inline(never)]
+        pub fn write_layer<'a, C: columnar::Container<Ref<'a>: Ord>>(
+            lists0: <crate::updates::Lists<C> as columnar::Borrow>::Borrowed<'a>,
+            lists1: <crate::updates::Lists<C> as columnar::Borrow>::Borrowed<'a>,
+            list_survey: &[Report],
+            item_survey: &[Report],
+            output: &mut crate::updates::Lists<C>,
+        ) {
+            use columnar::{Container, Index, Len, Push};
+
+            let mut item_idx = 0;
+
+            for (pos, list_report) in list_survey.iter().enumerate() {
+                let is_first = pos == 0;
+                let is_last = pos == list_survey.len() - 1;
+                let may_be_pruned = is_first || is_last;
+
+                match list_report {
+                    Report::This(lo, hi) => {
+                        let Report::This(item_lo, item_hi) = item_survey[item_idx] else { unreachable!("Expected This in item survey for This list") };
+                        item_idx += 1;
+                        if may_be_pruned {
+                            // Item range may not match natural bounds; copy items in bulk
+                            // but compute per-list bounds from natural bounds clamped to
+                            // the item range.
+                            let base = output.values.len();
+                            output.values.extend_from_self(lists0.values, item_lo..item_hi);
+                            for i in *lo..*hi {
+                                let (_, nat_hi) = lists0.bounds.bounds(i);
+                                output.bounds.push((base + nat_hi.min(item_hi) - item_lo) as u64);
+                            }
+                        } else {
+                            output.extend_from_self(lists0, *lo..*hi);
+                        }
+                    }
+                    Report::That(lo, hi) => {
+                        let Report::That(item_lo, item_hi) = item_survey[item_idx] else { unreachable!("Expected That in item survey for That list") };
+                        item_idx += 1;
+                        if may_be_pruned {
+                            let base = output.values.len();
+                            output.values.extend_from_self(lists1.values, item_lo..item_hi);
+                            for i in *lo..*hi {
+                                let (_, nat_hi) = lists1.bounds.bounds(i);
+                                output.bounds.push((base + nat_hi.min(item_hi) - item_lo) as u64);
+                            }
+                        } else {
+                            output.extend_from_self(lists1, *lo..*hi);
+                        }
+                    }
+                    Report::Both(i0, i1) => {
+                        // Merge: consume item survey entries until both sides are covered.
+                        let (mut c0, end0) = lists0.bounds.bounds(*i0);
+                        let (mut c1, end1) = lists1.bounds.bounds(*i1);
+                        while (c0 < end0 || c1 < end1) && item_idx < item_survey.len() {
+                            match item_survey[item_idx] {
+                                Report::This(lo, hi) => {
+                                    if lo >= end0 { break; }
+                                    output.values.extend_from_self(lists0.values, lo..hi);
+                                    c0 = hi;
+                                }
+                                Report::That(lo, hi) => {
+                                    if lo >= end1 { break; }
+                                    output.values.extend_from_self(lists1.values, lo..hi);
+                                    c1 = hi;
+                                }
+                                Report::Both(v0, v1) => {
+                                    if v0 >= end0 && v1 >= end1 { break; }
+                                    output.values.push(lists0.values.get(v0));
+                                    c0 = v0 + 1;
+                                    c1 = v1 + 1;
+                                }
+                            }
+                            item_idx += 1;
+                        }
+                        output.bounds.push(output.values.len() as u64);
+                    }
+                }
+            }
+        }
+
+        /// Write the diff layer from a time survey and two diff inputs.
+        ///
+        /// The time survey is the item-level survey for the time layer, which
+        /// doubles as the list survey for diffs (one diff list per time entry).
+        ///
+        /// - `This(lo, hi)`: bulk-copy diff lists from input 0.
+        /// - `That(lo, hi)`: bulk-copy diff lists from input 1.
+        /// - `Both(t0, t1)`: consolidate the two singleton diffs. Push `[sum]`
+        ///   if non-zero, or an empty list `[]` if they cancel.
+        #[inline(never)]
+        pub fn write_diffs<U: crate::layout::ColumnarUpdate>(
+            diffs0: <crate::updates::Lists<columnar::ContainerOf<U::Diff>> as columnar::Borrow>::Borrowed<'_>,
+            diffs1: <crate::updates::Lists<columnar::ContainerOf<U::Diff>> as columnar::Borrow>::Borrowed<'_>,
+            time_survey: &[Report],
+            output: &mut crate::updates::Lists<columnar::ContainerOf<U::Diff>>,
+        ) {
+            use columnar::{Columnar, Container, Index, Len, Push};
+            use differential_dataflow::difference::{Semigroup, IsZero};
+
+            for report in time_survey.iter() {
+                match report {
+                    Report::This(lo, hi) => { output.extend_from_self(diffs0, *lo..*hi); }
+                    Report::That(lo, hi) => { output.extend_from_self(diffs1, *lo..*hi); }
+                    Report::Both(t0, t1) => {
+                        // Read singleton diffs via list bounds, consolidate.
+                        let (d0_lo, d0_hi) = diffs0.bounds.bounds(*t0);
+                        let (d1_lo, d1_hi) = diffs1.bounds.bounds(*t1);
+                        assert_eq!(d0_hi - d0_lo, 1, "Expected singleton diff list at t0={t0}");
+                        assert_eq!(d1_hi - d1_lo, 1, "Expected singleton diff list at t1={t1}");
+                        let mut diff: U::Diff = Columnar::into_owned(diffs0.values.get(d0_lo));
+                        diff.plus_equals(&Columnar::into_owned(diffs1.values.get(d1_lo)));
+                        if !diff.is_zero() { output.values.push(&diff); }
+                        output.bounds.push(output.values.len() as u64);
+                    }
+                }
+            }
+        }
+
+        /// Increments `index` until just after the last element of `input` to satisfy `cmp`.
+        ///
+        /// The method assumes that `cmp` is monotonic, never becoming true once it is false.
+        /// If an `upper` is supplied, it acts as a constraint on the interval of `input` explored.
+        #[inline(always)]
+        pub(crate) fn gallop<C: columnar::Index>(input: C, lower: &mut usize, upper: usize, mut cmp: impl FnMut(<C as columnar::Index>::Ref) -> bool) {
+            // if empty input, or already >= element, return
+            if *lower < upper && cmp(input.get(*lower)) {
+                let mut step = 1;
+                while *lower + step < upper && cmp(input.get(*lower + step)) {
+                    *lower += step;
+                    step <<= 1;
+                }
+
+                step >>= 1;
+                while step > 0 {
+                    if *lower + step < upper && cmp(input.get(*lower + step)) {
+                        *lower += step;
+                    }
+                    step >>= 1;
+                }
+
+                *lower += 1;
+            }
+        }
+
+        /// A report we would expect to see in a sequence about two layers.
+        ///
+        /// A sequence of these reports reveal an ordered traversal of the keys
+        /// of two layers, with ranges exclusive to one, ranges exclusive to the
+        /// other, and individual elements (not ranges) common to both.
+        #[derive(Copy, Clone, Columnar, Debug)]
+        pub enum Report {
+            /// Range of indices in this input.
+            This(usize, usize),
+            /// Range of indices in that input.
+            That(usize, usize),
+            /// Matching indices in both inputs.
+            Both(usize, usize),
+        }
+
+        pub struct ChainBuilder<U: crate::layout::ColumnarUpdate> {
+            updates: Vec<Updates<U>>,
+        }
+
+        impl<U: crate::layout::ColumnarUpdate> Default for ChainBuilder<U> { fn default() -> Self { Self { updates: Default::default() } } }
+
+        impl<U: crate::layout::ColumnarUpdate> ChainBuilder<U> {
+            fn push(&mut self, mut link: Updates<U>) {
+                link = link.filter_zero();
+                if link.len() > 0 {
+                    if let Some(last) = self.updates.last_mut() {
+                        if last.len() + link.len() < 2 * 64 * 1024 {
+                            let mut build = crate::updates::UpdatesBuilder::new_from(std::mem::take(last));
+                            build.meld(&link);
+                            *last = build.done();
+                        }
+                        else { self.updates.push(link); }
+
+                    }
+                    else { self.updates.push(link); }
+                }
+            }
+            fn extend(&mut self, iter: impl IntoIterator<Item=Updates<U>>) { for link in iter { self.push(link); }}
+            fn done(self) -> Vec<Updates<U>> { self.updates }
+        }
     }
 
     use builder::ValMirror;
@@ -1014,61 +1470,11 @@ pub mod updates {
         /// Consolidates into canonical trie form:
         /// single outer key list, all lists sorted and deduplicated,
         /// diff lists are singletons (or absent if cancelled).
-        pub fn consolidate(self) -> Self {
-            Self::form_unsorted(self.iter())
-        }
+        pub fn consolidate(self) -> Self { Self::form_unsorted(self.iter()) }
+        pub fn filter_zero(self) -> Self { Self::form(self.iter()) }
 
         /// The number of leaf-level diff entries (total updates).
         pub fn len(&self) -> usize { self.diffs.values.len() }
-
-        /// Checks trie invariants and panics with a descriptive message if any fail.
-        pub fn validate(&self, label: &str) {
-            let keys_len = Len::len(&self.keys.values);
-            let vals_bounds_len = Len::len(&self.vals.bounds);
-            let vals_len = Len::len(&self.vals.values);
-            let times_bounds_len = Len::len(&self.times.bounds);
-            let times_len = Len::len(&self.times.values);
-            let diffs_bounds_len = Len::len(&self.diffs.bounds);
-            let diffs_len = Len::len(&self.diffs.values);
-
-            // 1. One val-group bound per key.
-            assert_eq!(vals_bounds_len, keys_len,
-                "{label}: vals.bounds.len() ({vals_bounds_len}) != keys.values.len() ({keys_len})");
-
-            // 2. One time-group bound per val.
-            assert_eq!(times_bounds_len, vals_len,
-                "{label}: times.bounds.len() ({times_bounds_len}) != vals.values.len() ({vals_len})");
-
-            // 3. One diff-group bound per time (singleton diffs).
-            assert_eq!(diffs_bounds_len, times_len,
-                "{label}: diffs.bounds.len() ({diffs_bounds_len}) != times.values.len() ({times_len})");
-
-            // 4. Cumulative bounds are monotonically non-decreasing at each level.
-            // 5. Final bound at each level matches the child values count.
-            fn check_monotone_and_final<B: IndexAs<u64> + Len>(
-                bounds: &B,
-                child_len: usize,
-                level: &str,
-                label: &str,
-            ) {
-                let n = Len::len(bounds);
-                if n == 0 { return; }
-                let mut prev = bounds.index_as(0);
-                for i in 1..n {
-                    let curr = bounds.index_as(i);
-                    assert!(curr >= prev,
-                        "{label}: {level}.bounds not monotonic at index {i}: {prev} > {curr}");
-                    prev = curr;
-                }
-                let last = bounds.index_as(n - 1) as usize;
-                assert_eq!(last, child_len,
-                    "{label}: last {level}.bounds ({last}) != child values len ({child_len})");
-            }
-
-            check_monotone_and_final(&self.vals.bounds.borrow(), vals_len, "vals", label);
-            check_monotone_and_final(&self.times.bounds.borrow(), times_len, "times", label);
-            check_monotone_and_final(&self.diffs.bounds.borrow(), diffs_len, "diffs", label);
-        }
     }
 
     /// Push a single flat update as a stride-1 entry.
@@ -1459,14 +1865,13 @@ where
         .inner
         .unary::<ValColBuilder<U>, _, _, _>(Pipeline, "JoinFunction", move |_, _| {
             move |input, output| {
+                let mut t1o = U::Time::default();
+                let mut d1o = U::Diff::default();
                 input.for_each(|time, data| {
-                    if std::env::var("DDIR_VALIDATE").is_ok() {
-                        data.updates.validate("JoinFunction input");
-                    }
                     let mut session = output.session_with_builder(&time);
                     for (k1, v1, t1, d1) in data.updates.iter() {
-                        let t1o: U::Time = Columnar::into_owned(t1);
-                        let d1o: U::Diff = Columnar::into_owned(d1);
+                        Columnar::copy_from(&mut t1o, t1);
+                        Columnar::copy_from(&mut d1o, d1);
                         for (k2, v2, t2, d2) in logic(k1, v1, t1, d1) {
                             let t3 = t2.join(&t1o);
                             let d3 = d2.multiply(&d1o);
@@ -1528,6 +1933,7 @@ where
 
     builder.build(move |_capability| {
         let mut col_builder = ValColBuilder::<(K, V, DynTime, R)>::default();
+        let mut time = DynTime::default();
         move |_frontier| {
             let mut output = output.activate();
             op_input.for_each(|cap, data| {
@@ -1544,7 +1950,7 @@ where
                 // that accepts pre-sorted, potentially-collapsing timestamps
                 // could avoid the re-sort inside the builder.
                 for (k, v, t, d) in data.updates.iter() {
-                    let mut time: DynTime = Columnar::into_owned(t);
+                    Columnar::copy_from(&mut time, t);
                     let mut inner_vec = std::mem::take(&mut time.inner).into_inner();
                     inner_vec.truncate(level - 1);
                     time.inner = PointStamp::new(inner_vec);

From ee080179b9df81de6bac81202f729ccb86192976 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:12:29 -0400
Subject: [PATCH 04/20] Remove commented code

---
 differential-dataflow/src/operators/reduce.rs | 35 -------------------
 1 file changed, 35 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 77834ab42..6433a3d33 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -479,18 +479,6 @@ mod history_replay {
             let mut meet = None;
             update_meet(&mut meet, self.meets.get(0));
             update_meet(&mut meet, batch_replay.meet());
-            // if let Some(time) = self.meets.get(0) {
-            //     meet = match meet {
-            //         None => Some(self.meets[0].clone()),
-            //         Some(x) => Some(x.meet(&self.meets[0])),
-            //     };
-            // }
-            // if let Some(time) = batch_replay.meet() {
-            //     meet = match meet {
-            //         None => Some(time.clone()),
-            //         Some(x) => Some(x.meet(&time)),
-            //     };
-            // }
 
             // Having determined the meet, we can load the input and output histories, where we
             // advance all times by joining them with `meet`. The resulting times are more compact
@@ -643,24 +631,6 @@ mod history_replay {
                             self.output_buffer.clear();
                         }
 
-                        // output_replay.advance_buffer_by(&meet);
-                        // for &((ref value, ref time), diff) in output_replay.buffer().iter() {
-                        //     if time.less_equal(&next_time) {
-                        //         self.output_buffer.push(((*value).clone(), -diff));
-                        //     }
-                        //     else {
-                        //         self.temporary.push(next_time.join(time));
-                        //     }
-                        // }
-                        // for &((ref value, ref time), diff) in self.output_produced.iter() {
-                        //     if time.less_equal(&next_time) {
-                        //         self.output_buffer.push(((*value).clone(), -diff));
-                        //     }
-                        //     else {
-                        //         self.temporary.push(next_time.join(&time));
-                        //     }
-                        // }
-
                         // Having subtracted output updates from user output, consolidate the results to determine
                         // if there is anything worth reporting. Note: this also orders the results by value, so
                         // that could make the above merging plan even easier.
@@ -754,12 +724,7 @@ mod history_replay {
                 update_meet(&mut meet, input_replay.meet());
                 update_meet(&mut meet, output_replay.meet());
                 for time in self.synth_times.iter() { update_meet(&mut meet, Some(time)); }
-                // if let Some(time) = batch_replay.meet() { meet = meet.meet(time); }
-                // if let Some(time) = input_replay.meet() { meet = meet.meet(time); }
-                // if let Some(time) = output_replay.meet() { meet = meet.meet(time); }
-                // for time in self.synth_times.iter() { meet = meet.meet(time); }
                 update_meet(&mut meet, meets_slice.first());
-                // if let Some(time) = meets_slice.first() { meet = meet.meet(time); }
 
                 // Update `times_current` by the frontier.
                 if let Some(meet) = meet.as_ref() {

From 1535c4b0e3499d165b3de12b996e6e6e44f948c5 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:14:57 -0400
Subject: [PATCH 05/20] Removed one-off trait

---
 differential-dataflow/src/operators/reduce.rs | 37 ++-----------------
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 6433a3d33..7e03139ff 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -347,35 +347,6 @@ fn sort_dedup<T: Ord>(list: &mut Vec<T>) {
     list.dedup();
 }
 
-trait PerKeyCompute<'a, C1, C2, C3, V>
-where
-    C1: Cursor,
-    C2: for<'b> Cursor<Key<'a> = C1::Key<'a>, ValOwn = V, Time = C1::Time>,
-    C3: Cursor<Key<'a> = C1::Key<'a>, Val<'a> = C1::Val<'a>, Time = C1::Time, Diff = C1::Diff>,
-    V: Clone + Ord,
-{
-    fn new() -> Self;
-    fn compute<L>(
-        &mut self,
-        key: C1::Key<'a>,
-        source_cursor: (&mut C1, &'a C1::Storage),
-        output_cursor: (&mut C2, &'a C2::Storage),
-        batch_cursor: (&mut C3, &'a C3::Storage),
-        times: &mut Vec<C1::Time>,
-        logic: &mut L,
-        upper_limit: &Antichain<C1::Time>,
-        outputs: &mut [(C2::Time, Vec<(V, C2::Time, C2::Diff)>)],
-        new_interesting: &mut Vec<C1::Time>) -> (usize, usize)
-    where
-        L: FnMut(
-            C1::Key<'a>,
-            &[(C1::Val<'a>, C1::Diff)],
-            &mut Vec<(V, C2::Diff)>,
-            &mut Vec<(V, C2::Diff)>,
-        );
-}
-
-
 /// Implementation based on replaying historical and new updates together.
 mod history_replay {
 
@@ -386,7 +357,7 @@ mod history_replay {
     use crate::trace::Cursor;
     use crate::operators::ValueHistory;
 
-    use super::{PerKeyCompute, sort_dedup};
+    use super::sort_dedup;
 
     /// The `HistoryReplayer` is a compute strategy based on moving through existing inputs, interesting times, etc in
     /// time order, maintaining consolidated representations of updates with respect to future interesting times.
@@ -410,14 +381,14 @@ mod history_replay {
         temporary: Vec<C1::Time>,
     }
 
-    impl<'a, C1, C2, C3, V> PerKeyCompute<'a, C1, C2, C3, V> for HistoryReplayer<'a, C1, C2, C3, V>
+    impl<'a, C1, C2, C3, V> HistoryReplayer<'a, C1, C2, C3, V>
     where
         C1: Cursor,
         C2: for<'b> Cursor<Key<'a> = C1::Key<'a>, ValOwn = V, Time = C1::Time>,
         C3: Cursor<Key<'a> = C1::Key<'a>, Val<'a> = C1::Val<'a>, Time = C1::Time, Diff = C1::Diff>,
         V: Clone + Ord,
     {
-        fn new() -> Self {
+        pub fn new() -> Self {
             HistoryReplayer {
                 input_history: ValueHistory::new(),
                 output_history: ValueHistory::new(),
@@ -433,7 +404,7 @@ mod history_replay {
             }
         }
         #[inline(never)]
-        fn compute<L>(
+        pub fn compute<L>(
             &mut self,
             key: C1::Key<'a>,
             (source_cursor, source_storage): (&mut C1, &'a C1::Storage),

From e6af4581d4e838167880867a3f3298478b1444df Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:16:04 -0400
Subject: [PATCH 06/20] Remove unread counters

---
 differential-dataflow/src/operators/reduce.rs | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 7e03139ff..fdf7bbec3 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -221,7 +221,7 @@ where
                             sort_dedup(&mut interesting_times);
 
                             // do the per-key computation.
-                            let _counters = thinker.compute(
+                            thinker.compute(
                                 key,
                                 (&mut source_cursor, source_storage),
                                 (&mut output_cursor, output_storage),
@@ -414,7 +414,7 @@ mod history_replay {
             logic: &mut L,
             upper_limit: &Antichain<C1::Time>,
             outputs: &mut [(C2::Time, Vec<(V, C2::Time, C2::Diff)>)],
-            new_interesting: &mut Vec<C1::Time>) -> (usize, usize)
+            new_interesting: &mut Vec<C1::Time>)
         where
             L: FnMut(
                 C1::Key<'a>,
@@ -487,9 +487,6 @@ mod history_replay {
             let mut times_slice = &times[..];
             let mut meets_slice = &self.meets[..];
 
-            let mut compute_counter = 0;
-            let mut output_counter = 0;
-
             // We have candidate times from `batch` and `times`, as well as times identified by either
             // `input` or `output`. Finally, we may have synthetic times produced as the join of times
             // we consider in the course of evaluation. As long as any of these times exist, we need to
@@ -553,8 +550,6 @@ mod history_replay {
                     // output produced. This sounds like a good test to have for debug builds!
                     if interesting {
 
-                        compute_counter += 1;
-
                         // Assemble the input collection at `next_time`. (`self.input_buffer` cleared just after use).
                         debug_assert!(self.input_buffer.is_empty());
                         meet.as_ref().map(|meet| input_replay.advance_buffer_by(meet));
@@ -613,8 +608,6 @@ mod history_replay {
                         // times.
                         if !self.update_buffer.is_empty() {
 
-                            output_counter += 1;
-
                             // We *should* be able to find a capability for `next_time`. Any thing else would
                             // indicate a logical error somewhere along the way; either we release a capability
                             // we should have kept, or we have computed the output incorrectly (or both!)
@@ -709,8 +702,6 @@ mod history_replay {
 
             // Normalize the representation of `new_interesting`, deduplicating and ordering.
             sort_dedup(new_interesting);
-
-            (compute_counter, output_counter)
         }
     }
 

From 8f9e929d9bb68cd5b9fda98344830428ae51c492 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:19:16 -0400
Subject: [PATCH 07/20] Convert silent errors to panics

---
 differential-dataflow/src/operators/reduce.rs | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index fdf7bbec3..2b4fa29c2 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -83,8 +83,6 @@ where
             let mut output_upper = Antichain::from_elem(<G::Timestamp as timely::progress::Timestamp>::minimum());
             let mut output_lower = Antichain::from_elem(<G::Timestamp as timely::progress::Timestamp>::minimum());
 
-            let id = scope.index();
-
             move |(input, _frontier), output| {
 
                 // The `reduce` operator receives fully formed batches, which each serve as an indication
@@ -301,15 +299,8 @@ where
                         // Update `capabilities` to reflect interesting pairs described by `frontier`.
                         let mut new_capabilities = Vec::new();
                         for time in frontier.borrow().iter() {
-                            if let Some(cap) = capabilities.iter().find(|c| c.time().less_equal(time)) {
-                                new_capabilities.push(cap.delayed(time));
-                            }
-                            else {
-                                println!("{}:\tfailed to find capability less than new frontier time:", id);
-                                println!("{}:\t  time: {:?}", id, time);
-                                println!("{}:\t  caps: {:?}", id, capabilities);
-                                println!("{}:\t  uppr: {:?}", id, upper_limit);
-                            }
+                            let cap = capabilities.iter().find(|c| c.time().less_equal(time)).expect("failed to find capability");
+                            new_capabilities.push(cap.delayed(time));
                         }
                         capabilities = new_capabilities;
 

From 51aef09bcd12bda0c94526566c6af790d24601ce Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:27:13 -0400
Subject: [PATCH 08/20] Simplify logic

---
 differential-dataflow/src/operators/reduce.rs | 32 +++++++------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 2b4fa29c2..50b4d55ee 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -447,26 +447,18 @@ mod history_replay {
             // and guaranteed to accumulate identically for times greater or equal to `meet`.
 
             // Load the input and output histories.
-            let mut input_replay = if let Some(meet) = meet.as_ref() {
-                self.input_history.replay_key(source_cursor, source_storage, key, |time| {
-                    let mut time = C1::owned_time(time);
-                    time.join_assign(meet);
-                    time
-                })
-            }
-            else {
-                self.input_history.replay_key(source_cursor, source_storage, key, |time| C1::owned_time(time))
-            };
-            let mut output_replay = if let Some(meet) = meet.as_ref() {
-                self.output_history.replay_key(output_cursor, output_storage, key, |time| {
-                    let mut time = C2::owned_time(time);
-                    time.join_assign(meet);
-                    time
-                })
-            }
-            else {
-                self.output_history.replay_key(output_cursor, output_storage, key, |time| C2::owned_time(time))
-            };
+            let mut input_replay =
+            self.input_history.replay_key(source_cursor, source_storage, key, |time| {
+                let mut time = C1::owned_time(time);
+                if let Some(meet) = meet.as_ref() { time.join_assign(meet); }
+                time
+            });
+            let mut output_replay =
+            self.output_history.replay_key(output_cursor, output_storage, key, |time| {
+                let mut time = C2::owned_time(time);
+                if let Some(meet) = meet.as_ref() { time.join_assign(meet); }
+                time
+            });
 
             self.synth_times.clear();
             self.times_current.clear();

From 3901fdb9b9928c9350b60e7e959c7928db7061c1 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:30:22 -0400
Subject: [PATCH 09/20] More commented code removed

---
 differential-dataflow/src/operators/reduce.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 50b4d55ee..596c63e53 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -60,11 +60,8 @@ where
 
             let (mut output_reader, mut output_writer) = TraceAgent::new(empty, operator_info, logger);
 
-            // let mut output_trace = TraceRc::make_from(agent).0;
             *result_trace = Some(output_reader.clone());
 
-            // let mut thinker1 = history_replay_prior::HistoryReplayer::<V, V2, G::Timestamp, R, R2>::new();
-            // let mut thinker = history_replay::HistoryReplayer::<V, V2, G::Timestamp, R, R2>::new();
             let mut new_interesting_times = Vec::<G::Timestamp>::new();
 
             // Our implementation maintains a list of outstanding `(key, time)` synthetic interesting times,
@@ -666,7 +663,7 @@ mod history_replay {
 
 
                 // Update `meet` to track the meet of each source of times.
-                meet = None;//T::maximum();
+                meet = None;
                 update_meet(&mut meet, batch_replay.meet());
                 update_meet(&mut meet, input_replay.meet());
                 update_meet(&mut meet, output_replay.meet());

From b157359b286bec095611cfc5ba90033a58940cf3 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:30:56 -0400
Subject: [PATCH 10/20] Extract unconditional behavior

---
 differential-dataflow/src/operators/reduce.rs | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 596c63e53..f9551a3a0 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -300,14 +300,11 @@ where
                             new_capabilities.push(cap.delayed(time));
                         }
                         capabilities = new_capabilities;
-
-                        // ensure that observed progress is reflected in the output.
-                        output_writer.seal(upper_limit.clone());
-                    }
-                    else {
-                        output_writer.seal(upper_limit.clone());
                     }
 
+                    // ensure that observed progress is reflected in the output.
+                    output_writer.seal(upper_limit.clone());
+
                     // We only anticipate future times in advance of `upper_limit`.
                     source_trace.set_logical_compaction(upper_limit.borrow());
                     output_reader.set_logical_compaction(upper_limit.borrow());

From fadcd7b89b45040533ab007bc9a4a31d1817f715 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:35:13 -0400
Subject: [PATCH 11/20] Idiomatic Rust

---
 differential-dataflow/src/operators/reduce.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index f9551a3a0..23ad76094 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -529,7 +529,7 @@ mod history_replay {
 
                         // Assemble the input collection at `next_time`. (`self.input_buffer` cleared just after use).
                         debug_assert!(self.input_buffer.is_empty());
-                        meet.as_ref().map(|meet| input_replay.advance_buffer_by(meet));
+                        if let Some(meet) = meet.as_ref() { input_replay.advance_buffer_by(meet) };
                         for &((value, ref time), ref diff) in input_replay.buffer().iter() {
                             if time.less_equal(&next_time) {
                                 self.input_buffer.push((value, diff.clone()));
@@ -548,7 +548,7 @@ mod history_replay {
                         }
                         crate::consolidation::consolidate(&mut self.input_buffer);
 
-                        meet.as_ref().map(|meet| output_replay.advance_buffer_by(meet));
+                        if let Some(meet) = meet.as_ref() { output_replay.advance_buffer_by(meet) };
                         for &((value, ref time), ref diff) in output_replay.buffer().iter() {
                             if time.less_equal(&next_time) {
                                 self.output_buffer.push((C2::owned_val(value), diff.clone()));

From 2ec3293b148b4556637a9ab4e9c4ae4cd5f6bb62 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 04:53:49 -0400
Subject: [PATCH 12/20] Less time cloning

---
 differential-dataflow/src/operators/reduce.rs | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 23ad76094..3715d2843 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -473,7 +473,7 @@ mod history_replay {
                                             input_replay.time(),
                                             output_replay.time(),
                                             self.synth_times.last(),
-                                        ].iter().cloned().flatten().min().cloned() {
+                                        ].into_iter().flatten().min().cloned() {
 
                 // Advance input and output history replayers. This marks applicable updates as active.
                 input_replay.step_while_time_is(&next_time);
@@ -511,7 +511,7 @@ mod history_replay {
                 // and become the time itself. They may not equal the current time because whatever frontier we
                 // are tracking may not have advanced far enough.
                 // TODO: `batch_history` may or may not be super compact at this point, and so this check might
-                //       yield false positives if not sufficiently compact. Maybe we should into this and see.
+                //       yield false positives if not sufficiently compact. Maybe we should look into this and see.
                 interesting = interesting || batch_replay.buffer().iter().any(|&((_, ref t),_)| t.less_equal(&next_time));
                 interesting = interesting || self.times_current.iter().any(|t| t.less_equal(&next_time));
 
@@ -658,7 +658,6 @@ mod history_replay {
                     debug_assert!(outputs.iter().any(|(t,_)| t.less_equal(&next_time)))
                 }
 
-
                 // Update `meet` to track the meet of each source of times.
                 meet = None;
                 update_meet(&mut meet, batch_replay.meet());
@@ -685,12 +684,8 @@ mod history_replay {
     /// Updates an optional meet by an optional time.
     fn update_meet<T: Lattice+Clone>(meet: &mut Option<T>, other: Option<&T>) {
         if let Some(time) = other {
-            if let Some(meet) = meet.as_mut() {
-                *meet = meet.meet(time);
-            }
-            if meet.is_none() {
-                *meet = Some(time.clone());
-            }
+            if let Some(meet) = meet.as_mut() { meet.meet_assign(time); }
+            else { *meet = Some(time.clone()); }
         }
     }
 }

From 81533872e2d317ac8a89382c485d344106779085 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 05:09:47 -0400
Subject: [PATCH 13/20] Idiomatic capability use

---
 differential-dataflow/src/operators/reduce.rs | 24 ++++---------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 3715d2843..4e3695e8f 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -8,13 +8,11 @@
 use timely::container::PushInto;
 use crate::Data;
 
-use timely::order::PartialOrder;
 use timely::progress::frontier::Antichain;
 use timely::progress::Timestamp;
 use timely::dataflow::*;
 use timely::dataflow::operators::Operator;
 use timely::dataflow::channels::pact::Pipeline;
-use timely::dataflow::operators::Capability;
 
 use crate::operators::arrange::{Arranged, TraceAgent};
 use crate::trace::{BatchReader, Cursor, Trace, Builder, ExertionLogic, Description};
@@ -67,7 +65,7 @@ where
             // Our implementation maintains a list of outstanding `(key, time)` synthetic interesting times,
             // as well as capabilities for these times (or their lower envelope, at least).
             let mut interesting = Vec::<(T1::KeyOwn, G::Timestamp)>::new();
-            let mut capabilities = Vec::<Capability<G::Timestamp>>::new();
+            let mut capabilities = timely::dataflow::operators::CapabilitySet::<G::Timestamp>::default();
 
             // buffers and logic for computing per-key interesting times "efficiently".
             let mut interesting_times = Vec::<G::Timestamp>::new();
@@ -119,10 +117,7 @@ where
                     }
 
                     // Ensure that `capabilities` covers the capability of the batch.
-                    capabilities.retain(|cap| !capability.time().less_than(cap.time()));
-                    if !capabilities.iter().any(|cap| cap.time().less_equal(capability.time())) {
-                        capabilities.push(capability.retain(0));
-                    }
+                    capabilities.insert(capability.retain(0));
                 });
 
                 // Pull in any subsequent empty batches we believe to exist.
@@ -287,19 +282,10 @@ where
                         // uses exactly `upper_limit` to determine the upper bound. Good to check though.
                         assert!(output_upper.borrow() == upper_limit.borrow());
 
-                        // Determine the frontier of our interesting times.
+                        // Update `capabilities` to reflect interesting times.
                         let mut frontier = Antichain::<G::Timestamp>::new();
-                        for (_, time) in &interesting {
-                            frontier.insert_ref(time);
-                        }
-
-                        // Update `capabilities` to reflect interesting pairs described by `frontier`.
-                        let mut new_capabilities = Vec::new();
-                        for time in frontier.borrow().iter() {
-                            let cap = capabilities.iter().find(|c| c.time().less_equal(time)).expect("failed to find capability");
-                            new_capabilities.push(cap.delayed(time));
-                        }
-                        capabilities = new_capabilities;
+                        for (_, time) in &interesting { frontier.insert_ref(time); }
+                        capabilities.downgrade(frontier);
                     }
 
                     // ensure that observed progress is reflected in the output.

From d0215402dfbf9af407c9ba01c46d934e5aa39362 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 05:19:13 -0400
Subject: [PATCH 14/20] Ref keyword to borrow

---
 differential-dataflow/src/operators/reduce.rs | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 4e3695e8f..44f3727a1 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -168,12 +168,9 @@ where
                         let mut buffer = Bu::Input::default();
 
                         // cursors for navigating input and output traces.
-                        let (mut source_cursor, source_storage): (T1::Cursor, _) = source_trace.cursor_through(lower_limit.borrow()).expect("failed to acquire source cursor");
-                        let source_storage = &source_storage;
-                        let (mut output_cursor, output_storage): (T2::Cursor, _) = output_reader.cursor_through(lower_limit.borrow()).expect("failed to acquire output cursor");
-                        let output_storage = &output_storage;
-                        let (mut batch_cursor, batch_storage) = (CursorList::new(batch_cursors, &batch_storage), batch_storage);
-                        let batch_storage = &batch_storage;
+                        let (mut source_cursor, ref source_storage): (T1::Cursor, _) = source_trace.cursor_through(lower_limit.borrow()).expect("failed to acquire source cursor");
+                        let (mut output_cursor, ref output_storage): (T2::Cursor, _) = output_reader.cursor_through(lower_limit.borrow()).expect("failed to acquire output cursor");
+                        let (mut batch_cursor, ref batch_storage) = (CursorList::new(batch_cursors, &batch_storage), batch_storage);
 
                         let mut thinker = history_replay::HistoryReplayer::new();
 

From 7a0283486c5e4d03eea6952127e93cefa2c9dfaa Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 05:53:51 -0400
Subject: [PATCH 15/20] Further tightening

---
 differential-dataflow/src/operators/reduce.rs | 118 +++++++-----------
 1 file changed, 43 insertions(+), 75 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 44f3727a1..650feeb0d 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -469,11 +469,7 @@ mod history_replay {
 
                 // Advance batch history, and capture whether an update exists at `next_time`.
                 let mut interesting = batch_replay.step_while_time_is(&next_time);
-                if interesting {
-                    if let Some(meet) = meet.as_ref() {
-                        batch_replay.advance_buffer_by(meet);
-                    }
-                }
+                if interesting { if let Some(meet) = meet.as_ref() { batch_replay.advance_buffer_by(meet); } }
 
                 // advance both `synth_times` and `times_slice`, marking this time interesting if in either.
                 while self.synth_times.last() == Some(&next_time) {
@@ -513,81 +509,62 @@ mod history_replay {
                         // Assemble the input collection at `next_time`. (`self.input_buffer` cleared just after use).
                         debug_assert!(self.input_buffer.is_empty());
                         if let Some(meet) = meet.as_ref() { input_replay.advance_buffer_by(meet) };
-                        for &((value, ref time), ref diff) in input_replay.buffer().iter() {
-                            if time.less_equal(&next_time) {
-                                self.input_buffer.push((value, diff.clone()));
-                            }
-                            else {
-                                self.temporary.push(next_time.join(time));
-                            }
+                        for ((value, time), diff) in input_replay.buffer().iter() {
+                            if time.less_equal(&next_time) { self.input_buffer.push((*value, diff.clone())); }
+                            else { self.temporary.push(next_time.join(time)); }
                         }
-                        for &((value, ref time), ref diff) in batch_replay.buffer().iter() {
-                            if time.less_equal(&next_time) {
-                                self.input_buffer.push((value, diff.clone()));
-                            }
-                            else {
-                                self.temporary.push(next_time.join(time));
-                            }
+                        for ((value, time), diff) in batch_replay.buffer().iter() {
+                            if time.less_equal(&next_time) { self.input_buffer.push((*value, diff.clone())); }
+                            else { self.temporary.push(next_time.join(time)); }
                         }
                         crate::consolidation::consolidate(&mut self.input_buffer);
 
+                        // Assemble the output collection at `next_time`. (`self.output_buffer` cleared just after use).
                         if let Some(meet) = meet.as_ref() { output_replay.advance_buffer_by(meet) };
-                        for &((value, ref time), ref diff) in output_replay.buffer().iter() {
-                            if time.less_equal(&next_time) {
-                                self.output_buffer.push((C2::owned_val(value), diff.clone()));
-                            }
-                            else {
-                                self.temporary.push(next_time.join(time));
-                            }
+                        for ((value, time), diff) in output_replay.buffer().iter() {
+                            if time.less_equal(&next_time) { self.output_buffer.push((C2::owned_val(*value), diff.clone())); }
+                            else { self.temporary.push(next_time.join(time)); }
                         }
-                        for &((ref value, ref time), ref diff) in self.output_produced.iter() {
-                            if time.less_equal(&next_time) {
-                                self.output_buffer.push(((*value).to_owned(), diff.clone()));
-                            }
-                            else {
-                                self.temporary.push(next_time.join(time));
-                            }
+                        for ((value, time), diff) in self.output_produced.iter() {
+                            if time.less_equal(&next_time) { self.output_buffer.push(((*value).to_owned(), diff.clone())); }
+                            else { self.temporary.push(next_time.join(time)); }
                         }
                         crate::consolidation::consolidate(&mut self.output_buffer);
 
-                        // Apply user logic if non-empty input and see what happens!
+                        // Apply user logic if non-empty input or output and see what happens!
                         if !self.input_buffer.is_empty() || !self.output_buffer.is_empty() {
                             logic(key, &self.input_buffer[..], &mut self.output_buffer, &mut self.update_buffer);
                             self.input_buffer.clear();
                             self.output_buffer.clear();
-                        }
-
-                        // Having subtracted output updates from user output, consolidate the results to determine
-                        // if there is anything worth reporting. Note: this also orders the results by value, so
-                        // that could make the above merging plan even easier.
-                        crate::consolidation::consolidate(&mut self.update_buffer);
-
-                        // Stash produced updates into both capability-indexed buffers and `output_produced`.
-                        // The two locations are important, in that we will compact `output_produced` as we move
-                        // through times, but we cannot compact the output buffers because we need their actual
-                        // times.
-                        if !self.update_buffer.is_empty() {
-
-                            // We *should* be able to find a capability for `next_time`. Any thing else would
-                            // indicate a logical error somewhere along the way; either we release a capability
-                            // we should have kept, or we have computed the output incorrectly (or both!)
-                            let idx = outputs.iter().rev().position(|(time, _)| time.less_equal(&next_time));
-                            let idx = outputs.len() - idx.expect("failed to find index") - 1;
-                            for (val, diff) in self.update_buffer.drain(..) {
-                                self.output_produced.push(((val.clone(), next_time.clone()), diff.clone()));
-                                outputs[idx].1.push((val, next_time.clone(), diff));
-                            }
 
-                            // Advance times in `self.output_produced` and consolidate the representation.
-                            // NOTE: We only do this when we add records; it could be that there are situations
-                            //       where we want to consolidate even without changes (because an initially
-                            //       large collection can now be collapsed).
-                            if let Some(meet) = meet.as_ref() {
-                                for entry in &mut self.output_produced {
-                                    (entry.0).1 = (entry.0).1.join(meet);
+                            // Having subtracted output updates from user output, consolidate the results to determine
+                            // if there is anything worth reporting. Note: this also orders the results by value, so
+                            // that could make the above merging plan even easier.
+                            //
+                            // Stash produced updates into both capability-indexed buffers and `output_produced`.
+                            // The two locations are important, in that we will compact `output_produced` as we move
+                            // through times, but we cannot compact the output buffers because we need their actual
+                            // times.
+                            crate::consolidation::consolidate(&mut self.update_buffer);
+                            if !self.update_buffer.is_empty() {
+
+                                // We *should* be able to find a capability for `next_time`. Any thing else would
+                                // indicate a logical error somewhere along the way; either we release a capability
+                                // we should have kept, or we have computed the output incorrectly (or both!)
+                                let idx = outputs.iter().rev().position(|(time, _)| time.less_equal(&next_time));
+                                let idx = outputs.len() - idx.expect("failed to find index") - 1;
+                                for (val, diff) in self.update_buffer.drain(..) {
+                                    self.output_produced.push(((val.clone(), next_time.clone()), diff.clone()));
+                                    outputs[idx].1.push((val, next_time.clone(), diff));
                                 }
+
+                                // Advance times in `self.output_produced` and consolidate the representation.
+                                // NOTE: We only do this when we add records; it could be that there are situations
+                                //       where we want to consolidate even without changes (because an initially
+                                //       large collection can now be collapsed).
+                                if let Some(meet) = meet.as_ref() { for entry in &mut self.output_produced { (entry.0).1.join_assign(meet); } }
+                                crate::consolidation::consolidate(&mut self.output_produced);
                             }
-                            crate::consolidation::consolidate(&mut self.output_produced);
                         }
                     }
 
@@ -601,17 +578,8 @@ mod history_replay {
 
                     // Any time, even uninteresting times, must be joined with the current accumulation of
                     // batch times as well as the current accumulation of `times_current`.
-                    for &((_, ref time), _) in batch_replay.buffer().iter() {
-                        if !time.less_equal(&next_time) {
-                            self.temporary.push(time.join(&next_time));
-                        }
-                    }
-                    for time in self.times_current.iter() {
-                        if !time.less_equal(&next_time) {
-                            self.temporary.push(time.join(&next_time));
-                        }
-                    }
-
+                    self.temporary.extend(batch_replay.buffer().iter().map(|((_,time),_)| time).filter(|time| !time.less_equal(&next_time)).map(|time| time.join(&next_time)));
+                    self.temporary.extend(self.times_current.iter().filter(|time| !time.less_equal(&next_time)).map(|time| time.join(&next_time)));
                     sort_dedup(&mut self.temporary);
 
                     // Introduce synthetic times, and re-organize if we add any.

From 8e75167dba1ca16d2c89c90b5836666d9d875d0c Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 06:18:31 -0400
Subject: [PATCH 16/20] Tidy explanatory text, and reflect frontier

---
 differential-dataflow/src/operators/reduce.rs | 37 +++++++------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 650feeb0d..9e159e428 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -78,25 +78,15 @@ where
             let mut output_upper = Antichain::from_elem(<G::Timestamp as timely::progress::Timestamp>::minimum());
             let mut output_lower = Antichain::from_elem(<G::Timestamp as timely::progress::Timestamp>::minimum());
 
-            move |(input, _frontier), output| {
+            move |(input, frontier), output| {
 
-                // The `reduce` operator receives fully formed batches, which each serve as an indication
-                // that the frontier has advanced to the upper bound of their description.
+                // The operator receives input batches, which it treats as contiguous and will collect and
+                // then process as one batch. It captures the input frontier from the batches, from the upstream
+                // trace, and from the input frontier, and retires the work through that interval.
                 //
-                // Although we could act on each individually, several may have been sent, and it makes
-                // sense to accumulate them first to coordinate their re-evaluation. We will need to pay
-                // attention to which times need to be collected under which capability, so that we can
-                // assemble output batches correctly. We will maintain several builders concurrently, and
-                // place output updates into the appropriate builder.
-                //
-                // It turns out we must use notificators, as we cannot await empty batches from arrange to
-                // indicate progress, as the arrange may not hold the capability to send such. Instead, we
-                // must watch for progress here (and the upper bound of received batches) to tell us how
-                // far we can process work.
-                //
-                // We really want to retire all batches we receive, so we want a frontier which reflects
-                // both information from batches as well as progress information. I think this means that
-                // we keep times that are greater than or equal to a time in the other frontier, deduplicated.
+                // Reduce may retain capabilities and need to perform work and produce output at times that
+                // may not be seen in its input. The standard example is that updates at `(0, 1)` and `(1, 0)`
+                // may result in outputs at `(1, 1)` as well, even with no input at that time.
 
                 let mut batch_cursors = Vec::new();
                 let mut batch_storage = Vec::new();
@@ -105,23 +95,22 @@ where
                 lower_limit.clear();
                 lower_limit.extend(upper_limit.borrow().iter().cloned());
 
-                // Drain the input stream of batches, validating the contiguity of the batch descriptions and
-                // capturing a cursor for each of the batches as well as ensuring we hold a capability for the
-                // times in the batch.
+                // Drain input batches in order, capturing capabilities and the last upper.
                 input.for_each(|capability, batches| {
-
+                    capabilities.insert(capability.retain(0));
                     for batch in batches.drain(..) {
                         upper_limit.clone_from(batch.upper());
                         batch_cursors.push(batch.cursor());
                         batch_storage.push(batch);
                     }
-
-                    // Ensure that `capabilities` covers the capability of the batch.
-                    capabilities.insert(capability.retain(0));
                 });
 
                 // Pull in any subsequent empty batches we believe to exist.
                 source_trace.advance_upper(&mut upper_limit);
+                // Incorporate the input frontier guarantees as well.
+                let mut joined = Antichain::new();
+                crate::lattice::antichain_join_into(&upper_limit.borrow()[..], &frontier.frontier()[..], &mut joined);
+                upper_limit = joined;
 
                 // Only if our upper limit has advanced should we do work.
                 if upper_limit != lower_limit {

From 7c053ade8bb2afe6183a95860d076e99296fa998 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 06:46:33 -0400
Subject: [PATCH 17/20] Tighten comments, remove mutable borrow

---
 differential-dataflow/src/operators/reduce.rs | 48 ++++++-------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index 9e159e428..f6e5ad60b 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -112,18 +112,15 @@ where
                 crate::lattice::antichain_join_into(&upper_limit.borrow()[..], &frontier.frontier()[..], &mut joined);
                 upper_limit = joined;
 
-                // Only if our upper limit has advanced should we do work.
+                // We plan to retire the interval [lower_limit, upper_limit), which should be non-empty to proceed.
                 if upper_limit != lower_limit {
 
-                    // If we have no capabilities, then we (i) should not produce any outputs and (ii) could not send
-                    // any produced outputs even if they were (incorrectly) produced. We cannot even send empty batches
-                    // to indicate forward progress, and must hope that downstream operators look at progress frontiers
-                    // as well as batch descriptions.
-                    //
-                    // We can (and should) advance source and output traces if `upper_limit` indicates this is possible.
+                    // If we hold no capabilitys in the interval [lower_limit, upper_limit) then we have no compute needs,
+                    // and could not transmit the outputs even if they were (incorrectly) non-zero.
+                    // We do have maintenance work after this logic, and should not fuse this test with the above test.
                     if capabilities.iter().any(|c| !upper_limit.less_equal(c.time())) {
 
-                        // `interesting` contains "warnings" about keys and times that may need to be re-considered.
+                        // `interesting` contains "todos" about key and time pairs that should be re-considered.
                         // We first extract those times from this list that lie in the interval we will process.
                         sort_dedup(&mut interesting);
                         // `exposed` contains interesting (key, time)s now below `upper_limit`
@@ -139,12 +136,6 @@ where
                         });
 
                         // Prepare an output buffer and builder for each capability.
-                        //
-                        // We buffer and build separately, as outputs are produced grouped by time, whereas the
-                        // builder wants to see outputs grouped by value. While the per-key computation could
-                        // do the re-sorting itself, buffering per-key outputs lets us double check the results
-                        // against other implementations for accuracy.
-                        //
                         // TODO: It would be better if all updates went into one batch, but timely dataflow prevents
                         //       this as long as it requires that there is only one capability for each message.
                         let mut buffers = Vec::<(G::Timestamp, Vec<(T2::ValOwn, G::Timestamp, T2::Diff)>)>::new();
@@ -163,11 +154,7 @@ where
 
                         let mut thinker = history_replay::HistoryReplayer::new();
 
-                        // We now march through the keys we must work on, drawing from `batch_cursors` and `exposed`.
-                        //
-                        // We only keep valid cursors (those with more data) in `batch_cursors`, and so its length
-                        // indicates whether more data remain. We move through `exposed` using (index) `exposed_position`.
-                        // There could perhaps be a less provocative variable name.
+                        // March through the keys we must work on, merging `batch_cursors` and `exposed`.
                         let mut exposed_position = 0;
                         while batch_cursor.key_valid(batch_storage) || exposed_position < exposed_keys.len() {
 
@@ -181,13 +168,9 @@ where
                                 (None, None)             => unreachable!(),
                             };
 
-                            // `interesting_times` contains those times between `lower_issued` and `upper_limit`
-                            // that we need to re-consider. We now populate it, but perhaps this should be left
-                            // to the per-key computation, which may be able to avoid examining the times of some
-                            // values (for example, in the case of min/max/topk).
+                            // Populate `interesting_times` with interesting times not beyond `upper_limit`.
+                            // TODO: This could just be `exposed_time` and `lower .. upper` bounds.
                             interesting_times.clear();
-
-                            // Populate `interesting_times` with synthetic interesting times (below `upper_limit`) for this key.
                             while exposed_keys.get(exposed_position) == Some(key) {
                                 interesting_times.push(T1::owned_time(exposed_time.index(exposed_position)));
                                 exposed_position += 1;
@@ -202,22 +185,19 @@ where
                                 (&mut source_cursor, source_storage),
                                 (&mut output_cursor, output_storage),
                                 (&mut batch_cursor, batch_storage),
-                                &mut interesting_times,
+                                &interesting_times,
                                 &mut logic,
                                 &upper_limit,
                                 &mut buffers[..],
                                 &mut new_interesting_times,
                             );
 
-                            if batch_cursor.get_key(batch_storage) == Some(key) {
-                                batch_cursor.step_key(batch_storage);
-                            }
+                            // Advance the cursor if this key, so that the loop's validity check registers the work as done.
+                            if batch_cursor.get_key(batch_storage) == Some(key) { batch_cursor.step_key(batch_storage); }
 
                             // Record future warnings about interesting times (and assert they should be "future").
-                            for time in new_interesting_times.drain(..) {
-                                debug_assert!(upper_limit.less_equal(&time));
-                                interesting.push((T1::owned_key(key), time));
-                            }
+                            debug_assert!(new_interesting_times.iter().all(|t| upper_limit.less_equal(t)));
+                            interesting.extend(new_interesting_times.drain(..).map(|t| (T1::owned_key(key), t)));
 
                             // Sort each buffer by value and move into the corresponding builder.
                             // TODO: This makes assumptions about at least one of (i) the stability of `sort_by`,
@@ -367,7 +347,7 @@ mod history_replay {
             (source_cursor, source_storage): (&mut C1, &'a C1::Storage),
             (output_cursor, output_storage): (&mut C2, &'a C2::Storage),
             (batch_cursor, batch_storage): (&mut C3, &'a C3::Storage),
-            times: &mut Vec<C1::Time>,
+            times: &Vec<C1::Time>,
             logic: &mut L,
             upper_limit: &Antichain<C1::Time>,
             outputs: &mut [(C2::Time, Vec<(V, C2::Time, C2::Diff)>)],

From 5281266452aea5e6894dafc02c31409342aa6d6f Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 07:00:05 -0400
Subject: [PATCH 18/20] Prefer insert_ref to insert

---
 differential-dataflow/src/operators/reduce.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index f6e5ad60b..b5b2aac4c 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -227,7 +227,7 @@ where
                             output_upper.clear();
                             output_upper.extend(upper_limit.borrow().iter().cloned());
                             for capability in &capabilities[index + 1 ..] {
-                                output_upper.insert(capability.time().clone());
+                                output_upper.insert_ref(capability.time());
                             }
 
                             if output_upper.borrow() != output_lower.borrow() {

From 671de13808b4500a86238c254196226aed2e0a88 Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sat, 4 Apr 2026 17:15:46 -0400
Subject: [PATCH 19/20] Use containers for interesting (keys, time) (#710)

* Use containers for interesting (keys, time)

* Remove use of owned keys in reduce.rs

* Remove all uses of KeyOwn

* Remove KeyOwn

* Improve documentation

* Walk back overly prescriptive dogs^3 constraints
---
 .../examples/columnar/main.rs                 |  12 +-
 differential-dataflow/src/collection.rs       |  28 ++-
 .../src/operators/arrange/agent.rs            |   6 +-
 .../src/operators/arrange/arrangement.rs      |  37 ++--
 .../src/operators/arrange/upsert.rs           |  24 +--
 differential-dataflow/src/operators/reduce.rs | 169 ++++++++++--------
 .../src/trace/implementations/mod.rs          |   8 +-
 differential-dataflow/src/trace/mod.rs        |   3 -
 differential-dataflow/tests/import.rs         |  12 +-
 dogsdogsdogs/src/operators/count.rs           |   3 +-
 dogsdogsdogs/src/operators/half_join.rs       |   9 +-
 dogsdogsdogs/src/operators/half_join2.rs      |   6 +-
 dogsdogsdogs/src/operators/lookup_map.rs      |   2 +-
 dogsdogsdogs/src/operators/propose.rs         |   4 +-
 dogsdogsdogs/src/operators/validate.rs        |   2 +-
 interactive/src/plan/mod.rs                   |   6 +-
 16 files changed, 200 insertions(+), 131 deletions(-)

diff --git a/differential-dataflow/examples/columnar/main.rs b/differential-dataflow/examples/columnar/main.rs
index 8404e7aa4..85b573157 100644
--- a/differential-dataflow/examples/columnar/main.rs
+++ b/differential-dataflow/examples/columnar/main.rs
@@ -168,8 +168,16 @@ mod reachability {
             let result = combined_arr.reduce_abelian::<_,
                 ValBuilder<Node, (), IterTime, Diff>,
                 ValSpine<Node, (), IterTime, Diff>,
-            >("Distinct", |_node, _input, output| {
-                output.push(((), 1));
+                _,
+            >("Distinct", |_node, _input, output| { output.push(((), 1)); },
+            |col, key, upds| {
+                use columnar::{Clear, Push};
+                col.keys.clear();
+                col.vals.clear();
+                col.times.clear();
+                col.diffs.clear();
+                for (val, time, diff) in upds.drain(..) { col.push((key, &val, &time, &diff)); }
+                *col = std::mem::take(col).consolidate();
             });
 
             // Extract RecordedUpdates from the Arranged's batch stream.
diff --git a/differential-dataflow/src/collection.rs b/differential-dataflow/src/collection.rs
index c13227f2c..6981cc3da 100644
--- a/differential-dataflow/src/collection.rs
+++ b/differential-dataflow/src/collection.rs
@@ -752,7 +752,11 @@ pub mod vec {
             use crate::trace::implementations::{ValBuilder, ValSpine};
 
             self.arrange_by_key_named(&format!("Arrange: {}", name))
-                .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<K,V2,_,_>>(name, logic)
+                .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<K,V2,_,_>,_>(
+                    name,
+                    logic,
+                    |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); },
+                )
                 .as_collection(|k,v| (k.clone(), v.clone()))
         }
 
@@ -782,7 +786,7 @@ pub mod vec {
         /// ```
         pub fn reduce_abelian<L, Bu, T2>(self, name: &str, mut logic: L) -> Arranged<G, TraceAgent<T2>>
         where
-            T2: for<'a> Trace<Key<'a>= &'a K, KeyOwn = K, ValOwn = V, Time=G::Timestamp, Diff: Abelian>+'static,
+            T2: for<'a> Trace<Key<'a>= &'a K, ValOwn = V, Time=G::Timestamp, Diff: Abelian>+'static,
             Bu: Builder<Time=T2::Time, Input = Vec<((K, V), T2::Time, T2::Diff)>, Output = T2::Batch>,
             L: FnMut(&K, &[(&V, R)], &mut Vec<(V, T2::Diff)>)+'static,
         {
@@ -801,12 +805,16 @@ pub mod vec {
         pub fn reduce_core<L, Bu, T2>(self, name: &str, logic: L) -> Arranged<G, TraceAgent<T2>>
         where
             V: Clone+'static,
-            T2: for<'a> Trace<Key<'a>=&'a K, KeyOwn = K, ValOwn = V, Time=G::Timestamp>+'static,
+            T2: for<'a> Trace<Key<'a>=&'a K, ValOwn = V, Time=G::Timestamp>+'static,
             Bu: Builder<Time=T2::Time, Input = Vec<((K, V), T2::Time, T2::Diff)>, Output = T2::Batch>,
             L: FnMut(&K, &[(&V, R)], &mut Vec<(V,T2::Diff)>, &mut Vec<(V, T2::Diff)>)+'static,
         {
             self.arrange_by_key_named(&format!("Arrange: {}", name))
-                .reduce_core::<_,Bu,_>(name, logic)
+                .reduce_core::<_,Bu,_,_>(
+                    name,
+                    logic,
+                    |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); },
+                )
         }
     }
 
@@ -871,7 +879,11 @@ pub mod vec {
             use crate::trace::implementations::{KeyBuilder, KeySpine};
 
             self.arrange_by_self_named(&format!("Arrange: {}", name))
-                .reduce_abelian::<_,KeyBuilder<K,G::Timestamp,R2>,KeySpine<K,G::Timestamp,R2>>(name, move |k,s,t| t.push(((), thresh(k, &s[0].1))))
+                .reduce_abelian::<_,KeyBuilder<K,G::Timestamp,R2>,KeySpine<K,G::Timestamp,R2>,_>(
+                    name,
+                    move |k,s,t| t.push(((), thresh(k, &s[0].1))),
+                    |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); },
+                )
                 .as_collection(|k,_| k.clone())
         }
 
@@ -908,7 +920,11 @@ pub mod vec {
         pub fn count_core<R2: Ord + Abelian + From<i8> + 'static>(self) -> Collection<G, (K, R), R2> {
             use crate::trace::implementations::{ValBuilder, ValSpine};
             self.arrange_by_self_named("Arrange: Count")
-                .reduce_abelian::<_,ValBuilder<K,R,G::Timestamp,R2>,ValSpine<K,R,G::Timestamp,R2>>("Count", |_k,s,t| t.push((s[0].1.clone(), R2::from(1i8))))
+                .reduce_abelian::<_,ValBuilder<K,R,G::Timestamp,R2>,ValSpine<K,R,G::Timestamp,R2>,_>(
+                    "Count",
+                    |_k,s,t| t.push((s[0].1.clone(), R2::from(1i8))),
+                    |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); },
+                )
                 .as_collection(|k,c| (k.clone(), c.clone()))
         }
     }
diff --git a/differential-dataflow/src/operators/arrange/agent.rs b/differential-dataflow/src/operators/arrange/agent.rs
index e1c2fde5f..715542f55 100644
--- a/differential-dataflow/src/operators/arrange/agent.rs
+++ b/differential-dataflow/src/operators/arrange/agent.rs
@@ -205,7 +205,11 @@ impl<Tr: TraceReader+'static> TraceAgent<Tr> {
     ///     // create a second dataflow
     ///     worker.dataflow(move |scope| {
     ///         trace.import(scope)
-    ///              .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<_,_,_,_>>("Reduce", |_key, src, dst| dst.push((*src[0].0, 1)))
+    ///              .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<_,_,_,_>,_>(
+    ///                  "Reduce",
+    ///                  |_key, src, dst| dst.push((*src[0].0, 1)),
+    ///                  |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); },
+    ///              )
     ///              .as_collection(|k,v| (k.clone(), v.clone()));
     ///     });
     ///
diff --git a/differential-dataflow/src/operators/arrange/arrangement.rs b/differential-dataflow/src/operators/arrange/arrangement.rs
index 69965e65b..cd1bd0894 100644
--- a/differential-dataflow/src/operators/arrange/arrangement.rs
+++ b/differential-dataflow/src/operators/arrange/arrangement.rs
@@ -30,7 +30,6 @@ use crate::{Data, VecCollection, AsCollection};
 use crate::difference::Semigroup;
 use crate::lattice::Lattice;
 use crate::trace::{self, Trace, TraceReader, BatchReader, Batcher, Builder, Cursor};
-use crate::trace::implementations::merge_batcher::container::InternalMerge;
 
 use trace::wrappers::enter::{TraceEnter, BatchEnter,};
 use trace::wrappers::enter_at::TraceEnter as TraceEnterAt;
@@ -75,7 +74,6 @@ where
 use ::timely::dataflow::scopes::Child;
 use ::timely::progress::timestamp::Refines;
 use timely::Container;
-use timely::container::PushInto;
 
 impl<G, Tr> Arranged<G, Tr>
 where
@@ -169,12 +167,17 @@ where
     /// The underlying `Stream<G, Vec<BatchWrapper<T::Batch>>>` is a much more efficient way to access the data,
     /// and this method should only be used when the data need to be transformed or exchanged, rather than
     /// supplied as arguments to an operator using the same key-value structure.
-    pub fn as_vecs(self) -> VecCollection<G, (Tr::KeyOwn, Tr::ValOwn), Tr::Diff>
+    ///
+    /// The method takes `K` and `V` as generic arguments, in order to constrain the reference types to support
+    /// cloning into owned types. If this bound does not work, the `as_collection` method allows arbitrary logic
+    /// on the reference types.
+    pub fn as_vecs<K, V>(self) -> VecCollection<G, (K, V), Tr::Diff>
     where
-        Tr::KeyOwn: crate::ExchangeData,
-        Tr::ValOwn: crate::ExchangeData,
+        K: crate::ExchangeData,
+        V: crate::ExchangeData,
+        Tr: for<'a> TraceReader<Key<'a> = &'a K, Val<'a> = &'a V>,
     {
-        self.flat_map_ref(move |key, val| [(Tr::owned_key(key), Tr::owned_val(val))])
+        self.flat_map_ref(move |key, val| [(key.clone(), val.clone())])
     }
 
     /// Extracts elements from an arrangement as a `VecCollection`.
@@ -271,43 +274,43 @@ where
     T1: TraceReader + Clone + 'static,
 {
     /// A direct implementation of `ReduceCore::reduce_abelian`.
-    pub fn reduce_abelian<L, Bu, T2>(self, name: &str, mut logic: L) -> Arranged<G, TraceAgent<T2>>
+    pub fn reduce_abelian<L, Bu, T2, P>(self, name: &str, mut logic: L, push: P) -> Arranged<G, TraceAgent<T2>>
     where
-        T1: TraceReader<KeyOwn: Ord>,
+        T1: TraceReader,
         T2: for<'a> Trace<
             Key<'a>= T1::Key<'a>,
-            KeyOwn=T1::KeyOwn,
             ValOwn: Data,
             Time=T1::Time,
             Diff: Abelian,
         >+'static,
-        Bu: Builder<Time=G::Timestamp, Output = T2::Batch, Input: InternalMerge + PushInto<((T1::KeyOwn, T2::ValOwn), T2::Time, T2::Diff)>>,
+        Bu: Builder<Time=G::Timestamp, Output = T2::Batch, Input: Default>,
         L: FnMut(T1::Key<'_>, &[(T1::Val<'_>, T1::Diff)], &mut Vec<(T2::ValOwn, T2::Diff)>)+'static,
+        P: FnMut(&mut Bu::Input, T1::Key<'_>, &mut Vec<(T2::ValOwn, T2::Time, T2::Diff)>) + 'static,
     {
-        self.reduce_core::<_,Bu,T2>(name, move |key, input, output, change| {
+        self.reduce_core::<_,Bu,T2,_>(name, move |key, input, output, change| {
             if !input.is_empty() {
                 logic(key, input, change);
             }
             change.extend(output.drain(..).map(|(x,mut d)| { d.negate(); (x, d) }));
             crate::consolidation::consolidate(change);
-        })
+        }, push)
     }
 
     /// A direct implementation of `ReduceCore::reduce_core`.
-    pub fn reduce_core<L, Bu, T2>(self, name: &str, logic: L) -> Arranged<G, TraceAgent<T2>>
+    pub fn reduce_core<L, Bu, T2, P>(self, name: &str, logic: L, push: P) -> Arranged<G, TraceAgent<T2>>
     where
-        T1: TraceReader<KeyOwn: Ord>,
+        T1: TraceReader,
         T2: for<'a> Trace<
             Key<'a>=T1::Key<'a>,
-            KeyOwn=T1::KeyOwn,
             ValOwn: Data,
             Time=T1::Time,
         >+'static,
-        Bu: Builder<Time=G::Timestamp, Output = T2::Batch, Input: InternalMerge + PushInto<((T1::KeyOwn, T2::ValOwn), T2::Time, T2::Diff)>>,
+        Bu: Builder<Time=G::Timestamp, Output = T2::Batch, Input: Default>,
         L: FnMut(T1::Key<'_>, &[(T1::Val<'_>, T1::Diff)], &mut Vec<(T2::ValOwn, T2::Diff)>, &mut Vec<(T2::ValOwn, T2::Diff)>)+'static,
+        P: FnMut(&mut Bu::Input, T1::Key<'_>, &mut Vec<(T2::ValOwn, T2::Time, T2::Diff)>) + 'static,
     {
         use crate::operators::reduce::reduce_trace;
-        reduce_trace::<_,_,Bu,_,_>(self, name, logic)
+        reduce_trace::<_,_,Bu,_,_,_>(self, name, logic, push)
     }
 }
 
diff --git a/differential-dataflow/src/operators/arrange/upsert.rs b/differential-dataflow/src/operators/arrange/upsert.rs
index e9dbe9cdb..a8322eaa3 100644
--- a/differential-dataflow/src/operators/arrange/upsert.rs
+++ b/differential-dataflow/src/operators/arrange/upsert.rs
@@ -59,7 +59,7 @@
 //!         use differential_dataflow::operators::arrange::upsert;
 //!
 //!         let stream = scope.input_from(&mut input);
-//!         let arranged = upsert::arrange_from_upsert::<_, ValBuilder<Key, Val, _, _>, ValSpine<Key, Val, _, _>>(stream, &"test");
+//!         let arranged = upsert::arrange_from_upsert::<_, ValBuilder<Key, Val, _, _>, ValSpine<Key, Val, _, _>,String,String>(stream, &"test");
 //!
 //!         arranged
 //!             .as_collection(|k,v| (k.clone(), v.clone()))
@@ -127,19 +127,21 @@ use super::TraceAgent;
 /// This method is only implemented for totally ordered times, as we do not yet
 /// understand what a "sequence" of upserts would mean for partially ordered
 /// timestamps.
-pub fn arrange_from_upsert<G, Bu, Tr>(
-    stream: Stream<G, Vec<(Tr::KeyOwn, Option<Tr::ValOwn>, G::Timestamp)>>,
+pub fn arrange_from_upsert<G, Bu, Tr, K, V>(
+    stream: Stream<G, Vec<(K, Option<V>, G::Timestamp)>>,
     name: &str,
 ) -> Arranged<G, TraceAgent<Tr>>
 where
     G: Scope<Timestamp=Tr::Time>,
+    K: ExchangeData+Hashable+std::hash::Hash,
+    V: ExchangeData,
     Tr: for<'a> Trace<
-        KeyOwn: ExchangeData+Hashable+std::hash::Hash,
-        ValOwn: ExchangeData,
+        Key<'a> = &'a K,
+        Val<'a> = &'a V,
         Time: TotalOrder+ExchangeData,
         Diff=isize,
     >+'static,
-    Bu: Builder<Time=G::Timestamp, Input = Vec<((Tr::KeyOwn, Tr::ValOwn), Tr::Time, Tr::Diff)>, Output = Tr::Batch>,
+    Bu: Builder<Time=G::Timestamp, Input = Vec<((K, V), Tr::Time, Tr::Diff)>, Output = Tr::Batch>,
 {
     let mut reader: Option<TraceAgent<Tr>> = None;
 
@@ -148,7 +150,7 @@ where
 
         let reader = &mut reader;
 
-        let exchange = Exchange::new(move |update: &(Tr::KeyOwn,Option<Tr::ValOwn>,G::Timestamp)| (update.0).hashed().into());
+        let exchange = Exchange::new(move |update: &(K,Option<V>,G::Timestamp)| (update.0).hashed().into());
 
         let scope = stream.scope();
         stream.unary_frontier(exchange, name, move |_capability, info| {
@@ -174,7 +176,7 @@ where
             let mut prev_frontier = Antichain::from_elem(<G::Timestamp as Timestamp>::minimum());
 
             // For stashing input upserts, ordered increasing by time (`BinaryHeap` is a max-heap).
-            let mut priority_queue = BinaryHeap::<std::cmp::Reverse<(G::Timestamp, Tr::KeyOwn, Option<Tr::ValOwn>)>>::new();
+            let mut priority_queue = BinaryHeap::<std::cmp::Reverse<(G::Timestamp, K, Option<V>)>>::new();
             let mut updates = Vec::new();
 
             move |(input, frontier), output| {
@@ -237,10 +239,10 @@ where
                                 let mut key_con = Tr::KeyContainer::with_capacity(1);
                                 for (key, mut list) in to_process {
 
-                                    key_con.clear(); key_con.push_own(&key);
+                                    key_con.clear(); key_con.push_ref(&key);
 
                                     // The prior value associated with the key.
-                                    let mut prev_value: Option<Tr::ValOwn> = None;
+                                    let mut prev_value: Option<V> = None;
 
                                     // Attempt to find the key in the trace.
                                     trace_cursor.seek_key(&trace_storage, key_con.index(0));
@@ -252,7 +254,7 @@ where
                                             assert!(count == 0 || count == 1);
                                             if count == 1 {
                                                 assert!(prev_value.is_none());
-                                                prev_value = Some(Tr::owned_val(val));
+                                                prev_value = Some(val.clone());
                                             }
                                             trace_cursor.step_val(&trace_storage);
                                         }
diff --git a/differential-dataflow/src/operators/reduce.rs b/differential-dataflow/src/operators/reduce.rs
index b5b2aac4c..909199295 100644
--- a/differential-dataflow/src/operators/reduce.rs
+++ b/differential-dataflow/src/operators/reduce.rs
@@ -5,7 +5,6 @@
 //! to the key and the list of values.
 //! The function is expected to populate a list of output values.
 
-use timely::container::PushInto;
 use crate::Data;
 
 use timely::progress::frontier::Antichain;
@@ -18,21 +17,29 @@ use crate::operators::arrange::{Arranged, TraceAgent};
 use crate::trace::{BatchReader, Cursor, Trace, Builder, ExertionLogic, Description};
 use crate::trace::cursor::CursorList;
 use crate::trace::implementations::containers::BatchContainer;
-use crate::trace::implementations::merge_batcher::container::InternalMerge;
 use crate::trace::TraceReader;
 
-// TODO: Remove the InternalMerge constraint on Bu::Input. It only needs Clear.
-
 /// A key-wise reduction of values in an input trace.
 ///
 /// This method exists to provide reduce functionality without opinions about qualifying trace types.
-pub fn reduce_trace<G, T1, Bu, T2, L>(trace: Arranged<G, T1>, name: &str, mut logic: L) -> Arranged<G, TraceAgent<T2>>
+///
+/// The `logic` closure is expected to take a key, accumulated input, and tentative accumulated output,
+/// and populate its final argument with whatever it feels to be appopriate updates. The behavior and
+/// correctness of the implementation rely on this making sense, and e.g. ideally the updates would if
+/// applied to the tentative output bring it in line with some function applied to the input.
+///
+/// The `push` closure is expected to clear its first argument, then populate it with the key and drain
+/// the value updates, as appropriate for the container. It is critical that it clear the container as
+/// the operator has no ability to do this otherwise, and failing to do so represents a leak from one
+/// key's computation to another, and will likely introduce non-determinism.
+pub fn reduce_trace<G, T1, Bu, T2, L, P>(trace: Arranged<G, T1>, name: &str, mut logic: L, mut push: P) -> Arranged<G, TraceAgent<T2>>
 where
     G: Scope<Timestamp=T1::Time>,
-    T1: TraceReader<KeyOwn: Ord> + Clone + 'static,
-    T2: for<'a> Trace<Key<'a>=T1::Key<'a>, KeyOwn=T1::KeyOwn, ValOwn: Data, Time=T1::Time> + 'static,
-    Bu: Builder<Time=T2::Time, Output = T2::Batch, Input: InternalMerge + PushInto<((T1::KeyOwn, T2::ValOwn), T2::Time, T2::Diff)>>,
+    T1: TraceReader + Clone + 'static,
+    T2: for<'a> Trace<Key<'a>=T1::Key<'a>, ValOwn: Data, Time=T1::Time> + 'static,
+    Bu: Builder<Time=T2::Time, Output = T2::Batch, Input: Default>,
     L: FnMut(T1::Key<'_>, &[(T1::Val<'_>, T1::Diff)], &mut Vec<(T2::ValOwn,T2::Diff)>, &mut Vec<(T2::ValOwn, T2::Diff)>)+'static,
+    P: FnMut(&mut Bu::Input, T1::Key<'_>, &mut Vec<(T2::ValOwn, T2::Time, T2::Diff)>) + 'static,
 {
     let mut result_trace = None;
 
@@ -53,7 +60,6 @@ where
                 empty.set_exert_logic(exert_logic);
             }
 
-
             let mut source_trace = trace.trace.clone();
 
             let (mut output_reader, mut output_writer) = TraceAgent::new(empty, operator_info, logger);
@@ -63,8 +69,11 @@ where
             let mut new_interesting_times = Vec::<G::Timestamp>::new();
 
             // Our implementation maintains a list of outstanding `(key, time)` synthetic interesting times,
-            // as well as capabilities for these times (or their lower envelope, at least).
-            let mut interesting = Vec::<(T1::KeyOwn, G::Timestamp)>::new();
+            // sorted by (key, time), as well as capabilities for the lower envelope of the times.
+            let mut pending_keys = T1::KeyContainer::with_capacity(0);
+            let mut pending_time = T1::TimeContainer::with_capacity(0);
+            let mut next_pending_keys = T1::KeyContainer::with_capacity(0);
+            let mut next_pending_time = T1::TimeContainer::with_capacity(0);
             let mut capabilities = timely::dataflow::operators::CapabilitySet::<G::Timestamp>::default();
 
             // buffers and logic for computing per-key interesting times "efficiently".
@@ -115,25 +124,15 @@ where
                 // We plan to retire the interval [lower_limit, upper_limit), which should be non-empty to proceed.
                 if upper_limit != lower_limit {
 
-                    // If we hold no capabilitys in the interval [lower_limit, upper_limit) then we have no compute needs,
+                    // If we hold no capabilities in the interval [lower_limit, upper_limit) then we have no compute needs,
                     // and could not transmit the outputs even if they were (incorrectly) non-zero.
                     // We do have maintenance work after this logic, and should not fuse this test with the above test.
                     if capabilities.iter().any(|c| !upper_limit.less_equal(c.time())) {
 
-                        // `interesting` contains "todos" about key and time pairs that should be re-considered.
-                        // We first extract those times from this list that lie in the interval we will process.
-                        sort_dedup(&mut interesting);
-                        // `exposed` contains interesting (key, time)s now below `upper_limit`
-                        let mut exposed_keys = T1::KeyContainer::with_capacity(0);
-                        let mut exposed_time = T1::TimeContainer::with_capacity(0);
-                        // Keep pairs greater or equal to `upper_limit`, and "expose" other pairs.
-                        interesting.retain(|(key, time)| {
-                            if upper_limit.less_equal(time) { true } else {
-                                exposed_keys.push_own(key);
-                                exposed_time.push_own(time);
-                                false
-                            }
-                        });
+                        // cursors for navigating input and output traces.
+                        let (mut source_cursor, ref source_storage): (T1::Cursor, _) = source_trace.cursor_through(lower_limit.borrow()).expect("failed to acquire source cursor");
+                        let (mut output_cursor, ref output_storage): (T2::Cursor, _) = output_reader.cursor_through(lower_limit.borrow()).expect("failed to acquire output cursor");
+                        let (mut batch_cursor, ref batch_storage) = (CursorList::new(batch_cursors, &batch_storage), batch_storage);
 
                         // Prepare an output buffer and builder for each capability.
                         // TODO: It would be better if all updates went into one batch, but timely dataflow prevents
@@ -144,22 +143,21 @@ where
                             buffers.push((cap.time().clone(), Vec::new()));
                             builders.push(Bu::new());
                         }
-
+                        // Temporary staging for output building.
                         let mut buffer = Bu::Input::default();
 
-                        // cursors for navigating input and output traces.
-                        let (mut source_cursor, ref source_storage): (T1::Cursor, _) = source_trace.cursor_through(lower_limit.borrow()).expect("failed to acquire source cursor");
-                        let (mut output_cursor, ref output_storage): (T2::Cursor, _) = output_reader.cursor_through(lower_limit.borrow()).expect("failed to acquire output cursor");
-                        let (mut batch_cursor, ref batch_storage) = (CursorList::new(batch_cursors, &batch_storage), batch_storage);
-
+                        // Reuseable state for performing the computation.
                         let mut thinker = history_replay::HistoryReplayer::new();
 
+                        // Merge the received batch cursor with our list of interesting (key, time) moments.
+                        // The interesting moments need to be in the interval to prompt work.
+
                         // March through the keys we must work on, merging `batch_cursors` and `exposed`.
-                        let mut exposed_position = 0;
-                        while batch_cursor.key_valid(batch_storage) || exposed_position < exposed_keys.len() {
+                        let mut pending_pos = 0;
+                        while batch_cursor.key_valid(batch_storage) || pending_pos < pending_keys.len() {
 
                             // Determine the next key we will work on; could be synthetic, could be from a batch.
-                            let key1 = exposed_keys.get(exposed_position);
+                            let key1 = pending_keys.get(pending_pos);
                             let key2 = batch_cursor.get_key(batch_storage);
                             let key = match (key1, key2) {
                                 (Some(key1), Some(key2)) => ::std::cmp::min(key1, key2),
@@ -169,49 +167,71 @@ where
                             };
 
                             // Populate `interesting_times` with interesting times not beyond `upper_limit`.
-                            // TODO: This could just be `exposed_time` and `lower .. upper` bounds.
+                            // TODO: This could just be `pending_time` and indexes within `lower .. upper`.
+                            let prior_pos = pending_pos;
                             interesting_times.clear();
-                            while exposed_keys.get(exposed_position) == Some(key) {
-                                interesting_times.push(T1::owned_time(exposed_time.index(exposed_position)));
-                                exposed_position += 1;
+                            while pending_keys.get(pending_pos) == Some(key) {
+                                let owned_time = T1::owned_time(pending_time.index(pending_pos));
+                                if !upper_limit.less_equal(&owned_time) { interesting_times.push(owned_time); }
+                                pending_pos += 1;
                             }
 
                             // tidy up times, removing redundancy.
                             sort_dedup(&mut interesting_times);
 
-                            // do the per-key computation.
-                            thinker.compute(
-                                key,
-                                (&mut source_cursor, source_storage),
-                                (&mut output_cursor, output_storage),
-                                (&mut batch_cursor, batch_storage),
-                                &interesting_times,
-                                &mut logic,
-                                &upper_limit,
-                                &mut buffers[..],
-                                &mut new_interesting_times,
-                            );
-
-                            // Advance the cursor if this key, so that the loop's validity check registers the work as done.
-                            if batch_cursor.get_key(batch_storage) == Some(key) { batch_cursor.step_key(batch_storage); }
-
-                            // Record future warnings about interesting times (and assert they should be "future").
-                            debug_assert!(new_interesting_times.iter().all(|t| upper_limit.less_equal(t)));
-                            interesting.extend(new_interesting_times.drain(..).map(|t| (T1::owned_key(key), t)));
-
-                            // Sort each buffer by value and move into the corresponding builder.
-                            // TODO: This makes assumptions about at least one of (i) the stability of `sort_by`,
-                            //       (ii) that the buffers are time-ordered, and (iii) that the builders accept
-                            //       arbitrarily ordered times.
-                            for index in 0 .. buffers.len() {
-                                buffers[index].1.sort_by(|x,y| x.0.cmp(&y.0));
-                                for (val, time, diff) in buffers[index].1.drain(..) {
-                                    buffer.push_into(((T1::owned_key(key), val), time, diff));
+                            // If there are new updates, or pending times, we must investigate!
+                            if batch_cursor.get_key(batch_storage) == Some(key) || !interesting_times.is_empty() {
+
+                                // do the per-key computation.
+                                thinker.compute(
+                                    key,
+                                    (&mut source_cursor, source_storage),
+                                    (&mut output_cursor, output_storage),
+                                    (&mut batch_cursor, batch_storage),
+                                    &interesting_times,
+                                    &mut logic,
+                                    &upper_limit,
+                                    &mut buffers[..],
+                                    &mut new_interesting_times,
+                                );
+
+                                // Advance the cursor if this key, so that the loop's validity check registers the work as done.
+                                if batch_cursor.get_key(batch_storage) == Some(key) { batch_cursor.step_key(batch_storage); }
+
+                                // Merge novel pending times with any prior pending times we did not process.
+                                // TODO: This could be a merge, not a sort_dedup, because both lists should be sorted.
+                                for pos in prior_pos .. pending_pos {
+                                    let owned_time = T1::owned_time(pending_time.index(pos));
+                                    if upper_limit.less_equal(&owned_time) { new_interesting_times.push(owned_time); }
+                                }
+                                sort_dedup(&mut new_interesting_times);
+                                for time in new_interesting_times.drain(..) {
+                                    next_pending_keys.push_ref(key);
+                                    next_pending_time.push_own(&time);
+                                }
+
+                                // Sort each buffer by value and move into the corresponding builder.
+                                // TODO: This makes assumptions about at least one of (i) the stability of `sort_by`,
+                                //       (ii) that the buffers are time-ordered, and (iii) that the builders accept
+                                //       arbitrarily ordered times.
+                                for index in 0 .. buffers.len() {
+                                    buffers[index].1.sort_by(|x,y| x.0.cmp(&y.0));
+                                    push(&mut buffer, key, &mut buffers[index].1);
+                                    buffers[index].1.clear();
                                     builders[index].push(&mut buffer);
-                                    buffer.clear();
+
+                                }
+                            }
+                            else {
+                                // copy over the pending key and times.
+                                for pos in prior_pos .. pending_pos {
+                                    next_pending_keys.push_ref(pending_keys.index(pos));
+                                    next_pending_time.push_ref(pending_time.index(pos));
                                 }
                             }
                         }
+                        // Drop to avoid lifetime issues that would lock `pending_{keys, time}`.
+                        drop(thinker);
 
                         // We start sealing output batches from the lower limit (previous upper limit).
                         // In principle, we could update `lower_limit` itself, and it should arrive at
@@ -243,14 +263,21 @@ where
                                 output_lower.extend(output_upper.borrow().iter().cloned());
                             }
                         }
-
                         // This should be true, as the final iteration introduces no capabilities, and
                         // uses exactly `upper_limit` to determine the upper bound. Good to check though.
                         assert!(output_upper.borrow() == upper_limit.borrow());
 
-                        // Update `capabilities` to reflect interesting times.
+                        // Refresh pending keys and times, then downgrade capabilities to the frontier of times.
+                        pending_keys.clear(); std::mem::swap(&mut next_pending_keys, &mut pending_keys);
+                        pending_time.clear(); std::mem::swap(&mut next_pending_time, &mut pending_time);
+
+                        // Update `capabilities` to reflect pending times.
                         let mut frontier = Antichain::<G::Timestamp>::new();
-                        for (_, time) in &interesting { frontier.insert_ref(time); }
+                        let mut owned_time = T1::Time::minimum();
+                        for pos in 0 .. pending_time.len() {
+                            T1::clone_time_onto(pending_time.index(pos), &mut owned_time);
+                            frontier.insert_ref(&owned_time);
+                        }
                         capabilities.downgrade(frontier);
                     }
 
diff --git a/differential-dataflow/src/trace/implementations/mod.rs b/differential-dataflow/src/trace/implementations/mod.rs
index d73eb71f4..082af6812 100644
--- a/differential-dataflow/src/trace/implementations/mod.rs
+++ b/differential-dataflow/src/trace/implementations/mod.rs
@@ -113,8 +113,6 @@ pub trait WithLayout {
 
 /// Automatically implemented trait for types with layouts.
 pub trait LayoutExt : WithLayout<Layout: Layout<KeyContainer = Self::KeyContainer, ValContainer = Self::ValContainer, TimeContainer = Self::TimeContainer, DiffContainer = Self::DiffContainer>> {
-    /// Alias for an owned key of a layout.
-    type KeyOwn;
     /// Alias for an borrowed key of a layout.
     type Key<'a>: Copy + Ord;
     /// Alias for an owned val of a layout.
@@ -131,7 +129,7 @@ pub trait LayoutExt : WithLayout<Layout: Layout<KeyContainer = Self::KeyContaine
     type DiffGat<'a>: Copy + Ord;
 
     /// Container for update keys.
-    type KeyContainer: for<'a> BatchContainer<ReadItem<'a> = Self::Key<'a>, Owned = Self::KeyOwn>;
+    type KeyContainer: for<'a> BatchContainer<ReadItem<'a> = Self::Key<'a>>;
     /// Container for update vals.
     type ValContainer: for<'a> BatchContainer<ReadItem<'a> = Self::Val<'a>, Owned = Self::ValOwn>;
     /// Container for times.
@@ -139,8 +137,6 @@ pub trait LayoutExt : WithLayout<Layout: Layout<KeyContainer = Self::KeyContaine
     /// Container for diffs.
     type DiffContainer: for<'a> BatchContainer<ReadItem<'a> = Self::DiffGat<'a>, Owned = Self::Diff>;
 
-    /// Construct an owned key from a reference.
-    fn owned_key(key: Self::Key<'_>) -> Self::KeyOwn;
     /// Construct an owned val from a reference.
     fn owned_val(val: Self::Val<'_>) -> Self::ValOwn;
     /// Construct an owned time from a reference.
@@ -153,7 +149,6 @@ pub trait LayoutExt : WithLayout<Layout: Layout<KeyContainer = Self::KeyContaine
 }
 
 impl<L: WithLayout> LayoutExt for L {
-    type KeyOwn = <<L::Layout as Layout>::KeyContainer as BatchContainer>::Owned;
     type Key<'a> = <<L::Layout as Layout>::KeyContainer as BatchContainer>::ReadItem<'a>;
     type ValOwn = <<L::Layout as Layout>::ValContainer as BatchContainer>::Owned;
     type Val<'a> = <<L::Layout as Layout>::ValContainer as BatchContainer>::ReadItem<'a>;
@@ -167,7 +162,6 @@ impl<L: WithLayout> LayoutExt for L {
     type TimeContainer = <L::Layout as Layout>::TimeContainer;
     type DiffContainer = <L::Layout as Layout>::DiffContainer;
 
-    #[inline(always)] fn owned_key(key: Self::Key<'_>) -> Self::KeyOwn { <Self::Layout as Layout>::KeyContainer::into_owned(key) }
     #[inline(always)] fn owned_val(val: Self::Val<'_>) -> Self::ValOwn { <Self::Layout as Layout>::ValContainer::into_owned(val) }
     #[inline(always)] fn owned_time(time: Self::TimeGat<'_>) -> Self::Time { <Self::Layout as Layout>::TimeContainer::into_owned(time) }
     #[inline(always)] fn owned_diff(diff: Self::DiffGat<'_>) -> Self::Diff { <Self::Layout as Layout>::DiffContainer::into_owned(diff) }
diff --git a/differential-dataflow/src/trace/mod.rs b/differential-dataflow/src/trace/mod.rs
index 0197e09c0..582a6da62 100644
--- a/differential-dataflow/src/trace/mod.rs
+++ b/differential-dataflow/src/trace/mod.rs
@@ -54,7 +54,6 @@ pub trait TraceReader : LayoutExt {
         WithLayout<Layout = Self::Layout> +
         for<'a> LayoutExt<
             Key<'a> = Self::Key<'a>,
-            KeyOwn = Self::KeyOwn,
             Val<'a> = Self::Val<'a>,
             ValOwn = Self::ValOwn,
             Time = Self::Time,
@@ -77,7 +76,6 @@ pub trait TraceReader : LayoutExt {
         WithLayout<Layout = Self::Layout> +
         for<'a> LayoutExt<
             Key<'a> = Self::Key<'a>,
-            KeyOwn = Self::KeyOwn,
             Val<'a> = Self::Val<'a>,
             ValOwn = Self::ValOwn,
             Time = Self::Time,
@@ -253,7 +251,6 @@ pub trait BatchReader : LayoutExt + Sized {
         WithLayout<Layout = Self::Layout> +
         for<'a> LayoutExt<
             Key<'a> = Self::Key<'a>,
-            KeyOwn = Self::KeyOwn,
             Val<'a> = Self::Val<'a>,
             ValOwn = Self::ValOwn,
             Time = Self::Time,
diff --git a/differential-dataflow/tests/import.rs b/differential-dataflow/tests/import.rs
index 747e45b72..1bb2c9cc1 100644
--- a/differential-dataflow/tests/import.rs
+++ b/differential-dataflow/tests/import.rs
@@ -60,7 +60,11 @@ fn test_import_vanilla() {
                 ::std::mem::drop(trace);
                 let captured =
                 imported
-                    .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<u64,i64,_,_>>("Reduce", |_k, s, t| t.push((s.iter().map(|&(_, w)| w).sum(), 1i64)))
+                    .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<u64,i64,_,_>,_>(
+                        "Reduce",
+                        |_k, s, t| t.push((s.iter().map(|&(_, w)| w).sum(), 1i64)),
+                        |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); },
+                    )
                     .as_collection(|k,v| (k.clone(), v.clone()))
                     .inner
                     .exchange(|_| 0)
@@ -135,7 +139,11 @@ fn test_import_completed_dataflow() {
                 ::std::mem::drop(trace);
                 let stream =
                 imported
-                    .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<u64,i64,_,_>>("Reduce", |_k, s, t| t.push((s.iter().map(|&(_, w)| w).sum(), 1i64)))
+                    .reduce_abelian::<_,ValBuilder<_,_,_,_>,ValSpine<u64,i64,_,_>,_>(
+                        "Reduce",
+                        |_k, s, t| t.push((s.iter().map(|&(_, w)| w).sum(), 1i64)),
+                        |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r)));
+                    })
                     .as_collection(|k,v| (k.clone(), v.clone()))
                     .inner
                     .exchange(|_| 0);
diff --git a/dogsdogsdogs/src/operators/count.rs b/dogsdogsdogs/src/operators/count.rs
index 0cd363a76..42367a2e1 100644
--- a/dogsdogsdogs/src/operators/count.rs
+++ b/dogsdogsdogs/src/operators/count.rs
@@ -19,7 +19,8 @@ pub fn count<G, Tr, K, R, F, P>(
 ) -> VecCollection<G, (P, usize, usize), R>
 where
     G: Scope<Timestamp=Tr::Time>,
-    Tr: TraceReader<KeyOwn = K, Time: std::hash::Hash, Diff=isize>+Clone+'static,
+    Tr: TraceReader<Time: std::hash::Hash, Diff=isize>+Clone+'static,
+    Tr::KeyContainer: differential_dataflow::trace::implementations::BatchContainer<Owned=K>,
     for<'a> Tr::Diff : Semigroup<Tr::DiffGat<'a>>,
     K: Hashable + Ord + Default + 'static,
     R: Monoid+Multiply<Output = R>+ExchangeData,
diff --git a/dogsdogsdogs/src/operators/half_join.rs b/dogsdogsdogs/src/operators/half_join.rs
index ddb2a5979..2a18a9aa1 100644
--- a/dogsdogsdogs/src/operators/half_join.rs
+++ b/dogsdogsdogs/src/operators/half_join.rs
@@ -85,7 +85,8 @@ where
     K: Hashable + ExchangeData,
     V: ExchangeData,
     R: ExchangeData + Monoid,
-    Tr: TraceReader<KeyOwn = K, Time: std::hash::Hash>+Clone+'static,
+    Tr: TraceReader<Time: std::hash::Hash>+Clone+'static,
+    Tr::KeyContainer: BatchContainer<Owned=K>,
     R: Mul<Tr::Diff, Output: Semigroup>,
     FF: Fn(&G::Timestamp, &mut Antichain<G::Timestamp>) + 'static,
     CF: Fn(Tr::TimeGat<'_>, &G::Timestamp) -> bool + 'static,
@@ -151,7 +152,8 @@ where
     K: Hashable + ExchangeData,
     V: ExchangeData,
     R: ExchangeData + Monoid,
-    Tr: for<'a> TraceReader<KeyOwn = K, Time: std::hash::Hash>+Clone+'static,
+    Tr: TraceReader<Time: std::hash::Hash>+Clone+'static,
+    Tr::KeyContainer: BatchContainer<Owned=K>,
     FF: Fn(&G::Timestamp, &mut Antichain<G::Timestamp>) + 'static,
     CF: Fn(Tr::TimeGat<'_>, &Tr::Time) -> bool + 'static,
     Y: Fn(std::time::Instant, usize) -> bool + 'static,
@@ -312,7 +314,8 @@ fn process_proposals<G, Tr, CF, Y, S, CB, K, V, R>(
 ) -> bool
 where
     G: Scope<Timestamp = Tr::Time>,
-    Tr: for<'a> TraceReader<KeyOwn = K>,
+    Tr: TraceReader,
+    Tr::KeyContainer: BatchContainer<Owned=K>,
     CF: Fn(Tr::TimeGat<'_>, &Tr::Time) -> bool + 'static,
     Y: Fn(Instant, usize) -> bool + 'static,
     S: FnMut(&mut SessionFor<G, CB>, &K, &V, Tr::Val<'_>, &G::Timestamp, &R, &mut Vec<(G::Timestamp, Tr::Diff)>) + 'static,
diff --git a/dogsdogsdogs/src/operators/half_join2.rs b/dogsdogsdogs/src/operators/half_join2.rs
index 650dc433d..08029f96e 100644
--- a/dogsdogsdogs/src/operators/half_join2.rs
+++ b/dogsdogsdogs/src/operators/half_join2.rs
@@ -73,7 +73,8 @@ where
     K: Hashable + ExchangeData,
     V: ExchangeData,
     R: ExchangeData + Monoid,
-    Tr: TraceReader<KeyOwn = K, Time: std::hash::Hash>+Clone+'static,
+    Tr: TraceReader<Time: std::hash::Hash>+Clone+'static,
+    Tr::KeyContainer: BatchContainer<Owned=K>,
     R: Mul<Tr::Diff, Output: Semigroup>,
     FF: Fn(&G::Timestamp, &mut Antichain<G::Timestamp>) + 'static,
     CF: Fn(Tr::TimeGat<'_>, &G::Timestamp) -> bool + 'static,
@@ -130,7 +131,8 @@ where
     K: Hashable + ExchangeData,
     V: ExchangeData,
     R: ExchangeData + Monoid,
-    Tr: for<'a> TraceReader<KeyOwn = K, Time: std::hash::Hash>+Clone+'static,
+    Tr: TraceReader<Time: std::hash::Hash>+Clone+'static,
+    Tr::KeyContainer: BatchContainer<Owned=K>,
     FF: Fn(&G::Timestamp, &mut Antichain<G::Timestamp>) + 'static,
     CF: Fn(Tr::TimeGat<'_>, &Tr::Time) -> bool + 'static,
     Y: Fn(std::time::Instant, usize) -> bool + 'static,
diff --git a/dogsdogsdogs/src/operators/lookup_map.rs b/dogsdogsdogs/src/operators/lookup_map.rs
index 7908ce858..65108ed59 100644
--- a/dogsdogsdogs/src/operators/lookup_map.rs
+++ b/dogsdogsdogs/src/operators/lookup_map.rs
@@ -29,10 +29,10 @@ pub fn lookup_map<G, D, K, R, Tr, F, DOut, ROut, S>(
 where
     G: Scope<Timestamp=Tr::Time>,
     Tr: for<'a> TraceReader<
-        KeyOwn = K,
         Time: std::hash::Hash,
         Diff : Semigroup<Tr::DiffGat<'a>>+Monoid+ExchangeData,
     >+Clone+'static,
+    Tr::KeyContainer: BatchContainer<Owned=K>,
     K: Hashable + Ord + 'static,
     F: FnMut(&D, &mut K)+Clone+'static,
     D: ExchangeData,
diff --git a/dogsdogsdogs/src/operators/propose.rs b/dogsdogsdogs/src/operators/propose.rs
index 9f2e69f78..6cdcb130d 100644
--- a/dogsdogsdogs/src/operators/propose.rs
+++ b/dogsdogsdogs/src/operators/propose.rs
@@ -21,11 +21,11 @@ pub fn propose<G, Tr, K, F, P, V>(
 where
     G: Scope<Timestamp=Tr::Time>,
     Tr: for<'a> TraceReader<
-        KeyOwn = K,
         ValOwn = V,
         Time: std::hash::Hash,
         Diff: Monoid+Multiply<Output = Tr::Diff>+ExchangeData+Semigroup<Tr::DiffGat<'a>>,
     >+Clone+'static,
+    Tr::KeyContainer: differential_dataflow::trace::implementations::BatchContainer<Owned=K>,
     K: Hashable + Default + Ord + 'static,
     F: Fn(&P)->K+Clone+'static,
     P: ExchangeData,
@@ -55,11 +55,11 @@ pub fn propose_distinct<G, Tr, K, F, P, V>(
 where
     G: Scope<Timestamp=Tr::Time>,
     Tr: for<'a> TraceReader<
-        KeyOwn = K,
         ValOwn = V,
         Time: std::hash::Hash,
         Diff : Semigroup<Tr::DiffGat<'a>>+Monoid+Multiply<Output = Tr::Diff>+ExchangeData,
     >+Clone+'static,
+    Tr::KeyContainer: differential_dataflow::trace::implementations::BatchContainer<Owned=K>,
     K: Hashable + Default + Ord + 'static,
     F: Fn(&P)->K+Clone+'static,
     P: ExchangeData,
diff --git a/dogsdogsdogs/src/operators/validate.rs b/dogsdogsdogs/src/operators/validate.rs
index f610d55be..0ab7297bb 100644
--- a/dogsdogsdogs/src/operators/validate.rs
+++ b/dogsdogsdogs/src/operators/validate.rs
@@ -20,10 +20,10 @@ pub fn validate<G, K, V, Tr, F, P>(
 where
     G: Scope<Timestamp=Tr::Time>,
     Tr: for<'a> TraceReader<
-        KeyOwn = (K, V),
         Time: std::hash::Hash,
         Diff : Semigroup<Tr::DiffGat<'a>>+Monoid+Multiply<Output = Tr::Diff>+ExchangeData,
     >+Clone+'static,
+    Tr::KeyContainer: differential_dataflow::trace::implementations::BatchContainer<Owned=(K,V)>,
     K: Ord+Hash+Clone+Default + 'static,
     V: ExchangeData+Hash+Default,
     F: Fn(&P)->K+Clone+'static,
diff --git a/interactive/src/plan/mod.rs b/interactive/src/plan/mod.rs
index deff18a4a..31d99041d 100644
--- a/interactive/src/plan/mod.rs
+++ b/interactive/src/plan/mod.rs
@@ -169,7 +169,11 @@ impl<V: ExchangeData+Hash+Datum> Render for Plan<V> {
                         input_arrangement
                     };
 
-                    let output = input.reduce_abelian::<_,KeyBuilder<_,_,_>,KeySpine<_,_,_>>("Distinct", move |_,_,t| t.push(((), 1)));
+                    let output = input.reduce_abelian::<_,KeyBuilder<_,_,_>,KeySpine<_,_,_>,_>(
+                        "Distinct",
+                        move |_,_,t| t.push(((), 1)),
+                        |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); },
+                    );
 
                     arrangements.set_unkeyed(&self, &output.trace);
                     output.as_collection(|k,&()| k.clone())

From 4f86ef23e477348920d438ab6901f9913b4c1d8c Mon Sep 17 00:00:00 2001
From: Frank McSherry <fmcsherry@me.com>
Date: Sun, 5 Apr 2026 09:25:44 -0400
Subject: [PATCH 20/20] WIP caching trace wrapper

---
 differential-dataflow/src/trace/mod.rs        |   9 +
 .../src/trace/wrappers/cached.rs              | 251 ++++++++++++++++++
 .../src/trace/wrappers/mod.rs                 |   1 +
 3 files changed, 261 insertions(+)
 create mode 100644 differential-dataflow/src/trace/wrappers/cached.rs

diff --git a/differential-dataflow/src/trace/mod.rs b/differential-dataflow/src/trace/mod.rs
index 582a6da62..3f6ee03ab 100644
--- a/differential-dataflow/src/trace/mod.rs
+++ b/differential-dataflow/src/trace/mod.rs
@@ -108,6 +108,15 @@ pub trait TraceReader : LayoutExt {
     /// should allow `upper` such as `&[]` as used by `self.cursor()`, though it is difficult to imagine other uses.
     fn cursor_through(&mut self, upper: AntichainRef<Self::Time>) -> Option<(Self::Cursor, Self::Storage)>;
 
+    /// Acquires a cursor for a known set of keys.
+    ///
+    /// The default implementation ignores the keys and returns a full cursor via `cursor_through`.
+    /// Implementations may override this to optimize for the specified key set (e.g., serving
+    /// from a cache or performing bulk key lookups).
+    fn cursor_through_keyed(&mut self, upper: AntichainRef<Self::Time>, _keys: &Self::KeyContainer) -> Option<(Self::Cursor, Self::Storage)> {
+        self.cursor_through(upper)
+    }
+
     /// Advances the frontier that constrains logical compaction.
     ///
     /// Logical compaction is the ability of the trace to change the times of the updates it contains.
diff --git a/differential-dataflow/src/trace/wrappers/cached.rs b/differential-dataflow/src/trace/wrappers/cached.rs
new file mode 100644
index 000000000..e9f3bbf22
--- /dev/null
+++ b/differential-dataflow/src/trace/wrappers/cached.rs
@@ -0,0 +1,251 @@
+//! A spine wrapper that caches keys on demand.
+//!
+//! `CachedSpine` wraps a `Spine` and maintains a hot cache of batches
+//! for keys that have been explicitly requested. Cache misses are
+//! loaded from the spine using the provided builder and push closure.
+//!
+//! Since it wraps `Spine` directly, `Storage = Vec<B>` and
+//! `Cursor = CursorList<B::Cursor>` are known concretely, avoiding
+//! opaque associated type issues.
+
+use std::cell::RefCell;
+use std::marker::PhantomData;
+use std::rc::Rc;
+
+use timely::progress::{Antichain, frontier::AntichainRef};
+
+use crate::trace::{Batch, Builder, Description, TraceReader};
+use crate::trace::cursor::{Cursor, CursorList};
+use crate::trace::implementations::WithLayout;
+use crate::trace::implementations::containers::BatchContainer;
+use crate::trace::implementations::spine_fueled::Spine;
+use crate::trace::implementations::Layout;
+
+/// Key container type for a batch's layout.
+type KeyCon<B> = <<B as WithLayout>::Layout as Layout>::KeyContainer;
+
+/// A spine with a hot cache for frequently-accessed keys.
+///
+/// - `B`: batch type
+/// - `Bu`: builder type for constructing cache batches
+/// - `P`: closure that loads a key's data from a cursor into `Bu::Input`
+pub struct CachedSpine<B, Bu, P>
+where
+    B: Batch + Clone + 'static,
+{
+    /// The backing spine, containing all data.
+    spine: Spine<B>,
+    /// Cached batches for loaded keys.
+    hot: Vec<B>,
+    /// The set of keys currently in the cache, sorted.
+    cached_keys: KeyCon<B>,
+    /// The frontier through which cached data is complete.
+    /// Cached keys have data for `[0, hot_physical)`.
+    hot_physical: Antichain<<B as crate::trace::implementations::LayoutExt>::Time>,
+    /// Closure to push cursor data into a builder's input container.
+    /// Behind Rc<RefCell> so clones share it.
+    push: Rc<RefCell<P>>,
+    _marker: PhantomData<Bu>,
+}
+
+impl<B, Bu, P> CachedSpine<B, Bu, P>
+where
+    B: Batch + Clone + 'static,
+{
+    /// Wraps a spine with caching support.
+    pub fn new(spine: Spine<B>, push: P) -> Self {
+        CachedSpine {
+            spine,
+            hot: Vec::new(),
+            cached_keys: KeyCon::<B>::with_capacity(0),
+            hot_physical: Antichain::from_elem(<<B as crate::trace::implementations::LayoutExt>::Time as timely::progress::Timestamp>::minimum()),
+            push: Rc::new(RefCell::new(push)),
+            _marker: PhantomData,
+        }
+    }
+
+    /// Returns true if the given key is in the cache.
+    pub fn contains_key(&self, key: <KeyCon<B> as BatchContainer>::ReadItem<'_>) -> bool {
+        (0..self.cached_keys.len()).any(|i| self.cached_keys.index(i) == <KeyCon<B> as BatchContainer>::reborrow(key))
+    }
+
+    /// Installs a batch of cached key data.
+    pub fn insert_hot(&mut self, batch: B) {
+        if !batch.is_empty() {
+            self.hot.push(batch);
+        }
+    }
+
+    /// Clears all cached data and the key set.
+    pub fn clear_hot(&mut self) {
+        self.hot.clear();
+        self.cached_keys.clear();
+        self.hot_physical = Antichain::from_elem(<<B as crate::trace::implementations::LayoutExt>::Time as timely::progress::Timestamp>::minimum());
+    }
+
+    /// The number of cached keys.
+    pub fn cached_key_count(&self) -> usize {
+        self.cached_keys.len()
+    }
+
+    /// The number of hot batches.
+    pub fn hot_batch_count(&self) -> usize {
+        self.hot.len()
+    }
+
+    /// Returns a reference to the inner spine.
+    pub fn spine(&self) -> &Spine<B> { &self.spine }
+
+    /// Returns a mutable reference to the inner spine.
+    pub fn spine_mut(&mut self) -> &mut Spine<B> { &mut self.spine }
+}
+
+// CachedSpine is not Clone — it wraps a mutable Spine.
+// Use TraceAgent<CachedSpine<...>> for shared access.
+
+impl<B, Bu, P> WithLayout for CachedSpine<B, Bu, P>
+where
+    B: Batch + Clone + 'static,
+{
+    type Layout = B::Layout;
+}
+
+impl<B, Bu, P> TraceReader for CachedSpine<B, Bu, P>
+where
+    B: Batch + Clone + 'static,
+    Bu: Builder<Time = Self::Time, Output = B, Input: Default> + 'static,
+    P: FnMut(&mut Bu::Input, &mut CursorList<B::Cursor>, &Vec<B>) + 'static,
+{
+    type Batch = B;
+    type Storage = Vec<B>;
+    type Cursor = CursorList<B::Cursor>;
+
+    fn cursor_through(&mut self, upper: AntichainRef<Self::Time>) -> Option<(Self::Cursor, Self::Storage)> {
+        self.spine.cursor_through(upper)
+    }
+
+    fn cursor_through_keyed(&mut self, upper: AntichainRef<Self::Time>, keys: &Self::KeyContainer) -> Option<(Self::Cursor, Self::Storage)> {
+        // Check for missing keys.
+        let mut any_missing = false;
+        for i in 0..keys.len() {
+            if !self.contains_key(keys.index(i)) {
+                any_missing = true;
+                break;
+            }
+        }
+
+        // Load missing keys from the spine.
+        if any_missing {
+            if let Some((mut cursor, storage)) = self.spine.cursor_through(upper) {
+                let mut builder = Bu::new();
+                let mut buffer = Bu::Input::default();
+
+                for i in 0..keys.len() {
+                    let key = keys.index(i);
+                    if !self.contains_key(<KeyCon<B> as BatchContainer>::reborrow(key)) {
+                        cursor.seek_key(&storage, <KeyCon<B> as BatchContainer>::reborrow(key));
+                        if cursor.get_key(&storage) == Some(<KeyCon<B> as BatchContainer>::reborrow(key)) {
+                            (self.push.borrow_mut())(&mut buffer, &mut cursor, &storage);
+                        }
+                        builder.push(&mut buffer);
+                    }
+                }
+
+                let description = Description::new(
+                    Antichain::from_elem(<<B as crate::trace::implementations::LayoutExt>::Time as timely::progress::Timestamp>::minimum()),
+                    upper.to_owned(),
+                    self.spine.get_logical_compaction().to_owned(),
+                );
+                let batch = builder.done(description);
+                self.insert_hot(batch);
+            }
+
+            // TODO: Maintain sorted invariant for cached_keys.
+            // Currently new keys are appended unsorted relative to existing ones.
+            // Need to merge the two sorted sequences, or rebuild from scratch.
+            // Can't just replace with `keys` because the caller may not pass all
+            // previously cached keys each time.
+            for i in 0..keys.len() {
+                if !self.contains_key(keys.index(i)) {
+                    self.cached_keys.push_ref(keys.index(i));
+                }
+            }
+        }
+
+        // Build storage from hot batches plus any cold batches beyond hot_physical.
+        let mut storage: Vec<B> = self.hot.clone();
+
+        // Include cold batches in [hot_physical, upper) — recent, uncompacted data.
+        self.spine.map_batches(|batch| {
+            if timely::order::PartialOrder::less_than(&self.hot_physical.borrow(), &batch.upper().borrow())
+            && !batch.is_empty()
+            {
+                storage.push(batch.clone());
+            }
+        });
+
+        let cursors: Vec<_> = storage.iter().map(|b| b.cursor()).collect();
+        let cursor = CursorList::new(cursors, &storage);
+        Some((cursor, storage))
+    }
+
+    fn set_logical_compaction(&mut self, frontier: AntichainRef<Self::Time>) {
+        self.spine.set_logical_compaction(frontier);
+    }
+    fn get_logical_compaction(&mut self) -> AntichainRef<'_, Self::Time> {
+        self.spine.get_logical_compaction()
+    }
+    fn set_physical_compaction(&mut self, frontier: AntichainRef<'_, Self::Time>) {
+        // Load cached keys' data from batches in [hot_physical, frontier)
+        // before advancing, so the cache stays current.
+        if self.cached_keys.len() > 0 && self.hot_physical.borrow() != frontier {
+            // Collect batches overlapping [hot_physical, frontier).
+            let mut recent_batches = Vec::new();
+            self.spine.map_batches(|batch| {
+                if timely::order::PartialOrder::less_than(&self.hot_physical.borrow(), &batch.upper().borrow())
+                && !timely::order::PartialOrder::less_equal(&frontier, &batch.lower().borrow())
+                {
+                    recent_batches.push(batch.clone());
+                }
+            });
+
+            if !recent_batches.is_empty() {
+                let cursors: Vec<_> = recent_batches.iter().map(|b| b.cursor()).collect();
+                let mut cursor = CursorList::new(cursors, &recent_batches);
+
+                let mut builder = Bu::new();
+                let mut buffer = Bu::Input::default();
+                let mut any_loaded = false;
+
+                for i in 0..self.cached_keys.len() {
+                    let key = self.cached_keys.index(i);
+                    cursor.seek_key(&recent_batches, key);
+                    if cursor.get_key(&recent_batches) == Some(key) {
+                        (self.push.borrow_mut())(&mut buffer, &mut cursor, &recent_batches);
+                        builder.push(&mut buffer);
+                        any_loaded = true;
+                    }
+                }
+
+                if any_loaded {
+                    let description = Description::new(
+                        self.hot_physical.clone(),
+                        frontier.to_owned(),
+                        self.spine.get_logical_compaction().to_owned(),
+                    );
+                    let batch = builder.done(description);
+                    self.insert_hot(batch);
+                }
+            }
+
+            self.hot_physical = frontier.to_owned();
+        }
+        self.spine.set_physical_compaction(frontier);
+    }
+    fn get_physical_compaction(&mut self) -> AntichainRef<'_, Self::Time> {
+        self.spine.get_physical_compaction()
+    }
+    fn map_batches<F: FnMut(&Self::Batch)>(&self, f: F) {
+        self.spine.map_batches(f);
+    }
+}
diff --git a/differential-dataflow/src/trace/wrappers/mod.rs b/differential-dataflow/src/trace/wrappers/mod.rs
index aebaa72a6..0040ad364 100644
--- a/differential-dataflow/src/trace/wrappers/mod.rs
+++ b/differential-dataflow/src/trace/wrappers/mod.rs
@@ -4,3 +4,4 @@ pub mod enter;
 pub mod enter_at;
 pub mod frontier;
 pub mod rc;
+pub mod cached;