igerber · igerber · Jun 26, 2026 · Jun 26, 2026
diff --git a/TODO.md b/TODO.md
@@ -92,7 +92,6 @@ Deferred items from PR reviews that were not addressed before merge.
 | Survey-weighted Silverman bandwidth in EfficientDiD conditional Omega* — `_silverman_bandwidth()` uses unweighted mean/std for bandwidth selection; survey-weighted statistics would better reflect the population distribution but is a second-order refinement | `efficient_did_covariates.py` | — | Low |
 | Survey sandwich SE is not exactly invariant to zero-weight (subpopulation / padded) rows: the shared `_compute_stratified_psu_meat` finite-sample correction counts zero-weight units as PSUs (an `n_psu/(n_psu-1)`-style factor), so adding zero-weight rows shifts the SE by a second-order amount (~2e-4 relative in the EfficientDiD e2e). The point estimate is exactly invariant and the weighted scores of zero-weight rows are already zero — only the DOF correction's PSU count includes them. Cross-cutting across all survey-enabled estimators; fix by counting only positive-weight PSUs in the correction. | `survey.py` (`_compute_stratified_psu_meat`) | PR-B follow-up | Low |
 | ImputationDiD: leave-one-out (LOO) conservative-variance refinement (BJS 2024 Supplementary Appendix A.9) not implemented — a finite-sample improvement to the auxiliary-model residuals that reduces overfitting of `tau_tilde_g` to `epsilon`. The asymptotic Theorem-3 variance is implemented and matches R `didimputation` (which also omits LOO by default). | `imputation.py` | imputation-validation follow-up | Low |
-| TROP: extend Wave 4's `_setup_trop_data` helper to also cover the duplicated bootstrap resampling loop in `_bootstrap_variance` / `_bootstrap_variance_global` (~40 LoC dedup; mirrors the data-setup helper pattern with a `fit_callable` parameter for the per-draw refit step). | `trop_local.py`, `trop_global.py` | follow-up | Low |
 | StaggeredTripleDifference R cross-validation: CSV fixtures not committed (gitignored); tests skip without local R + triplediff. Commit fixtures or generate deterministically. | `tests/test_methodology_staggered_triple_diff.py` | #245 | Medium |
 | StaggeredTripleDifference R parity: benchmark only tests no-covariate path (xformla=~1). Add covariate-adjusted scenarios and aggregation SE parity assertions. | `benchmarks/R/benchmark_staggered_triplediff.R` | #245 | Medium |
 | StaggeredTripleDifference: per-cohort group-effect SEs include WIF (conservative vs R's wif=NULL). Documented in REGISTRY. Could override mixin for exact R match. | `staggered_triple_diff.py` | #245 | Low |

diff --git a/diff_diff/trop_global.py b/diff_diff/trop_global.py
@@ -28,7 +28,11 @@
     stratified_bootstrap_indices,
     warn_bootstrap_failure_rate,
 )
-from diff_diff.trop_local import _setup_trop_data, _soft_threshold_svd
+from diff_diff.trop_local import (
+    _run_trop_bootstrap_loop,
+    _setup_trop_data,
+    _soft_threshold_svd,
+)
 from diff_diff.trop_results import TROPResults
 from diff_diff.utils import safe_inference, warn_if_not_converged
 
@@ -978,43 +982,28 @@ def _bootstrap_variance_global(
                 )
 
         # Python fallback: consume the same indices the Rust branch would have used.
-        bootstrap_estimates_list: List[float] = []
-        nonconverg_tracker: List[int] = []
-
-        for b in range(self.n_bootstrap):
-            sampled_control = (
-                control_units[control_idx[b]] if n_control_units > 0 else np.array([], dtype=object)
-            )
-            sampled_treated = (
-                treated_units[treated_idx[b]] if n_treated_units > 0 else np.array([], dtype=object)
-            )
-            sampled_units = np.concatenate([sampled_control, sampled_treated])
-
-            # Create bootstrap sample
-            boot_data = pd.concat(
-                [
-                    data[data[unit] == u].assign(**{unit: f"{u}_{idx}"})
-                    for idx, u in enumerate(sampled_units)
-                ],
-                ignore_index=True,
-            )
-
-            try:
-                tau = self._fit_global_with_fixed_lambda(
-                    boot_data,
-                    outcome,
-                    treatment,
-                    unit,
-                    time,
-                    optimal_lambda,
-                    treated_periods,
-                    survey_design=survey_design,
-                    _nonconvergence_tracker=nonconverg_tracker,
-                )
-                if np.isfinite(tau):
-                    bootstrap_estimates_list.append(tau)
-            except (ValueError, np.linalg.LinAlgError, KeyError):
-                continue
+        bootstrap_estimates_list, nonconverg_tracker = _run_trop_bootstrap_loop(
+            data,
+            unit,
+            control_units,
+            treated_units,
+            control_idx,
+            treated_idx,
+            n_control_units,
+            n_treated_units,
+            self.n_bootstrap,
+            lambda boot_data, tracker: self._fit_global_with_fixed_lambda(
+                boot_data,
+                outcome,
+                treatment,
+                unit,
+                time,
+                optimal_lambda,
+                treated_periods,
+                survey_design=survey_design,
+                _nonconvergence_tracker=tracker,
+            ),
+        )
 
         bootstrap_estimates = np.array(bootstrap_estimates_list)
 

diff --git a/diff_diff/trop_local.py b/diff_diff/trop_local.py
@@ -12,7 +12,7 @@
 
 import logging
 import warnings
-from typing import List, Optional, Tuple
+from typing import Callable, List, Optional, Tuple
 
 import numpy as np
 import pandas as pd
@@ -194,6 +194,66 @@ def _setup_trop_data(data, outcome, treatment, unit, time, resolved_survey, surv
     }
 
 
+def _run_trop_bootstrap_loop(
+    data: pd.DataFrame,
+    unit: str,
+    control_units: np.ndarray,
+    treated_units: np.ndarray,
+    control_idx: np.ndarray,
+    treated_idx: np.ndarray,
+    n_control_units: int,
+    n_treated_units: int,
+    n_bootstrap: int,
+    fit_callable: Callable[[pd.DataFrame, List[int]], float],
+) -> Tuple[List[float], List[int]]:
+    """Shared per-draw resample-and-refit loop for the TROP pairs bootstrap.
+
+    Used by both ``TROP._bootstrap_variance`` (local) and
+    ``TROP._bootstrap_variance_global``, whose Python-fallback loops were
+    byte-identical apart from the refit call. RNG-free: ``control_idx`` /
+    ``treated_idx`` are pre-generated by the caller via
+    :func:`stratified_bootstrap_indices`, so the Rust and Python paths consume the
+    identical draw sequence and this loop is deterministic. ``fit_callable(boot_data,
+    nonconverg_tracker) -> float`` performs the fixed-lambda refit -- the only thing
+    that differs between the local and global methods. Returns the list of finite
+    per-draw estimates and the shared non-convergence tracker; the caller keeps its
+    own (method-specific) warnings and ``np.std(ddof=1)`` SE computation.
+    """
+    bootstrap_estimates_list: List[float] = []
+    nonconverg_tracker: List[int] = []
+
+    for b in range(n_bootstrap):
+        sampled_control = (
+            control_units[control_idx[b]]
+            if n_control_units > 0
+            else np.array([], dtype=control_units.dtype)
+        )
+        sampled_treated = (
+            treated_units[treated_idx[b]]
+            if n_treated_units > 0
+            else np.array([], dtype=treated_units.dtype)
+        )
+        sampled_units = np.concatenate([sampled_control, sampled_treated])
+
+        # Create bootstrap sample with unique unit IDs
+        boot_data = pd.concat(
+            [
+                data[data[unit] == u].assign(**{unit: f"{u}_{idx}"})
+                for idx, u in enumerate(sampled_units)
+            ],
+            ignore_index=True,
+        )
+
+        try:
+            est = fit_callable(boot_data, nonconverg_tracker)
+            if np.isfinite(est):
+                bootstrap_estimates_list.append(est)
+        except (ValueError, np.linalg.LinAlgError, KeyError):
+            continue
+
+    return bootstrap_estimates_list, nonconverg_tracker
+
+
 # Module-level convergence tolerance for SVD singular value truncation.
 # Singular values below this threshold after soft-thresholding are treated
 # as zero to improve numerical stability.
@@ -1119,47 +1179,28 @@ def _bootstrap_variance(
                 )
 
         # Python fallback: consume the same indices the Rust branch would have used.
-        bootstrap_estimates_list = []
-        nonconverg_tracker: List[int] = []
-
-        for b in range(self.n_bootstrap):
-            sampled_control = (
-                control_units[control_idx[b]]
-                if n_control_units > 0
-                else np.array([], dtype=control_units.dtype)
-            )
-            sampled_treated = (
-                treated_units[treated_idx[b]]
-                if n_treated_units > 0
-                else np.array([], dtype=treated_units.dtype)
-            )
-            sampled_units = np.concatenate([sampled_control, sampled_treated])
-
-            # Create bootstrap sample with unique unit IDs
-            boot_data = pd.concat(
-                [
-                    data[data[unit] == u].assign(**{unit: f"{u}_{idx}"})
-                    for idx, u in enumerate(sampled_units)
-                ],
-                ignore_index=True,
-            )
-
-            try:
-                # Fit with fixed lambda (skip LOOCV for speed)
-                att = self._fit_with_fixed_lambda(
-                    boot_data,
-                    outcome,
-                    treatment,
-                    unit,
-                    time,
-                    optimal_lambda,
-                    survey_design=survey_design,
-                    _nonconvergence_tracker=nonconverg_tracker,
-                )
-                if np.isfinite(att):
-                    bootstrap_estimates_list.append(att)
-            except (ValueError, np.linalg.LinAlgError, KeyError):
-                continue
+        bootstrap_estimates_list, nonconverg_tracker = _run_trop_bootstrap_loop(
+            data,
+            unit,
+            control_units,
+            treated_units,
+            control_idx,
+            treated_idx,
+            n_control_units,
+            n_treated_units,
+            self.n_bootstrap,
+            # Fit with fixed lambda (skip LOOCV for speed)
+            lambda boot_data, tracker: self._fit_with_fixed_lambda(
+                boot_data,
+                outcome,
+                treatment,
+                unit,
+                time,
+                optimal_lambda,
+                survey_design=survey_design,
+                _nonconvergence_tracker=tracker,
+            ),
+        )
 
         bootstrap_estimates = np.array(bootstrap_estimates_list)