From cb58478381d627b9a2c776380f5325b922a7d0cc Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 08:24:53 -0400 Subject: [PATCH 01/12] Add HonestDiD integration, summary() Phase 3 blocks, trends_nonparam regression test - Add ChaisemartinDHaultfoeuilleResults extraction to _extract_event_study_params() in honest_did.py (maps placebo horizons to pre-periods, event study to post-periods) - Lift honest_did gate in fit(), add early L_max>=1 validation, post-computation compute_honest_did() call with fallback warning on solver failures - Add 5 new summary() sections: covariate diagnostics, cumulated level effects, heterogeneity test, design-2 descriptive, HonestDiD sensitivity bounds - Update honest_did_results field type annotation and docstring - 17 new tests: 7 HonestDiD integration, 5 summary rendering, 3 honest_did.py extraction/integration, 1 trends_nonparam unequal-support, 1 gate update - REGISTRY.md: HonestDiD note, checklist update; ROADMAP: 3g shipped Co-Authored-By: Claude Opus 4.6 (1M context) --- ROADMAP.md | 2 +- diff_diff/chaisemartin_dhaultfoeuille.py | 36 ++- .../chaisemartin_dhaultfoeuille_results.py | 137 ++++++++- diff_diff/honest_did.py | 131 ++++++++- docs/methodology/REGISTRY.md | 5 +- tests/test_chaisemartin_dhaultfoeuille.py | 273 +++++++++++++++++- tests/test_honest_did.py | 65 +++++ 7 files changed, 634 insertions(+), 15 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index db56947c..c24b33a0 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -173,7 +173,7 @@ The dynamic companion paper subsumes the AER 2020 paper: `DID_1 = DID_M`. The si | **3d.** Heterogeneity testing `beta^{het}_l` (Web Appendix Section 1.5) | LOW | Shipped (PR B) | | **3e.** Design-2 switch-in / switch-out separation (Web Appendix Section 1.6) | LOW | Shipped (PR B; convenience wrapper) | | **3f.** Non-binary treatment support (the formula already handles it; this row is documentation + tests) | MEDIUM | Shipped (PR #300; also ships placebo SE, L_max=1 per-group path, parity SE assertions) | -| **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Not started | +| **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Shipped (PR C) | | **3h.** **Single comprehensive tutorial notebook** covering all three phases — Favara-Imbs (2015) banking deregulation replication as the headline application, with comparison plots vs LP / TWFE | HIGH | Not started | | **3i.** Parity tests vs `did_multiplegt_dyn` for covariate and extension specifications | HIGH | Shipped (PR B; controls, trends_lin, combined) | diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 9959ad2d..9dfa0627 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -946,6 +946,13 @@ def fit( f"is {n_post_baseline}." ) + if honest_did and L_max is None: + raise ValueError( + "honest_did=True requires L_max >= 1 for multi-horizon placebos. " + "Set L_max to compute DID^{pl}_l placebos that HonestDiD uses as " + "pre-period coefficients." + ) + # Pivot to (group x time) matrices for vectorized computations d_pivot = cell.pivot(index=group, columns=time, values="d_gt").reindex( index=all_groups, columns=all_periods @@ -2394,6 +2401,27 @@ def fit( _estimator_ref=self, ) + # ------------------------------------------------------------------ + # HonestDiD integration (when honest_did=True) + # ------------------------------------------------------------------ + if honest_did and results.placebo_event_study: + try: + from diff_diff.honest_did import compute_honest_did + + results.honest_did_results = compute_honest_did( + results, method="relative_magnitude", M=1.0 + ) + except (ValueError, np.linalg.LinAlgError) as exc: + warnings.warn( + f"HonestDiD computation failed: {exc}. " + f"results.honest_did_results will be None. " + f"You can retry with compute_honest_did(results, ...) " + f"using different parameters.", + UserWarning, + stacklevel=2, + ) + results.honest_did_results = None + self.results_ = results self.is_fitted_ = True return results @@ -2432,12 +2460,8 @@ def _check_forward_compat_gates( # Validation (L_max >= 1, n_periods >= 3 required) is in fit(). # trends_nonparam gate lifted - state-set trends implemented. # Validation (L_max >= 1, column exists, time-invariant) is in fit(). - if honest_did: - raise NotImplementedError( - "HonestDiD integration for dCDH is reserved for Phase 3, applied to " - "the placebo DID^{pl}_l output. Phase 1 provides only the placebo " - "point estimate via results.placebo_effect. See ROADMAP.md Phase 3." - ) + # honest_did gate lifted - integration implemented. + # Validation (L_max >= 1 required) is in fit() after L_max detection. def _drop_crossing_cells( diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 9c85b438..1a363e24 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -18,8 +18,13 @@ NBER Working Paper 29873. """ +from __future__ import annotations + from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +if TYPE_CHECKING: + from diff_diff.honest_did import HonestDiDResults import numpy as np import pandas as pd @@ -331,8 +336,11 @@ class ChaisemartinDHaultfoeuilleResults: design2_effects : dict, optional Design-2 switch-in/switch-out descriptive summary. Populated when ``design2=True``. - honest_did_results : Any, optional - Reserved for HonestDiD integration on placebos. + honest_did_results : HonestDiDResults, optional + HonestDiD sensitivity analysis bounds (Rambachan & Roth 2023). + Populated when ``honest_did=True`` in ``fit()`` or by calling + ``compute_honest_did(results)`` post-hoc. Contains identified + set bounds, robust confidence intervals, and breakdown analysis. survey_metadata : Any, optional Always ``None`` in Phase 1 — survey integration is deferred to a separate effort after all phases ship. @@ -415,7 +423,7 @@ class ChaisemartinDHaultfoeuilleResults: linear_trends_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False) heterogeneity_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False) design2_effects: Optional[Dict[str, Any]] = field(default=None, repr=False) - honest_did_results: Optional[Any] = field(default=None, repr=False) + honest_did_results: Optional["HonestDiDResults"] = field(default=None, repr=False) # --- Repr-suppressed metadata --- survey_metadata: Optional[Any] = field(default=None, repr=False) @@ -798,6 +806,127 @@ def summary(self, alpha: Optional[float] = None) -> str: lines.extend([""]) + # --- Covariate adjustment diagnostics (DID^X) --- + if self.covariate_residuals is not None: + cov_df = self.covariate_residuals + control_names = sorted(cov_df["covariate"].unique()) + n_baselines = cov_df["baseline_treatment"].nunique() + failed = int((cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum()) + lines.extend( + [ + thin, + "Covariate Adjustment (DID^X) Diagnostics".center(width), + thin, + f"{'Controls:':<35} {', '.join(control_names):>10}", + f"{'Baselines residualized:':<35} {n_baselines:>10}", + f"{'Failed strata:':<35} {failed:>10}", + thin, + "", + ] + ) + + # --- Linear trends cumulated level effects --- + if self.linear_trends_effects is not None: + lines.extend( + [ + thin, + "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width), + thin, + header_row, + thin, + ] + ) + for l_h in sorted(self.linear_trends_effects.keys()): + entry = self.linear_trends_effects[l_h] + lines.append( + _format_inference_row( + f"Level_{l_h}", + entry["effect"], + entry["se"], + entry["t_stat"], + entry["p_value"], + ) + ) + lines.extend([thin, ""]) + + # --- Heterogeneity test --- + if self.heterogeneity_effects is not None: + lines.extend( + [ + thin, + "Heterogeneity Test (Section 1.5, partial)".center(width), + thin, + f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} " + f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}", + thin, + ] + ) + for l_h in sorted(self.heterogeneity_effects.keys()): + entry = self.heterogeneity_effects[l_h] + lines.append( + _format_inference_row( + f"l={l_h}", + entry["beta"], + entry["se"], + entry["t_stat"], + entry["p_value"], + ) + ) + lines.extend( + [ + thin, + "Note: Post-treatment regressions only (no placebo/joint test).", + "", + ] + ) + + # --- Design-2 switch-in / switch-out --- + if self.design2_effects is not None: + d2 = self.design2_effects + si = d2.get("switch_in", {}) + so = d2.get("switch_out", {}) + lines.extend( + [ + thin, + "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width), + thin, + f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}", + f"{'Switch-in effect (mean):':<35} " + f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}" + f" (N={si.get('n_groups', 0)})", + f"{'Switch-out effect (mean):':<35} " + f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}" + f" (N={so.get('n_groups', 0)})", + thin, + "", + ] + ) + + # --- HonestDiD sensitivity --- + if self.honest_did_results is not None: + hd = self.honest_did_results + method_label = hd.method.replace("_", " ").title() + m_val = hd.M + sig_label = "Yes" if hd.is_significant else "No" + conf_pct = int((1 - hd.alpha) * 100) + lines.extend( + [ + thin, + "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width), + thin, + f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})", + f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}", + f"{'Identified set:':<35} " + f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]", + f"{'Robust ' + str(conf_pct) + '% CI:':<35} " + f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]", + f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} " + f"{sig_label:>10}", + thin, + "", + ] + ) + # --- TWFE diagnostic --- if self.twfe_beta_fe is not None: lines.extend( diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index 02f84a83..e7de6a3c 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -817,9 +817,138 @@ def _extract_event_study_params( except ImportError: pass + # Try ChaisemartinDHaultfoeuilleResults (dCDH estimator) + try: + from diff_diff.chaisemartin_dhaultfoeuille_results import ( + ChaisemartinDHaultfoeuilleResults, + ) + + if isinstance(results, ChaisemartinDHaultfoeuilleResults): + if results.placebo_event_study is None: + raise ValueError( + "ChaisemartinDHaultfoeuilleResults must have placebo_event_study " + "for HonestDiD. Re-run ChaisemartinDHaultfoeuille.fit() with " + "L_max >= 1 to compute multi-horizon placebos." + ) + if results.event_study_effects is None: + raise ValueError( + "ChaisemartinDHaultfoeuilleResults must have event_study_effects " + "for HonestDiD." + ) + + # Filter for finite SEs in both surfaces + placebo_finite = { + h: data + for h, data in results.placebo_event_study.items() + if np.isfinite(data.get("se", np.nan)) + } + effects_finite = { + h: data + for h, data in results.event_study_effects.items() + if np.isfinite(data.get("se", np.nan)) + } + + pre_times = sorted(placebo_finite.keys()) # -P, ..., -1 + post_times = sorted(effects_finite.keys()) # 1, ..., L_max + + if len(pre_times) == 0: + raise ValueError( + "No placebo horizons with finite SEs found in dCDH results. " + "HonestDiD requires at least one identified pre-period " + "coefficient." + ) + if len(post_times) == 0: + raise ValueError( + "No event study horizons with finite SEs found in dCDH results. " + "HonestDiD requires at least one post-period coefficient." + ) + + # Consecutiveness check: more permissive than CS because + # trends_nonparam support-trimming can create legitimate gaps. + # Filter to the largest consecutive block spanning the -1/+1 + # boundary; warn about dropped horizons. + def _largest_consecutive_block(times, boundary_val): + """Find largest consecutive block containing boundary_val.""" + if not times: + return [] + if boundary_val not in times: + # No boundary value - take the block closest to it + return times + # Expand outward from boundary_val + block = [boundary_val] + idx = times.index(boundary_val) + # Expand left + for i in range(idx - 1, -1, -1): + if times[i] == block[0] - 1: + block.insert(0, times[i]) + else: + break + # Expand right + for i in range(idx + 1, len(times)): + if times[i] == block[-1] + 1: + block.append(times[i]) + else: + break + return block + + pre_consec = _largest_consecutive_block(pre_times, -1) + post_consec = _largest_consecutive_block(post_times, 1) + + dropped_pre = set(pre_times) - set(pre_consec) + dropped_post = set(post_times) - set(post_consec) + + if dropped_pre or dropped_post: + import warnings + + dropped = sorted(dropped_pre | dropped_post) + warnings.warn( + f"HonestDiD requires a consecutive event-time grid. " + f"Dropping non-consecutive horizons {dropped} from dCDH " + f"results. This can happen when trends_nonparam " + f"support-trimming removes horizons. Retained: " + f"pre={pre_consec}, post={post_consec}.", + UserWarning, + stacklevel=3, + ) + pre_times = pre_consec + post_times = post_consec + + if len(pre_times) == 0 or len(post_times) == 0: + raise ValueError( + "After filtering for consecutive horizons, no pre- or " + "post-periods remain. Cannot compute HonestDiD bounds." + ) + + # Build beta_hat and sigma (diagonal - no full VCV for dCDH) + all_times = pre_times + post_times + effects = [] + ses = [] + for h in pre_times: + effects.append(placebo_finite[h]["effect"]) + ses.append(placebo_finite[h]["se"]) + for h in post_times: + effects.append(effects_finite[h]["effect"]) + ses.append(effects_finite[h]["se"]) + + beta_hat = np.array(effects) + sigma = np.diag(np.array(ses) ** 2) + + return ( + beta_hat, + sigma, + len(pre_times), + len(post_times), + pre_times, + post_times, + None, # df_survey: dCDH has no survey support + ) + except ImportError: + pass + raise TypeError( f"Unsupported results type: {type(results)}. " - "Expected MultiPeriodDiDResults or CallawaySantAnnaResults." + "Expected MultiPeriodDiDResults, CallawaySantAnnaResults, " + "or ChaisemartinDHaultfoeuilleResults." ) diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 645d577c..9d39725c 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -617,6 +617,8 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. +- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning. + - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. **Reference implementation(s):** @@ -625,7 +627,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param **Requirements checklist:** - [x] Single class `ChaisemartinDHaultfoeuille` (alias `DCDH`); not a family -- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for remaining parameters (`aggregate`, `honest_did`, `survey_design`); Phase 3 gates lifted for `controls`, `trends_linear`, `trends_nonparam` +- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for remaining parameters (`aggregate`, `survey_design`); Phase 3 gates lifted for `controls`, `trends_linear`, `trends_nonparam`, `honest_did` - [x] `DID_M` point estimate with cohort-recentered analytical SE - [x] Joiners-only `DID_+` and leavers-only `DID_-` decompositions with their own inference - [x] Single-lag placebo `DID_M^pl` (point estimate; SE deferred to Phase 2) @@ -645,6 +647,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - [x] State-set-specific trends via control-pool restriction (Web Appendix Section 1.4) - [x] Heterogeneity testing via saturated OLS (Web Appendix Section 1.5, Lemma 7) - [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6) +- [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface --- diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index e8de230b..29101d20 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -374,8 +374,8 @@ def test_trends_nonparam_requires_lmax(self, data): trends_nonparam="state", ) - def test_honest_did_raises_not_implemented(self, data): - with pytest.raises(NotImplementedError, match="Phase 3"): + def test_honest_did_requires_lmax(self, data): + with pytest.raises(ValueError, match="honest_did=True requires L_max"): self._est().fit( data, outcome="outcome", @@ -2703,6 +2703,46 @@ def test_nonparam_with_covariates(self): assert np.isfinite(r.overall_att) assert r.covariate_residuals is not None + def test_trends_nonparam_unequal_support(self): + """Unequal switcher/control support across state sets. + + State A: 3 switchers + 5 controls -> finite effects. + State B: 2 switchers + 0 controls -> empty control pool, groups + excluded at horizons with empty pools (Assumption 14 support-trimming). + """ + rng = np.random.RandomState(99) + rows = [] + n_periods = 6 + # State A: groups 0-7 (0-2 switch at t=3, 3-7 never switch) + for g in range(8): + switches = g < 3 + for t in range(n_periods): + d = 1 if (switches and t >= 3) else 0 + y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "A", + }) + # State B: groups 8-9 (both switch at t=3, NO controls in this set) + for g in range(8, 10): + for t in range(n_periods): + d = 1 if t >= 3 else 0 + y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "B", + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, trends_nonparam="state", + ) + # Should not error; State A groups contribute, State B excluded + assert np.isfinite(r.overall_att) + assert r.event_study_effects is not None + class TestHeterogeneityTesting: """Heterogeneity testing beta^{het}_l (ROADMAP item 3d).""" @@ -3194,3 +3234,232 @@ def test_normalized_effects_general_formula(self): # For dose 0->2: denominator at l=1 should be ~2 (not 1) denom = r.normalized_effects[1]["denominator"] assert denom > 1.5, f"Denominator should reflect dose=2, got {denom}" + + +# ============================================================================= +# HonestDiD Integration +# ============================================================================= + + +class TestHonestDiDIntegration: + """HonestDiD (Rambachan-Roth 2023) integration on dCDH placebos.""" + + @staticmethod + def _make_data(n_groups=40, n_periods=6, seed=42): + return generate_reversible_did_data( + n_groups=n_groups, n_periods=n_periods, seed=seed + ) + + def test_honest_did_basic(self): + """honest_did=True with L_max>=2 produces HonestDiDResults.""" + from diff_diff.honest_did import HonestDiDResults + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert isinstance(r.honest_did_results, HonestDiDResults) + assert np.isfinite(r.honest_did_results.ci_lb) + assert np.isfinite(r.honest_did_results.ci_ub) + + def test_honest_did_requires_lmax(self): + """honest_did=True with L_max=None raises ValueError.""" + df = self._make_data() + with pytest.raises(ValueError, match="honest_did=True requires L_max"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + honest_did=True, + ) + + def test_honest_did_standalone(self): + """compute_honest_did() on dCDH results matches honest_did=True.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r_auto = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + r_plain = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + r_manual = compute_honest_did( + r_plain, method="relative_magnitude", M=1.0 + ) + # Deterministic - bitwise identical + np.testing.assert_allclose( + r_auto.honest_did_results.ci_lb, r_manual.ci_lb, rtol=0 + ) + np.testing.assert_allclose( + r_auto.honest_did_results.ci_ub, r_manual.ci_ub, rtol=0 + ) + + def test_honest_did_with_controls(self): + """HonestDiD runs on DID^X placebos.""" + df = self._make_data(n_periods=6) + df["X1"] = np.random.RandomState(77).normal(0, 1, len(df)) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + + def test_honest_did_with_trends_linear(self): + """HonestDiD on second-differenced DID^{fd} estimand.""" + df = self._make_data(n_periods=7) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + trends_linear=True, L_max=2, honest_did=True, + ) + # Bounds should be computed on second-differenced estimand + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + + def test_honest_did_sensitivity(self): + """sensitivity_analysis() on dCDH results.""" + from diff_diff.honest_did import HonestDiD + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + honest = HonestDiD(method="relative_magnitude") + sens = honest.sensitivity_analysis( + r, M_grid=list(np.linspace(0, 2, 5)) + ) + assert sens.breakdown_M is not None or len(sens.bounds) == 5 + + def test_honest_did_smoothness(self): + """Smoothness method gives different bounds than RM.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + rm_bounds = compute_honest_did(r, method="relative_magnitude", M=1.0) + sd_bounds = compute_honest_did(r, method="smoothness", M=0.5) + # Different methods should generally give different bounds + assert rm_bounds.ci_lb != sd_bounds.ci_lb or rm_bounds.ci_ub != sd_bounds.ci_ub + + +# ============================================================================= +# Summary Phase 3 Rendering +# ============================================================================= + + +class TestSummaryPhase3: + """Verify summary() renders Phase 3 result blocks.""" + + @staticmethod + def _make_data(n_groups=40, n_periods=6, seed=42): + return generate_reversible_did_data( + n_groups=n_groups, n_periods=n_periods, seed=seed + ) + + def test_summary_renders_covariate_diagnostics(self): + """Covariate Adjustment section appears in summary().""" + df = self._make_data() + df["X1"] = np.random.RandomState(77).normal(0, 1, len(df)) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=1, + ) + text = r.summary() + assert "Covariate Adjustment" in text + + def test_summary_renders_linear_trends(self): + """Cumulated Level Effects section appears in summary().""" + df = self._make_data(n_periods=7) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + trends_linear=True, L_max=2, + ) + text = r.summary() + assert "Cumulated Level Effects" in text + + def test_summary_renders_heterogeneity(self): + """Heterogeneity Test section appears in summary().""" + rng = np.random.RandomState(42) + rows = [] + for g in range(40): + x_g = 1 if g < 20 else 0 + switches = g < 30 + for t in range(6): + d = 1 if (switches and t >= 3) else 0 + y = 10 + 2.0 * t + 5.0 * d + 3.0 * x_g * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "het_x": x_g, + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, heterogeneity="het_x", + ) + text = r.summary() + assert "Heterogeneity Test" in text + + def test_summary_renders_design2(self): + """Design-2 section appears in summary().""" + rng = np.random.RandomState(42) + rows = [] + for g in range(30): + for t in range(8): + if g < 10: + d = 1 if 3 <= t < 6 else 0 # join then leave + elif g < 20: + d = 1 if t >= 3 else 0 # join only + else: + d = 0 # never switch + y = 10 + t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, "outcome": y, + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille( + seed=1, drop_larger_lower=False + ).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, design2=True, + ) + text = r.summary() + assert "Design-2" in text + + def test_summary_renders_honest_did(self): + """HonestDiD Sensitivity section appears in summary().""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + text = r.summary() + assert "HonestDiD Sensitivity" in text diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py index 74330897..7d7c5b84 100644 --- a/tests/test_honest_did.py +++ b/tests/test_honest_did.py @@ -1333,3 +1333,68 @@ def test_sensitivity_results_has_plot_method(self, mock_multiperiod_results): assert hasattr(sensitivity, "plot") assert callable(sensitivity.plot) + + +# ============================================================================= +# dCDH Integration Tests +# ============================================================================= + + +class TestDCDHIntegration: + """HonestDiD integration with ChaisemartinDHaultfoeuille results.""" + + @staticmethod + def _fit_dcdh(n_groups=40, n_periods=6, seed=42, L_max=2): + import warnings + + from diff_diff import ChaisemartinDHaultfoeuille + from diff_diff.prep import generate_reversible_did_data + + df = generate_reversible_did_data( + n_groups=n_groups, n_periods=n_periods, seed=seed + ) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=L_max, + ) + + def test_dcdh_integration(self): + """compute_honest_did works on dCDH results (mirrors CS pattern).""" + results = self._fit_dcdh() + bounds = compute_honest_did(results, method="relative_magnitude", M=1.0) + assert isinstance(bounds, HonestDiDResults) + assert np.isfinite(bounds.ci_lb) + assert np.isfinite(bounds.ci_ub) + assert bounds.method == "relative_magnitude" + + def test_dcdh_extraction(self): + """_extract_event_study_params returns correct shapes for dCDH.""" + results = self._fit_dcdh() + beta_hat, sigma, n_pre, n_post, pre_t, post_t, df_s = ( + _extract_event_study_params(results) + ) + assert n_pre >= 1 + assert n_post >= 1 + assert beta_hat.shape == (n_pre + n_post,) + assert sigma.shape == (n_pre + n_post, n_pre + n_post) + assert all(t < 0 for t in pre_t) + assert all(t > 0 for t in post_t) + assert df_s is None # dCDH has no survey support + + def test_dcdh_no_placebos_raises(self): + """dCDH results without placebos raise ValueError.""" + import warnings + + from diff_diff import ChaisemartinDHaultfoeuille + from diff_diff.prep import generate_reversible_did_data + + df = generate_reversible_did_data(n_groups=20, n_periods=4, seed=1) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1, placebo=False).fit( + df, "outcome", "group", "period", "treatment", + ) + with pytest.raises(ValueError, match="placebo_event_study"): + compute_honest_did(r) From b732007b3e95e2f93d6afecbce94553519532536 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 08:40:28 -0400 Subject: [PATCH 02/12] Address AI review: runtime warning, docstring, summary refactor, edge-case test - Add UserWarning in dCDH HonestDiD extraction about placebo-based pre-periods - Update REGISTRY.md to explicitly document library extension semantics - Update fit() docstring for honest_did (was "Reserved for Phase 3") - Include exception class name in HonestDiD failure warning - Factor summary() Phase 3 blocks into 5 private helper methods - Add test_dcdh_emits_placebo_warning and test_dcdh_empty_consecutive_block_raises Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 11 +- .../chaisemartin_dhaultfoeuille_results.py | 268 ++++++++++-------- diff_diff/honest_did.py | 14 + docs/methodology/REGISTRY.md | 2 +- tests/test_honest_did.py | 31 ++ 5 files changed, 202 insertions(+), 124 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 9dfa0627..38c9d111 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -538,7 +538,12 @@ def fit( pool to groups in the same set (Web Appendix Section 1.4). Requires ``L_max >= 1`` and time-invariant values per group. honest_did : bool, default=False - **Reserved for Phase 3** (HonestDiD integration on placebos). + Run HonestDiD sensitivity analysis (Rambachan & Roth 2023) on + the placebo + event study surface. Requires ``L_max >= 1``. + Default: relative magnitudes (DeltaRM, Mbar=1.0). Results + stored on ``results.honest_did_results``; ``None`` with a + warning if the solver fails. For custom parameters, call + ``compute_honest_did(results, ...)`` post-hoc instead. heterogeneity : str, optional Column name for a time-invariant covariate to test for heterogeneous effects (Web Appendix Section 1.5, Lemma 7). @@ -2413,8 +2418,8 @@ def fit( ) except (ValueError, np.linalg.LinAlgError) as exc: warnings.warn( - f"HonestDiD computation failed: {exc}. " - f"results.honest_did_results will be None. " + f"HonestDiD computation failed ({type(exc).__name__}): " + f"{exc}. results.honest_did_results will be None. " f"You can retry with compute_honest_did(results, ...) " f"using different parameters.", UserWarning, diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 1a363e24..2c96136b 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -806,126 +806,12 @@ def summary(self, alpha: Optional[float] = None) -> str: lines.extend([""]) - # --- Covariate adjustment diagnostics (DID^X) --- - if self.covariate_residuals is not None: - cov_df = self.covariate_residuals - control_names = sorted(cov_df["covariate"].unique()) - n_baselines = cov_df["baseline_treatment"].nunique() - failed = int((cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum()) - lines.extend( - [ - thin, - "Covariate Adjustment (DID^X) Diagnostics".center(width), - thin, - f"{'Controls:':<35} {', '.join(control_names):>10}", - f"{'Baselines residualized:':<35} {n_baselines:>10}", - f"{'Failed strata:':<35} {failed:>10}", - thin, - "", - ] - ) - - # --- Linear trends cumulated level effects --- - if self.linear_trends_effects is not None: - lines.extend( - [ - thin, - "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width), - thin, - header_row, - thin, - ] - ) - for l_h in sorted(self.linear_trends_effects.keys()): - entry = self.linear_trends_effects[l_h] - lines.append( - _format_inference_row( - f"Level_{l_h}", - entry["effect"], - entry["se"], - entry["t_stat"], - entry["p_value"], - ) - ) - lines.extend([thin, ""]) - - # --- Heterogeneity test --- - if self.heterogeneity_effects is not None: - lines.extend( - [ - thin, - "Heterogeneity Test (Section 1.5, partial)".center(width), - thin, - f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} " - f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}", - thin, - ] - ) - for l_h in sorted(self.heterogeneity_effects.keys()): - entry = self.heterogeneity_effects[l_h] - lines.append( - _format_inference_row( - f"l={l_h}", - entry["beta"], - entry["se"], - entry["t_stat"], - entry["p_value"], - ) - ) - lines.extend( - [ - thin, - "Note: Post-treatment regressions only (no placebo/joint test).", - "", - ] - ) - - # --- Design-2 switch-in / switch-out --- - if self.design2_effects is not None: - d2 = self.design2_effects - si = d2.get("switch_in", {}) - so = d2.get("switch_out", {}) - lines.extend( - [ - thin, - "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width), - thin, - f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}", - f"{'Switch-in effect (mean):':<35} " - f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}" - f" (N={si.get('n_groups', 0)})", - f"{'Switch-out effect (mean):':<35} " - f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}" - f" (N={so.get('n_groups', 0)})", - thin, - "", - ] - ) - - # --- HonestDiD sensitivity --- - if self.honest_did_results is not None: - hd = self.honest_did_results - method_label = hd.method.replace("_", " ").title() - m_val = hd.M - sig_label = "Yes" if hd.is_significant else "No" - conf_pct = int((1 - hd.alpha) * 100) - lines.extend( - [ - thin, - "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width), - thin, - f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})", - f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}", - f"{'Identified set:':<35} " - f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]", - f"{'Robust ' + str(conf_pct) + '% CI:':<35} " - f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]", - f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} " - f"{sig_label:>10}", - thin, - "", - ] - ) + # --- Phase 3 extension blocks (factored into helpers) --- + self._render_covariate_section(lines, width, thin) + self._render_linear_trends_section(lines, width, thin, header_row) + self._render_heterogeneity_section(lines, width, thin) + self._render_design2_section(lines, width, thin) + self._render_honest_did_section(lines, width, thin) # --- TWFE diagnostic --- if self.twfe_beta_fe is not None: @@ -971,6 +857,148 @@ def print_summary(self, alpha: Optional[float] = None) -> None: """Print the formatted summary to stdout.""" print(self.summary(alpha)) + # ------------------------------------------------------------------ + # Summary section helpers (Phase 3 blocks) + # ------------------------------------------------------------------ + + def _render_covariate_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.covariate_residuals is None: + return + cov_df = self.covariate_residuals + control_names = sorted(cov_df["covariate"].unique()) + n_baselines = cov_df["baseline_treatment"].nunique() + failed = int( + (cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum() + ) + lines.extend( + [ + thin, + "Covariate Adjustment (DID^X) Diagnostics".center(width), + thin, + f"{'Controls:':<35} {', '.join(control_names):>10}", + f"{'Baselines residualized:':<35} {n_baselines:>10}", + f"{'Failed strata:':<35} {failed:>10}", + thin, + "", + ] + ) + + def _render_linear_trends_section( + self, lines: List[str], width: int, thin: str, header_row: str + ) -> None: + if self.linear_trends_effects is None: + return + lines.extend( + [ + thin, + "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width), + thin, + header_row, + thin, + ] + ) + for l_h in sorted(self.linear_trends_effects.keys()): + entry = self.linear_trends_effects[l_h] + lines.append( + _format_inference_row( + f"Level_{l_h}", + entry["effect"], + entry["se"], + entry["t_stat"], + entry["p_value"], + ) + ) + lines.extend([thin, ""]) + + def _render_heterogeneity_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.heterogeneity_effects is None: + return + lines.extend( + [ + thin, + "Heterogeneity Test (Section 1.5, partial)".center(width), + thin, + f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} " + f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}", + thin, + ] + ) + for l_h in sorted(self.heterogeneity_effects.keys()): + entry = self.heterogeneity_effects[l_h] + lines.append( + _format_inference_row( + f"l={l_h}", + entry["beta"], + entry["se"], + entry["t_stat"], + entry["p_value"], + ) + ) + lines.extend( + [ + thin, + "Note: Post-treatment regressions only (no placebo/joint test).", + "", + ] + ) + + def _render_design2_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.design2_effects is None: + return + d2 = self.design2_effects + si = d2.get("switch_in", {}) + so = d2.get("switch_out", {}) + lines.extend( + [ + thin, + "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width), + thin, + f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}", + f"{'Switch-in effect (mean):':<35} " + f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}" + f" (N={si.get('n_groups', 0)})", + f"{'Switch-out effect (mean):':<35} " + f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}" + f" (N={so.get('n_groups', 0)})", + thin, + "", + ] + ) + + def _render_honest_did_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.honest_did_results is None: + return + hd = self.honest_did_results + method_label = hd.method.replace("_", " ").title() + m_val = hd.M + sig_label = "Yes" if hd.is_significant else "No" + conf_pct = int((1 - hd.alpha) * 100) + lines.extend( + [ + thin, + "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width), + thin, + f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})", + f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}", + f"{'Identified set:':<35} " + f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]", + f"{'Robust ' + str(conf_pct) + '% CI:':<35} " + f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]", + f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} " + f"{sig_label:>10}", + thin, + "", + ] + ) + # ------------------------------------------------------------------ # to_dataframe # ------------------------------------------------------------------ diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index e7de6a3c..3da0aba4 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -824,6 +824,20 @@ def _extract_event_study_params( ) if isinstance(results, ChaisemartinDHaultfoeuilleResults): + import warnings + + warnings.warn( + "HonestDiD on dCDH results uses DID^{pl}_l placebo " + "estimates as pre-period coefficients, not standard " + "event-study pre-treatment coefficients. The Rambachan-" + "Roth restrictions bound violations of the parallel " + "trends assumption underlying the dCDH placebo " + "estimand. This is a library extension; interpretation " + "differs from canonical event-study HonestDiD.", + UserWarning, + stacklevel=3, + ) + if results.placebo_event_study is None: raise ValueError( "ChaisemartinDHaultfoeuilleResults must have placebo_event_study " diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 9d39725c..1f94064b 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -617,7 +617,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. -- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning. +- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning. - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py index 7d7c5b84..9bd1f753 100644 --- a/tests/test_honest_did.py +++ b/tests/test_honest_did.py @@ -1398,3 +1398,34 @@ def test_dcdh_no_placebos_raises(self): ) with pytest.raises(ValueError, match="placebo_event_study"): compute_honest_did(r) + + def test_dcdh_emits_placebo_warning(self): + """compute_honest_did on dCDH emits warning about placebo-based pre-periods.""" + import warnings + + results = self._fit_dcdh() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + compute_honest_did(results) + placebo_warnings = [ + x for x in w + if "placebo" in str(x.message).lower() + and "pre-period" in str(x.message).lower() + ] + assert len(placebo_warnings) >= 1, ( + "Expected a UserWarning about placebo-based pre-period inputs" + ) + + def test_dcdh_empty_consecutive_block_raises(self): + """ValueError when all placebos have NaN SE (no valid pre-periods).""" + import warnings + + # Fit real results, then corrupt placebo SEs to NaN + results = self._fit_dcdh() + for h in results.placebo_event_study: + results.placebo_event_study[h]["se"] = float("nan") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match="No placebo horizons with finite SEs"): + compute_honest_did(results) From ee6ad2f9f037c16aa7c36a74f9ac8118d7aff2a0 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 10:35:41 -0400 Subject: [PATCH 03/12] Fix CI review R1: boundary-horizon validation, placebo=False guard, docstrings P0: _largest_consecutive_block now raises ValueError when boundary horizon (-1 or +1) is missing after finite-SE filtering instead of silently returning the full list (would produce wrong HonestDiD bounds). P1: honest_did=True now rejects placebo=False early instead of silently returning honest_did_results=None with no warning. P2: Added 3 regression tests (boundary -1 missing, boundary +1 missing, placebo=False + honest_did). P3: Updated docstrings in honest_did.py (6 locations) and docs/llms.txt to include ChaisemartinDHaultfoeuilleResults alongside MultiPeriodDiD/CS. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 6 ++++++ diff_diff/honest_did.py | 22 ++++++++++++------- docs/llms.txt | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 9 ++++++++ tests/test_honest_did.py | 26 +++++++++++++++++++++++ 5 files changed, 56 insertions(+), 9 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 38c9d111..3ec12e25 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -957,6 +957,12 @@ def fit( "Set L_max to compute DID^{pl}_l placebos that HonestDiD uses as " "pre-period coefficients." ) + if honest_did and not self.placebo: + raise ValueError( + "honest_did=True requires placebo computation. The estimator was " + "constructed with placebo=False. Use " + "ChaisemartinDHaultfoeuille(placebo=True) (the default)." + ) # Pivot to (group x time) matrices for vectorized computations d_pivot = cell.pivot(index=group, columns=time, values="d_gt").reindex( diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index 3da0aba4..d5aa3e0f 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -559,7 +559,7 @@ def _extract_event_study_params( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Estimation results with event study structure. Returns @@ -886,8 +886,14 @@ def _largest_consecutive_block(times, boundary_val): if not times: return [] if boundary_val not in times: - # No boundary value - take the block closest to it - return times + raise ValueError( + f"HonestDiD requires horizon {boundary_val} in " + f"the dCDH " + f"{'placebo' if boundary_val < 0 else 'event study'}" + f" surface, but it was removed by finite-SE " + f"filtering. Retained horizons: {times}. Ensure " + f"horizon {boundary_val} has a finite SE." + ) # Expand outward from boundary_val block = [boundary_val] idx = times.index(boundary_val) @@ -2197,7 +2203,7 @@ def fit( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. M : float, optional Override the M parameter for this fit. @@ -2515,7 +2521,7 @@ def sensitivity_analysis( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. M_grid : list of float, optional Grid of M values to evaluate. If None, uses default grid @@ -2614,7 +2620,7 @@ def breakdown_value( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. tol : float Tolerance for binary search. @@ -2669,7 +2675,7 @@ def compute_honest_did( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. method : str Type of restriction ("smoothness", "relative_magnitude", "combined"). @@ -2705,7 +2711,7 @@ def sensitivity_plot( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. method : str Type of restriction. diff --git a/docs/llms.txt b/docs/llms.txt index 8f6b5b02..cb59f16a 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -20,7 +20,7 @@ diagnostic steps produces unreliable results. 3. **Test parallel trends** — simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds. 4. **Choose estimator** — staggered adoption → CS/SA/BJS (NOT plain TWFE); few treated units → SDiD; factor confounding → TROP; simple 2x2 → DiD. Run `BaconDecomposition` to diagnose TWFE bias. 5. **Estimate** — `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer). -6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD/CS only), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs. +6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD, CS, or dCDH), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs. 7. **Heterogeneity** — CS: `aggregate='group'`/`'event_study'`; SA: `results.event_study_effects`/`to_dataframe(level='cohort')`; subgroup re-estimation. 8. **Robustness** — compare 2-3 estimators (CS vs SA vs BJS), MUST report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 29101d20..d968935c 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3275,6 +3275,15 @@ def test_honest_did_requires_lmax(self): honest_did=True, ) + def test_honest_did_rejects_placebo_false(self): + """honest_did=True with placebo=False raises ValueError.""" + df = self._make_data() + with pytest.raises(ValueError, match="placebo=False"): + ChaisemartinDHaultfoeuille(seed=1, placebo=False).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + def test_honest_did_standalone(self): """compute_honest_did() on dCDH results matches honest_did=True.""" from diff_diff.honest_did import compute_honest_did diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py index 9bd1f753..97213651 100644 --- a/tests/test_honest_did.py +++ b/tests/test_honest_did.py @@ -1429,3 +1429,29 @@ def test_dcdh_empty_consecutive_block_raises(self): warnings.simplefilter("ignore") with pytest.raises(ValueError, match="No placebo horizons with finite SEs"): compute_honest_did(results) + + def test_dcdh_missing_boundary_minus1_raises(self): + """ValueError when horizon -1 has NaN SE (boundary required).""" + import warnings + + results = self._fit_dcdh() + # Corrupt only horizon -1 SE; leave -2 intact + results.placebo_event_study[-1]["se"] = float("nan") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match="requires horizon -1"): + compute_honest_did(results) + + def test_dcdh_missing_boundary_plus1_raises(self): + """ValueError when horizon +1 has NaN SE (boundary required).""" + import warnings + + results = self._fit_dcdh() + # Corrupt only horizon +1 SE + results.event_study_effects[1]["se"] = float("nan") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match="requires horizon 1"): + compute_honest_did(results) From ad7698cd11a0291363dea21a8e266fa502f1e509 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 10:49:02 -0400 Subject: [PATCH 04/12] Fix CI review R2: document l_vec target, label in summary, pin in test - Update fit() docstring to specify equal-weight average over post horizons (l_vec=None default) and note R's HonestDiD targets on-impact instead - Update REGISTRY.md with l_vec deviation from R's default - Add "Target: Equal-weight avg over post horizons" line to summary - Add test_honest_did_original_estimate_is_post_average regression Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 9 ++++++--- diff_diff/chaisemartin_dhaultfoeuille_results.py | 1 + docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 16 ++++++++++++++++ 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 3ec12e25..2acee121 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -540,9 +540,12 @@ def fit( honest_did : bool, default=False Run HonestDiD sensitivity analysis (Rambachan & Roth 2023) on the placebo + event study surface. Requires ``L_max >= 1``. - Default: relative magnitudes (DeltaRM, Mbar=1.0). Results - stored on ``results.honest_did_results``; ``None`` with a - warning if the solver fails. For custom parameters, call + Default: relative magnitudes (DeltaRM, Mbar=1.0), targeting + the equal-weight average over all post-treatment horizons + (``l_vec=None``). Results stored on + ``results.honest_did_results``; ``None`` with a warning if + the solver fails. For custom parameters (e.g., targeting + the on-impact effect only via ``l_vec``), call ``compute_honest_did(results, ...)`` post-hoc instead. heterogeneity : str, optional Column name for a time-invariant covariate to test for diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 2c96136b..1d07813c 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -987,6 +987,7 @@ def _render_honest_did_section( "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width), thin, f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})", + f"{'Target:':<35} {'Equal-weight avg over post horizons'}", f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}", f"{'Identified set:':<35} " f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]", diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 1f94064b..7601a8e9 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -617,7 +617,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. -- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning. +- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`, targeting the equal-weight average over all post-treatment horizons (`l_vec=None`). R's HonestDiD defaults to the first post/on-impact effect; use `compute_honest_did(results, ...)` with a custom `l_vec` to match that behavior. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning. - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index d968935c..7381f762 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3369,6 +3369,22 @@ def test_honest_did_smoothness(self): # Different methods should generally give different bounds assert rm_bounds.ci_lb != sd_bounds.ci_lb or rm_bounds.ci_ub != sd_bounds.ci_ub + def test_honest_did_original_estimate_is_post_average(self): + """original_estimate targets equal-weight average over post horizons.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + hd = r.honest_did_results + assert hd is not None + # Equal-weight average = mean of event_study_effects[1..L_max] + es = r.event_study_effects + avg = np.mean([es[h]["effect"] for h in sorted(es.keys())]) + np.testing.assert_allclose(hd.original_estimate, avg, rtol=1e-10) + # ============================================================================= # Summary Phase 3 Rendering From a351e2e2f68a1bb60c8a3584107dc56fb56491dd Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 12:09:32 -0400 Subject: [PATCH 05/12] Fix CI review R3: add l_vec to compute_honest_did, end-to-end tests - Add l_vec parameter to compute_honest_did() so the advertised custom-target path actually works (was missing from wrapper) - Add test_honest_did_custom_l_vec_on_impact: l_vec=[1,0] targets on-impact effect, asserts original_estimate matches DID_1 - Add test_honest_did_with_trends_nonparam: end-to-end trends_nonparam + honest_did=True integration Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/honest_did.py | 9 ++++- tests/test_chaisemartin_dhaultfoeuille.py | 43 +++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index d5aa3e0f..6fa1f376 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -2669,6 +2669,7 @@ def compute_honest_did( method: str = "relative_magnitude", M: float = 1.0, alpha: float = 0.05, + l_vec: Optional[np.ndarray] = None, ) -> HonestDiDResults: """ Convenience function for computing Honest DiD bounds. @@ -2683,6 +2684,12 @@ def compute_honest_did( Restriction parameter. alpha : float Significance level. + l_vec : np.ndarray, optional + Weight vector defining the scalar target ``theta = l_vec' tau`` + over post-treatment horizons. Length must equal the number of + post-treatment periods. ``None`` (default) uses equal weights + (uniform average). To target the on-impact effect only (R's + default), pass ``np.array([1, 0, ..., 0])``. Returns ------- @@ -2694,7 +2701,7 @@ def compute_honest_did( >>> bounds = compute_honest_did(event_study_results, method='relative_magnitude', M=1.0) >>> print(f"Robust CI: [{bounds.ci_lb:.3f}, {bounds.ci_ub:.3f}]") """ - honest = HonestDiD(method=method, M=M, alpha=alpha) + honest = HonestDiD(method=method, M=M, alpha=alpha, l_vec=l_vec) return honest.fit(results) diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 7381f762..c7f04643 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3385,6 +3385,49 @@ def test_honest_did_original_estimate_is_post_average(self): avg = np.mean([es[h]["effect"] for h in sorted(es.keys())]) np.testing.assert_allclose(hd.original_estimate, avg, rtol=1e-10) + def test_honest_did_custom_l_vec_on_impact(self): + """compute_honest_did with l_vec=[1,0] targets on-impact effect.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + # l_vec=[1, 0] targets only DID_1 (on-impact, R's default) + bounds = compute_honest_did(r, l_vec=np.array([1.0, 0.0])) + np.testing.assert_allclose( + bounds.original_estimate, + r.event_study_effects[1]["effect"], + rtol=1e-10, + ) + + def test_honest_did_with_trends_nonparam(self): + """End-to-end trends_nonparam + honest_did=True.""" + rng = np.random.RandomState(42) + rows = [] + for g in range(40): + state = g % 4 + switches = g < 20 + for t in range(7): + d = 1 if (switches and t >= 3) else 0 + y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": state, + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, trends_nonparam="state", honest_did=True, + ) + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + # ============================================================================= # Summary Phase 3 Rendering From 419d872c2da379516949f91ef22837dd072daccb Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 12:45:02 -0400 Subject: [PATCH 06/12] Fix CI review R4: persist target_label on HonestDiDResults, render dynamically - Add target_label field to HonestDiDResults (default: equal-weight avg) - HonestDiD.fit() detects common l_vec patterns and sets human-readable label (on-impact, equal-weight, or custom with vector) - Summary renders hd.target_label instead of hard-coded string - Add test_honest_did_custom_l_vec_summary_label: attaches custom-target results and asserts summary shows "on-impact" not "Equal-weight" Co-Authored-By: Claude Opus 4.6 (1M context) --- .../chaisemartin_dhaultfoeuille_results.py | 2 +- diff_diff/honest_did.py | 14 +++++++++++++- tests/test_chaisemartin_dhaultfoeuille.py | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 1d07813c..cadc1008 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -987,7 +987,7 @@ def _render_honest_did_section( "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width), thin, f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})", - f"{'Target:':<35} {'Equal-weight avg over post horizons'}", + f"{'Target:':<35} {hd.target_label}", f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}", f"{'Identified set:':<35} " f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]", diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index 6fa1f376..f46ad58c 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -191,6 +191,7 @@ class HonestDiDResults: original_se: float alpha: float = 0.05 ci_method: str = "FLCI" + target_label: str = "Equal-weight avg over post horizons" original_results: Optional[Any] = field(default=None, repr=False) # Event study bounds (optional) event_study_bounds: Optional[Dict[Any, Dict[str, float]]] = field(default=None, repr=False) @@ -2252,13 +2253,23 @@ def fit( "coefficient to compute bounds." ) - # Set up weighting vector + # Set up weighting vector and target label if self.l_vec is None: l_vec = np.ones(num_post) / num_post # Uniform weights + target_label = "Equal-weight avg over post horizons" else: l_vec = np.asarray(self.l_vec) if len(l_vec) != num_post: raise ValueError(f"l_vec must have length {num_post}, got {len(l_vec)}") + # Detect common patterns for a human-readable label + basis = np.zeros(num_post) + basis[0] = 1.0 + if np.allclose(l_vec, basis): + target_label = "First post-treatment effect (on-impact)" + elif np.allclose(l_vec, np.ones(num_post) / num_post): + target_label = "Equal-weight avg over post horizons" + else: + target_label = f"Custom l_vec ({l_vec.tolist()})" # Compute original estimate and SE original_estimate = np.dot(l_vec, beta_post) @@ -2318,6 +2329,7 @@ def fit( original_se=original_se, alpha=self.alpha, ci_method=ci_method, + target_label=target_label, original_results=results, survey_metadata=survey_metadata, df_survey=df_survey, diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index c7f04643..4b2fa168 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3404,6 +3404,25 @@ def test_honest_did_custom_l_vec_on_impact(self): rtol=1e-10, ) + def test_honest_did_custom_l_vec_summary_label(self): + """summary() renders custom target label when l_vec is overridden.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + # Attach custom-target HonestDiD to results + r.honest_did_results = compute_honest_did( + r, l_vec=np.array([1.0, 0.0]) + ) + text = r.summary() + assert "on-impact" in text.lower() + assert "Equal-weight" not in text + def test_honest_did_with_trends_nonparam(self): """End-to-end trends_nonparam + honest_did=True.""" rng = np.random.RandomState(42) From 33e9c678dc2754341843a75a9df7865e8c534922 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 13:36:06 -0400 Subject: [PATCH 07/12] Fix CI review R5: persist pre/post_periods_used on HonestDiDResults - Add pre_periods_used and post_periods_used fields to HonestDiDResults so the retained horizon set is always available on the results object - HonestDiD.fit() populates both fields from the extracted period lists - Summary renders retained horizons below the target label - Add test_honest_did_retains_period_metadata asserting fields populated and summary shows "Post horizons used:" Co-Authored-By: Claude Opus 4.6 (1M context) --- .../chaisemartin_dhaultfoeuille_results.py | 12 ++++++++++++ diff_diff/honest_did.py | 4 ++++ tests/test_chaisemartin_dhaultfoeuille.py | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index cadc1008..153cc7fc 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -988,6 +988,18 @@ def _render_honest_did_section( thin, f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})", f"{'Target:':<35} {hd.target_label}", + ] + ) + if hd.post_periods_used is not None: + lines.append( + f"{'Post horizons used:':<35} {hd.post_periods_used}" + ) + if hd.pre_periods_used is not None: + lines.append( + f"{'Pre horizons used:':<35} {hd.pre_periods_used}" + ) + lines.extend( + [ f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}", f"{'Identified set:':<35} " f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]", diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index f46ad58c..87d944d0 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -192,6 +192,8 @@ class HonestDiDResults: alpha: float = 0.05 ci_method: str = "FLCI" target_label: str = "Equal-weight avg over post horizons" + pre_periods_used: Optional[List[Any]] = field(default=None, repr=False) + post_periods_used: Optional[List[Any]] = field(default=None, repr=False) original_results: Optional[Any] = field(default=None, repr=False) # Event study bounds (optional) event_study_bounds: Optional[Dict[Any, Dict[str, float]]] = field(default=None, repr=False) @@ -2330,6 +2332,8 @@ def fit( alpha=self.alpha, ci_method=ci_method, target_label=target_label, + pre_periods_used=list(pre_periods), + post_periods_used=list(post_periods), original_results=results, survey_metadata=survey_metadata, df_survey=df_survey, diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 4b2fa168..143c462e 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3404,6 +3404,24 @@ def test_honest_did_custom_l_vec_on_impact(self): rtol=1e-10, ) + def test_honest_did_retains_period_metadata(self): + """HonestDiDResults stores pre_periods_used and post_periods_used.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + hd = r.honest_did_results + assert hd.pre_periods_used is not None + assert hd.post_periods_used is not None + assert all(p < 0 for p in hd.pre_periods_used) + assert all(p > 0 for p in hd.post_periods_used) + # Summary renders the retained horizons + text = r.summary() + assert "Post horizons used:" in text + def test_honest_did_custom_l_vec_summary_label(self): """summary() renders custom target label when l_vec is overridden.""" from diff_diff.honest_did import compute_honest_did From dd06b3bbfd2d68bfc356caba1cc704a662cbcec0 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 14:04:04 -0400 Subject: [PATCH 08/12] Fix CI review R6: surface target metadata in standalone HonestDiD summary/export - HonestDiDResults.summary() now renders target_label, pre/post_periods_used - HonestDiDResults.to_dict() includes target_label, pre/post_periods_used - to_dataframe() inherits from to_dict() automatically - Add test_dcdh_standalone_surfaces_target_metadata verifying all three surfaces include target metadata for custom l_vec Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/honest_did.py | 11 +++++++++++ tests/test_honest_did.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index 87d944d0..9e535fa9 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -276,6 +276,7 @@ def summary(self) -> str: "=" * 70, "", f"{'Method:':<30} {method_display}", + f"{'Target:':<30} {self.target_label}", f"{'Restriction parameter (M):':<30} {self.M:.4f}", f"{'CI method:':<30} {self.ci_method}", "", @@ -296,6 +297,13 @@ def summary(self) -> str: ] # Interpretation + if self.pre_periods_used is not None: + lines.append(f"{'Pre horizons used:':<30} {self.pre_periods_used}") + if self.post_periods_used is not None: + lines.append(f"{'Post horizons used:':<30} {self.post_periods_used}") + if self.pre_periods_used is not None or self.post_periods_used is not None: + lines.append("") + lines.extend( [ "-" * 70, @@ -343,6 +351,9 @@ def to_dict(self) -> Dict[str, Any]: "ci_ub": self.ci_ub, "M": self.M, "method": self.method, + "target_label": self.target_label, + "pre_periods_used": self.pre_periods_used, + "post_periods_used": self.post_periods_used, "original_estimate": self.original_estimate, "original_se": self.original_se, "alpha": self.alpha, diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py index 97213651..d0e99dd4 100644 --- a/tests/test_honest_did.py +++ b/tests/test_honest_did.py @@ -1430,6 +1430,22 @@ def test_dcdh_empty_consecutive_block_raises(self): with pytest.raises(ValueError, match="No placebo horizons with finite SEs"): compute_honest_did(results) + def test_dcdh_standalone_surfaces_target_metadata(self): + """Standalone HonestDiDResults summary/to_dict include target metadata.""" + results = self._fit_dcdh() + bounds = compute_honest_did(results, l_vec=np.array([1.0, 0.0])) + # summary() includes target and period metadata + text = bounds.summary() + assert "on-impact" in text.lower() + assert "Post horizons used:" in text + assert "Pre horizons used:" in text + # to_dict() includes the fields + d = bounds.to_dict() + assert "target_label" in d + assert "pre_periods_used" in d + assert "post_periods_used" in d + assert d["post_periods_used"] == [1, 2] + def test_dcdh_missing_boundary_minus1_raises(self): """ValueError when horizon -1 has NaN SE (boundary required).""" import warnings From 4943910e66b1d878d169e74b63d8ebc8da9fd753 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 14:22:55 -0400 Subject: [PATCH 09/12] Add end-to-end trends_nonparam support-trimming + HonestDiD test test_honest_did_trends_nonparam_trimming: State B's early-switching controls vanish at far horizons, causing N_l=0 at h=3 and h=-3. HonestDiD extraction drops NaN-SE horizons and retains [-2,-1,1,2]. Asserts n_obs=0 at trimmed horizons, finite bounds on retained block, and post_periods_used excludes h=3. This exercises the real trends_nonparam support-trimming path through HonestDiD (not SE mutation), addressing the recurring P2 finding. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_chaisemartin_dhaultfoeuille.py | 55 ++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 143c462e..ec966b33 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3442,7 +3442,7 @@ def test_honest_did_custom_l_vec_summary_label(self): assert "Equal-weight" not in text def test_honest_did_with_trends_nonparam(self): - """End-to-end trends_nonparam + honest_did=True.""" + """End-to-end trends_nonparam + honest_did=True (balanced support).""" rng = np.random.RandomState(42) rows = [] for g in range(40): @@ -3465,6 +3465,59 @@ def test_honest_did_with_trends_nonparam(self): assert r.honest_did_results is not None assert np.isfinite(r.honest_did_results.ci_lb) + def test_honest_did_trends_nonparam_trimming(self): + """End-to-end: trends_nonparam causes NaN at far horizons, HonestDiD trims. + + State A: switches late (t=5), has never-switching controls. + State B: switches early (t=2), "controls" switch at t=3 so + control pool vanishes at h>=2. At L_max=3, h=3 and h=-3 have + N_l=0 (NaN SE) because State A can't reach h=3 and State B + has no controls there. HonestDiD extraction drops the NaN + horizons and retains [-2, -1, 1, 2]. + """ + rng = np.random.RandomState(42) + rows = [] + n_periods = 7 + # State A: 3 switch at t=5, 4 controls + for g in range(7): + switches = g < 3 + for t in range(n_periods): + d = 1 if (switches and t >= 5) else 0 + y = 10 + 2.0*t + 5.0*d + rng.normal(0, 0.3) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "A", + }) + # State B: 4 switch at t=2, 2 "controls" switch at t=3 + for g in range(7, 13): + switch_t = 2 if g < 11 else 3 + for t in range(n_periods): + d = 1 if t >= switch_t else 0 + y = 10 + 2.0*t + 5.0*d + rng.normal(0, 0.3) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "B", + }) + df = pd.DataFrame(rows) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=3, trends_nonparam="state", honest_did=True, + ) + # h=3 and h=-3 should be NaN (N_l=0 from support trimming) + assert r.event_study_effects[3]["n_obs"] == 0 + assert r.placebo_event_study[-3]["n_obs"] == 0 + # HonestDiD should still compute on the retained block + hd = r.honest_did_results + assert hd is not None + assert np.isfinite(hd.ci_lb) + # Retained horizons should exclude the NaN endpoints + assert -3 not in hd.pre_periods_used + assert 3 not in hd.post_periods_used + assert hd.post_periods_used == [1, 2] + # ============================================================================= # Summary Phase 3 Rendering From 46e6dc50428135cca7dda8f9199b726106bc4b5b Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 14:56:26 -0400 Subject: [PATCH 10/12] Fix CI review R8 P0: propagate estimator alpha to HonestDiD compute_honest_did() call in fit() now passes alpha=self.alpha instead of inheriting the default 0.05. Added regression test asserting honest_did_results.alpha matches estimator alpha. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 3 ++- tests/test_chaisemartin_dhaultfoeuille.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 2acee121..75d75d37 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -2423,7 +2423,8 @@ def fit( from diff_diff.honest_did import compute_honest_did results.honest_did_results = compute_honest_did( - results, method="relative_magnitude", M=1.0 + results, method="relative_magnitude", M=1.0, + alpha=self.alpha, ) except (ValueError, np.linalg.LinAlgError) as exc: warnings.warn( diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index ec966b33..88cca1a6 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3404,6 +3404,18 @@ def test_honest_did_custom_l_vec_on_impact(self): rtol=1e-10, ) + def test_honest_did_respects_alpha(self): + """honest_did=True propagates estimator alpha to HonestDiD.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1, alpha=0.10).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert r.honest_did_results.alpha == 0.10 + def test_honest_did_retains_period_metadata(self): """HonestDiDResults stores pre_periods_used and post_periods_used.""" df = self._make_data() From 820c9dd0a1302590153bf091434956511ffec6b5 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 15:11:03 -0400 Subject: [PATCH 11/12] Fix CI review R9: bootstrap interaction test, assert trimming warning P1: Add test_honest_did_with_bootstrap - fits with n_bootstrap=49 and honest_did=True, asserts finite bounds and retained horizons. P2: test_honest_did_trends_nonparam_trimming now captures warnings and asserts the placebo-based pre-period warning is emitted (was suppressing all warnings before). Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_chaisemartin_dhaultfoeuille.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 88cca1a6..94022e78 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -3512,8 +3512,8 @@ def test_honest_did_trends_nonparam_trimming(self): }) df = pd.DataFrame(rows) - with warnings.catch_warnings(): - warnings.simplefilter("ignore") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") r = ChaisemartinDHaultfoeuille(seed=1).fit( df, "outcome", "group", "period", "treatment", L_max=3, trends_nonparam="state", honest_did=True, @@ -3529,6 +3529,25 @@ def test_honest_did_trends_nonparam_trimming(self): assert -3 not in hd.pre_periods_used assert 3 not in hd.post_periods_used assert hd.post_periods_used == [1, 2] + # The placebo-based pre-period warning should have been emitted + placebo_warns = [ + x for x in w if "placebo" in str(x.message).lower() + and "pre-period" in str(x.message).lower() + ] + assert len(placebo_warns) >= 1 + + def test_honest_did_with_bootstrap(self): + """honest_did=True works with bootstrap-fitted results.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1, n_bootstrap=49).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + assert r.honest_did_results.post_periods_used == [1, 2] # ============================================================================= From b88a97e7d39bb75a3cc8ed00ee24ea96aa48e970 Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 14 Apr 2026 15:52:57 -0400 Subject: [PATCH 12/12] Fix CI review R10 P2: assert non-consecutive-horizon trimming warning Add test_dcdh_interior_gap_triggers_trimming_warning: corrupts h=-2 SE to create an interior gap [-3, -1], asserts the "Dropping non-consecutive horizons" warning is emitted and pre_periods_used == [-1]. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_honest_did.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py index d0e99dd4..5ca20efd 100644 --- a/tests/test_honest_did.py +++ b/tests/test_honest_did.py @@ -1446,6 +1446,29 @@ def test_dcdh_standalone_surfaces_target_metadata(self): assert "post_periods_used" in d assert d["post_periods_used"] == [1, 2] + def test_dcdh_interior_gap_triggers_trimming_warning(self): + """Non-consecutive horizons after SE filtering emit trimming warning.""" + import warnings + + # L_max=3 gives horizons [-3,-2,-1,1,2,3]. Corrupt h=-2 to create + # interior gap [-3, -1], which triggers consecutive-block trimming + # that drops -3 and keeps only [-1]. + results = self._fit_dcdh(n_periods=8, L_max=3) + results.placebo_event_study[-2]["se"] = float("nan") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + bounds = compute_honest_did(results) + trim_warns = [ + x for x in w + if "dropping non-consecutive" in str(x.message).lower() + ] + assert len(trim_warns) >= 1, ( + "Expected a warning about dropping non-consecutive horizons" + ) + # Retained pre should be [-1] only (h=-3 dropped due to gap at -2) + assert bounds.pre_periods_used == [-1] + def test_dcdh_missing_boundary_minus1_raises(self): """ValueError when horizon -1 has NaN SE (boundary required).""" import warnings