diff --git a/ROADMAP.md b/ROADMAP.md index db56947c..c24b33a0 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -173,7 +173,7 @@ The dynamic companion paper subsumes the AER 2020 paper: `DID_1 = DID_M`. The si | **3d.** Heterogeneity testing `beta^{het}_l` (Web Appendix Section 1.5) | LOW | Shipped (PR B) | | **3e.** Design-2 switch-in / switch-out separation (Web Appendix Section 1.6) | LOW | Shipped (PR B; convenience wrapper) | | **3f.** Non-binary treatment support (the formula already handles it; this row is documentation + tests) | MEDIUM | Shipped (PR #300; also ships placebo SE, L_max=1 per-group path, parity SE assertions) | -| **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Not started | +| **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Shipped (PR C) | | **3h.** **Single comprehensive tutorial notebook** covering all three phases — Favara-Imbs (2015) banking deregulation replication as the headline application, with comparison plots vs LP / TWFE | HIGH | Not started | | **3i.** Parity tests vs `did_multiplegt_dyn` for covariate and extension specifications | HIGH | Shipped (PR B; controls, trends_lin, combined) | diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 9959ad2d..75d75d37 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -538,7 +538,15 @@ def fit( pool to groups in the same set (Web Appendix Section 1.4). Requires ``L_max >= 1`` and time-invariant values per group. honest_did : bool, default=False - **Reserved for Phase 3** (HonestDiD integration on placebos). + Run HonestDiD sensitivity analysis (Rambachan & Roth 2023) on + the placebo + event study surface. Requires ``L_max >= 1``. + Default: relative magnitudes (DeltaRM, Mbar=1.0), targeting + the equal-weight average over all post-treatment horizons + (``l_vec=None``). Results stored on + ``results.honest_did_results``; ``None`` with a warning if + the solver fails. For custom parameters (e.g., targeting + the on-impact effect only via ``l_vec``), call + ``compute_honest_did(results, ...)`` post-hoc instead. heterogeneity : str, optional Column name for a time-invariant covariate to test for heterogeneous effects (Web Appendix Section 1.5, Lemma 7). @@ -946,6 +954,19 @@ def fit( f"is {n_post_baseline}." ) + if honest_did and L_max is None: + raise ValueError( + "honest_did=True requires L_max >= 1 for multi-horizon placebos. " + "Set L_max to compute DID^{pl}_l placebos that HonestDiD uses as " + "pre-period coefficients." + ) + if honest_did and not self.placebo: + raise ValueError( + "honest_did=True requires placebo computation. The estimator was " + "constructed with placebo=False. Use " + "ChaisemartinDHaultfoeuille(placebo=True) (the default)." + ) + # Pivot to (group x time) matrices for vectorized computations d_pivot = cell.pivot(index=group, columns=time, values="d_gt").reindex( index=all_groups, columns=all_periods @@ -2394,6 +2415,28 @@ def fit( _estimator_ref=self, ) + # ------------------------------------------------------------------ + # HonestDiD integration (when honest_did=True) + # ------------------------------------------------------------------ + if honest_did and results.placebo_event_study: + try: + from diff_diff.honest_did import compute_honest_did + + results.honest_did_results = compute_honest_did( + results, method="relative_magnitude", M=1.0, + alpha=self.alpha, + ) + except (ValueError, np.linalg.LinAlgError) as exc: + warnings.warn( + f"HonestDiD computation failed ({type(exc).__name__}): " + f"{exc}. results.honest_did_results will be None. " + f"You can retry with compute_honest_did(results, ...) " + f"using different parameters.", + UserWarning, + stacklevel=2, + ) + results.honest_did_results = None + self.results_ = results self.is_fitted_ = True return results @@ -2432,12 +2475,8 @@ def _check_forward_compat_gates( # Validation (L_max >= 1, n_periods >= 3 required) is in fit(). # trends_nonparam gate lifted - state-set trends implemented. # Validation (L_max >= 1, column exists, time-invariant) is in fit(). - if honest_did: - raise NotImplementedError( - "HonestDiD integration for dCDH is reserved for Phase 3, applied to " - "the placebo DID^{pl}_l output. Phase 1 provides only the placebo " - "point estimate via results.placebo_effect. See ROADMAP.md Phase 3." - ) + # honest_did gate lifted - integration implemented. + # Validation (L_max >= 1 required) is in fit() after L_max detection. def _drop_crossing_cells( diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 9c85b438..153cc7fc 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -18,8 +18,13 @@ NBER Working Paper 29873. """ +from __future__ import annotations + from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +if TYPE_CHECKING: + from diff_diff.honest_did import HonestDiDResults import numpy as np import pandas as pd @@ -331,8 +336,11 @@ class ChaisemartinDHaultfoeuilleResults: design2_effects : dict, optional Design-2 switch-in/switch-out descriptive summary. Populated when ``design2=True``. - honest_did_results : Any, optional - Reserved for HonestDiD integration on placebos. + honest_did_results : HonestDiDResults, optional + HonestDiD sensitivity analysis bounds (Rambachan & Roth 2023). + Populated when ``honest_did=True`` in ``fit()`` or by calling + ``compute_honest_did(results)`` post-hoc. Contains identified + set bounds, robust confidence intervals, and breakdown analysis. survey_metadata : Any, optional Always ``None`` in Phase 1 — survey integration is deferred to a separate effort after all phases ship. @@ -415,7 +423,7 @@ class ChaisemartinDHaultfoeuilleResults: linear_trends_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False) heterogeneity_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False) design2_effects: Optional[Dict[str, Any]] = field(default=None, repr=False) - honest_did_results: Optional[Any] = field(default=None, repr=False) + honest_did_results: Optional["HonestDiDResults"] = field(default=None, repr=False) # --- Repr-suppressed metadata --- survey_metadata: Optional[Any] = field(default=None, repr=False) @@ -798,6 +806,13 @@ def summary(self, alpha: Optional[float] = None) -> str: lines.extend([""]) + # --- Phase 3 extension blocks (factored into helpers) --- + self._render_covariate_section(lines, width, thin) + self._render_linear_trends_section(lines, width, thin, header_row) + self._render_heterogeneity_section(lines, width, thin) + self._render_design2_section(lines, width, thin) + self._render_honest_did_section(lines, width, thin) + # --- TWFE diagnostic --- if self.twfe_beta_fe is not None: lines.extend( @@ -842,6 +857,161 @@ def print_summary(self, alpha: Optional[float] = None) -> None: """Print the formatted summary to stdout.""" print(self.summary(alpha)) + # ------------------------------------------------------------------ + # Summary section helpers (Phase 3 blocks) + # ------------------------------------------------------------------ + + def _render_covariate_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.covariate_residuals is None: + return + cov_df = self.covariate_residuals + control_names = sorted(cov_df["covariate"].unique()) + n_baselines = cov_df["baseline_treatment"].nunique() + failed = int( + (cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum() + ) + lines.extend( + [ + thin, + "Covariate Adjustment (DID^X) Diagnostics".center(width), + thin, + f"{'Controls:':<35} {', '.join(control_names):>10}", + f"{'Baselines residualized:':<35} {n_baselines:>10}", + f"{'Failed strata:':<35} {failed:>10}", + thin, + "", + ] + ) + + def _render_linear_trends_section( + self, lines: List[str], width: int, thin: str, header_row: str + ) -> None: + if self.linear_trends_effects is None: + return + lines.extend( + [ + thin, + "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width), + thin, + header_row, + thin, + ] + ) + for l_h in sorted(self.linear_trends_effects.keys()): + entry = self.linear_trends_effects[l_h] + lines.append( + _format_inference_row( + f"Level_{l_h}", + entry["effect"], + entry["se"], + entry["t_stat"], + entry["p_value"], + ) + ) + lines.extend([thin, ""]) + + def _render_heterogeneity_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.heterogeneity_effects is None: + return + lines.extend( + [ + thin, + "Heterogeneity Test (Section 1.5, partial)".center(width), + thin, + f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} " + f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}", + thin, + ] + ) + for l_h in sorted(self.heterogeneity_effects.keys()): + entry = self.heterogeneity_effects[l_h] + lines.append( + _format_inference_row( + f"l={l_h}", + entry["beta"], + entry["se"], + entry["t_stat"], + entry["p_value"], + ) + ) + lines.extend( + [ + thin, + "Note: Post-treatment regressions only (no placebo/joint test).", + "", + ] + ) + + def _render_design2_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.design2_effects is None: + return + d2 = self.design2_effects + si = d2.get("switch_in", {}) + so = d2.get("switch_out", {}) + lines.extend( + [ + thin, + "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width), + thin, + f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}", + f"{'Switch-in effect (mean):':<35} " + f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}" + f" (N={si.get('n_groups', 0)})", + f"{'Switch-out effect (mean):':<35} " + f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}" + f" (N={so.get('n_groups', 0)})", + thin, + "", + ] + ) + + def _render_honest_did_section( + self, lines: List[str], width: int, thin: str + ) -> None: + if self.honest_did_results is None: + return + hd = self.honest_did_results + method_label = hd.method.replace("_", " ").title() + m_val = hd.M + sig_label = "Yes" if hd.is_significant else "No" + conf_pct = int((1 - hd.alpha) * 100) + lines.extend( + [ + thin, + "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width), + thin, + f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})", + f"{'Target:':<35} {hd.target_label}", + ] + ) + if hd.post_periods_used is not None: + lines.append( + f"{'Post horizons used:':<35} {hd.post_periods_used}" + ) + if hd.pre_periods_used is not None: + lines.append( + f"{'Pre horizons used:':<35} {hd.pre_periods_used}" + ) + lines.extend( + [ + f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}", + f"{'Identified set:':<35} " + f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]", + f"{'Robust ' + str(conf_pct) + '% CI:':<35} " + f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]", + f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} " + f"{sig_label:>10}", + thin, + "", + ] + ) + # ------------------------------------------------------------------ # to_dataframe # ------------------------------------------------------------------ diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index 02f84a83..9e535fa9 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -191,6 +191,9 @@ class HonestDiDResults: original_se: float alpha: float = 0.05 ci_method: str = "FLCI" + target_label: str = "Equal-weight avg over post horizons" + pre_periods_used: Optional[List[Any]] = field(default=None, repr=False) + post_periods_used: Optional[List[Any]] = field(default=None, repr=False) original_results: Optional[Any] = field(default=None, repr=False) # Event study bounds (optional) event_study_bounds: Optional[Dict[Any, Dict[str, float]]] = field(default=None, repr=False) @@ -273,6 +276,7 @@ def summary(self) -> str: "=" * 70, "", f"{'Method:':<30} {method_display}", + f"{'Target:':<30} {self.target_label}", f"{'Restriction parameter (M):':<30} {self.M:.4f}", f"{'CI method:':<30} {self.ci_method}", "", @@ -293,6 +297,13 @@ def summary(self) -> str: ] # Interpretation + if self.pre_periods_used is not None: + lines.append(f"{'Pre horizons used:':<30} {self.pre_periods_used}") + if self.post_periods_used is not None: + lines.append(f"{'Post horizons used:':<30} {self.post_periods_used}") + if self.pre_periods_used is not None or self.post_periods_used is not None: + lines.append("") + lines.extend( [ "-" * 70, @@ -340,6 +351,9 @@ def to_dict(self) -> Dict[str, Any]: "ci_ub": self.ci_ub, "M": self.M, "method": self.method, + "target_label": self.target_label, + "pre_periods_used": self.pre_periods_used, + "post_periods_used": self.post_periods_used, "original_estimate": self.original_estimate, "original_se": self.original_se, "alpha": self.alpha, @@ -559,7 +573,7 @@ def _extract_event_study_params( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Estimation results with event study structure. Returns @@ -817,9 +831,158 @@ def _extract_event_study_params( except ImportError: pass + # Try ChaisemartinDHaultfoeuilleResults (dCDH estimator) + try: + from diff_diff.chaisemartin_dhaultfoeuille_results import ( + ChaisemartinDHaultfoeuilleResults, + ) + + if isinstance(results, ChaisemartinDHaultfoeuilleResults): + import warnings + + warnings.warn( + "HonestDiD on dCDH results uses DID^{pl}_l placebo " + "estimates as pre-period coefficients, not standard " + "event-study pre-treatment coefficients. The Rambachan-" + "Roth restrictions bound violations of the parallel " + "trends assumption underlying the dCDH placebo " + "estimand. This is a library extension; interpretation " + "differs from canonical event-study HonestDiD.", + UserWarning, + stacklevel=3, + ) + + if results.placebo_event_study is None: + raise ValueError( + "ChaisemartinDHaultfoeuilleResults must have placebo_event_study " + "for HonestDiD. Re-run ChaisemartinDHaultfoeuille.fit() with " + "L_max >= 1 to compute multi-horizon placebos." + ) + if results.event_study_effects is None: + raise ValueError( + "ChaisemartinDHaultfoeuilleResults must have event_study_effects " + "for HonestDiD." + ) + + # Filter for finite SEs in both surfaces + placebo_finite = { + h: data + for h, data in results.placebo_event_study.items() + if np.isfinite(data.get("se", np.nan)) + } + effects_finite = { + h: data + for h, data in results.event_study_effects.items() + if np.isfinite(data.get("se", np.nan)) + } + + pre_times = sorted(placebo_finite.keys()) # -P, ..., -1 + post_times = sorted(effects_finite.keys()) # 1, ..., L_max + + if len(pre_times) == 0: + raise ValueError( + "No placebo horizons with finite SEs found in dCDH results. " + "HonestDiD requires at least one identified pre-period " + "coefficient." + ) + if len(post_times) == 0: + raise ValueError( + "No event study horizons with finite SEs found in dCDH results. " + "HonestDiD requires at least one post-period coefficient." + ) + + # Consecutiveness check: more permissive than CS because + # trends_nonparam support-trimming can create legitimate gaps. + # Filter to the largest consecutive block spanning the -1/+1 + # boundary; warn about dropped horizons. + def _largest_consecutive_block(times, boundary_val): + """Find largest consecutive block containing boundary_val.""" + if not times: + return [] + if boundary_val not in times: + raise ValueError( + f"HonestDiD requires horizon {boundary_val} in " + f"the dCDH " + f"{'placebo' if boundary_val < 0 else 'event study'}" + f" surface, but it was removed by finite-SE " + f"filtering. Retained horizons: {times}. Ensure " + f"horizon {boundary_val} has a finite SE." + ) + # Expand outward from boundary_val + block = [boundary_val] + idx = times.index(boundary_val) + # Expand left + for i in range(idx - 1, -1, -1): + if times[i] == block[0] - 1: + block.insert(0, times[i]) + else: + break + # Expand right + for i in range(idx + 1, len(times)): + if times[i] == block[-1] + 1: + block.append(times[i]) + else: + break + return block + + pre_consec = _largest_consecutive_block(pre_times, -1) + post_consec = _largest_consecutive_block(post_times, 1) + + dropped_pre = set(pre_times) - set(pre_consec) + dropped_post = set(post_times) - set(post_consec) + + if dropped_pre or dropped_post: + import warnings + + dropped = sorted(dropped_pre | dropped_post) + warnings.warn( + f"HonestDiD requires a consecutive event-time grid. " + f"Dropping non-consecutive horizons {dropped} from dCDH " + f"results. This can happen when trends_nonparam " + f"support-trimming removes horizons. Retained: " + f"pre={pre_consec}, post={post_consec}.", + UserWarning, + stacklevel=3, + ) + pre_times = pre_consec + post_times = post_consec + + if len(pre_times) == 0 or len(post_times) == 0: + raise ValueError( + "After filtering for consecutive horizons, no pre- or " + "post-periods remain. Cannot compute HonestDiD bounds." + ) + + # Build beta_hat and sigma (diagonal - no full VCV for dCDH) + all_times = pre_times + post_times + effects = [] + ses = [] + for h in pre_times: + effects.append(placebo_finite[h]["effect"]) + ses.append(placebo_finite[h]["se"]) + for h in post_times: + effects.append(effects_finite[h]["effect"]) + ses.append(effects_finite[h]["se"]) + + beta_hat = np.array(effects) + sigma = np.diag(np.array(ses) ** 2) + + return ( + beta_hat, + sigma, + len(pre_times), + len(post_times), + pre_times, + post_times, + None, # df_survey: dCDH has no survey support + ) + except ImportError: + pass + raise TypeError( f"Unsupported results type: {type(results)}. " - "Expected MultiPeriodDiDResults or CallawaySantAnnaResults." + "Expected MultiPeriodDiDResults, CallawaySantAnnaResults, " + "or ChaisemartinDHaultfoeuilleResults." ) @@ -2054,7 +2217,7 @@ def fit( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. M : float, optional Override the M parameter for this fit. @@ -2103,13 +2266,23 @@ def fit( "coefficient to compute bounds." ) - # Set up weighting vector + # Set up weighting vector and target label if self.l_vec is None: l_vec = np.ones(num_post) / num_post # Uniform weights + target_label = "Equal-weight avg over post horizons" else: l_vec = np.asarray(self.l_vec) if len(l_vec) != num_post: raise ValueError(f"l_vec must have length {num_post}, got {len(l_vec)}") + # Detect common patterns for a human-readable label + basis = np.zeros(num_post) + basis[0] = 1.0 + if np.allclose(l_vec, basis): + target_label = "First post-treatment effect (on-impact)" + elif np.allclose(l_vec, np.ones(num_post) / num_post): + target_label = "Equal-weight avg over post horizons" + else: + target_label = f"Custom l_vec ({l_vec.tolist()})" # Compute original estimate and SE original_estimate = np.dot(l_vec, beta_post) @@ -2169,6 +2342,9 @@ def fit( original_se=original_se, alpha=self.alpha, ci_method=ci_method, + target_label=target_label, + pre_periods_used=list(pre_periods), + post_periods_used=list(post_periods), original_results=results, survey_metadata=survey_metadata, df_survey=df_survey, @@ -2372,7 +2548,7 @@ def sensitivity_analysis( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. M_grid : list of float, optional Grid of M values to evaluate. If None, uses default grid @@ -2471,7 +2647,7 @@ def breakdown_value( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. tol : float Tolerance for binary search. @@ -2520,13 +2696,14 @@ def compute_honest_did( method: str = "relative_magnitude", M: float = 1.0, alpha: float = 0.05, + l_vec: Optional[np.ndarray] = None, ) -> HonestDiDResults: """ Convenience function for computing Honest DiD bounds. Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. method : str Type of restriction ("smoothness", "relative_magnitude", "combined"). @@ -2534,6 +2711,12 @@ def compute_honest_did( Restriction parameter. alpha : float Significance level. + l_vec : np.ndarray, optional + Weight vector defining the scalar target ``theta = l_vec' tau`` + over post-treatment horizons. Length must equal the number of + post-treatment periods. ``None`` (default) uses equal weights + (uniform average). To target the on-impact effect only (R's + default), pass ``np.array([1, 0, ..., 0])``. Returns ------- @@ -2545,7 +2728,7 @@ def compute_honest_did( >>> bounds = compute_honest_did(event_study_results, method='relative_magnitude', M=1.0) >>> print(f"Robust CI: [{bounds.ci_lb:.3f}, {bounds.ci_ub:.3f}]") """ - honest = HonestDiD(method=method, M=M, alpha=alpha) + honest = HonestDiD(method=method, M=M, alpha=alpha, l_vec=l_vec) return honest.fit(results) @@ -2562,7 +2745,7 @@ def sensitivity_plot( Parameters ---------- - results : MultiPeriodDiDResults or CallawaySantAnnaResults + results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults Results from event study estimation. method : str Type of restriction. diff --git a/docs/llms.txt b/docs/llms.txt index 8f6b5b02..cb59f16a 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -20,7 +20,7 @@ diagnostic steps produces unreliable results. 3. **Test parallel trends** — simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds. 4. **Choose estimator** — staggered adoption → CS/SA/BJS (NOT plain TWFE); few treated units → SDiD; factor confounding → TROP; simple 2x2 → DiD. Run `BaconDecomposition` to diagnose TWFE bias. 5. **Estimate** — `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer). -6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD/CS only), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs. +6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD, CS, or dCDH), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs. 7. **Heterogeneity** — CS: `aggregate='group'`/`'event_study'`; SA: `results.event_study_effects`/`to_dataframe(level='cohort')`; subgroup re-estimation. 8. **Robustness** — compare 2-3 estimators (CS vs SA vs BJS), MUST report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds. diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 645d577c..7601a8e9 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -617,6 +617,8 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. +- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`, targeting the equal-weight average over all post-treatment horizons (`l_vec=None`). R's HonestDiD defaults to the first post/on-impact effect; use `compute_honest_did(results, ...)` with a custom `l_vec` to match that behavior. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning. + - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. **Reference implementation(s):** @@ -625,7 +627,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param **Requirements checklist:** - [x] Single class `ChaisemartinDHaultfoeuille` (alias `DCDH`); not a family -- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for remaining parameters (`aggregate`, `honest_did`, `survey_design`); Phase 3 gates lifted for `controls`, `trends_linear`, `trends_nonparam` +- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for remaining parameters (`aggregate`, `survey_design`); Phase 3 gates lifted for `controls`, `trends_linear`, `trends_nonparam`, `honest_did` - [x] `DID_M` point estimate with cohort-recentered analytical SE - [x] Joiners-only `DID_+` and leavers-only `DID_-` decompositions with their own inference - [x] Single-lag placebo `DID_M^pl` (point estimate; SE deferred to Phase 2) @@ -645,6 +647,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - [x] State-set-specific trends via control-pool restriction (Web Appendix Section 1.4) - [x] Heterogeneity testing via saturated OLS (Web Appendix Section 1.5, Lemma 7) - [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6) +- [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface --- diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index e8de230b..94022e78 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -374,8 +374,8 @@ def test_trends_nonparam_requires_lmax(self, data): trends_nonparam="state", ) - def test_honest_did_raises_not_implemented(self, data): - with pytest.raises(NotImplementedError, match="Phase 3"): + def test_honest_did_requires_lmax(self, data): + with pytest.raises(ValueError, match="honest_did=True requires L_max"): self._est().fit( data, outcome="outcome", @@ -2703,6 +2703,46 @@ def test_nonparam_with_covariates(self): assert np.isfinite(r.overall_att) assert r.covariate_residuals is not None + def test_trends_nonparam_unequal_support(self): + """Unequal switcher/control support across state sets. + + State A: 3 switchers + 5 controls -> finite effects. + State B: 2 switchers + 0 controls -> empty control pool, groups + excluded at horizons with empty pools (Assumption 14 support-trimming). + """ + rng = np.random.RandomState(99) + rows = [] + n_periods = 6 + # State A: groups 0-7 (0-2 switch at t=3, 3-7 never switch) + for g in range(8): + switches = g < 3 + for t in range(n_periods): + d = 1 if (switches and t >= 3) else 0 + y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "A", + }) + # State B: groups 8-9 (both switch at t=3, NO controls in this set) + for g in range(8, 10): + for t in range(n_periods): + d = 1 if t >= 3 else 0 + y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "B", + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, trends_nonparam="state", + ) + # Should not error; State A groups contribute, State B excluded + assert np.isfinite(r.overall_att) + assert r.event_study_effects is not None + class TestHeterogeneityTesting: """Heterogeneity testing beta^{het}_l (ROADMAP item 3d).""" @@ -3194,3 +3234,421 @@ def test_normalized_effects_general_formula(self): # For dose 0->2: denominator at l=1 should be ~2 (not 1) denom = r.normalized_effects[1]["denominator"] assert denom > 1.5, f"Denominator should reflect dose=2, got {denom}" + + +# ============================================================================= +# HonestDiD Integration +# ============================================================================= + + +class TestHonestDiDIntegration: + """HonestDiD (Rambachan-Roth 2023) integration on dCDH placebos.""" + + @staticmethod + def _make_data(n_groups=40, n_periods=6, seed=42): + return generate_reversible_did_data( + n_groups=n_groups, n_periods=n_periods, seed=seed + ) + + def test_honest_did_basic(self): + """honest_did=True with L_max>=2 produces HonestDiDResults.""" + from diff_diff.honest_did import HonestDiDResults + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert isinstance(r.honest_did_results, HonestDiDResults) + assert np.isfinite(r.honest_did_results.ci_lb) + assert np.isfinite(r.honest_did_results.ci_ub) + + def test_honest_did_requires_lmax(self): + """honest_did=True with L_max=None raises ValueError.""" + df = self._make_data() + with pytest.raises(ValueError, match="honest_did=True requires L_max"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + honest_did=True, + ) + + def test_honest_did_rejects_placebo_false(self): + """honest_did=True with placebo=False raises ValueError.""" + df = self._make_data() + with pytest.raises(ValueError, match="placebo=False"): + ChaisemartinDHaultfoeuille(seed=1, placebo=False).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + + def test_honest_did_standalone(self): + """compute_honest_did() on dCDH results matches honest_did=True.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r_auto = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + r_plain = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + r_manual = compute_honest_did( + r_plain, method="relative_magnitude", M=1.0 + ) + # Deterministic - bitwise identical + np.testing.assert_allclose( + r_auto.honest_did_results.ci_lb, r_manual.ci_lb, rtol=0 + ) + np.testing.assert_allclose( + r_auto.honest_did_results.ci_ub, r_manual.ci_ub, rtol=0 + ) + + def test_honest_did_with_controls(self): + """HonestDiD runs on DID^X placebos.""" + df = self._make_data(n_periods=6) + df["X1"] = np.random.RandomState(77).normal(0, 1, len(df)) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + + def test_honest_did_with_trends_linear(self): + """HonestDiD on second-differenced DID^{fd} estimand.""" + df = self._make_data(n_periods=7) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + trends_linear=True, L_max=2, honest_did=True, + ) + # Bounds should be computed on second-differenced estimand + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + + def test_honest_did_sensitivity(self): + """sensitivity_analysis() on dCDH results.""" + from diff_diff.honest_did import HonestDiD + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + honest = HonestDiD(method="relative_magnitude") + sens = honest.sensitivity_analysis( + r, M_grid=list(np.linspace(0, 2, 5)) + ) + assert sens.breakdown_M is not None or len(sens.bounds) == 5 + + def test_honest_did_smoothness(self): + """Smoothness method gives different bounds than RM.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + rm_bounds = compute_honest_did(r, method="relative_magnitude", M=1.0) + sd_bounds = compute_honest_did(r, method="smoothness", M=0.5) + # Different methods should generally give different bounds + assert rm_bounds.ci_lb != sd_bounds.ci_lb or rm_bounds.ci_ub != sd_bounds.ci_ub + + def test_honest_did_original_estimate_is_post_average(self): + """original_estimate targets equal-weight average over post horizons.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + hd = r.honest_did_results + assert hd is not None + # Equal-weight average = mean of event_study_effects[1..L_max] + es = r.event_study_effects + avg = np.mean([es[h]["effect"] for h in sorted(es.keys())]) + np.testing.assert_allclose(hd.original_estimate, avg, rtol=1e-10) + + def test_honest_did_custom_l_vec_on_impact(self): + """compute_honest_did with l_vec=[1,0] targets on-impact effect.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + # l_vec=[1, 0] targets only DID_1 (on-impact, R's default) + bounds = compute_honest_did(r, l_vec=np.array([1.0, 0.0])) + np.testing.assert_allclose( + bounds.original_estimate, + r.event_study_effects[1]["effect"], + rtol=1e-10, + ) + + def test_honest_did_respects_alpha(self): + """honest_did=True propagates estimator alpha to HonestDiD.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1, alpha=0.10).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert r.honest_did_results.alpha == 0.10 + + def test_honest_did_retains_period_metadata(self): + """HonestDiDResults stores pre_periods_used and post_periods_used.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + hd = r.honest_did_results + assert hd.pre_periods_used is not None + assert hd.post_periods_used is not None + assert all(p < 0 for p in hd.pre_periods_used) + assert all(p > 0 for p in hd.post_periods_used) + # Summary renders the retained horizons + text = r.summary() + assert "Post horizons used:" in text + + def test_honest_did_custom_l_vec_summary_label(self): + """summary() renders custom target label when l_vec is overridden.""" + from diff_diff.honest_did import compute_honest_did + + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, + ) + # Attach custom-target HonestDiD to results + r.honest_did_results = compute_honest_did( + r, l_vec=np.array([1.0, 0.0]) + ) + text = r.summary() + assert "on-impact" in text.lower() + assert "Equal-weight" not in text + + def test_honest_did_with_trends_nonparam(self): + """End-to-end trends_nonparam + honest_did=True (balanced support).""" + rng = np.random.RandomState(42) + rows = [] + for g in range(40): + state = g % 4 + switches = g < 20 + for t in range(7): + d = 1 if (switches and t >= 3) else 0 + y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": state, + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, trends_nonparam="state", honest_did=True, + ) + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + + def test_honest_did_trends_nonparam_trimming(self): + """End-to-end: trends_nonparam causes NaN at far horizons, HonestDiD trims. + + State A: switches late (t=5), has never-switching controls. + State B: switches early (t=2), "controls" switch at t=3 so + control pool vanishes at h>=2. At L_max=3, h=3 and h=-3 have + N_l=0 (NaN SE) because State A can't reach h=3 and State B + has no controls there. HonestDiD extraction drops the NaN + horizons and retains [-2, -1, 1, 2]. + """ + rng = np.random.RandomState(42) + rows = [] + n_periods = 7 + # State A: 3 switch at t=5, 4 controls + for g in range(7): + switches = g < 3 + for t in range(n_periods): + d = 1 if (switches and t >= 5) else 0 + y = 10 + 2.0*t + 5.0*d + rng.normal(0, 0.3) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "A", + }) + # State B: 4 switch at t=2, 2 "controls" switch at t=3 + for g in range(7, 13): + switch_t = 2 if g < 11 else 3 + for t in range(n_periods): + d = 1 if t >= switch_t else 0 + y = 10 + 2.0*t + 5.0*d + rng.normal(0, 0.3) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": "B", + }) + df = pd.DataFrame(rows) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=3, trends_nonparam="state", honest_did=True, + ) + # h=3 and h=-3 should be NaN (N_l=0 from support trimming) + assert r.event_study_effects[3]["n_obs"] == 0 + assert r.placebo_event_study[-3]["n_obs"] == 0 + # HonestDiD should still compute on the retained block + hd = r.honest_did_results + assert hd is not None + assert np.isfinite(hd.ci_lb) + # Retained horizons should exclude the NaN endpoints + assert -3 not in hd.pre_periods_used + assert 3 not in hd.post_periods_used + assert hd.post_periods_used == [1, 2] + # The placebo-based pre-period warning should have been emitted + placebo_warns = [ + x for x in w if "placebo" in str(x.message).lower() + and "pre-period" in str(x.message).lower() + ] + assert len(placebo_warns) >= 1 + + def test_honest_did_with_bootstrap(self): + """honest_did=True works with bootstrap-fitted results.""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1, n_bootstrap=49).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + assert r.honest_did_results is not None + assert np.isfinite(r.honest_did_results.ci_lb) + assert r.honest_did_results.post_periods_used == [1, 2] + + +# ============================================================================= +# Summary Phase 3 Rendering +# ============================================================================= + + +class TestSummaryPhase3: + """Verify summary() renders Phase 3 result blocks.""" + + @staticmethod + def _make_data(n_groups=40, n_periods=6, seed=42): + return generate_reversible_did_data( + n_groups=n_groups, n_periods=n_periods, seed=seed + ) + + def test_summary_renders_covariate_diagnostics(self): + """Covariate Adjustment section appears in summary().""" + df = self._make_data() + df["X1"] = np.random.RandomState(77).normal(0, 1, len(df)) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=1, + ) + text = r.summary() + assert "Covariate Adjustment" in text + + def test_summary_renders_linear_trends(self): + """Cumulated Level Effects section appears in summary().""" + df = self._make_data(n_periods=7) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + trends_linear=True, L_max=2, + ) + text = r.summary() + assert "Cumulated Level Effects" in text + + def test_summary_renders_heterogeneity(self): + """Heterogeneity Test section appears in summary().""" + rng = np.random.RandomState(42) + rows = [] + for g in range(40): + x_g = 1 if g < 20 else 0 + switches = g < 30 + for t in range(6): + d = 1 if (switches and t >= 3) else 0 + y = 10 + 2.0 * t + 5.0 * d + 3.0 * x_g * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "het_x": x_g, + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, heterogeneity="het_x", + ) + text = r.summary() + assert "Heterogeneity Test" in text + + def test_summary_renders_design2(self): + """Design-2 section appears in summary().""" + rng = np.random.RandomState(42) + rows = [] + for g in range(30): + for t in range(8): + if g < 10: + d = 1 if 3 <= t < 6 else 0 # join then leave + elif g < 20: + d = 1 if t >= 3 else 0 # join only + else: + d = 0 # never switch + y = 10 + t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, "outcome": y, + }) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille( + seed=1, drop_larger_lower=False + ).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, design2=True, + ) + text = r.summary() + assert "Design-2" in text + + def test_summary_renders_honest_did(self): + """HonestDiD Sensitivity section appears in summary().""" + df = self._make_data() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, honest_did=True, + ) + text = r.summary() + assert "HonestDiD Sensitivity" in text diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py index 74330897..5ca20efd 100644 --- a/tests/test_honest_did.py +++ b/tests/test_honest_did.py @@ -1333,3 +1333,164 @@ def test_sensitivity_results_has_plot_method(self, mock_multiperiod_results): assert hasattr(sensitivity, "plot") assert callable(sensitivity.plot) + + +# ============================================================================= +# dCDH Integration Tests +# ============================================================================= + + +class TestDCDHIntegration: + """HonestDiD integration with ChaisemartinDHaultfoeuille results.""" + + @staticmethod + def _fit_dcdh(n_groups=40, n_periods=6, seed=42, L_max=2): + import warnings + + from diff_diff import ChaisemartinDHaultfoeuille + from diff_diff.prep import generate_reversible_did_data + + df = generate_reversible_did_data( + n_groups=n_groups, n_periods=n_periods, seed=seed + ) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=L_max, + ) + + def test_dcdh_integration(self): + """compute_honest_did works on dCDH results (mirrors CS pattern).""" + results = self._fit_dcdh() + bounds = compute_honest_did(results, method="relative_magnitude", M=1.0) + assert isinstance(bounds, HonestDiDResults) + assert np.isfinite(bounds.ci_lb) + assert np.isfinite(bounds.ci_ub) + assert bounds.method == "relative_magnitude" + + def test_dcdh_extraction(self): + """_extract_event_study_params returns correct shapes for dCDH.""" + results = self._fit_dcdh() + beta_hat, sigma, n_pre, n_post, pre_t, post_t, df_s = ( + _extract_event_study_params(results) + ) + assert n_pre >= 1 + assert n_post >= 1 + assert beta_hat.shape == (n_pre + n_post,) + assert sigma.shape == (n_pre + n_post, n_pre + n_post) + assert all(t < 0 for t in pre_t) + assert all(t > 0 for t in post_t) + assert df_s is None # dCDH has no survey support + + def test_dcdh_no_placebos_raises(self): + """dCDH results without placebos raise ValueError.""" + import warnings + + from diff_diff import ChaisemartinDHaultfoeuille + from diff_diff.prep import generate_reversible_did_data + + df = generate_reversible_did_data(n_groups=20, n_periods=4, seed=1) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r = ChaisemartinDHaultfoeuille(seed=1, placebo=False).fit( + df, "outcome", "group", "period", "treatment", + ) + with pytest.raises(ValueError, match="placebo_event_study"): + compute_honest_did(r) + + def test_dcdh_emits_placebo_warning(self): + """compute_honest_did on dCDH emits warning about placebo-based pre-periods.""" + import warnings + + results = self._fit_dcdh() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + compute_honest_did(results) + placebo_warnings = [ + x for x in w + if "placebo" in str(x.message).lower() + and "pre-period" in str(x.message).lower() + ] + assert len(placebo_warnings) >= 1, ( + "Expected a UserWarning about placebo-based pre-period inputs" + ) + + def test_dcdh_empty_consecutive_block_raises(self): + """ValueError when all placebos have NaN SE (no valid pre-periods).""" + import warnings + + # Fit real results, then corrupt placebo SEs to NaN + results = self._fit_dcdh() + for h in results.placebo_event_study: + results.placebo_event_study[h]["se"] = float("nan") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match="No placebo horizons with finite SEs"): + compute_honest_did(results) + + def test_dcdh_standalone_surfaces_target_metadata(self): + """Standalone HonestDiDResults summary/to_dict include target metadata.""" + results = self._fit_dcdh() + bounds = compute_honest_did(results, l_vec=np.array([1.0, 0.0])) + # summary() includes target and period metadata + text = bounds.summary() + assert "on-impact" in text.lower() + assert "Post horizons used:" in text + assert "Pre horizons used:" in text + # to_dict() includes the fields + d = bounds.to_dict() + assert "target_label" in d + assert "pre_periods_used" in d + assert "post_periods_used" in d + assert d["post_periods_used"] == [1, 2] + + def test_dcdh_interior_gap_triggers_trimming_warning(self): + """Non-consecutive horizons after SE filtering emit trimming warning.""" + import warnings + + # L_max=3 gives horizons [-3,-2,-1,1,2,3]. Corrupt h=-2 to create + # interior gap [-3, -1], which triggers consecutive-block trimming + # that drops -3 and keeps only [-1]. + results = self._fit_dcdh(n_periods=8, L_max=3) + results.placebo_event_study[-2]["se"] = float("nan") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + bounds = compute_honest_did(results) + trim_warns = [ + x for x in w + if "dropping non-consecutive" in str(x.message).lower() + ] + assert len(trim_warns) >= 1, ( + "Expected a warning about dropping non-consecutive horizons" + ) + # Retained pre should be [-1] only (h=-3 dropped due to gap at -2) + assert bounds.pre_periods_used == [-1] + + def test_dcdh_missing_boundary_minus1_raises(self): + """ValueError when horizon -1 has NaN SE (boundary required).""" + import warnings + + results = self._fit_dcdh() + # Corrupt only horizon -1 SE; leave -2 intact + results.placebo_event_study[-1]["se"] = float("nan") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match="requires horizon -1"): + compute_honest_did(results) + + def test_dcdh_missing_boundary_plus1_raises(self): + """ValueError when horizon +1 has NaN SE (boundary required).""" + import warnings + + results = self._fit_dcdh() + # Corrupt only horizon +1 SE + results.event_study_effects[1]["se"] = float("nan") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match="requires horizon 1"): + compute_honest_did(results)