From cb58478381d627b9a2c776380f5325b922a7d0cc Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 08:24:53 -0400
Subject: [PATCH 01/12] Add HonestDiD integration, summary() Phase 3 blocks,
 trends_nonparam regression test

- Add ChaisemartinDHaultfoeuilleResults extraction to _extract_event_study_params()
  in honest_did.py (maps placebo horizons to pre-periods, event study to post-periods)
- Lift honest_did gate in fit(), add early L_max>=1 validation, post-computation
  compute_honest_did() call with fallback warning on solver failures
- Add 5 new summary() sections: covariate diagnostics, cumulated level effects,
  heterogeneity test, design-2 descriptive, HonestDiD sensitivity bounds
- Update honest_did_results field type annotation and docstring
- 17 new tests: 7 HonestDiD integration, 5 summary rendering, 3 honest_did.py
  extraction/integration, 1 trends_nonparam unequal-support, 1 gate update
- REGISTRY.md: HonestDiD note, checklist update; ROADMAP: 3g shipped

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 ROADMAP.md                                    |   2 +-
 diff_diff/chaisemartin_dhaultfoeuille.py      |  36 ++-
 .../chaisemartin_dhaultfoeuille_results.py    | 137 ++++++++-
 diff_diff/honest_did.py                       | 131 ++++++++-
 docs/methodology/REGISTRY.md                  |   5 +-
 tests/test_chaisemartin_dhaultfoeuille.py     | 273 +++++++++++++++++-
 tests/test_honest_did.py                      |  65 +++++
 7 files changed, 634 insertions(+), 15 deletions(-)

diff --git a/ROADMAP.md b/ROADMAP.md
index db56947c..c24b33a0 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -173,7 +173,7 @@ The dynamic companion paper subsumes the AER 2020 paper: `DID_1 = DID_M`. The si
 | **3d.** Heterogeneity testing `beta^{het}_l` (Web Appendix Section 1.5) | LOW | Shipped (PR B) |
 | **3e.** Design-2 switch-in / switch-out separation (Web Appendix Section 1.6) | LOW | Shipped (PR B; convenience wrapper) |
 | **3f.** Non-binary treatment support (the formula already handles it; this row is documentation + tests) | MEDIUM | Shipped (PR #300; also ships placebo SE, L_max=1 per-group path, parity SE assertions) |
-| **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Not started |
+| **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Shipped (PR C) |
 | **3h.** **Single comprehensive tutorial notebook** covering all three phases — Favara-Imbs (2015) banking deregulation replication as the headline application, with comparison plots vs LP / TWFE | HIGH | Not started |
 | **3i.** Parity tests vs `did_multiplegt_dyn` for covariate and extension specifications | HIGH | Shipped (PR B; controls, trends_lin, combined) |
 
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
index 9959ad2d..9dfa0627 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -946,6 +946,13 @@ def fit(
                     f"is {n_post_baseline}."
                 )
 
+        if honest_did and L_max is None:
+            raise ValueError(
+                "honest_did=True requires L_max >= 1 for multi-horizon placebos. "
+                "Set L_max to compute DID^{pl}_l placebos that HonestDiD uses as "
+                "pre-period coefficients."
+            )
+
         # Pivot to (group x time) matrices for vectorized computations
         d_pivot = cell.pivot(index=group, columns=time, values="d_gt").reindex(
             index=all_groups, columns=all_periods
@@ -2394,6 +2401,27 @@ def fit(
             _estimator_ref=self,
         )
 
+        # ------------------------------------------------------------------
+        # HonestDiD integration (when honest_did=True)
+        # ------------------------------------------------------------------
+        if honest_did and results.placebo_event_study:
+            try:
+                from diff_diff.honest_did import compute_honest_did
+
+                results.honest_did_results = compute_honest_did(
+                    results, method="relative_magnitude", M=1.0
+                )
+            except (ValueError, np.linalg.LinAlgError) as exc:
+                warnings.warn(
+                    f"HonestDiD computation failed: {exc}. "
+                    f"results.honest_did_results will be None. "
+                    f"You can retry with compute_honest_did(results, ...) "
+                    f"using different parameters.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                results.honest_did_results = None
+
         self.results_ = results
         self.is_fitted_ = True
         return results
@@ -2432,12 +2460,8 @@ def _check_forward_compat_gates(
     # Validation (L_max >= 1, n_periods >= 3 required) is in fit().
     # trends_nonparam gate lifted - state-set trends implemented.
     # Validation (L_max >= 1, column exists, time-invariant) is in fit().
-    if honest_did:
-        raise NotImplementedError(
-            "HonestDiD integration for dCDH is reserved for Phase 3, applied to "
-            "the placebo DID^{pl}_l output. Phase 1 provides only the placebo "
-            "point estimate via results.placebo_effect. See ROADMAP.md Phase 3."
-        )
+    # honest_did gate lifted - integration implemented.
+    # Validation (L_max >= 1 required) is in fit() after L_max detection.
 
 
 def _drop_crossing_cells(
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
index 9c85b438..1a363e24 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille_results.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -18,8 +18,13 @@
   NBER Working Paper 29873.
 """
 
+from __future__ import annotations
+
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+if TYPE_CHECKING:
+    from diff_diff.honest_did import HonestDiDResults
 
 import numpy as np
 import pandas as pd
@@ -331,8 +336,11 @@ class ChaisemartinDHaultfoeuilleResults:
     design2_effects : dict, optional
         Design-2 switch-in/switch-out descriptive summary. Populated
         when ``design2=True``.
-    honest_did_results : Any, optional
-        Reserved for HonestDiD integration on placebos.
+    honest_did_results : HonestDiDResults, optional
+        HonestDiD sensitivity analysis bounds (Rambachan & Roth 2023).
+        Populated when ``honest_did=True`` in ``fit()`` or by calling
+        ``compute_honest_did(results)`` post-hoc. Contains identified
+        set bounds, robust confidence intervals, and breakdown analysis.
     survey_metadata : Any, optional
         Always ``None`` in Phase 1 — survey integration is deferred to a
         separate effort after all phases ship.
@@ -415,7 +423,7 @@ class ChaisemartinDHaultfoeuilleResults:
     linear_trends_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False)
     heterogeneity_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False)
     design2_effects: Optional[Dict[str, Any]] = field(default=None, repr=False)
-    honest_did_results: Optional[Any] = field(default=None, repr=False)
+    honest_did_results: Optional["HonestDiDResults"] = field(default=None, repr=False)
 
     # --- Repr-suppressed metadata ---
     survey_metadata: Optional[Any] = field(default=None, repr=False)
@@ -798,6 +806,127 @@ def summary(self, alpha: Optional[float] = None) -> str:
 
             lines.extend([""])
 
+        # --- Covariate adjustment diagnostics (DID^X) ---
+        if self.covariate_residuals is not None:
+            cov_df = self.covariate_residuals
+            control_names = sorted(cov_df["covariate"].unique())
+            n_baselines = cov_df["baseline_treatment"].nunique()
+            failed = int((cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum())
+            lines.extend(
+                [
+                    thin,
+                    "Covariate Adjustment (DID^X) Diagnostics".center(width),
+                    thin,
+                    f"{'Controls:':<35} {', '.join(control_names):>10}",
+                    f"{'Baselines residualized:':<35} {n_baselines:>10}",
+                    f"{'Failed strata:':<35} {failed:>10}",
+                    thin,
+                    "",
+                ]
+            )
+
+        # --- Linear trends cumulated level effects ---
+        if self.linear_trends_effects is not None:
+            lines.extend(
+                [
+                    thin,
+                    "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width),
+                    thin,
+                    header_row,
+                    thin,
+                ]
+            )
+            for l_h in sorted(self.linear_trends_effects.keys()):
+                entry = self.linear_trends_effects[l_h]
+                lines.append(
+                    _format_inference_row(
+                        f"Level_{l_h}",
+                        entry["effect"],
+                        entry["se"],
+                        entry["t_stat"],
+                        entry["p_value"],
+                    )
+                )
+            lines.extend([thin, ""])
+
+        # --- Heterogeneity test ---
+        if self.heterogeneity_effects is not None:
+            lines.extend(
+                [
+                    thin,
+                    "Heterogeneity Test (Section 1.5, partial)".center(width),
+                    thin,
+                    f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} "
+                    f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
+                    thin,
+                ]
+            )
+            for l_h in sorted(self.heterogeneity_effects.keys()):
+                entry = self.heterogeneity_effects[l_h]
+                lines.append(
+                    _format_inference_row(
+                        f"l={l_h}",
+                        entry["beta"],
+                        entry["se"],
+                        entry["t_stat"],
+                        entry["p_value"],
+                    )
+                )
+            lines.extend(
+                [
+                    thin,
+                    "Note: Post-treatment regressions only (no placebo/joint test).",
+                    "",
+                ]
+            )
+
+        # --- Design-2 switch-in / switch-out ---
+        if self.design2_effects is not None:
+            d2 = self.design2_effects
+            si = d2.get("switch_in", {})
+            so = d2.get("switch_out", {})
+            lines.extend(
+                [
+                    thin,
+                    "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width),
+                    thin,
+                    f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}",
+                    f"{'Switch-in effect (mean):':<35} "
+                    f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}"
+                    f"  (N={si.get('n_groups', 0)})",
+                    f"{'Switch-out effect (mean):':<35} "
+                    f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}"
+                    f"  (N={so.get('n_groups', 0)})",
+                    thin,
+                    "",
+                ]
+            )
+
+        # --- HonestDiD sensitivity ---
+        if self.honest_did_results is not None:
+            hd = self.honest_did_results
+            method_label = hd.method.replace("_", " ").title()
+            m_val = hd.M
+            sig_label = "Yes" if hd.is_significant else "No"
+            conf_pct = int((1 - hd.alpha) * 100)
+            lines.extend(
+                [
+                    thin,
+                    "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width),
+                    thin,
+                    f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
+                    f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
+                    f"{'Identified set:':<35} "
+                    f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
+                    f"{'Robust ' + str(conf_pct) + '% CI:':<35} "
+                    f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]",
+                    f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} "
+                    f"{sig_label:>10}",
+                    thin,
+                    "",
+                ]
+            )
+
         # --- TWFE diagnostic ---
         if self.twfe_beta_fe is not None:
             lines.extend(
diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
index 02f84a83..e7de6a3c 100644
--- a/diff_diff/honest_did.py
+++ b/diff_diff/honest_did.py
@@ -817,9 +817,138 @@ def _extract_event_study_params(
         except ImportError:
             pass
 
+        # Try ChaisemartinDHaultfoeuilleResults (dCDH estimator)
+        try:
+            from diff_diff.chaisemartin_dhaultfoeuille_results import (
+                ChaisemartinDHaultfoeuilleResults,
+            )
+
+            if isinstance(results, ChaisemartinDHaultfoeuilleResults):
+                if results.placebo_event_study is None:
+                    raise ValueError(
+                        "ChaisemartinDHaultfoeuilleResults must have placebo_event_study "
+                        "for HonestDiD. Re-run ChaisemartinDHaultfoeuille.fit() with "
+                        "L_max >= 1 to compute multi-horizon placebos."
+                    )
+                if results.event_study_effects is None:
+                    raise ValueError(
+                        "ChaisemartinDHaultfoeuilleResults must have event_study_effects "
+                        "for HonestDiD."
+                    )
+
+                # Filter for finite SEs in both surfaces
+                placebo_finite = {
+                    h: data
+                    for h, data in results.placebo_event_study.items()
+                    if np.isfinite(data.get("se", np.nan))
+                }
+                effects_finite = {
+                    h: data
+                    for h, data in results.event_study_effects.items()
+                    if np.isfinite(data.get("se", np.nan))
+                }
+
+                pre_times = sorted(placebo_finite.keys())   # -P, ..., -1
+                post_times = sorted(effects_finite.keys())   # 1, ..., L_max
+
+                if len(pre_times) == 0:
+                    raise ValueError(
+                        "No placebo horizons with finite SEs found in dCDH results. "
+                        "HonestDiD requires at least one identified pre-period "
+                        "coefficient."
+                    )
+                if len(post_times) == 0:
+                    raise ValueError(
+                        "No event study horizons with finite SEs found in dCDH results. "
+                        "HonestDiD requires at least one post-period coefficient."
+                    )
+
+                # Consecutiveness check: more permissive than CS because
+                # trends_nonparam support-trimming can create legitimate gaps.
+                # Filter to the largest consecutive block spanning the -1/+1
+                # boundary; warn about dropped horizons.
+                def _largest_consecutive_block(times, boundary_val):
+                    """Find largest consecutive block containing boundary_val."""
+                    if not times:
+                        return []
+                    if boundary_val not in times:
+                        # No boundary value - take the block closest to it
+                        return times
+                    # Expand outward from boundary_val
+                    block = [boundary_val]
+                    idx = times.index(boundary_val)
+                    # Expand left
+                    for i in range(idx - 1, -1, -1):
+                        if times[i] == block[0] - 1:
+                            block.insert(0, times[i])
+                        else:
+                            break
+                    # Expand right
+                    for i in range(idx + 1, len(times)):
+                        if times[i] == block[-1] + 1:
+                            block.append(times[i])
+                        else:
+                            break
+                    return block
+
+                pre_consec = _largest_consecutive_block(pre_times, -1)
+                post_consec = _largest_consecutive_block(post_times, 1)
+
+                dropped_pre = set(pre_times) - set(pre_consec)
+                dropped_post = set(post_times) - set(post_consec)
+
+                if dropped_pre or dropped_post:
+                    import warnings
+
+                    dropped = sorted(dropped_pre | dropped_post)
+                    warnings.warn(
+                        f"HonestDiD requires a consecutive event-time grid. "
+                        f"Dropping non-consecutive horizons {dropped} from dCDH "
+                        f"results. This can happen when trends_nonparam "
+                        f"support-trimming removes horizons. Retained: "
+                        f"pre={pre_consec}, post={post_consec}.",
+                        UserWarning,
+                        stacklevel=3,
+                    )
+                    pre_times = pre_consec
+                    post_times = post_consec
+
+                if len(pre_times) == 0 or len(post_times) == 0:
+                    raise ValueError(
+                        "After filtering for consecutive horizons, no pre- or "
+                        "post-periods remain. Cannot compute HonestDiD bounds."
+                    )
+
+                # Build beta_hat and sigma (diagonal - no full VCV for dCDH)
+                all_times = pre_times + post_times
+                effects = []
+                ses = []
+                for h in pre_times:
+                    effects.append(placebo_finite[h]["effect"])
+                    ses.append(placebo_finite[h]["se"])
+                for h in post_times:
+                    effects.append(effects_finite[h]["effect"])
+                    ses.append(effects_finite[h]["se"])
+
+                beta_hat = np.array(effects)
+                sigma = np.diag(np.array(ses) ** 2)
+
+                return (
+                    beta_hat,
+                    sigma,
+                    len(pre_times),
+                    len(post_times),
+                    pre_times,
+                    post_times,
+                    None,  # df_survey: dCDH has no survey support
+                )
+        except ImportError:
+            pass
+
         raise TypeError(
             f"Unsupported results type: {type(results)}. "
-            "Expected MultiPeriodDiDResults or CallawaySantAnnaResults."
+            "Expected MultiPeriodDiDResults, CallawaySantAnnaResults, "
+            "or ChaisemartinDHaultfoeuilleResults."
         )
 
 
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 645d577c..9d39725c 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -617,6 +617,8 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`.
 
+- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
+
 - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups.
 
 **Reference implementation(s):**
@@ -625,7 +627,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 **Requirements checklist:**
 - [x] Single class `ChaisemartinDHaultfoeuille` (alias `DCDH`); not a family
-- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for remaining parameters (`aggregate`, `honest_did`, `survey_design`); Phase 3 gates lifted for `controls`, `trends_linear`, `trends_nonparam`
+- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for remaining parameters (`aggregate`, `survey_design`); Phase 3 gates lifted for `controls`, `trends_linear`, `trends_nonparam`, `honest_did`
 - [x] `DID_M` point estimate with cohort-recentered analytical SE
 - [x] Joiners-only `DID_+` and leavers-only `DID_-` decompositions with their own inference
 - [x] Single-lag placebo `DID_M^pl` (point estimate; SE deferred to Phase 2)
@@ -645,6 +647,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 - [x] State-set-specific trends via control-pool restriction (Web Appendix Section 1.4)
 - [x] Heterogeneity testing via saturated OLS (Web Appendix Section 1.5, Lemma 7)
 - [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6)
+- [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface
 
 ---
 
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index e8de230b..29101d20 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -374,8 +374,8 @@ def test_trends_nonparam_requires_lmax(self, data):
                 trends_nonparam="state",
             )
 
-    def test_honest_did_raises_not_implemented(self, data):
-        with pytest.raises(NotImplementedError, match="Phase 3"):
+    def test_honest_did_requires_lmax(self, data):
+        with pytest.raises(ValueError, match="honest_did=True requires L_max"):
             self._est().fit(
                 data,
                 outcome="outcome",
@@ -2703,6 +2703,46 @@ def test_nonparam_with_covariates(self):
         assert np.isfinite(r.overall_att)
         assert r.covariate_residuals is not None
 
+    def test_trends_nonparam_unequal_support(self):
+        """Unequal switcher/control support across state sets.
+
+        State A: 3 switchers + 5 controls -> finite effects.
+        State B: 2 switchers + 0 controls -> empty control pool, groups
+        excluded at horizons with empty pools (Assumption 14 support-trimming).
+        """
+        rng = np.random.RandomState(99)
+        rows = []
+        n_periods = 6
+        # State A: groups 0-7 (0-2 switch at t=3, 3-7 never switch)
+        for g in range(8):
+            switches = g < 3
+            for t in range(n_periods):
+                d = 1 if (switches and t >= 3) else 0
+                y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5)
+                rows.append({
+                    "group": g, "period": t, "treatment": d,
+                    "outcome": y, "state": "A",
+                })
+        # State B: groups 8-9 (both switch at t=3, NO controls in this set)
+        for g in range(8, 10):
+            for t in range(n_periods):
+                d = 1 if t >= 3 else 0
+                y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5)
+                rows.append({
+                    "group": g, "period": t, "treatment": d,
+                    "outcome": y, "state": "B",
+                })
+        df = pd.DataFrame(rows)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, trends_nonparam="state",
+            )
+        # Should not error; State A groups contribute, State B excluded
+        assert np.isfinite(r.overall_att)
+        assert r.event_study_effects is not None
+
 
 class TestHeterogeneityTesting:
     """Heterogeneity testing beta^{het}_l (ROADMAP item 3d)."""
@@ -3194,3 +3234,232 @@ def test_normalized_effects_general_formula(self):
             # For dose 0->2: denominator at l=1 should be ~2 (not 1)
             denom = r.normalized_effects[1]["denominator"]
             assert denom > 1.5, f"Denominator should reflect dose=2, got {denom}"
+
+
+# =============================================================================
+# HonestDiD Integration
+# =============================================================================
+
+
+class TestHonestDiDIntegration:
+    """HonestDiD (Rambachan-Roth 2023) integration on dCDH placebos."""
+
+    @staticmethod
+    def _make_data(n_groups=40, n_periods=6, seed=42):
+        return generate_reversible_did_data(
+            n_groups=n_groups, n_periods=n_periods, seed=seed
+        )
+
+    def test_honest_did_basic(self):
+        """honest_did=True with L_max>=2 produces HonestDiDResults."""
+        from diff_diff.honest_did import HonestDiDResults
+
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+        assert r.honest_did_results is not None
+        assert isinstance(r.honest_did_results, HonestDiDResults)
+        assert np.isfinite(r.honest_did_results.ci_lb)
+        assert np.isfinite(r.honest_did_results.ci_ub)
+
+    def test_honest_did_requires_lmax(self):
+        """honest_did=True with L_max=None raises ValueError."""
+        df = self._make_data()
+        with pytest.raises(ValueError, match="honest_did=True requires L_max"):
+            ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                honest_did=True,
+            )
+
+    def test_honest_did_standalone(self):
+        """compute_honest_did() on dCDH results matches honest_did=True."""
+        from diff_diff.honest_did import compute_honest_did
+
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r_auto = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+            r_plain = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2,
+            )
+            r_manual = compute_honest_did(
+                r_plain, method="relative_magnitude", M=1.0
+            )
+        # Deterministic - bitwise identical
+        np.testing.assert_allclose(
+            r_auto.honest_did_results.ci_lb, r_manual.ci_lb, rtol=0
+        )
+        np.testing.assert_allclose(
+            r_auto.honest_did_results.ci_ub, r_manual.ci_ub, rtol=0
+        )
+
+    def test_honest_did_with_controls(self):
+        """HonestDiD runs on DID^X placebos."""
+        df = self._make_data(n_periods=6)
+        df["X1"] = np.random.RandomState(77).normal(0, 1, len(df))
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                controls=["X1"], L_max=2, honest_did=True,
+            )
+        assert r.honest_did_results is not None
+        assert np.isfinite(r.honest_did_results.ci_lb)
+
+    def test_honest_did_with_trends_linear(self):
+        """HonestDiD on second-differenced DID^{fd} estimand."""
+        df = self._make_data(n_periods=7)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                trends_linear=True, L_max=2, honest_did=True,
+            )
+        # Bounds should be computed on second-differenced estimand
+        assert r.honest_did_results is not None
+        assert np.isfinite(r.honest_did_results.ci_lb)
+
+    def test_honest_did_sensitivity(self):
+        """sensitivity_analysis() on dCDH results."""
+        from diff_diff.honest_did import HonestDiD
+
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2,
+            )
+        honest = HonestDiD(method="relative_magnitude")
+        sens = honest.sensitivity_analysis(
+            r, M_grid=list(np.linspace(0, 2, 5))
+        )
+        assert sens.breakdown_M is not None or len(sens.bounds) == 5
+
+    def test_honest_did_smoothness(self):
+        """Smoothness method gives different bounds than RM."""
+        from diff_diff.honest_did import compute_honest_did
+
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2,
+            )
+        rm_bounds = compute_honest_did(r, method="relative_magnitude", M=1.0)
+        sd_bounds = compute_honest_did(r, method="smoothness", M=0.5)
+        # Different methods should generally give different bounds
+        assert rm_bounds.ci_lb != sd_bounds.ci_lb or rm_bounds.ci_ub != sd_bounds.ci_ub
+
+
+# =============================================================================
+# Summary Phase 3 Rendering
+# =============================================================================
+
+
+class TestSummaryPhase3:
+    """Verify summary() renders Phase 3 result blocks."""
+
+    @staticmethod
+    def _make_data(n_groups=40, n_periods=6, seed=42):
+        return generate_reversible_did_data(
+            n_groups=n_groups, n_periods=n_periods, seed=seed
+        )
+
+    def test_summary_renders_covariate_diagnostics(self):
+        """Covariate Adjustment section appears in summary()."""
+        df = self._make_data()
+        df["X1"] = np.random.RandomState(77).normal(0, 1, len(df))
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                controls=["X1"], L_max=1,
+            )
+        text = r.summary()
+        assert "Covariate Adjustment" in text
+
+    def test_summary_renders_linear_trends(self):
+        """Cumulated Level Effects section appears in summary()."""
+        df = self._make_data(n_periods=7)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                trends_linear=True, L_max=2,
+            )
+        text = r.summary()
+        assert "Cumulated Level Effects" in text
+
+    def test_summary_renders_heterogeneity(self):
+        """Heterogeneity Test section appears in summary()."""
+        rng = np.random.RandomState(42)
+        rows = []
+        for g in range(40):
+            x_g = 1 if g < 20 else 0
+            switches = g < 30
+            for t in range(6):
+                d = 1 if (switches and t >= 3) else 0
+                y = 10 + 2.0 * t + 5.0 * d + 3.0 * x_g * d + rng.normal(0, 0.5)
+                rows.append({
+                    "group": g, "period": t, "treatment": d,
+                    "outcome": y, "het_x": x_g,
+                })
+        df = pd.DataFrame(rows)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=1, heterogeneity="het_x",
+            )
+        text = r.summary()
+        assert "Heterogeneity Test" in text
+
+    def test_summary_renders_design2(self):
+        """Design-2 section appears in summary()."""
+        rng = np.random.RandomState(42)
+        rows = []
+        for g in range(30):
+            for t in range(8):
+                if g < 10:
+                    d = 1 if 3 <= t < 6 else 0  # join then leave
+                elif g < 20:
+                    d = 1 if t >= 3 else 0  # join only
+                else:
+                    d = 0  # never switch
+                y = 10 + t + 5.0 * d + rng.normal(0, 0.5)
+                rows.append({
+                    "group": g, "period": t, "treatment": d, "outcome": y,
+                })
+        df = pd.DataFrame(rows)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(
+                seed=1, drop_larger_lower=False
+            ).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=1, design2=True,
+            )
+        text = r.summary()
+        assert "Design-2" in text
+
+    def test_summary_renders_honest_did(self):
+        """HonestDiD Sensitivity section appears in summary()."""
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+        text = r.summary()
+        assert "HonestDiD Sensitivity" in text
diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py
index 74330897..7d7c5b84 100644
--- a/tests/test_honest_did.py
+++ b/tests/test_honest_did.py
@@ -1333,3 +1333,68 @@ def test_sensitivity_results_has_plot_method(self, mock_multiperiod_results):
 
         assert hasattr(sensitivity, "plot")
         assert callable(sensitivity.plot)
+
+
+# =============================================================================
+# dCDH Integration Tests
+# =============================================================================
+
+
+class TestDCDHIntegration:
+    """HonestDiD integration with ChaisemartinDHaultfoeuille results."""
+
+    @staticmethod
+    def _fit_dcdh(n_groups=40, n_periods=6, seed=42, L_max=2):
+        import warnings
+
+        from diff_diff import ChaisemartinDHaultfoeuille
+        from diff_diff.prep import generate_reversible_did_data
+
+        df = generate_reversible_did_data(
+            n_groups=n_groups, n_periods=n_periods, seed=seed
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            return ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=L_max,
+            )
+
+    def test_dcdh_integration(self):
+        """compute_honest_did works on dCDH results (mirrors CS pattern)."""
+        results = self._fit_dcdh()
+        bounds = compute_honest_did(results, method="relative_magnitude", M=1.0)
+        assert isinstance(bounds, HonestDiDResults)
+        assert np.isfinite(bounds.ci_lb)
+        assert np.isfinite(bounds.ci_ub)
+        assert bounds.method == "relative_magnitude"
+
+    def test_dcdh_extraction(self):
+        """_extract_event_study_params returns correct shapes for dCDH."""
+        results = self._fit_dcdh()
+        beta_hat, sigma, n_pre, n_post, pre_t, post_t, df_s = (
+            _extract_event_study_params(results)
+        )
+        assert n_pre >= 1
+        assert n_post >= 1
+        assert beta_hat.shape == (n_pre + n_post,)
+        assert sigma.shape == (n_pre + n_post, n_pre + n_post)
+        assert all(t < 0 for t in pre_t)
+        assert all(t > 0 for t in post_t)
+        assert df_s is None  # dCDH has no survey support
+
+    def test_dcdh_no_placebos_raises(self):
+        """dCDH results without placebos raise ValueError."""
+        import warnings
+
+        from diff_diff import ChaisemartinDHaultfoeuille
+        from diff_diff.prep import generate_reversible_did_data
+
+        df = generate_reversible_did_data(n_groups=20, n_periods=4, seed=1)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1, placebo=False).fit(
+                df, "outcome", "group", "period", "treatment",
+            )
+        with pytest.raises(ValueError, match="placebo_event_study"):
+            compute_honest_did(r)

From b732007b3e95e2f93d6afecbce94553519532536 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 08:40:28 -0400
Subject: [PATCH 02/12] Address AI review: runtime warning, docstring, summary
 refactor, edge-case test

- Add UserWarning in dCDH HonestDiD extraction about placebo-based pre-periods
- Update REGISTRY.md to explicitly document library extension semantics
- Update fit() docstring for honest_did (was "Reserved for Phase 3")
- Include exception class name in HonestDiD failure warning
- Factor summary() Phase 3 blocks into 5 private helper methods
- Add test_dcdh_emits_placebo_warning and test_dcdh_empty_consecutive_block_raises

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/chaisemartin_dhaultfoeuille.py      |  11 +-
 .../chaisemartin_dhaultfoeuille_results.py    | 268 ++++++++++--------
 diff_diff/honest_did.py                       |  14 +
 docs/methodology/REGISTRY.md                  |   2 +-
 tests/test_honest_did.py                      |  31 ++
 5 files changed, 202 insertions(+), 124 deletions(-)

diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
index 9dfa0627..38c9d111 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -538,7 +538,12 @@ def fit(
             pool to groups in the same set (Web Appendix Section 1.4).
             Requires ``L_max >= 1`` and time-invariant values per group.
         honest_did : bool, default=False
-            **Reserved for Phase 3** (HonestDiD integration on placebos).
+            Run HonestDiD sensitivity analysis (Rambachan & Roth 2023) on
+            the placebo + event study surface. Requires ``L_max >= 1``.
+            Default: relative magnitudes (DeltaRM, Mbar=1.0). Results
+            stored on ``results.honest_did_results``; ``None`` with a
+            warning if the solver fails. For custom parameters, call
+            ``compute_honest_did(results, ...)`` post-hoc instead.
         heterogeneity : str, optional
             Column name for a time-invariant covariate to test for
             heterogeneous effects (Web Appendix Section 1.5, Lemma 7).
@@ -2413,8 +2418,8 @@ def fit(
                 )
             except (ValueError, np.linalg.LinAlgError) as exc:
                 warnings.warn(
-                    f"HonestDiD computation failed: {exc}. "
-                    f"results.honest_did_results will be None. "
+                    f"HonestDiD computation failed ({type(exc).__name__}): "
+                    f"{exc}. results.honest_did_results will be None. "
                     f"You can retry with compute_honest_did(results, ...) "
                     f"using different parameters.",
                     UserWarning,
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
index 1a363e24..2c96136b 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille_results.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -806,126 +806,12 @@ def summary(self, alpha: Optional[float] = None) -> str:
 
             lines.extend([""])
 
-        # --- Covariate adjustment diagnostics (DID^X) ---
-        if self.covariate_residuals is not None:
-            cov_df = self.covariate_residuals
-            control_names = sorted(cov_df["covariate"].unique())
-            n_baselines = cov_df["baseline_treatment"].nunique()
-            failed = int((cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum())
-            lines.extend(
-                [
-                    thin,
-                    "Covariate Adjustment (DID^X) Diagnostics".center(width),
-                    thin,
-                    f"{'Controls:':<35} {', '.join(control_names):>10}",
-                    f"{'Baselines residualized:':<35} {n_baselines:>10}",
-                    f"{'Failed strata:':<35} {failed:>10}",
-                    thin,
-                    "",
-                ]
-            )
-
-        # --- Linear trends cumulated level effects ---
-        if self.linear_trends_effects is not None:
-            lines.extend(
-                [
-                    thin,
-                    "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width),
-                    thin,
-                    header_row,
-                    thin,
-                ]
-            )
-            for l_h in sorted(self.linear_trends_effects.keys()):
-                entry = self.linear_trends_effects[l_h]
-                lines.append(
-                    _format_inference_row(
-                        f"Level_{l_h}",
-                        entry["effect"],
-                        entry["se"],
-                        entry["t_stat"],
-                        entry["p_value"],
-                    )
-                )
-            lines.extend([thin, ""])
-
-        # --- Heterogeneity test ---
-        if self.heterogeneity_effects is not None:
-            lines.extend(
-                [
-                    thin,
-                    "Heterogeneity Test (Section 1.5, partial)".center(width),
-                    thin,
-                    f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} "
-                    f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
-                    thin,
-                ]
-            )
-            for l_h in sorted(self.heterogeneity_effects.keys()):
-                entry = self.heterogeneity_effects[l_h]
-                lines.append(
-                    _format_inference_row(
-                        f"l={l_h}",
-                        entry["beta"],
-                        entry["se"],
-                        entry["t_stat"],
-                        entry["p_value"],
-                    )
-                )
-            lines.extend(
-                [
-                    thin,
-                    "Note: Post-treatment regressions only (no placebo/joint test).",
-                    "",
-                ]
-            )
-
-        # --- Design-2 switch-in / switch-out ---
-        if self.design2_effects is not None:
-            d2 = self.design2_effects
-            si = d2.get("switch_in", {})
-            so = d2.get("switch_out", {})
-            lines.extend(
-                [
-                    thin,
-                    "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width),
-                    thin,
-                    f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}",
-                    f"{'Switch-in effect (mean):':<35} "
-                    f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}"
-                    f"  (N={si.get('n_groups', 0)})",
-                    f"{'Switch-out effect (mean):':<35} "
-                    f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}"
-                    f"  (N={so.get('n_groups', 0)})",
-                    thin,
-                    "",
-                ]
-            )
-
-        # --- HonestDiD sensitivity ---
-        if self.honest_did_results is not None:
-            hd = self.honest_did_results
-            method_label = hd.method.replace("_", " ").title()
-            m_val = hd.M
-            sig_label = "Yes" if hd.is_significant else "No"
-            conf_pct = int((1 - hd.alpha) * 100)
-            lines.extend(
-                [
-                    thin,
-                    "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width),
-                    thin,
-                    f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
-                    f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
-                    f"{'Identified set:':<35} "
-                    f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
-                    f"{'Robust ' + str(conf_pct) + '% CI:':<35} "
-                    f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]",
-                    f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} "
-                    f"{sig_label:>10}",
-                    thin,
-                    "",
-                ]
-            )
+        # --- Phase 3 extension blocks (factored into helpers) ---
+        self._render_covariate_section(lines, width, thin)
+        self._render_linear_trends_section(lines, width, thin, header_row)
+        self._render_heterogeneity_section(lines, width, thin)
+        self._render_design2_section(lines, width, thin)
+        self._render_honest_did_section(lines, width, thin)
 
         # --- TWFE diagnostic ---
         if self.twfe_beta_fe is not None:
@@ -971,6 +857,148 @@ def print_summary(self, alpha: Optional[float] = None) -> None:
         """Print the formatted summary to stdout."""
         print(self.summary(alpha))
 
+    # ------------------------------------------------------------------
+    # Summary section helpers (Phase 3 blocks)
+    # ------------------------------------------------------------------
+
+    def _render_covariate_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.covariate_residuals is None:
+            return
+        cov_df = self.covariate_residuals
+        control_names = sorted(cov_df["covariate"].unique())
+        n_baselines = cov_df["baseline_treatment"].nunique()
+        failed = int(
+            (cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum()
+        )
+        lines.extend(
+            [
+                thin,
+                "Covariate Adjustment (DID^X) Diagnostics".center(width),
+                thin,
+                f"{'Controls:':<35} {', '.join(control_names):>10}",
+                f"{'Baselines residualized:':<35} {n_baselines:>10}",
+                f"{'Failed strata:':<35} {failed:>10}",
+                thin,
+                "",
+            ]
+        )
+
+    def _render_linear_trends_section(
+        self, lines: List[str], width: int, thin: str, header_row: str
+    ) -> None:
+        if self.linear_trends_effects is None:
+            return
+        lines.extend(
+            [
+                thin,
+                "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width),
+                thin,
+                header_row,
+                thin,
+            ]
+        )
+        for l_h in sorted(self.linear_trends_effects.keys()):
+            entry = self.linear_trends_effects[l_h]
+            lines.append(
+                _format_inference_row(
+                    f"Level_{l_h}",
+                    entry["effect"],
+                    entry["se"],
+                    entry["t_stat"],
+                    entry["p_value"],
+                )
+            )
+        lines.extend([thin, ""])
+
+    def _render_heterogeneity_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.heterogeneity_effects is None:
+            return
+        lines.extend(
+            [
+                thin,
+                "Heterogeneity Test (Section 1.5, partial)".center(width),
+                thin,
+                f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} "
+                f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
+                thin,
+            ]
+        )
+        for l_h in sorted(self.heterogeneity_effects.keys()):
+            entry = self.heterogeneity_effects[l_h]
+            lines.append(
+                _format_inference_row(
+                    f"l={l_h}",
+                    entry["beta"],
+                    entry["se"],
+                    entry["t_stat"],
+                    entry["p_value"],
+                )
+            )
+        lines.extend(
+            [
+                thin,
+                "Note: Post-treatment regressions only (no placebo/joint test).",
+                "",
+            ]
+        )
+
+    def _render_design2_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.design2_effects is None:
+            return
+        d2 = self.design2_effects
+        si = d2.get("switch_in", {})
+        so = d2.get("switch_out", {})
+        lines.extend(
+            [
+                thin,
+                "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width),
+                thin,
+                f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}",
+                f"{'Switch-in effect (mean):':<35} "
+                f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}"
+                f"  (N={si.get('n_groups', 0)})",
+                f"{'Switch-out effect (mean):':<35} "
+                f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}"
+                f"  (N={so.get('n_groups', 0)})",
+                thin,
+                "",
+            ]
+        )
+
+    def _render_honest_did_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.honest_did_results is None:
+            return
+        hd = self.honest_did_results
+        method_label = hd.method.replace("_", " ").title()
+        m_val = hd.M
+        sig_label = "Yes" if hd.is_significant else "No"
+        conf_pct = int((1 - hd.alpha) * 100)
+        lines.extend(
+            [
+                thin,
+                "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width),
+                thin,
+                f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
+                f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
+                f"{'Identified set:':<35} "
+                f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
+                f"{'Robust ' + str(conf_pct) + '% CI:':<35} "
+                f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]",
+                f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} "
+                f"{sig_label:>10}",
+                thin,
+                "",
+            ]
+        )
+
     # ------------------------------------------------------------------
     # to_dataframe
     # ------------------------------------------------------------------
diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
index e7de6a3c..3da0aba4 100644
--- a/diff_diff/honest_did.py
+++ b/diff_diff/honest_did.py
@@ -824,6 +824,20 @@ def _extract_event_study_params(
             )
 
             if isinstance(results, ChaisemartinDHaultfoeuilleResults):
+                import warnings
+
+                warnings.warn(
+                    "HonestDiD on dCDH results uses DID^{pl}_l placebo "
+                    "estimates as pre-period coefficients, not standard "
+                    "event-study pre-treatment coefficients. The Rambachan-"
+                    "Roth restrictions bound violations of the parallel "
+                    "trends assumption underlying the dCDH placebo "
+                    "estimand. This is a library extension; interpretation "
+                    "differs from canonical event-study HonestDiD.",
+                    UserWarning,
+                    stacklevel=3,
+                )
+
                 if results.placebo_event_study is None:
                     raise ValueError(
                         "ChaisemartinDHaultfoeuilleResults must have placebo_event_study "
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 9d39725c..1f94064b 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -617,7 +617,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`.
 
-- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
+- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
 
 - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups.
 
diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py
index 7d7c5b84..9bd1f753 100644
--- a/tests/test_honest_did.py
+++ b/tests/test_honest_did.py
@@ -1398,3 +1398,34 @@ def test_dcdh_no_placebos_raises(self):
             )
         with pytest.raises(ValueError, match="placebo_event_study"):
             compute_honest_did(r)
+
+    def test_dcdh_emits_placebo_warning(self):
+        """compute_honest_did on dCDH emits warning about placebo-based pre-periods."""
+        import warnings
+
+        results = self._fit_dcdh()
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            compute_honest_did(results)
+        placebo_warnings = [
+            x for x in w
+            if "placebo" in str(x.message).lower()
+            and "pre-period" in str(x.message).lower()
+        ]
+        assert len(placebo_warnings) >= 1, (
+            "Expected a UserWarning about placebo-based pre-period inputs"
+        )
+
+    def test_dcdh_empty_consecutive_block_raises(self):
+        """ValueError when all placebos have NaN SE (no valid pre-periods)."""
+        import warnings
+
+        # Fit real results, then corrupt placebo SEs to NaN
+        results = self._fit_dcdh()
+        for h in results.placebo_event_study:
+            results.placebo_event_study[h]["se"] = float("nan")
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            with pytest.raises(ValueError, match="No placebo horizons with finite SEs"):
+                compute_honest_did(results)

From ee6ad2f9f037c16aa7c36a74f9ac8118d7aff2a0 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 10:35:41 -0400
Subject: [PATCH 03/12] Fix CI review R1: boundary-horizon validation,
 placebo=False guard, docstrings

P0: _largest_consecutive_block now raises ValueError when boundary horizon
    (-1 or +1) is missing after finite-SE filtering instead of silently
    returning the full list (would produce wrong HonestDiD bounds).
P1: honest_did=True now rejects placebo=False early instead of silently
    returning honest_did_results=None with no warning.
P2: Added 3 regression tests (boundary -1 missing, boundary +1 missing,
    placebo=False + honest_did).
P3: Updated docstrings in honest_did.py (6 locations) and docs/llms.txt
    to include ChaisemartinDHaultfoeuilleResults alongside MultiPeriodDiD/CS.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/chaisemartin_dhaultfoeuille.py  |  6 ++++++
 diff_diff/honest_did.py                   | 22 ++++++++++++-------
 docs/llms.txt                             |  2 +-
 tests/test_chaisemartin_dhaultfoeuille.py |  9 ++++++++
 tests/test_honest_did.py                  | 26 +++++++++++++++++++++++
 5 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
index 38c9d111..3ec12e25 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -957,6 +957,12 @@ def fit(
                 "Set L_max to compute DID^{pl}_l placebos that HonestDiD uses as "
                 "pre-period coefficients."
             )
+        if honest_did and not self.placebo:
+            raise ValueError(
+                "honest_did=True requires placebo computation. The estimator was "
+                "constructed with placebo=False. Use "
+                "ChaisemartinDHaultfoeuille(placebo=True) (the default)."
+            )
 
         # Pivot to (group x time) matrices for vectorized computations
         d_pivot = cell.pivot(index=group, columns=time, values="d_gt").reindex(
diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
index 3da0aba4..d5aa3e0f 100644
--- a/diff_diff/honest_did.py
+++ b/diff_diff/honest_did.py
@@ -559,7 +559,7 @@ def _extract_event_study_params(
 
     Parameters
     ----------
-    results : MultiPeriodDiDResults or CallawaySantAnnaResults
+    results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults
         Estimation results with event study structure.
 
     Returns
@@ -886,8 +886,14 @@ def _largest_consecutive_block(times, boundary_val):
                     if not times:
                         return []
                     if boundary_val not in times:
-                        # No boundary value - take the block closest to it
-                        return times
+                        raise ValueError(
+                            f"HonestDiD requires horizon {boundary_val} in "
+                            f"the dCDH "
+                            f"{'placebo' if boundary_val < 0 else 'event study'}"
+                            f" surface, but it was removed by finite-SE "
+                            f"filtering. Retained horizons: {times}. Ensure "
+                            f"horizon {boundary_val} has a finite SE."
+                        )
                     # Expand outward from boundary_val
                     block = [boundary_val]
                     idx = times.index(boundary_val)
@@ -2197,7 +2203,7 @@ def fit(
 
         Parameters
         ----------
-        results : MultiPeriodDiDResults or CallawaySantAnnaResults
+        results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults
             Results from event study estimation.
         M : float, optional
             Override the M parameter for this fit.
@@ -2515,7 +2521,7 @@ def sensitivity_analysis(
 
         Parameters
         ----------
-        results : MultiPeriodDiDResults or CallawaySantAnnaResults
+        results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults
             Results from event study estimation.
         M_grid : list of float, optional
             Grid of M values to evaluate. If None, uses default grid
@@ -2614,7 +2620,7 @@ def breakdown_value(
 
         Parameters
         ----------
-        results : MultiPeriodDiDResults or CallawaySantAnnaResults
+        results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults
             Results from event study estimation.
         tol : float
             Tolerance for binary search.
@@ -2669,7 +2675,7 @@ def compute_honest_did(
 
     Parameters
     ----------
-    results : MultiPeriodDiDResults or CallawaySantAnnaResults
+    results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults
         Results from event study estimation.
     method : str
         Type of restriction ("smoothness", "relative_magnitude", "combined").
@@ -2705,7 +2711,7 @@ def sensitivity_plot(
 
     Parameters
     ----------
-    results : MultiPeriodDiDResults or CallawaySantAnnaResults
+    results : MultiPeriodDiDResults, CallawaySantAnnaResults, or ChaisemartinDHaultfoeuilleResults
         Results from event study estimation.
     method : str
         Type of restriction.
diff --git a/docs/llms.txt b/docs/llms.txt
index 8f6b5b02..cb59f16a 100644
--- a/docs/llms.txt
+++ b/docs/llms.txt
@@ -20,7 +20,7 @@ diagnostic steps produces unreliable results.
 3. **Test parallel trends** — simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds.
 4. **Choose estimator** — staggered adoption → CS/SA/BJS (NOT plain TWFE); few treated units → SDiD; factor confounding → TROP; simple 2x2 → DiD. Run `BaconDecomposition` to diagnose TWFE bias.
 5. **Estimate** — `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer).
-6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD/CS only), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs.
+6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD, CS, or dCDH), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs.
 7. **Heterogeneity** — CS: `aggregate='group'`/`'event_study'`; SA: `results.event_study_effects`/`to_dataframe(level='cohort')`; subgroup re-estimation.
 8. **Robustness** — compare 2-3 estimators (CS vs SA vs BJS), MUST report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds.
 
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index 29101d20..d968935c 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3275,6 +3275,15 @@ def test_honest_did_requires_lmax(self):
                 honest_did=True,
             )
 
+    def test_honest_did_rejects_placebo_false(self):
+        """honest_did=True with placebo=False raises ValueError."""
+        df = self._make_data()
+        with pytest.raises(ValueError, match="placebo=False"):
+            ChaisemartinDHaultfoeuille(seed=1, placebo=False).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+
     def test_honest_did_standalone(self):
         """compute_honest_did() on dCDH results matches honest_did=True."""
         from diff_diff.honest_did import compute_honest_did
diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py
index 9bd1f753..97213651 100644
--- a/tests/test_honest_did.py
+++ b/tests/test_honest_did.py
@@ -1429,3 +1429,29 @@ def test_dcdh_empty_consecutive_block_raises(self):
             warnings.simplefilter("ignore")
             with pytest.raises(ValueError, match="No placebo horizons with finite SEs"):
                 compute_honest_did(results)
+
+    def test_dcdh_missing_boundary_minus1_raises(self):
+        """ValueError when horizon -1 has NaN SE (boundary required)."""
+        import warnings
+
+        results = self._fit_dcdh()
+        # Corrupt only horizon -1 SE; leave -2 intact
+        results.placebo_event_study[-1]["se"] = float("nan")
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            with pytest.raises(ValueError, match="requires horizon -1"):
+                compute_honest_did(results)
+
+    def test_dcdh_missing_boundary_plus1_raises(self):
+        """ValueError when horizon +1 has NaN SE (boundary required)."""
+        import warnings
+
+        results = self._fit_dcdh()
+        # Corrupt only horizon +1 SE
+        results.event_study_effects[1]["se"] = float("nan")
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            with pytest.raises(ValueError, match="requires horizon 1"):
+                compute_honest_did(results)

From ad7698cd11a0291363dea21a8e266fa502f1e509 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 10:49:02 -0400
Subject: [PATCH 04/12] Fix CI review R2: document l_vec target, label in
 summary, pin in test

- Update fit() docstring to specify equal-weight average over post horizons
  (l_vec=None default) and note R's HonestDiD targets on-impact instead
- Update REGISTRY.md with l_vec deviation from R's default
- Add "Target: Equal-weight avg over post horizons" line to summary
- Add test_honest_did_original_estimate_is_post_average regression

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/chaisemartin_dhaultfoeuille.py         |  9 ++++++---
 diff_diff/chaisemartin_dhaultfoeuille_results.py |  1 +
 docs/methodology/REGISTRY.md                     |  2 +-
 tests/test_chaisemartin_dhaultfoeuille.py        | 16 ++++++++++++++++
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
index 3ec12e25..2acee121 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -540,9 +540,12 @@ def fit(
         honest_did : bool, default=False
             Run HonestDiD sensitivity analysis (Rambachan & Roth 2023) on
             the placebo + event study surface. Requires ``L_max >= 1``.
-            Default: relative magnitudes (DeltaRM, Mbar=1.0). Results
-            stored on ``results.honest_did_results``; ``None`` with a
-            warning if the solver fails. For custom parameters, call
+            Default: relative magnitudes (DeltaRM, Mbar=1.0), targeting
+            the equal-weight average over all post-treatment horizons
+            (``l_vec=None``). Results stored on
+            ``results.honest_did_results``; ``None`` with a warning if
+            the solver fails. For custom parameters (e.g., targeting
+            the on-impact effect only via ``l_vec``), call
             ``compute_honest_did(results, ...)`` post-hoc instead.
         heterogeneity : str, optional
             Column name for a time-invariant covariate to test for
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
index 2c96136b..1d07813c 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille_results.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -987,6 +987,7 @@ def _render_honest_did_section(
                 "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width),
                 thin,
                 f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
+                f"{'Target:':<35} {'Equal-weight avg over post horizons'}",
                 f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
                 f"{'Identified set:':<35} "
                 f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 1f94064b..7601a8e9 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -617,7 +617,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`.
 
-- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
+- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`, targeting the equal-weight average over all post-treatment horizons (`l_vec=None`). R's HonestDiD defaults to the first post/on-impact effect; use `compute_honest_did(results, ...)` with a custom `l_vec` to match that behavior. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
 
 - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups.
 
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index d968935c..7381f762 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3369,6 +3369,22 @@ def test_honest_did_smoothness(self):
         # Different methods should generally give different bounds
         assert rm_bounds.ci_lb != sd_bounds.ci_lb or rm_bounds.ci_ub != sd_bounds.ci_ub
 
+    def test_honest_did_original_estimate_is_post_average(self):
+        """original_estimate targets equal-weight average over post horizons."""
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+        hd = r.honest_did_results
+        assert hd is not None
+        # Equal-weight average = mean of event_study_effects[1..L_max]
+        es = r.event_study_effects
+        avg = np.mean([es[h]["effect"] for h in sorted(es.keys())])
+        np.testing.assert_allclose(hd.original_estimate, avg, rtol=1e-10)
+
 
 # =============================================================================
 # Summary Phase 3 Rendering

From a351e2e2f68a1bb60c8a3584107dc56fb56491dd Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 12:09:32 -0400
Subject: [PATCH 05/12] Fix CI review R3: add l_vec to compute_honest_did,
 end-to-end tests

- Add l_vec parameter to compute_honest_did() so the advertised
  custom-target path actually works (was missing from wrapper)
- Add test_honest_did_custom_l_vec_on_impact: l_vec=[1,0] targets
  on-impact effect, asserts original_estimate matches DID_1
- Add test_honest_did_with_trends_nonparam: end-to-end
  trends_nonparam + honest_did=True integration

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/honest_did.py                   |  9 ++++-
 tests/test_chaisemartin_dhaultfoeuille.py | 43 +++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
index d5aa3e0f..6fa1f376 100644
--- a/diff_diff/honest_did.py
+++ b/diff_diff/honest_did.py
@@ -2669,6 +2669,7 @@ def compute_honest_did(
     method: str = "relative_magnitude",
     M: float = 1.0,
     alpha: float = 0.05,
+    l_vec: Optional[np.ndarray] = None,
 ) -> HonestDiDResults:
     """
     Convenience function for computing Honest DiD bounds.
@@ -2683,6 +2684,12 @@ def compute_honest_did(
         Restriction parameter.
     alpha : float
         Significance level.
+    l_vec : np.ndarray, optional
+        Weight vector defining the scalar target ``theta = l_vec' tau``
+        over post-treatment horizons. Length must equal the number of
+        post-treatment periods. ``None`` (default) uses equal weights
+        (uniform average). To target the on-impact effect only (R's
+        default), pass ``np.array([1, 0, ..., 0])``.
 
     Returns
     -------
@@ -2694,7 +2701,7 @@ def compute_honest_did(
     >>> bounds = compute_honest_did(event_study_results, method='relative_magnitude', M=1.0)
     >>> print(f"Robust CI: [{bounds.ci_lb:.3f}, {bounds.ci_ub:.3f}]")
     """
-    honest = HonestDiD(method=method, M=M, alpha=alpha)
+    honest = HonestDiD(method=method, M=M, alpha=alpha, l_vec=l_vec)
     return honest.fit(results)
 
 
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index 7381f762..c7f04643 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3385,6 +3385,49 @@ def test_honest_did_original_estimate_is_post_average(self):
         avg = np.mean([es[h]["effect"] for h in sorted(es.keys())])
         np.testing.assert_allclose(hd.original_estimate, avg, rtol=1e-10)
 
+    def test_honest_did_custom_l_vec_on_impact(self):
+        """compute_honest_did with l_vec=[1,0] targets on-impact effect."""
+        from diff_diff.honest_did import compute_honest_did
+
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2,
+            )
+        # l_vec=[1, 0] targets only DID_1 (on-impact, R's default)
+        bounds = compute_honest_did(r, l_vec=np.array([1.0, 0.0]))
+        np.testing.assert_allclose(
+            bounds.original_estimate,
+            r.event_study_effects[1]["effect"],
+            rtol=1e-10,
+        )
+
+    def test_honest_did_with_trends_nonparam(self):
+        """End-to-end trends_nonparam + honest_did=True."""
+        rng = np.random.RandomState(42)
+        rows = []
+        for g in range(40):
+            state = g % 4
+            switches = g < 20
+            for t in range(7):
+                d = 1 if (switches and t >= 3) else 0
+                y = 10 + 2.0 * t + 5.0 * d + rng.normal(0, 0.5)
+                rows.append({
+                    "group": g, "period": t, "treatment": d,
+                    "outcome": y, "state": state,
+                })
+        df = pd.DataFrame(rows)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, trends_nonparam="state", honest_did=True,
+            )
+        assert r.honest_did_results is not None
+        assert np.isfinite(r.honest_did_results.ci_lb)
+
 
 # =============================================================================
 # Summary Phase 3 Rendering

From 419d872c2da379516949f91ef22837dd072daccb Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 12:45:02 -0400
Subject: [PATCH 06/12] Fix CI review R4: persist target_label on
 HonestDiDResults, render dynamically

- Add target_label field to HonestDiDResults (default: equal-weight avg)
- HonestDiD.fit() detects common l_vec patterns and sets human-readable label
  (on-impact, equal-weight, or custom with vector)
- Summary renders hd.target_label instead of hard-coded string
- Add test_honest_did_custom_l_vec_summary_label: attaches custom-target
  results and asserts summary shows "on-impact" not "Equal-weight"

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../chaisemartin_dhaultfoeuille_results.py    |  2 +-
 diff_diff/honest_did.py                       | 14 +++++++++++++-
 tests/test_chaisemartin_dhaultfoeuille.py     | 19 +++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
index 1d07813c..cadc1008 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille_results.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -987,7 +987,7 @@ def _render_honest_did_section(
                 "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width),
                 thin,
                 f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
-                f"{'Target:':<35} {'Equal-weight avg over post horizons'}",
+                f"{'Target:':<35} {hd.target_label}",
                 f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
                 f"{'Identified set:':<35} "
                 f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
index 6fa1f376..f46ad58c 100644
--- a/diff_diff/honest_did.py
+++ b/diff_diff/honest_did.py
@@ -191,6 +191,7 @@ class HonestDiDResults:
     original_se: float
     alpha: float = 0.05
     ci_method: str = "FLCI"
+    target_label: str = "Equal-weight avg over post horizons"
     original_results: Optional[Any] = field(default=None, repr=False)
     # Event study bounds (optional)
     event_study_bounds: Optional[Dict[Any, Dict[str, float]]] = field(default=None, repr=False)
@@ -2252,13 +2253,23 @@ def fit(
                 "coefficient to compute bounds."
             )
 
-        # Set up weighting vector
+        # Set up weighting vector and target label
         if self.l_vec is None:
             l_vec = np.ones(num_post) / num_post  # Uniform weights
+            target_label = "Equal-weight avg over post horizons"
         else:
             l_vec = np.asarray(self.l_vec)
             if len(l_vec) != num_post:
                 raise ValueError(f"l_vec must have length {num_post}, got {len(l_vec)}")
+            # Detect common patterns for a human-readable label
+            basis = np.zeros(num_post)
+            basis[0] = 1.0
+            if np.allclose(l_vec, basis):
+                target_label = "First post-treatment effect (on-impact)"
+            elif np.allclose(l_vec, np.ones(num_post) / num_post):
+                target_label = "Equal-weight avg over post horizons"
+            else:
+                target_label = f"Custom l_vec ({l_vec.tolist()})"
 
         # Compute original estimate and SE
         original_estimate = np.dot(l_vec, beta_post)
@@ -2318,6 +2329,7 @@ def fit(
             original_se=original_se,
             alpha=self.alpha,
             ci_method=ci_method,
+            target_label=target_label,
             original_results=results,
             survey_metadata=survey_metadata,
             df_survey=df_survey,
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index c7f04643..4b2fa168 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3404,6 +3404,25 @@ def test_honest_did_custom_l_vec_on_impact(self):
             rtol=1e-10,
         )
 
+    def test_honest_did_custom_l_vec_summary_label(self):
+        """summary() renders custom target label when l_vec is overridden."""
+        from diff_diff.honest_did import compute_honest_did
+
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2,
+            )
+        # Attach custom-target HonestDiD to results
+        r.honest_did_results = compute_honest_did(
+            r, l_vec=np.array([1.0, 0.0])
+        )
+        text = r.summary()
+        assert "on-impact" in text.lower()
+        assert "Equal-weight" not in text
+
     def test_honest_did_with_trends_nonparam(self):
         """End-to-end trends_nonparam + honest_did=True."""
         rng = np.random.RandomState(42)

From 33e9c678dc2754341843a75a9df7865e8c534922 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 13:36:06 -0400
Subject: [PATCH 07/12] Fix CI review R5: persist pre/post_periods_used on
 HonestDiDResults

- Add pre_periods_used and post_periods_used fields to HonestDiDResults
  so the retained horizon set is always available on the results object
- HonestDiD.fit() populates both fields from the extracted period lists
- Summary renders retained horizons below the target label
- Add test_honest_did_retains_period_metadata asserting fields populated
  and summary shows "Post horizons used:"

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../chaisemartin_dhaultfoeuille_results.py     | 12 ++++++++++++
 diff_diff/honest_did.py                        |  4 ++++
 tests/test_chaisemartin_dhaultfoeuille.py      | 18 ++++++++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
index cadc1008..153cc7fc 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille_results.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -988,6 +988,18 @@ def _render_honest_did_section(
                 thin,
                 f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
                 f"{'Target:':<35} {hd.target_label}",
+            ]
+        )
+        if hd.post_periods_used is not None:
+            lines.append(
+                f"{'Post horizons used:':<35} {hd.post_periods_used}"
+            )
+        if hd.pre_periods_used is not None:
+            lines.append(
+                f"{'Pre horizons used:':<35} {hd.pre_periods_used}"
+            )
+        lines.extend(
+            [
                 f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
                 f"{'Identified set:':<35} "
                 f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
index f46ad58c..87d944d0 100644
--- a/diff_diff/honest_did.py
+++ b/diff_diff/honest_did.py
@@ -192,6 +192,8 @@ class HonestDiDResults:
     alpha: float = 0.05
     ci_method: str = "FLCI"
     target_label: str = "Equal-weight avg over post horizons"
+    pre_periods_used: Optional[List[Any]] = field(default=None, repr=False)
+    post_periods_used: Optional[List[Any]] = field(default=None, repr=False)
     original_results: Optional[Any] = field(default=None, repr=False)
     # Event study bounds (optional)
     event_study_bounds: Optional[Dict[Any, Dict[str, float]]] = field(default=None, repr=False)
@@ -2330,6 +2332,8 @@ def fit(
             alpha=self.alpha,
             ci_method=ci_method,
             target_label=target_label,
+            pre_periods_used=list(pre_periods),
+            post_periods_used=list(post_periods),
             original_results=results,
             survey_metadata=survey_metadata,
             df_survey=df_survey,
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index 4b2fa168..143c462e 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3404,6 +3404,24 @@ def test_honest_did_custom_l_vec_on_impact(self):
             rtol=1e-10,
         )
 
+    def test_honest_did_retains_period_metadata(self):
+        """HonestDiDResults stores pre_periods_used and post_periods_used."""
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+        hd = r.honest_did_results
+        assert hd.pre_periods_used is not None
+        assert hd.post_periods_used is not None
+        assert all(p < 0 for p in hd.pre_periods_used)
+        assert all(p > 0 for p in hd.post_periods_used)
+        # Summary renders the retained horizons
+        text = r.summary()
+        assert "Post horizons used:" in text
+
     def test_honest_did_custom_l_vec_summary_label(self):
         """summary() renders custom target label when l_vec is overridden."""
         from diff_diff.honest_did import compute_honest_did

From dd06b3bbfd2d68bfc356caba1cc704a662cbcec0 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 14:04:04 -0400
Subject: [PATCH 08/12] Fix CI review R6: surface target metadata in standalone
 HonestDiD summary/export

- HonestDiDResults.summary() now renders target_label, pre/post_periods_used
- HonestDiDResults.to_dict() includes target_label, pre/post_periods_used
- to_dataframe() inherits from to_dict() automatically
- Add test_dcdh_standalone_surfaces_target_metadata verifying all three
  surfaces include target metadata for custom l_vec

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/honest_did.py  | 11 +++++++++++
 tests/test_honest_did.py | 16 ++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
index 87d944d0..9e535fa9 100644
--- a/diff_diff/honest_did.py
+++ b/diff_diff/honest_did.py
@@ -276,6 +276,7 @@ def summary(self) -> str:
             "=" * 70,
             "",
             f"{'Method:':<30} {method_display}",
+            f"{'Target:':<30} {self.target_label}",
             f"{'Restriction parameter (M):':<30} {self.M:.4f}",
             f"{'CI method:':<30} {self.ci_method}",
             "",
@@ -296,6 +297,13 @@ def summary(self) -> str:
         ]
 
         # Interpretation
+        if self.pre_periods_used is not None:
+            lines.append(f"{'Pre horizons used:':<30} {self.pre_periods_used}")
+        if self.post_periods_used is not None:
+            lines.append(f"{'Post horizons used:':<30} {self.post_periods_used}")
+        if self.pre_periods_used is not None or self.post_periods_used is not None:
+            lines.append("")
+
         lines.extend(
             [
                 "-" * 70,
@@ -343,6 +351,9 @@ def to_dict(self) -> Dict[str, Any]:
             "ci_ub": self.ci_ub,
             "M": self.M,
             "method": self.method,
+            "target_label": self.target_label,
+            "pre_periods_used": self.pre_periods_used,
+            "post_periods_used": self.post_periods_used,
             "original_estimate": self.original_estimate,
             "original_se": self.original_se,
             "alpha": self.alpha,
diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py
index 97213651..d0e99dd4 100644
--- a/tests/test_honest_did.py
+++ b/tests/test_honest_did.py
@@ -1430,6 +1430,22 @@ def test_dcdh_empty_consecutive_block_raises(self):
             with pytest.raises(ValueError, match="No placebo horizons with finite SEs"):
                 compute_honest_did(results)
 
+    def test_dcdh_standalone_surfaces_target_metadata(self):
+        """Standalone HonestDiDResults summary/to_dict include target metadata."""
+        results = self._fit_dcdh()
+        bounds = compute_honest_did(results, l_vec=np.array([1.0, 0.0]))
+        # summary() includes target and period metadata
+        text = bounds.summary()
+        assert "on-impact" in text.lower()
+        assert "Post horizons used:" in text
+        assert "Pre horizons used:" in text
+        # to_dict() includes the fields
+        d = bounds.to_dict()
+        assert "target_label" in d
+        assert "pre_periods_used" in d
+        assert "post_periods_used" in d
+        assert d["post_periods_used"] == [1, 2]
+
     def test_dcdh_missing_boundary_minus1_raises(self):
         """ValueError when horizon -1 has NaN SE (boundary required)."""
         import warnings

From 4943910e66b1d878d169e74b63d8ebc8da9fd753 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 14:22:55 -0400
Subject: [PATCH 09/12] Add end-to-end trends_nonparam support-trimming +
 HonestDiD test

test_honest_did_trends_nonparam_trimming: State B's early-switching
controls vanish at far horizons, causing N_l=0 at h=3 and h=-3.
HonestDiD extraction drops NaN-SE horizons and retains [-2,-1,1,2].
Asserts n_obs=0 at trimmed horizons, finite bounds on retained block,
and post_periods_used excludes h=3.

This exercises the real trends_nonparam support-trimming path through
HonestDiD (not SE mutation), addressing the recurring P2 finding.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_chaisemartin_dhaultfoeuille.py | 55 ++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index 143c462e..ec966b33 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3442,7 +3442,7 @@ def test_honest_did_custom_l_vec_summary_label(self):
         assert "Equal-weight" not in text
 
     def test_honest_did_with_trends_nonparam(self):
-        """End-to-end trends_nonparam + honest_did=True."""
+        """End-to-end trends_nonparam + honest_did=True (balanced support)."""
         rng = np.random.RandomState(42)
         rows = []
         for g in range(40):
@@ -3465,6 +3465,59 @@ def test_honest_did_with_trends_nonparam(self):
         assert r.honest_did_results is not None
         assert np.isfinite(r.honest_did_results.ci_lb)
 
+    def test_honest_did_trends_nonparam_trimming(self):
+        """End-to-end: trends_nonparam causes NaN at far horizons, HonestDiD trims.
+
+        State A: switches late (t=5), has never-switching controls.
+        State B: switches early (t=2), "controls" switch at t=3 so
+        control pool vanishes at h>=2. At L_max=3, h=3 and h=-3 have
+        N_l=0 (NaN SE) because State A can't reach h=3 and State B
+        has no controls there. HonestDiD extraction drops the NaN
+        horizons and retains [-2, -1, 1, 2].
+        """
+        rng = np.random.RandomState(42)
+        rows = []
+        n_periods = 7
+        # State A: 3 switch at t=5, 4 controls
+        for g in range(7):
+            switches = g < 3
+            for t in range(n_periods):
+                d = 1 if (switches and t >= 5) else 0
+                y = 10 + 2.0*t + 5.0*d + rng.normal(0, 0.3)
+                rows.append({
+                    "group": g, "period": t, "treatment": d,
+                    "outcome": y, "state": "A",
+                })
+        # State B: 4 switch at t=2, 2 "controls" switch at t=3
+        for g in range(7, 13):
+            switch_t = 2 if g < 11 else 3
+            for t in range(n_periods):
+                d = 1 if t >= switch_t else 0
+                y = 10 + 2.0*t + 5.0*d + rng.normal(0, 0.3)
+                rows.append({
+                    "group": g, "period": t, "treatment": d,
+                    "outcome": y, "state": "B",
+                })
+        df = pd.DataFrame(rows)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=3, trends_nonparam="state", honest_did=True,
+            )
+        # h=3 and h=-3 should be NaN (N_l=0 from support trimming)
+        assert r.event_study_effects[3]["n_obs"] == 0
+        assert r.placebo_event_study[-3]["n_obs"] == 0
+        # HonestDiD should still compute on the retained block
+        hd = r.honest_did_results
+        assert hd is not None
+        assert np.isfinite(hd.ci_lb)
+        # Retained horizons should exclude the NaN endpoints
+        assert -3 not in hd.pre_periods_used
+        assert 3 not in hd.post_periods_used
+        assert hd.post_periods_used == [1, 2]
+
 
 # =============================================================================
 # Summary Phase 3 Rendering

From 46e6dc50428135cca7dda8f9199b726106bc4b5b Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 14:56:26 -0400
Subject: [PATCH 10/12] Fix CI review R8 P0: propagate estimator alpha to
 HonestDiD

compute_honest_did() call in fit() now passes alpha=self.alpha
instead of inheriting the default 0.05. Added regression test
asserting honest_did_results.alpha matches estimator alpha.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/chaisemartin_dhaultfoeuille.py  |  3 ++-
 tests/test_chaisemartin_dhaultfoeuille.py | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
index 2acee121..75d75d37 100644
--- a/diff_diff/chaisemartin_dhaultfoeuille.py
+++ b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -2423,7 +2423,8 @@ def fit(
                 from diff_diff.honest_did import compute_honest_did
 
                 results.honest_did_results = compute_honest_did(
-                    results, method="relative_magnitude", M=1.0
+                    results, method="relative_magnitude", M=1.0,
+                    alpha=self.alpha,
                 )
             except (ValueError, np.linalg.LinAlgError) as exc:
                 warnings.warn(
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index ec966b33..88cca1a6 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3404,6 +3404,18 @@ def test_honest_did_custom_l_vec_on_impact(self):
             rtol=1e-10,
         )
 
+    def test_honest_did_respects_alpha(self):
+        """honest_did=True propagates estimator alpha to HonestDiD."""
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1, alpha=0.10).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+        assert r.honest_did_results is not None
+        assert r.honest_did_results.alpha == 0.10
+
     def test_honest_did_retains_period_metadata(self):
         """HonestDiDResults stores pre_periods_used and post_periods_used."""
         df = self._make_data()

From 820c9dd0a1302590153bf091434956511ffec6b5 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 15:11:03 -0400
Subject: [PATCH 11/12] Fix CI review R9: bootstrap interaction test, assert
 trimming warning

P1: Add test_honest_did_with_bootstrap - fits with n_bootstrap=49 and
    honest_did=True, asserts finite bounds and retained horizons.
P2: test_honest_did_trends_nonparam_trimming now captures warnings and
    asserts the placebo-based pre-period warning is emitted (was
    suppressing all warnings before).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_chaisemartin_dhaultfoeuille.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
index 88cca1a6..94022e78 100644
--- a/tests/test_chaisemartin_dhaultfoeuille.py
+++ b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -3512,8 +3512,8 @@ def test_honest_did_trends_nonparam_trimming(self):
                 })
         df = pd.DataFrame(rows)
 
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
             r = ChaisemartinDHaultfoeuille(seed=1).fit(
                 df, "outcome", "group", "period", "treatment",
                 L_max=3, trends_nonparam="state", honest_did=True,
@@ -3529,6 +3529,25 @@ def test_honest_did_trends_nonparam_trimming(self):
         assert -3 not in hd.pre_periods_used
         assert 3 not in hd.post_periods_used
         assert hd.post_periods_used == [1, 2]
+        # The placebo-based pre-period warning should have been emitted
+        placebo_warns = [
+            x for x in w if "placebo" in str(x.message).lower()
+            and "pre-period" in str(x.message).lower()
+        ]
+        assert len(placebo_warns) >= 1
+
+    def test_honest_did_with_bootstrap(self):
+        """honest_did=True works with bootstrap-fitted results."""
+        df = self._make_data()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = ChaisemartinDHaultfoeuille(seed=1, n_bootstrap=49).fit(
+                df, "outcome", "group", "period", "treatment",
+                L_max=2, honest_did=True,
+            )
+        assert r.honest_did_results is not None
+        assert np.isfinite(r.honest_did_results.ci_lb)
+        assert r.honest_did_results.post_periods_used == [1, 2]
 
 
 # =============================================================================

From b88a97e7d39bb75a3cc8ed00ee24ea96aa48e970 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Tue, 14 Apr 2026 15:52:57 -0400
Subject: [PATCH 12/12] Fix CI review R10 P2: assert non-consecutive-horizon
 trimming warning

Add test_dcdh_interior_gap_triggers_trimming_warning: corrupts h=-2 SE
to create an interior gap [-3, -1], asserts the "Dropping non-consecutive
horizons" warning is emitted and pre_periods_used == [-1].

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_honest_did.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py
index d0e99dd4..5ca20efd 100644
--- a/tests/test_honest_did.py
+++ b/tests/test_honest_did.py
@@ -1446,6 +1446,29 @@ def test_dcdh_standalone_surfaces_target_metadata(self):
         assert "post_periods_used" in d
         assert d["post_periods_used"] == [1, 2]
 
+    def test_dcdh_interior_gap_triggers_trimming_warning(self):
+        """Non-consecutive horizons after SE filtering emit trimming warning."""
+        import warnings
+
+        # L_max=3 gives horizons [-3,-2,-1,1,2,3]. Corrupt h=-2 to create
+        # interior gap [-3, -1], which triggers consecutive-block trimming
+        # that drops -3 and keeps only [-1].
+        results = self._fit_dcdh(n_periods=8, L_max=3)
+        results.placebo_event_study[-2]["se"] = float("nan")
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            bounds = compute_honest_did(results)
+        trim_warns = [
+            x for x in w
+            if "dropping non-consecutive" in str(x.message).lower()
+        ]
+        assert len(trim_warns) >= 1, (
+            "Expected a warning about dropping non-consecutive horizons"
+        )
+        # Retained pre should be [-1] only (h=-3 dropped due to gap at -2)
+        assert bounds.pre_periods_used == [-1]
+
     def test_dcdh_missing_boundary_minus1_raises(self):
         """ValueError when horizon -1 has NaN SE (boundary required)."""
         import warnings