From 4cb0c1104cb5f41c46b3b5f5dbef07be8c6d7837 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 12:35:33 -0400 Subject: [PATCH 01/17] Add Phase 3 PR B: covariates, trends, and extensions for dCDH Implements ROADMAP items 3a-3e and 3i: - DID^X covariate residualization (Web Appendix Section 1.2) - DID^{fd} group-specific linear trends (Section 1.3, Lemma 6) - State-set-specific trends (Section 1.4) - Heterogeneity testing (Section 1.5, Lemma 7) - Design-2 switch-in/switch-out convenience wrapper (Section 1.6) - R parity tests for controls and trends_lin scenarios Co-Authored-By: Claude Opus 4.6 (1M context) --- ROADMAP.md | 12 +- benchmarks/R/generate_dcdh_dynr_test_values.R | 94 +++ benchmarks/data/dcdh_dynr_golden_values.json | 135 ++++ diff_diff/chaisemartin_dhaultfoeuille.py | 739 +++++++++++++++++- .../chaisemartin_dhaultfoeuille_results.py | 77 +- docs/methodology/REGISTRY.md | 19 +- tests/test_chaisemartin_dhaultfoeuille.py | 469 ++++++++++- ...test_chaisemartin_dhaultfoeuille_parity.py | 122 +++ 8 files changed, 1604 insertions(+), 63 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 326e2e46..66b7aada 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -167,12 +167,12 @@ The dynamic companion paper subsumes the AER 2020 paper: `DID_1 = DID_M`. The si | Item | Priority | Status | |------|----------|--------| -| **3a.** Residualization-style covariate adjustment `DID^X` (Web Appendix Section 1.2 of dynamic paper). **Note:** NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. | HIGH | Not started | -| **3b.** Group-specific linear trends `DID^{fd}` (Web Appendix Section 1.3, Lemma 6) — second-difference estimator with cumulation for level effects | MEDIUM | Not started | -| **3c.** State-set-specific trends (`trends_nonparam` option, Web Appendix Section 1.4) | MEDIUM | Not started | -| **3d.** Heterogeneity testing `beta^{het}_l` (Web Appendix Section 1.5) | LOW | Not started | -| **3e.** Design-2 switch-in / switch-out separation (Web Appendix Section 1.6) | LOW | Not started | -| **3f.** Non-binary treatment support (the formula already handles it; this row is documentation + tests) | MEDIUM | Not started | +| **3a.** Residualization-style covariate adjustment `DID^X` (Web Appendix Section 1.2 of dynamic paper). **Note:** NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. | HIGH | Shipped (PR B) | +| **3b.** Group-specific linear trends `DID^{fd}` (Web Appendix Section 1.3, Lemma 6) — second-difference estimator with cumulation for level effects | MEDIUM | Shipped (PR B) | +| **3c.** State-set-specific trends (`trends_nonparam` option, Web Appendix Section 1.4) | MEDIUM | Shipped (PR B) | +| **3d.** Heterogeneity testing `beta^{het}_l` (Web Appendix Section 1.5) | LOW | Shipped (PR B) | +| **3e.** Design-2 switch-in / switch-out separation (Web Appendix Section 1.6) | LOW | Shipped (PR B; convenience wrapper) | +| **3f.** Non-binary treatment support (the formula already handles it; this row is documentation + tests) | MEDIUM | Shipped (PR #300; also ships placebo SE, L_max=1 per-group path, parity SE assertions) | | **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Not started | | **3h.** **Single comprehensive tutorial notebook** covering all three phases — Favara-Imbs (2015) banking deregulation replication as the headline application, with comparison plots vs LP / TWFE | HIGH | Not started | | **3i.** Parity tests vs `did_multiplegt_dyn` for covariate and extension specifications | HIGH | Not started | diff --git a/benchmarks/R/generate_dcdh_dynr_test_values.R b/benchmarks/R/generate_dcdh_dynr_test_values.R index eeb68fee..9a0c0e3e 100644 --- a/benchmarks/R/generate_dcdh_dynr_test_values.R +++ b/benchmarks/R/generate_dcdh_dynr_test_values.R @@ -388,6 +388,100 @@ scenarios$joiners_only_long_multi_horizon <- list( results = extract_dcdh_multi(res9, n_effects = 5, n_placebos = 5) ) +# --------------------------------------------------------------------------- +# Phase 3: Covariate and linear-trends scenarios +# --------------------------------------------------------------------------- + +# Helper: add a covariate column to a panel. The covariate is correlated with +# switch timing (confounding) but the true effect is constant. +add_covariate <- function(df, seed = 42, x_effect = 1.5) { + set.seed(seed) + n <- nrow(df) + groups <- unique(df$group) + # Group-level base value (correlated with which groups switch) + x_base <- setNames(rnorm(length(groups), 0, 1), groups) + # Time-varying component + df$X1 <- x_base[as.character(df$group)] + 0.3 * df$period + rnorm(n, 0, 0.2) + # Add covariate effect to outcome + df$outcome <- df$outcome + x_effect * df$X1 + df +} + +# Scenario 10: joiners_only with controls (L_max=2) +cat(" Scenario 10: joiners_only_controls\n") +d10 <- gen_reversible(n_groups = N_GOLDEN, n_periods = 8, + pattern = "joiners_only", seed = 110) +d10 <- add_covariate(d10, seed = 210, x_effect = 1.5) +res10 <- did_multiplegt_dyn( + df = d10, outcome = "outcome", group = "group", time = "period", + treatment = "treatment", effects = 2, placebo = 1, ci_level = 95, + controls = "X1" +) +scenarios$joiners_only_controls <- list( + data = list( + group = as.numeric(d10$group), + period = as.numeric(d10$period), + treatment = as.numeric(d10$treatment), + outcome = as.numeric(d10$outcome), + X1 = as.numeric(d10$X1) + ), + params = list(pattern = "joiners_only", n_groups = N_GOLDEN, n_periods = 8, + seed = 110, effects = 2, placebo = 1, ci_level = 95, + controls = "X1"), + results = extract_dcdh_multi(res10, n_effects = 2, n_placebos = 1) +) + +# Scenario 11: joiners_only with trends_lin (L_max=2) +cat(" Scenario 11: joiners_only_trends_lin\n") +d11 <- gen_reversible(n_groups = N_GOLDEN, n_periods = 8, + pattern = "joiners_only", seed = 111) +# Add group-specific linear trends to outcome +set.seed(311) +groups11 <- unique(d11$group) +g_trends <- setNames(rnorm(length(groups11), 0, 0.5), groups11) +d11$outcome <- d11$outcome + g_trends[as.character(d11$group)] * d11$period +res11 <- did_multiplegt_dyn( + df = d11, outcome = "outcome", group = "group", time = "period", + treatment = "treatment", effects = 2, placebo = 1, ci_level = 95, + trends_lin = TRUE +) +scenarios$joiners_only_trends_lin <- list( + data = export_data(d11), + params = list(pattern = "joiners_only", n_groups = N_GOLDEN, n_periods = 8, + seed = 111, effects = 2, placebo = 1, ci_level = 95, + trends_lin = TRUE), + results = extract_dcdh_multi(res11, n_effects = 2, n_placebos = 1) +) + +# Scenario 12: joiners_only with both controls and trends_lin (L_max=2) +cat(" Scenario 12: joiners_only_controls_trends_lin\n") +d12 <- gen_reversible(n_groups = N_GOLDEN, n_periods = 8, + pattern = "joiners_only", seed = 112) +d12 <- add_covariate(d12, seed = 212, x_effect = 1.5) +# Add group-specific linear trends +set.seed(312) +groups12 <- unique(d12$group) +g_trends12 <- setNames(rnorm(length(groups12), 0, 0.5), groups12) +d12$outcome <- d12$outcome + g_trends12[as.character(d12$group)] * d12$period +res12 <- did_multiplegt_dyn( + df = d12, outcome = "outcome", group = "group", time = "period", + treatment = "treatment", effects = 2, placebo = 1, ci_level = 95, + controls = "X1", trends_lin = TRUE +) +scenarios$joiners_only_controls_trends_lin <- list( + data = list( + group = as.numeric(d12$group), + period = as.numeric(d12$period), + treatment = as.numeric(d12$treatment), + outcome = as.numeric(d12$outcome), + X1 = as.numeric(d12$X1) + ), + params = list(pattern = "joiners_only", n_groups = N_GOLDEN, n_periods = 8, + seed = 112, effects = 2, placebo = 1, ci_level = 95, + controls = "X1", trends_lin = TRUE), + results = extract_dcdh_multi(res12, n_effects = 2, n_placebos = 1) +) + # --------------------------------------------------------------------------- # Write output # --------------------------------------------------------------------------- diff --git a/benchmarks/data/dcdh_dynr_golden_values.json b/benchmarks/data/dcdh_dynr_golden_values.json index f640761c..e5055728 100644 --- a/benchmarks/data/dcdh_dynr_golden_values.json +++ b/benchmarks/data/dcdh_dynr_golden_values.json @@ -429,6 +429,141 @@ } } } + }, + "joiners_only_controls": { + "data": { + "group": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86, 86, 86, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, 95, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, 105, 105, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, 107, 107, 108, 108, 108, 108, 108, 108, 108, 108, 109, 109, 109, 109, 109, 109, 109, 109, 110, 110, 110, 110, 110, 110, 110, 110, 111, 111, 111, 111, 111, 111, 111, 111, 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, 115, 115, 115, 115, 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 119, 119, 119, 119, 119, 119, 119, 119], + "period": [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7], + "treatment": [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + "outcome": [6.8842389362, 8.1310934422, 8.4054839405, 9.0173911152, 10.4008891075, 12.7028533425, 11.0391691157, 13.9566891664, 12.0852737495, 12.9420124908, 13.6558371829, 13.3974293134, 14.7043958506, 14.7481434034, 17.1463326554, 18.1467078212, 7.5620860005, 9.1898045026, 9.5579968932, 12.1658706759, 12.8227248059, 12.5745478126, 13.5050872027, 13.2423092376, 7.8195823971, 9.8794888103, 10.0632801148, 10.8276343913, 11.0898146653, 12.093730201, 13.3715440928, 13.8751887986, 7.8349849078, 8.3753981338, 7.5070817891, 8.2092311168, 9.4613395331, 9.5390316199, 9.7501762158, 14.0073007917, 5.8027339235, 5.9101345886, 6.3621627918, 9.9137982368, 10.7013476798, 10.9535376924, 10.7693698175, 12.6434965087, 15.9450248498, 17.7014805635, 17.8987951215, 20.0443782304, 19.1690269873, 20.4500785385, 20.4790805531, 21.2540116337, 7.4478951611, 8.8345228357, 8.5243848161, 11.1892919524, 12.4629966183, 12.0234425059, 13.524619326, 13.3736986596, 6.6591918196, 6.802989882, 7.8009394461, 8.4274021533, 12.2849704887, 12.2938434735, 12.9765959292, 14.7226813598, 12.4058223669, 13.3988284976, 13.806318277, 16.4303141859, 17.2290572897, 18.2925692608, 17.7405495561, 19.0318702748, 14.6040640414, 14.9615048837, 13.4069268317, 17.0707478307, 17.7789186167, 19.4445877285, 18.7427722822, 19.7230308299, 7.8946374917, 7.9108113735, 9.2485539939, 9.0865149719, 10.3246878256, 10.7825164262, 10.778330538, 14.0440664025, 15.4900748664, 17.1403557757, 19.1172068788, 19.656289296, 20.5727488583, 20.7144870963, 21.2909618365, 20.8014447135, 7.7629494684, 7.5537378622, 10.8797806038, 11.8137073683, 12.1286438203, 12.6871645385, 12.8712374204, 13.8645980285, 6.3937706788, 9.8320791868, 10.2909920963, 9.8471535628, 10.8653926292, 12.4058988826, 13.0063827731, 13.3256024288, 7.8254626293, 8.6933523886, 9.7058836732, 9.7685439087, 11.1285399123, 11.7137600983, 12.8530239706, 13.0825405679, 9.7182058139, 12.6741328643, 13.1835611466, 14.0040718989, 14.4701523524, 15.4021153808, 14.4533308234, 16.4118921401, 11.5982820024, 12.0950235315, 13.3477438746, 13.2421236095, 14.4389181627, 16.9421365665, 17.3617308043, 17.9927267796, 11.0523600327, 10.9449698179, 10.9920086921, 11.9372748276, 15.3384854302, 16.3028351239, 15.9881550217, 16.7293274288, 14.2937600848, 15.2656073812, 15.5331681008, 15.9015803822, 18.8232406786, 19.8165549894, 19.5737095957, 20.6964706537, 9.7763239127, 9.7951363782, 9.8951803592, 10.476063912, 11.6187385468, 12.14046785, 11.9303189864, 14.8276379573, 9.4573674609, 12.5119749368, 12.85274816, 12.1089499498, 13.9182337025, 14.2147337763, 15.1275927836, 15.045341142, 7.7446754739, 8.7353771982, 9.4223276001, 8.6537898733, 9.2776131584, 11.8841744084, 13.8711471536, 13.3433623802, 12.828108782, 15.142961644, 15.3983541531, 14.7039704304, 15.5773501516, 15.7441970841, 16.8989562507, 16.3113373602, 9.1773623508, 8.9328987896, 9.9275266277, 10.8909468003, 11.9014501261, 12.7607479431, 14.7549133364, 14.4370287878, 10.0430085586, 10.8326618919, 12.0120362516, 13.7655681764, 14.079514159, 15.1707080986, 15.9390090863, 16.7674164454, 9.7735998525, 10.562256469, 10.5899839365, 11.2392060953, 13.5881352717, 13.5546197295, 15.3460799852, 15.6716696372, 6.9885761884, 6.0733759708, 8.4311111541, 10.5317883537, 10.8375104598, 10.1201730916, 12.6275115745, 10.981072067, 10.0647485964, 11.1902446962, 12.2298414073, 12.6483150107, 15.2051649435, 16.3454893662, 16.3409864869, 17.7582865345, 9.7286675069, 10.1386068925, 10.3374814206, 13.0180738865, 12.499300124, 12.6435492534, 13.7750101315, 16.5817907228, 8.0852037153, 7.7952829908, 7.1944166723, 8.1999920405, 9.1430953109, 10.280182672, 10.8176901583, 12.218809478, 9.8324724434, 9.44678252, 10.6345030283, 11.0127205948, 10.6147741133, 12.4725234979, 15.4928408225, 14.9003857098, 9.3613230648, 11.1960097507, 11.5853232386, 13.3772445145, 12.875501071, 14.9556722665, 14.10448141, 14.8270084976, 15.4307367565, 16.0688462999, 16.1908288033, 18.1034197228, 18.9488766886, 19.4557999966, 19.9422400324, 19.9241671817, 12.3839633498, 12.4958313099, 12.4398176016, 14.2851685672, 14.9238564651, 15.2962479863, 16.9945542008, 17.4808891092, 9.5323694068, 9.6565504035, 10.8913178061, 13.8843571064, 13.4185978605, 16.2141961343, 15.3411182627, 15.995230831, 12.1993240659, 11.2981134306, 12.1456521912, 12.2339147031, 14.4187020359, 13.7324005632, 17.6077823036, 18.7166086958, 5.4155298184, 5.3232957134, 6.959737071, 7.2695909426, 7.7561919659, 11.1135267327, 10.1937321937, 11.7029290419, 9.3500768599, 12.5616524385, 12.5418152066, 14.4603600497, 14.4393607806, 13.5387627612, 16.1462607475, 15.661078365, 14.4448949912, 14.6641750144, 14.2592573671, 17.1955987622, 17.0055878514, 19.0591872722, 18.1780649315, 19.0912898325, 11.2101628181, 11.6351074526, 12.4064565617, 13.2994224156, 15.3610291094, 15.2122938667, 16.8426398084, 17.1308750232, 14.0303990182, 12.384154147, 13.6636254788, 14.9157426717, 14.1561511568, 17.4414948796, 18.35226336, 18.1993336702, 11.9294731055, 11.732847356, 13.9302671371, 13.8973209971, 14.6710485579, 15.0479623664, 16.0770933241, 18.061117341, 11.8509267972, 12.2245349847, 13.2572981838, 16.5729439413, 17.3634706635, 16.7997182561, 17.4817253549, 18.4600472962, 9.346181853, 9.842110863, 13.2043432876, 13.3722312349, 15.0513712641, 13.0175342376, 14.7731518399, 15.3771158374, 12.8427364561, 12.4403089819, 13.6619405488, 13.5977076804, 17.0057109615, 17.1868880791, 18.2388091385, 18.0561650016, 13.1500909438, 16.2152600091, 16.3330622084, 17.0075480179, 16.9982633397, 18.6860071111, 19.1623860794, 20.3538135366, 5.3030116118, 8.5735971176, 8.6642749009, 9.6928632307, 9.5113763197, 10.25460108, 11.7867864793, 11.7597848288, 11.7020089106, 14.8418402824, 13.9303360712, 14.9732662182, 14.6410751234, 14.7989463201, 17.6726045876, 16.3819401046, 11.5092698984, 11.3691936716, 12.6477398916, 13.1491275041, 13.020526113, 15.6922909849, 15.3269553229, 16.2028171459, 10.6264043936, 12.3615051517, 14.1191899675, 15.4878697166, 14.4811221682, 15.4175644178, 15.6617815504, 17.2630949875, 7.7633658504, 9.0606806898, 9.5692064442, 10.2419733349, 11.0502477408, 10.6893240151, 14.0230277412, 13.1249145507, 8.0838885042, 8.9203185945, 9.2506035152, 9.7915859322, 9.5741386652, 11.1928004044, 13.7164554309, 13.8394417085, 10.9697240893, 13.4449268398, 12.8357304992, 13.737989957, 15.1600637501, 13.5683725198, 17.7201258765, 17.2726056136, 5.1607172181, 5.821302277, 8.2129027066, 7.4554631993, 9.1735866553, 8.2978787326, 10.6114892908, 9.9704683435, 7.5298049552, 6.7168159846, 7.0213625215, 8.1527755051, 10.5673049267, 10.3952361241, 11.1081842925, 11.138869664, 10.347509022, 10.4407629169, 13.3654791859, 14.1292716148, 15.0192200547, 14.6258469869, 15.8454921665, 15.7718435084, 12.3996206959, 13.0517368911, 14.3549686322, 15.1594932779, 16.1654457831, 15.1289485568, 17.5637312473, 18.552869093, 10.3309647041, 11.2322542166, 9.8544166127, 10.8360819433, 14.1046186057, 14.8881795978, 15.9265673462, 16.2421784294, 8.6133796121, 8.434890378, 9.639433149, 10.3209739862, 10.4921216791, 12.4718185059, 13.507362618, 14.2858215484, 12.8707104124, 14.9453422091, 14.1859343469, 14.6739165251, 15.1677487115, 15.6875056903, 18.4125545711, 19.0230681882, 9.9067218547, 11.9125796461, 13.8860134314, 13.5921219457, 13.8153391, 14.0087269063, 15.7903103838, 16.0544805196, 10.1859860106, 12.9976783737, 13.3578199065, 13.5060363775, 13.9549439992, 14.3236173782, 14.6128356722, 15.1471660033, 7.6100814232, 7.2525654763, 7.275895049, 8.0746497085, 9.2655847935, 9.5231419051, 9.6426822853, 12.7787173225, 5.068003664, 7.2826534128, 7.4567561839, 8.7915544121, 9.1546864053, 11.0085602252, 12.524067912, 12.3068008427, 5.0600117349, 6.104025244, 4.9826285591, 5.2830370659, 7.2276431621, 10.0785698382, 9.956425294, 10.9126804238, 9.063421048, 10.5021334638, 9.8242104844, 10.9056330581, 13.1972496065, 14.1064931795, 15.0175783283, 14.4593530381, 9.8617383743, 9.6675251717, 9.9607833096, 13.6065866017, 13.229376444, 14.0347440756, 15.3695368132, 14.0830957681, 13.6319370975, 12.8988586451, 14.3274033592, 17.8921982836, 17.8946430287, 17.684301477, 18.3907750006, 19.0846378737, 3.5606121541, 5.7012840961, 6.2094835755, 5.4953827552, 7.1313906601, 7.7313424961, 9.4524761597, 10.5713071193, 7.7587625254, 8.3020445134, 9.2242461992, 12.5387694253, 12.564704784, 12.649205109, 12.7492769118, 15.6726735866, 9.5974048104, 11.7445965439, 11.1477111161, 12.1811151667, 13.7797900171, 14.1422884699, 14.5089253385, 15.5971461796, 11.3191527536, 11.8477189118, 10.258185961, 12.3117603205, 11.5536074452, 12.7339075885, 15.5513081489, 16.6749116347, 8.6555102738, 8.6069456997, 8.9260722454, 11.1017209185, 12.9499986711, 13.0481261275, 12.9599588886, 14.7960569749, 9.9170119896, 11.6377536581, 10.4326577506, 11.2611994679, 12.7101557012, 12.520530732, 11.7095892963, 15.5413427585, 11.4549892484, 11.3705788153, 12.8845671351, 13.4338262343, 13.8721965291, 15.1208605983, 17.4158166186, 17.6207831345, 9.3359945237, 10.3846194555, 13.3180738094, 15.2159817522, 14.7692655135, 15.5914121427, 14.9637347547, 15.7816831361, 10.1272411956, 11.4922075692, 10.9492894008, 14.4760480965, 14.9899585357, 14.7728799997, 16.7158673054, 17.1775943937, 13.2484071271, 15.7886135958, 15.70175575, 15.866734709, 16.2707936273, 16.6014516552, 18.9277177649, 20.1896885661, 9.1061486844, 9.7017006685, 10.1724514284, 9.0484269045, 10.5067550685, 11.2381675231, 10.5281503017, 13.9633878342, 7.9035602596, 7.0455517097, 7.9430109803, 8.9659136888, 9.0757108314, 9.8613911563, 9.9203402787, 10.9898265211, 11.4063756544, 11.8454190602, 11.6929037154, 12.6530367462, 12.6250371216, 13.4926532693, 14.6273065862, 13.5096115887, 8.0380368151, 8.628728056, 9.5606330947, 9.4525149797, 9.2193360005, 10.069161983, 9.7313011776, 11.81883017, 7.1309724911, 7.232683351, 8.6679700837, 7.9412518364, 9.6736477694, 10.7741585796, 10.7877879166, 12.3524589952, 9.1510484559, 9.947323734, 10.1749737191, 11.026919124, 11.1165404016, 11.3976542821, 13.3031392619, 13.1124928827, 5.2648308873, 7.7118141317, 7.8361362526, 7.6309525279, 8.2795311985, 10.8998655455, 9.8723010852, 10.6859375983, 9.5345522217, 9.2995996195, 10.33131953, 10.6466776551, 10.6909285504, 12.126466595, 14.0416817218, 12.1602226808, 9.0314663256, 8.2959111433, 11.1633783623, 10.4361688964, 12.3119120188, 11.804678283, 12.0861219845, 12.3928742764, 5.854654049, 5.8743450122, 8.0536893806, 7.1426609324, 8.0665525138, 8.2870396433, 8.7919243697, 9.0018079073, 9.9799553828, 12.078512993, 11.4553655195, 11.9151335417, 12.6893655665, 14.170720672, 13.9420318076, 14.5780867477, 11.3121216748, 11.2955526294, 12.70937442, 13.3057241925, 13.6801707359, 14.3564385183, 14.1728235337, 15.8562442377, 6.8066420755, 6.6357688941, 6.4824237906, 8.4098463665, 8.058130176, 9.3745286287, 9.2059622909, 8.6566034283, 9.2750704042, 10.8683245184, 9.7536178836, 12.1536836226, 11.534483723, 13.3518955042, 13.7074695721, 13.1767293259, 9.8798861753, 10.3580077266, 11.1561825064, 11.3948224271, 11.0522727904, 12.4629396745, 13.0756488889, 13.6080905294, 10.5284515797, 11.0589216967, 11.025984127, 11.7335458685, 13.3082838884, 12.8878298451, 13.7874145738, 13.6559296664, 8.5254489699, 10.492150117, 9.6651307165, 10.0877436034, 10.3094010281, 12.0442156183, 14.1360688031, 13.199710462, 14.5672673755, 13.5790327818, 15.2328660281, 15.6337603974, 16.8592217231, 17.0346934607, 17.0914500669, 18.1549272026, 4.9983137129, 6.5544505455, 7.1122169154, 6.0880375526, 8.8119699672, 8.9063196042, 8.7658130161, 8.7523001685, 8.5800519922, 9.1938939308, 9.4920325217, 9.4362224028, 11.4508532734, 11.2053272837, 10.9696756478, 12.9649281391, 12.1876935557, 14.8152386328, 14.2435174145, 14.8232755046, 15.7610658887, 15.594696545, 17.7980042213, 17.0182304195, 15.018756412, 16.0297573246, 17.7419538651, 16.6664858826, 17.2234459394, 18.8885298911, 18.3827455654, 18.9611230485, 13.0582858397, 13.7810618652, 14.5160204198, 15.2356399456, 15.4088090399, 14.7913392665, 15.2613897519, 16.3688145698, 13.9239982538, 13.289214565, 15.1837962736, 15.0589034785, 14.6717150368, 15.8747736399, 17.4453552001, 17.8892418868, 9.053207891, 8.3257775051, 10.458077643, 10.0761946768, 10.0045353256, 10.7933574283, 10.2529712474, 12.1828829605, 12.5296340811, 12.5417974528, 12.7893817677, 14.4870408732, 15.071359546, 15.2219441176, 15.8401194425, 16.7993849378, 6.8477384718, 6.831036603, 8.2760525568, 9.1797704835, 10.1564526677, 10.4676449629, 10.6875842157, 10.4929328094, 12.2253401017, 14.3393035059, 14.829285019, 14.3995373021, 16.1108512616, 15.7282856383, 16.8009399801, 17.2313618798, 15.6703731471, 16.4609325758, 16.2936620079, 17.0010800528, 17.6162934854, 17.4376647172, 18.9346238671, 18.5248943039, 7.4981496012, 7.8582390193, 7.9413390946, 8.0232715938, 8.6564945462, 9.5264259313, 10.2957190963, 11.0668745734, 14.0346572493, 14.6818441755, 14.3284598866, 15.9901642876, 16.1516504965, 17.3743806027, 18.2578165534, 17.4373730908, 12.2518459701, 13.3664124053, 13.2309732986, 13.597983859, 14.9513669991, 15.1671734621, 15.8551198179, 16.7336354023, 17.4990792793, 16.9814365944, 17.7324295728, 18.7477046039, 17.3199605311, 20.5542621557, 21.2954559437, 21.4128681575, 13.1674338624, 13.9990076383, 14.822847947, 14.2998750256, 14.0419129869, 16.3181937559, 17.3137755382, 17.2945478104, 14.6012028778, 15.4494851178, 15.0360724131, 16.7304248499, 15.7294658736, 18.0691373339, 16.9519596229, 17.4579969679, 12.5620787271, 13.0007941207, 13.8109912358, 13.914929913, 14.7782614176, 15.4114448244, 15.9746652944, 16.8254060003, 11.5262224298, 10.4502469675, 11.8166653635, 11.2730906214, 11.8265475672, 12.9318725459, 13.4339881552, 14.7697197944, 11.4374256359, 12.4453432255, 12.4315054706, 14.1898243369, 13.9595698507, 14.6711950605, 14.2668082794, 15.3491763628, 12.9231840499, 13.3185000703, 13.7293576343, 13.6106887538, 14.1051229408, 15.2870914475, 15.2675520727, 15.9962190445, 14.0524881298, 15.3566803111, 16.7182081521, 17.4443389204, 16.5583967921, 16.9864171952, 18.5578747708, 17.5194349311, 9.7121125766, 9.9074981956, 11.0370449334, 10.5665780793, 11.5765037828, 13.148033223, 13.6970014655, 13.8646863495], + "X1": [-0.13571077274, 0.31453290885, 0.70639486847, 0.73475953825, 1.1820468223, 1.3640149541, 1.3419034301, 2.2254857826, 1.4337992814, 1.6040776863, 1.7338548829, 1.9345651397, 2.8428401036, 2.4840144124, 3.078978898, 3.4917834385, -0.72190748241, -0.025605459358, 0.30325263639, 0.43175876552, 0.68828693041, 0.79188995868, 1.3928930433, 1.2655497402, -0.26818111803, -0.34018492893, 0.034936053684, 0.49219489766, 0.83038296293, 0.72036629226, 1.5424535312, 1.5415131916, 0.21890953765, 0.33083993653, 0.48641155601, 0.93648788127, 1.2627516217, 1.0944231839, 1.6683723806, 2.1725367733, -0.61952758224, -0.8455188238, -0.17526211928, 0.24012730937, 0.68002213721, 0.56646874501, 1.1638421862, 1.3718219485, 1.8316127188, 1.6223558413, 2.2675818772, 2.8115157093, 2.8028819556, 3.4850286084, 3.2431571415, 3.3653564711, -1.2979062783, -0.70960585645, -0.55369602101, -0.49770188582, 0.038549208003, 0.048193974616, 0.89557310239, 1.1306962818, -0.40734967314, -0.38183850854, -0.14868704109, 0.49118924366, 0.90722155514, 1.224772941, 1.4394538112, 1.927001228, 1.3400432638, 1.7555590947, 2.2354344264, 2.2882771389, 2.8339905779, 3.164452943, 3.3906009839, 3.8213294274, 1.8992609842, 1.7676705463, 2.3748172655, 2.4446819494, 2.672756108, 3.2477361946, 3.6760369983, 4.0588666013, -0.63965033608, -0.44492680401, 0.4754493977, 0.088448532588, 0.89910780664, 0.77511950988, 1.0963994833, 1.8217770308, 1.9305684819, 2.5375911668, 2.8209661358, 3.2033957074, 3.4048417151, 3.7237516054, 3.7031435555, 4.2028143281, -0.18604883063, 0.25501392277, 0.33433185808, 0.8599316237, 1.0648111132, 1.5511634674, 1.9349058045, 2.1635610429, -2.3789469105, -1.7446165571, -1.7830545419, -1.3958557206, -1.3679530991, -0.68098354568, -0.28358618075, -0.14509437665, -0.061483295454, 0.045004124696, 0.21920576653, 0.42869411144, 0.85608051974, 1.0444138869, 1.7473671538, 1.899837966, -0.25366055078, 0.042672487392, 0.50682538249, 0.70718142038, 1.0851802651, 1.2686006754, 1.1389126299, 1.8813954669, -0.27076508163, 0.1023256887, 0.42113345143, 0.51050336367, 0.7570793472, 1.3983627576, 1.9547451692, 1.9564497734, -0.29285493745, -0.023985863434, 0.096120076422, 0.5212953875, 1.3418514832, 1.3561311292, 1.3212373961, 1.690096654, 3.0019271431, 3.3307268038, 3.6833567183, 4.2492521869, 4.050681918, 4.5973624084, 4.8002005568, 5.5141980932, -0.7786189111, -0.94131641545, -0.26806921758, -0.33048361638, 0.61131022263, 0.41098722746, 0.7540934332, 1.2656034819, 0.22356623394, 0.74208242013, 0.96223503978, 1.2136078983, 1.5953497077, 1.8013532803, 2.0348482811, 2.2432526015, 0.39492063005, 0.62804808226, 0.82759705717, 1.1269400173, 1.2811175606, 1.4697523753, 2.1810913685, 2.1659763846, 1.8908827507, 2.3447646399, 2.734298987, 2.5861575607, 2.7633829353, 3.2688806211, 3.7956024329, 3.5589779503, -0.25308079938, -0.52205684461, 0.28075633196, 0.2972981924, 0.58332733752, 0.90474910265, 1.3927052305, 1.7939565911, -1.2078085234, -0.89521811284, -0.60868850352, -0.023622356017, 0.0099056634786, 0.78587842416, 1.1304562519, 1.2736451896, -0.057495003279, 0.32746650483, 0.64407740143, 0.61387725504, 1.5323816125, 1.4960883136, 1.709475577, 1.8498783443, -1.4905426405, -1.5518639735, -1.3110031297, -0.7826041248, -0.34169436963, -0.19987589596, 0.15225229474, 0.43451843526, 0.010490522102, 0.31695994045, 0.75879187966, 1.0701192066, 1.3410897325, 1.8894687302, 2.0538729769, 2.5080902473, 0.33031411597, 0.47349158785, 0.50844886032, 1.0946372996, 1.4631320165, 1.3432314138, 1.6976158965, 2.5140775726, 0.20299925436, -0.081643602698, 0.28615571793, 0.81199484079, 0.82821382003, 0.71562360929, 1.6000869146, 1.8083058339, -0.85961544682, -0.99456698167, -0.32705860762, -0.052156772155, -0.12368930693, 0.36476167388, 0.64806166953, 0.85194042229, 1.3129307137, 1.8482514838, 1.6165574755, 2.2086344914, 2.1398918345, 2.8806102762, 2.9364354134, 3.2227635707, 1.1505797227, 1.7609277953, 1.5312630673, 1.6066807821, 2.3878939919, 2.3881849892, 3.1060349147, 3.2563979731, -0.093728367253, -0.066967400101, -0.021626967427, 0.85753804557, 1.2853511888, 1.5502148421, 1.4998930567, 1.7733060909, 0.20027457093, 0.56069010075, 0.80550940861, 1.3652542223, 1.8991589264, 1.9831936756, 2.3778471584, 2.3232775748, -0.19520094178, 0.21951577421, 0.20366682423, 0.51220378949, 1.3874830515, 1.363976505, 2.16069307, 2.343192003, -0.036609439747, 0.19553223087, 0.49611972777, 0.88781723274, 1.1995929739, 1.4747716539, 1.8246144419, 2.5871627735, -1.4812911808, -1.1427729842, -0.84124736554, -0.40643001473, -0.10204186214, -0.14772004419, 0.57708041858, 0.90497288089, 1.4774626083, 1.6983316006, 1.7599391809, 2.0758483033, 2.1780625267, 2.7482289931, 3.0610072944, 3.2426736639, 0.034689379748, 0.28674266318, 0.82482601421, 1.0201255455, 1.3545731856, 1.6151053663, 1.9292680975, 1.7320305979, 2.4974781314, 2.6675242046, 3.0981507865, 3.3160959037, 3.438160318, 3.7599673751, 4.1001610007, 4.5327804184, -0.32664037213, -0.069407229151, 0.41373803145, 0.58421932029, 1.2064946839, 1.2918420613, 1.251666007, 2.2636147462, -0.30912127721, -0.18087284333, 0.19191527624, 0.58418975179, 0.91048289549, 1.2083589061, 1.5004360397, 1.7338028604, -1.0015473402, -0.56447833858, -0.11066960972, 0.22764492455, 0.32124567089, 0.98828906328, 1.212035446, 1.2614777948, 0.64515391152, 0.79959795207, 1.0906573193, 1.4581269238, 1.9012144633, 2.1368580944, 2.3442764089, 2.4989270449, 2.1777420626, 2.4184859212, 2.5774929329, 3.0256128105, 3.2372296393, 3.9421283502, 3.9738454466, 4.6994395639, -0.72342475922, -0.67970086997, -0.20684192528, 0.51170350844, 0.2792851744, 0.77908474978, 1.493800586, 1.6062030822, 1.9939009326, 1.9813964066, 2.1809100241, 2.7477438031, 2.1402799517, 2.787984075, 3.8055377562, 3.5229370685, -0.50491675046, -0.25370701833, 0.058745051643, 0.57596611694, 0.69358398088, 1.1263012288, 0.90850548242, 1.1756717414, 0.50332267592, 0.98260917003, 1.3109014234, 1.7412945738, 1.7994332681, 2.0850226647, 2.256804522, 2.7571223164, -0.2320689838, -0.11207995038, 0.7685260903, 1.0191453503, 0.91718843037, 1.2714027866, 1.5042946327, 1.982779114, -0.75974298198, -0.53481284532, -0.29722578748, 0.10331582662, 0.48555096144, 0.37463553998, 1.0430524587, 1.5716539422, -1.0789535595, 0.14405376661, 0.14845083627, 0.41234229379, 0.95099626456, 1.0359449305, 1.3811477957, 1.5581673862, -1.1444864064, -0.61127875772, -0.63479521814, -0.4012548778, -0.020483504568, -0.029464464348, 0.6566295865, 0.87154545465, -0.85085721203, -0.9688879514, -0.38076395975, -0.62590942845, -0.33037273917, 0.26846878745, 0.43058654136, 0.58622416378, 2.1222543313, 2.3086554192, 2.1503275305, 2.9175131096, 3.1638976666, 3.1835399304, 3.9474807245, 3.9816858712, -0.25333850934, 0.12568779412, 0.86635301318, 0.49222424275, 0.81324263189, 1.3516733205, 1.6958157773, 1.626496815, -0.05193300542, 0.17178326259, 0.24942917647, 0.67231112293, 0.95352182544, 1.4403998914, 1.8842209754, 1.7154908207, -0.97533311426, -0.75712701634, -0.73979506066, -0.29598574572, 0.33634814265, 0.74449753642, 0.70713415799, 0.96673933474, 0.91087830573, 1.2356393383, 1.5160272011, 1.7939057456, 1.883041283, 2.5636610894, 2.6636607397, 3.0126485261, -1.0854557326, -0.68867908107, -0.13626049618, -0.011289439323, 0.32142652901, 0.89313033584, 0.95775251798, 1.1842435383, -0.56265203286, 0.12611010156, 0.3608312151, 0.5705350827, 0.92689132871, 1.0686884139, 1.2798276227, 1.6900293816, 0.012849950047, -0.082432147465, 0.30325898964, 0.54048412071, 1.0317279997, 1.2166614326, 1.4153141902, 1.9566219476, -0.40819127133, 0.52202757979, 0.67338065293, 1.1622739459, 1.3355011947, 1.5753588446, 2.0898431517, 2.1933992953, -2.0338859893, -1.5211556684, -1.8480910849, -1.5183064614, -1.0339464155, -0.56023623702, 0.0095850353003, -0.036203575993, -0.83604632712, 0.14094260641, -0.33857004381, 0.094313514614, -0.0069521370851, 0.44690563995, 0.8879591241, 1.5831473574, -0.45129024938, -0.31302426185, -0.37143259708, -0.0057757381946, 0.20946557656, 0.55370987241, 1.0888130776, 1.1597574256, 1.4508265395, 1.537718982, 2.1678771708, 2.7852782409, 2.5586332308, 2.7824570887, 3.1908098905, 3.2713525499, -1.537933, -0.87651798912, -0.99019545608, -0.6045027748, -0.12935839514, 0.32655093999, 0.22086698252, 0.89207623889, 0.067910456953, 0.37695616449, 0.56449604486, 1.1125842707, 1.3962078183, 1.6879702989, 1.9922743221, 2.5232116454, 0.65424883396, 0.81763131316, 1.0366833889, 1.2175568687, 1.6349118167, 1.9158020405, 2.1515238677, 2.5746460346, 0.057085277721, 0.66309836279, 0.12797359006, 0.74204273005, 1.0911341259, 1.3481734599, 1.6006267812, 2.2730205871, 0.20431906643, 0.32136925156, 0.97302286455, 1.2482999446, 1.8351406793, 2.1753170459, 2.0160147455, 2.5517192785, -1.222771249, -0.78843526634, -0.82437153972, -0.021889326266, 0.09311365149, 0.22054734342, 0.22667678098, 0.51841024451, 0.069378157656, 0.38495014284, 1.142702728, 0.96292363798, 1.7625535837, 1.5699717394, 2.1452392222, 2.3476956907, -0.14401187776, 0.40426343337, 1.0048966498, 1.0494307655, 1.4032234512, 1.7543155345, 1.6109125635, 2.0743018375, 1.7581085748, 1.9090932691, 2.3426334931, 2.7623576519, 2.8129662509, 3.2969585334, 3.4034485727, 4.0601079179, 1.0470567214, 1.6973395002, 2.1578555986, 2.3626974838, 2.5145634128, 2.8662076873, 3.1341312494, 3.5197547498, 0.523746675, 0.81975787371, 0.74362117656, 0.63477404827, 1.4305856001, 1.8359396054, 1.6837272722, 2.4866747809, 0.56291345683, 0.50155848161, 0.66823827553, 1.2573953577, 1.4002901462, 1.8628884743, 1.9173854404, 2.0936024204, -0.63864952936, -0.37818585489, -0.23729241341, -0.098538699773, 0.15901383662, 0.73950804596, 0.99227528042, 0.75830666988, 0.037874350786, 0.32406575009, 0.66521806248, 0.78657832428, 1.1501760211, 1.2298816187, 1.5757868427, 1.9670568839, 0.027477214834, 0.14037520819, 0.62979808942, 0.9173955133, 1.035985942, 1.5751300701, 1.9250775278, 2.406452138, -1.6254895843, -1.1085243136, -0.66601196976, -0.66412476249, -0.43183128923, -0.092303072299, 0.4204080875, 0.75944089687, 1.0769061077, 1.4679829717, 1.7589489829, 1.8500202191, 2.6648487812, 3.1050681338, 2.939670207, 3.4857280034, -0.66816088385, -0.63247563734, 0.12448307186, -0.056115210797, 0.41791323297, 1.0626840457, 1.3418194299, 1.3736375017, -0.60752699278, -0.42025717081, 0.016482694212, 0.0050400916578, 0.42211216015, 1.0029728264, 0.92073767291, 1.463151044, -1.462111932, -1.376486762, -0.89589156815, -0.60991223496, -0.55588009176, -0.082573780456, 0.22559861659, 0.086745062703, 0.18040872797, 0.12149857911, 0.40959911726, 1.0764782735, 1.2711120818, 1.7730571784, 1.9946247896, 2.272822169, 1.4644378943, 1.5855008402, 2.4204386165, 2.8680754306, 2.4996765187, 3.1529797976, 3.1539176817, 3.8510637332, -0.35684935162, -0.079169796739, 0.19364043993, 0.69065383693, 1.0523572056, 1.33884242, 1.4059118857, 1.3295826232, 0.085041929583, 0.66791640797, 0.41473003691, 1.1489790494, 1.1179047936, 1.5251000545, 2.1382433405, 2.181152785, 1.6803276312, 1.7974727297, 2.2137324272, 2.1571440454, 2.3617730356, 2.9021726604, 2.666287764, 3.7314976176, -0.55687319534, -0.56978618342, -0.235443517, 0.25899425662, 0.50170923357, 0.85317600166, 1.1998175808, 1.3396395533, -0.53993230859, -0.098667493563, 0.050635495836, 0.74191565796, 0.56606599619, 0.96764305471, 1.7296537065, 1.6824633904, 0.91111071526, 1.0753152617, 1.4810004613, 1.9317100813, 2.196368199, 2.5734843642, 2.6566768868, 3.1071424739, -0.57960490813, 0.18670425404, 0.31806833186, 0.21423015572, 0.99518317361, 1.1509066703, 1.3206504908, 1.7008099962, -1.1030156298, -0.78390846487, -0.49658151805, -0.19129988623, 0.31290213513, 0.43096807305, 1.0305401925, 1.4601106466, -1.0262682449, -0.28187433857, -0.15320336169, 0.2517468552, 0.88255976646, 0.56273990831, 1.399845074, 1.299525534, 0.41803001908, 0.35730359201, 1.4279602, 1.0710786516, 1.5446576404, 1.9715617244, 2.0419539107, 2.5228805423, -0.26032935004, -0.25635481941, -0.017824924807, 0.30295177779, 0.69675319785, 1.1923292894, 0.97502168948, 1.6306842755, 1.0748412779, 1.5804325162, 1.9219242467, 2.0622757103, 2.1130706785, 2.4495354468, 2.8503514205, 3.4245552859, -0.15743709427, -0.24622973235, 0.43489147677, 0.043345025281, 1.0659390546, 1.0028597812, 1.4521034625, 1.6707460388, -0.077478584092, 0.035722209889, 0.43197939023, 0.87987484226, 1.1276669238, 1.2429491791, 1.3434193254, 1.8869338852, -0.5225963044, -0.37246841231, -0.16358666298, 0.41190119455, 0.83315366327, 1.0390908706, 1.0011812035, 1.3473823404, 0.39350005951, 0.61882628215, 1.2884716909, 1.384793739, 1.9408206083, 1.7010845555, 1.816141293, 2.7001379274, 0.33891686774, 0.58487378599, 0.37856871203, 1.1506701738, 1.5524420506, 1.2932232521, 1.9538393463, 1.8922287993, -2.2757993857, -1.8268115935, -1.8721799723, -1.2190472445, -1.2502095958, -0.73979208334, -0.459914892, -0.11376396691, 2.0340008117, 1.6603646404, 1.7879229533, 2.485260026, 2.7402447617, 3.3491777212, 3.3119743228, 3.8031296662, 0.020555462942, 0.67623126464, 0.54349500938, 0.84345590557, 1.4164978432, 1.8437321354, 1.8768342778, 2.3829773089, 2.0453614347, 2.4039166192, 2.7005551298, 3.1383449786, 2.9372143404, 3.7732968751, 4.0484061664, 4.4274749113, -0.27555417722, 0.20015410469, 0.35102291073, 0.6985070458, 0.80638981094, 1.6520851767, 1.7691910367, 1.9163545423, 0.94453984677, 0.97702546566, 1.4108285446, 1.6634544205, 1.726589722, 2.6689988109, 2.2137124257, 2.8443334821, -0.41327698288, -0.32925314047, 0.25064867147, 0.45171819969, 0.50752368576, 1.0218544055, 1.3737399535, 1.2118084444, 1.002265735, 1.1291539197, 1.8342658374, 1.7677839584, 2.0775774721, 2.5846120384, 3.1394200043, 3.3653877948, -1.2034726897, -0.90649522144, -0.44735692921, 0.29427438749, 0.15427115607, 0.57948457431, 0.43565643821, 0.98800571292, 0.49984071656, 0.79058431726, 0.98177141649, 1.0058351677, 1.4669106327, 1.8092263863, 2.3136543247, 2.3853735085, 0.36178123181, 0.95251441058, 0.97608590435, 1.5671941686, 1.4196350965, 1.6820637147, 2.1171180546, 2.5501484196, -1.0846909162, -0.72211266503, -0.51020588434, -0.39222959002, -0.011975296238, 0.60420129469, 0.77601700749, 0.9973348007] + }, + "params": { + "pattern": "joiners_only", + "n_groups": 80, + "n_periods": 8, + "seed": 110, + "effects": 2, + "placebo": 1, + "ci_level": 95, + "controls": "X1" + }, + "results": { + "effects": { + "1": { + "overall_att": 2.1309207288, + "overall_se": 0.094744993463, + "overall_ci_lo": 1.9452239539, + "overall_ci_hi": 2.3166175037, + "n_switchers": 448 + }, + "2": { + "overall_att": 2.0722303619, + "overall_se": 0.10646899325, + "overall_ci_lo": 1.8635549697, + "overall_ci_hi": 2.2809057542, + "n_switchers": 353 + } + }, + "placebos": { + "1": { + "effect": 0.10460219364, + "se": 0.11311896791, + "ci_lo": -0.11710690942, + "ci_hi": 0.32631129671 + } + } + } + }, + "joiners_only_trends_lin": { + "data": { + "group": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86, 86, 86, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, 95, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, 105, 105, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, 107, 107, 108, 108, 108, 108, 108, 108, 108, 108, 109, 109, 109, 109, 109, 109, 109, 109, 110, 110, 110, 110, 110, 110, 110, 110, 111, 111, 111, 111, 111, 111, 111, 111, 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, 115, 115, 115, 115, 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 119, 119, 119, 119, 119, 119, 119, 119], + "period": [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7], + "treatment": [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + "outcome": [10.9237295553, 11.2576768018, 13.041527547, 12.0178303822, 12.7619436096, 12.8490768731, 15.5175379844, 15.1681759004, 11.7404546839, 11.9549246192, 11.9798942609, 14.3641132268, 14.5503133901, 15.7866170713, 15.3381214078, 16.1037102683, 15.8197185861, 14.72506281, 14.2802337723, 13.391802151, 16.3613170386, 16.5220788267, 16.1136110343, 15.2813641605, 13.4777946977, 13.0736137253, 12.524834617, 14.3323952717, 12.2895900132, 12.1974793342, 11.0324418975, 10.9732073356, 10.8115716205, 10.1536129175, 9.8603356878, 10.5900703356, 10.9199074149, 11.0662826375, 11.1956581998, 13.5714451031, 13.1925597695, 16.115593592, 16.4464744789, 18.023097534, 18.7569404335, 19.6941713667, 21.0174308504, 21.2433393584, 11.6292320928, 11.8533813268, 12.4837534367, 14.3788541587, 14.6544586527, 13.8032997131, 13.9292332559, 14.024458441, 10.8548520308, 10.0631090671, 9.7591429471, 9.8769606534, 9.2651974217, 11.7613185005, 11.6649531473, 11.3170827075, 9.7158318726, 11.5764022733, 11.6819564167, 14.6404148372, 14.8087887683, 15.1393562984, 17.055538724, 17.2892884951, 8.3004237976, 8.9582426329, 10.703019033, 11.4030675312, 13.444345889, 15.3447722439, 15.9603739408, 16.1995965209, 12.2831641771, 11.2857978667, 13.3444405781, 12.7739754314, 12.2328734373, 11.5145102712, 11.7872207426, 11.58298802, 13.661026508, 15.2882354175, 16.0750384557, 15.3517711475, 14.5299753604, 14.91344802, 15.4514153959, 13.9284450447, 10.6971380754, 10.0865729151, 9.3200040703, 9.9234192104, 10.7420087905, 12.0735868321, 12.1353596913, 11.0926979647, 9.4805071995, 10.2558166565, 9.3556832655, 10.7631839576, 11.8221511147, 14.4940363498, 15.1567905555, 15.4906907715, 8.9203971918, 9.1863389617, 10.5649355315, 10.6851146851, 10.6180027119, 9.7840865305, 8.7070298985, 8.4590485996, 12.8695074795, 13.5188597364, 13.9038613739, 14.3235743797, 15.5154190142, 16.5278784189, 17.544956194, 17.8211654035, 10.5019214702, 11.9020297309, 13.7849172308, 14.453678371, 15.0883014187, 17.0425655132, 18.7384402679, 21.006793562, 11.712982008, 10.4113221732, 11.2630557462, 9.9649750171, 9.8583964555, 8.8110610683, 10.9990651298, 10.1439828133, 12.0732955016, 13.1087546335, 15.2010409041, 15.4575613653, 16.3005468193, 16.0184870662, 16.5591024843, 16.9066343987, 9.0986645128, 10.9498231254, 9.9111637379, 8.7401571236, 8.6001736336, 8.5536467097, 7.8975501428, 8.506524977, 10.6797604575, 11.0338309604, 10.6823006462, 12.808687938, 14.5449039651, 14.1656264175, 15.1032065405, 14.6879273172, 8.0821283033, 10.2652990531, 9.621530262, 10.3277630763, 9.6248271722, 10.7690229871, 11.0081818579, 9.9255082231, 9.5218356416, 9.8523883749, 9.8499815664, 12.3773681534, 13.1527374847, 12.8523995835, 11.7564199893, 14.8284584602, 13.1594048852, 13.406947531, 13.5669953814, 13.0628041503, 13.778291015, 13.5693323384, 13.9084206109, 15.0514064292, 12.5861316764, 11.8265432289, 10.383308213, 8.9454445689, 8.9171539046, 9.2321815328, 9.3160260237, 7.169084111, 11.3862112076, 9.6087737375, 11.2890460136, 10.1489461612, 10.0886323569, 10.4433822201, 9.2694477272, 11.2859488396, 8.2639696509, 11.4561731162, 11.7054709989, 11.9374001801, 13.8657987599, 14.2058311117, 15.4485037304, 15.3028237301, 10.2638035556, 12.0464651941, 12.7137810142, 13.0316002319, 12.48783671, 13.6138026713, 12.7838524798, 12.7394204543, 8.7693942032, 9.7533054643, 10.8271269637, 10.7694355432, 12.0792184243, 12.3604765278, 13.1141480772, 15.6357932912, 11.6879645542, 15.2341496265, 15.9093849305, 17.0637999424, 17.573765477, 19.0445261525, 20.2786357246, 20.4485520689, 8.2904783101, 8.9151793095, 9.4611837017, 11.4891919672, 11.8529220288, 12.4065304535, 12.4559121856, 13.478776775, 9.624501441, 10.6963015362, 9.6119268324, 12.0907733988, 11.9412369474, 12.3569921626, 15.0741237044, 16.8019494069, 11.9302389043, 12.810968132, 12.9767650624, 14.1883513276, 16.6705317315, 16.6257019938, 17.3280017591, 17.0992062695, 10.6196571097, 10.6899326576, 10.7605347897, 10.9678092816, 11.5786346178, 12.9144058712, 12.8253086634, 11.4570374133, 7.3383915071, 7.5998786546, 6.7527780117, 7.7486880417, 9.2108108711, 9.4591947298, 9.0428707235, 10.3016406505, 8.1293267327, 9.1702183761, 10.7683226496, 11.2759230437, 11.9181240947, 14.1068441841, 15.5044217561, 15.6466630519, 11.2729750169, 13.1766296823, 14.1046242699, 14.5637174383, 15.3129486983, 15.6878447689, 16.6178180195, 17.5478085277, 8.1664643536, 8.1011183416, 9.140706061, 9.4661706195, 8.8191959309, 9.8825235069, 9.7659225774, 9.7093555358, 7.1070206209, 7.9745188082, 8.2432495004, 8.5872227411, 11.706691113, 10.9019622442, 10.8995654046, 11.8163979549, 12.3171385632, 11.714926203, 13.2455544173, 13.604595365, 17.2557258809, 17.3815027522, 17.0999937237, 17.478111775, 10.9792256921, 10.2953195495, 9.5938483663, 10.8975431045, 8.9900912381, 8.3204827357, 8.4026862978, 9.9242743938, 7.1554133476, 7.6175842458, 6.0680036553, 5.8478980404, 5.464320083, 5.6204678747, 7.0082683101, 6.6690315774, 9.2040123565, 8.625003185, 8.6863023392, 7.8233512052, 7.3934112518, 7.3091720477, 9.6551542492, 8.9933449274, 11.3751543095, 10.4727100365, 10.6824136584, 10.9712729229, 10.733510236, 12.3502921846, 12.0316855446, 12.4416272611, 13.7577687954, 14.7816347457, 14.6390407363, 15.5830510307, 15.4699196111, 19.8251653612, 18.7225301844, 19.8546992501, 11.7719176286, 10.9641582815, 10.7198612708, 10.483665625, 9.9009800618, 12.0433607456, 12.1063301927, 10.8029340548, 6.7091182207, 6.8242790012, 6.325869284, 7.5602850616, 8.6554649645, 8.8601965425, 11.5623582481, 11.9168826439, 12.2770059509, 11.0645358404, 10.6455655926, 11.7323964741, 13.6468194526, 13.9147323589, 13.6141774776, 13.7041569374, 9.9722848386, 11.8358450448, 12.6807365524, 11.9981433142, 12.1690420591, 11.4485662379, 11.8047108646, 12.7454223932, 7.9737398721, 7.5646170602, 8.1522458168, 6.9166983458, 9.3088966752, 10.3426347256, 11.4407148225, 11.572723414, 12.7925281482, 13.994781207, 15.461888201, 16.9047197939, 17.8490475664, 18.0157240185, 19.0719219573, 19.640725525, 10.4235371785, 9.9646421027, 11.3941527817, 11.3621897534, 9.7988851641, 9.7519310485, 7.7073558907, 8.4391916336, 8.3361692218, 8.7280472942, 10.1826819654, 10.7778450343, 12.2299192326, 11.3886717605, 10.512580247, 11.5883303815, 7.630373898, 7.4274947768, 8.8183604799, 8.9088737842, 8.0757825213, 10.7437722687, 10.6036397575, 11.4082654115, 11.3433294957, 10.3170449344, 9.5484357662, 9.8737634289, 8.8573299317, 9.995509373, 10.2567790855, 8.3073147003, 7.5307426895, 8.1267936687, 8.5136368706, 12.2909443829, 11.3886892436, 12.144970157, 13.3600729649, 13.8411881219, 10.7969797185, 11.8803986054, 11.8224901341, 12.6066212899, 12.6504522632, 14.837310847, 16.666201891, 17.2827002143, 9.5748858452, 9.9734472449, 9.5816856098, 9.2044103209, 8.9633656641, 8.9183263201, 8.4147343373, 10.5400412533, 8.3302446887, 8.5248265596, 7.8583562723, 7.9673442952, 6.7252860566, 9.5269330317, 9.2489695108, 10.616433804, 8.1811696569, 8.617533489, 8.8619610455, 8.8310429211, 8.8984607576, 11.0813212994, 11.2456644282, 11.3543988439, 7.7446177067, 9.3694601861, 8.839454128, 9.6554857071, 9.130270443, 8.3689063017, 9.1224454265, 7.8775160638, 10.6667149842, 11.7524978515, 11.9991833755, 11.767510029, 14.4003015625, 15.2041185498, 15.4099137635, 15.6976819741, 13.1280793604, 12.9799566551, 14.5724121123, 14.8747659514, 14.5828506519, 15.0932300083, 13.2814630994, 13.2032205809, 9.1951194936, 9.23994538, 10.953535461, 10.9561942424, 10.3657263776, 10.5575977652, 10.6214303917, 10.4571823019, 12.6439247156, 11.9973559598, 12.9659158915, 12.4501045017, 13.1439555453, 13.1031571431, 12.7092985837, 15.5613660394, 12.4568352725, 12.41466524, 11.8910446209, 12.9493701706, 14.6347833793, 15.715608575, 15.7851527174, 15.3710702025, 9.9569478414, 9.2144722677, 10.6092188085, 9.9451149355, 13.0545739098, 14.1355021847, 14.1167972239, 14.3654913388, 14.039683902, 14.2231036038, 14.1909372247, 14.4802439484, 14.4817258472, 15.4089254621, 13.8725274844, 14.6702860951, 6.033382986, 7.3609552483, 8.0319453622, 7.8980840036, 7.9009938754, 7.3098753215, 8.3176286886, 8.6615569783, 9.6435268723, 10.2621622224, 10.0665334483, 12.0112567977, 11.779530167, 11.8926182088, 12.4361865391, 12.1648466094, 7.5242773338, 6.7078964743, 6.8406461276, 6.8341476022, 5.7915352384, 5.6509126082, 8.0082763754, 6.9804697573, 7.5558356073, 7.8011113194, 7.8850168925, 8.8512499363, 8.6399826294, 9.3353183353, 9.1485268414, 12.3004956375, 11.9726958841, 11.50009296, 11.6989613023, 11.5230758993, 10.9418647823, 12.5798763632, 13.06729843, 13.5005135583, 11.2454317837, 11.3405985529, 10.9439531741, 11.9683587098, 11.7965273101, 14.2473867908, 14.8669462225, 15.1161250943, 11.8526621713, 11.1850848914, 12.1906183558, 13.4061323814, 14.2612439064, 13.902737306, 14.9444614473, 16.8723713991, 14.7192292392, 15.1583969509, 14.7028464032, 15.7995721102, 15.504552888, 14.9757567579, 15.8684026454, 18.4742235313, 8.8429950947, 7.2531363071, 5.7462643244, 8.8873155184, 8.4093220074, 6.9303257106, 6.9119297866, 5.9740294755, 10.9261594818, 9.3197891964, 8.8548652061, 7.8434504684, 6.9336048536, 7.365581842, 7.3962519291, 5.6335322596, 13.2982729498, 13.012100292, 15.0647252651, 14.0988192453, 13.2087289506, 12.6066059049, 11.6981267334, 11.6372062379, 8.2124075206, 8.2960668471, 7.806835666, 7.9746121881, 7.9004163469, 7.7495776815, 9.1554903889, 9.7199052113, 12.6686655409, 12.4463516012, 12.3869106816, 13.1379071874, 12.4843681949, 13.1985758658, 12.6742028271, 13.6378508161, 10.6282772372, 11.0897867597, 11.7500071802, 10.8300467226, 12.5386573476, 12.3288375783, 12.8957730574, 13.0577826353, 10.1104935044, 10.9501778189, 10.7502811438, 12.1068747989, 12.1181014473, 12.2510618804, 12.9782034463, 13.217815272, 10.768704846, 11.4272628347, 10.242781341, 11.2049655996, 11.2016010881, 10.9219026634, 12.6843820436, 11.6215508027, 14.7002000185, 16.0355260022, 17.175916499, 17.1059423298, 18.5376238968, 19.0639875897, 19.8020780895, 20.7138408761, 12.6046605556, 14.2227480238, 13.3536628305, 13.9863482874, 15.1601425, 15.8996995455, 16.0536695292, 16.3105895338, 6.852612514, 5.5537418152, 6.4579604353, 6.6099265729, 6.0286968203, 6.1105429911, 5.8150604548, 6.2182451116, 9.5583573585, 10.050222475, 8.7165460375, 8.8301562689, 9.6821036615, 9.0362796029, 9.676324101, 9.7521695607, 9.8880824231, 11.6765654698, 10.8653617976, 10.3978451066, 10.3895584918, 9.7835080022, 9.6701209569, 9.6146865122, 7.952907292, 7.3942576808, 8.8095538297, 8.1219246234, 8.5181236445, 6.9666591055, 7.5953722891, 6.8037222551, 3.7258366348, 3.340094584, 3.9826377451, 4.721253209, 3.7045447625, 4.5346131213, 5.3079627365, 5.0806091214, 9.4277639079, 9.682745681, 9.4192369629, 9.5526897074, 9.6290907922, 10.1336236941, 8.7115486275, 10.565871485, 10.4699300155, 10.5318578461, 10.7267976701, 11.9504382665, 11.2061334112, 10.924300232, 11.5546177272, 10.4815763192, 8.025575144, 9.1604032513, 8.3417212029, 8.4910423073, 7.6990390267, 8.8142706491, 8.684382202, 8.7116911022, 11.7383499484, 10.1720068125, 9.3666662058, 7.8883439644, 6.5192020651, 4.7680162002, 3.4139213423, 2.378775244, 10.865738194, 12.8074422601, 12.0208162357, 13.5150545642, 13.5575242516, 13.637737221, 14.2007718829, 14.99206057, 8.3354371208, 8.9907705297, 9.329958488, 8.5232837782, 8.9941254706, 8.5139928982, 8.520984639, 9.7766354048, 10.7907471552, 11.0537127237, 11.2651014866, 10.8012799453, 10.370674142, 10.1403693071, 10.6921531608, 10.581791447, 12.3580126149, 11.803656932, 12.0342240322, 12.8678676511, 13.2072679804, 12.7280907655, 13.0342109311, 12.9443211535, 10.0348636166, 10.0013058263, 9.7100928682, 9.2362625773, 9.1945500347, 8.9654458388, 9.1499813363, 8.835746129, 15.1330536638, 15.5070880777, 14.0745644114, 13.083286055, 13.9385439898, 13.1413713707, 13.1955023064, 11.8434437853, 11.3389671375, 11.7054318169, 11.3011380463, 13.0708965859, 12.6001686374, 13.8321465135, 14.0402274687, 13.3514982566, 11.938816562, 11.3177930841, 10.9683255555, 10.7588737612, 9.2945207275, 7.769602469, 9.2048325808, 7.6670216429, 13.4614968931, 13.4129026773, 12.157600992, 11.1669237492, 11.7021058282, 10.5057474816, 11.269747544, 10.0645827103, 12.4570175716, 13.4778136168, 13.3247212202, 14.7944949455, 15.3427961485, 14.8742007863, 15.9997663947, 17.2701430547, 9.5619749372, 7.8418717627, 6.2692118116, 6.8156057605, 6.052686193, 4.8293523365, 4.4156230136, 3.5710576138, 11.0514788425, 10.5011574149, 10.3990284705, 10.1353219714, 10.361165405, 10.390683789, 9.492572467, 10.5207289034, 12.9512322152, 12.6559164419, 14.6946145522, 14.071107259, 15.8674628731, 16.1141072381, 16.6457680744, 16.8795760366, 6.6600048944, 6.6769729916, 7.5290444269, 9.2385496542, 9.1864119854, 9.2966907109, 10.0573952165, 9.6989430058, 13.1198450045, 13.237760818, 13.5323154753, 13.4012221016, 14.7876846672, 15.4214632925, 15.5970723166, 17.5421036377, 11.50529246, 10.3907120294, 10.1216074994, 8.4997942017, 8.712465998, 8.2960246335, 8.2427368819, 7.8037866174, 12.4310295309, 12.6191567678, 13.1653854364, 14.5685644728, 13.3121153783, 13.2408026448, 14.5701706104, 14.1089662576, 10.1691517436, 10.3050873525, 10.2469682799, 11.0158631866, 12.3134384249, 13.1001235832, 13.1022830475, 13.2311538283, 13.4344484621, 12.8656694897, 11.7672142189, 10.9671199966, 9.1021291475, 8.0508527386, 7.5070177603, 6.5856481283, 12.9208104468, 11.6174432663, 11.1535878294, 11.0361856991, 8.8334786678, 7.6482540962, 6.9207848868, 6.3317262271, 10.4043690947, 10.4802402287, 11.5918546118, 11.2422356007, 11.5769206202, 11.141891392, 12.1749194843, 11.44195357, 14.7877371119, 14.7906963545, 15.1476036441, 16.1533893702, 17.4611556305, 18.4186629566, 19.1239817769, 20.7600372733, 13.5144636511, 13.6825856533, 14.0273119152, 14.0021459325, 13.2678259093, 14.2171553625, 12.9854209032, 14.8957880496, 13.6222847267, 12.0410330687, 11.6496057075, 11.9356407065, 12.2733486332, 11.8775121668, 11.5277154868, 10.9948998805, 12.3236867457, 12.97267767, 13.6381223668, 13.7336686848, 13.4373099372, 13.2571677019, 14.1615828559, 14.228296956] + }, + "params": { + "pattern": "joiners_only", + "n_groups": 80, + "n_periods": 8, + "seed": 111, + "effects": 2, + "placebo": 1, + "ci_level": 95, + "trends_lin": true + }, + "results": { + "effects": { + "1": { + "overall_att": 2.0365596049, + "overall_se": 0.16457819901, + "overall_ci_lo": 1.7139922622, + "overall_ci_hi": 2.3591269476, + "n_switchers": 360 + }, + "2": { + "overall_att": 2.146532354, + "overall_se": 0.25251930262, + "overall_ci_lo": 1.6516036155, + "overall_ci_hi": 2.6414610926, + "n_switchers": 270 + } + }, + "placebos": { + "1": { + "effect": 0.2022104185, + "se": 0.14861354394, + "ci_lo": -0.089066775234, + "ci_hi": 0.49348761224 + } + } + } + }, + "joiners_only_controls_trends_lin": { + "data": { + "group": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86, 86, 86, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, 95, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, 105, 105, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, 107, 107, 108, 108, 108, 108, 108, 108, 108, 108, 109, 109, 109, 109, 109, 109, 109, 109, 110, 110, 110, 110, 110, 110, 110, 110, 111, 111, 111, 111, 111, 111, 111, 111, 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, 115, 115, 115, 115, 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 119, 119, 119, 119, 119, 119, 119, 119], + "period": [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7], + "treatment": [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + "outcome": [10.0692743508, 10.522460632, 12.0279975, 13.0939562203, 12.0737605408, 14.087450257, 17.3475221984, 18.9607507922, 16.4209721391, 17.0154143929, 19.2757750068, 22.2233766247, 22.541942821, 24.6545185011, 24.5893349581, 26.983396303, 7.0897333564, 6.7459126894, 9.1550403979, 9.9966285331, 12.4761518758, 13.6828339919, 13.4979359087, 15.4252963952, 13.2614447124, 13.1820255966, 14.4451644096, 13.5200151574, 14.7445372965, 16.6583202572, 15.926845616, 17.4197848555, 6.2997034686, 7.7350525494, 7.5161572143, 8.0155205942, 8.9386782892, 9.0043008069, 10.5583845442, 12.4133036184, 13.3403019064, 15.4384900048, 14.6809784475, 14.5518143624, 15.0815269183, 14.6781394385, 14.8847023897, 15.3206057847, 14.298735269, 14.9340172764, 15.5080657628, 16.2058720301, 17.1797876294, 17.2449850576, 20.2904508939, 21.2807279992, 8.4273493649, 9.1197354458, 11.0887558738, 10.4766274928, 13.2829951641, 13.6163509679, 14.1672679413, 15.7407243466, 11.865527704, 11.9430451619, 12.9941300555, 12.2758488621, 12.8948154369, 14.9232925523, 15.003629, 15.4753676586, 9.5540251245, 9.747897128, 9.5827582306, 11.4034898277, 8.9833196359, 9.7996834169, 10.1785857802, 9.2516709249, 10.6636478021, 12.601214779, 13.6983453413, 14.9959053698, 15.4335108932, 17.4141610197, 18.9378079456, 20.5827292631, 7.5585853145, 10.7754959988, 10.8458753136, 10.3310336178, 10.6815939074, 11.4188876561, 10.8653357409, 10.9780882902, 7.2242812351, 9.1693277354, 9.8984044213, 12.571353631, 13.5631536269, 14.2466809192, 16.7068898345, 18.5602827102, 9.9591093562, 11.5081642343, 11.6532804352, 12.0184755261, 14.9628464523, 13.4187206801, 14.3749698066, 14.6708755333, 11.3196500802, 13.8215294708, 14.1377233035, 13.852088248, 14.9619516931, 14.6749631208, 14.7406130575, 15.7404664933, 4.9176482427, 5.4984455533, 7.116442679, 6.9281794905, 8.6894456678, 10.1392652478, 12.3171036386, 13.7915104501, 6.1905052465, 7.2583526256, 11.4361621894, 12.4342006161, 12.5018408653, 13.4677455971, 14.3855182353, 16.3391085903, 7.5792853816, 7.9497192789, 7.2131956103, 7.2905039072, 7.0298520186, 7.2773026015, 8.0254788308, 9.5658802723, 6.2719570666, 10.2547135056, 11.816484468, 11.0951389704, 11.5679507278, 14.5098140575, 14.1978491848, 16.4439641432, 8.4561825917, 8.7981909214, 8.3782848299, 7.7662018132, 6.8792407338, 6.4320156008, 6.5037545101, 8.4631081008, 12.9912573389, 12.8757943759, 13.911353756, 14.1221486852, 14.9396123664, 15.5465102974, 13.5164869296, 16.5692067891, 13.8160253251, 13.7154170988, 16.6050450379, 16.0257012231, 17.8659362865, 20.3769389881, 22.0446077558, 22.7553133954, 10.2055228865, 11.4135053944, 10.5558923525, 10.5964706165, 11.8871826096, 12.8521474638, 14.5158039391, 14.0007410062, 11.054895397, 10.9521009892, 12.5144633143, 13.3520711722, 14.6118515234, 13.4098328585, 13.9986311849, 15.1063859461, 6.6339307625, 7.3990769426, 8.9002115597, 9.3311209801, 9.4746413215, 9.8869519588, 11.0260586618, 14.9101738241, 7.9498271017, 9.115635002, 9.4664549571, 10.6860816767, 9.0365710516, 10.2104837119, 12.0432364966, 14.2500553618, 10.8937835742, 9.9771341672, 11.0459167218, 14.0084489402, 14.5001275776, 16.4743834394, 16.6831656619, 17.5919574581, 11.7932178911, 12.5883273145, 13.164740579, 15.3235113169, 16.0394164903, 16.2828457391, 16.4851309165, 18.4776389943, 11.9982113639, 12.1927463446, 11.3296279826, 12.235894366, 10.953500878, 11.5486013972, 10.3141271307, 12.4640116571, 13.1178273227, 13.2916079106, 14.0685171544, 14.4889917956, 15.7534014407, 16.0853241934, 16.0802007044, 17.7446312935, 6.3123031414, 9.0618455889, 8.4890459625, 8.7673475028, 11.4235516119, 11.1634481897, 10.2889072181, 11.8814571364, 12.4320183209, 16.0826858908, 18.8065757567, 19.9679050299, 21.8974638365, 24.6283722378, 27.3286882412, 29.3129095467, 9.2846127341, 11.3813670381, 10.9334199561, 16.0071918549, 16.0362096016, 16.3491214879, 16.8521022256, 16.4048967784, 8.736395642, 9.8418755657, 11.0118789068, 12.3762163949, 14.4951409819, 15.3323653001, 15.7848483617, 16.5667505006, 11.5910628801, 11.2550014194, 11.3297876215, 14.4041275407, 16.6521356663, 15.8355681983, 16.8087898807, 17.1533328773, 11.1779302791, 14.5946835957, 15.0866601083, 16.1945169, 16.7384363825, 17.965448131, 18.7328335626, 19.1795321456, 8.6890419805, 9.9919414005, 12.4055964465, 12.5439211441, 13.5984081389, 14.6380143056, 17.002481246, 20.0144689367, 6.4713675568, 6.6986992503, 8.6355467907, 8.7647228137, 8.6151139941, 9.4553428521, 10.1354767221, 10.2918897376, 8.3449611605, 8.3988115414, 11.1716794664, 11.4472560086, 12.2450965658, 13.8802134724, 13.6834973453, 13.1253702641, 8.8334195989, 9.1890605304, 9.0116929841, 9.2659857533, 8.7663660988, 8.7413963597, 13.2558692432, 13.0724291749, 8.9949257942, 10.3410760093, 9.5947759856, 10.7955735586, 13.0219199246, 14.81064855, 14.7280691591, 16.2071994637, 8.4230978014, 8.7654542111, 9.5345488504, 10.7009723472, 12.0783749035, 13.3342703451, 17.1025579583, 18.1166574552, 10.8032330355, 11.0165021059, 14.9285139552, 15.0634266267, 16.2771564009, 17.0820750827, 18.0135825612, 19.2280358221, 8.5121033201, 8.747183006, 8.6076667838, 8.1816517024, 8.7798800356, 8.4967769478, 10.1037297349, 9.5756189336, 16.4979732232, 17.2516780411, 20.0427189961, 21.2417081996, 20.8306459205, 20.7491789693, 22.2424006917, 21.5826930988, 11.1304636904, 12.8138734183, 13.5393801943, 13.9327475485, 15.2151248971, 15.0720282738, 18.3753624692, 18.8790120074, 12.2130247648, 11.6215064541, 16.3166450072, 15.7859100327, 15.259516622, 16.5670587022, 16.0047090408, 17.5322125186, 9.7620342836, 11.3097070673, 11.0762438265, 10.489265019, 9.9746872887, 9.2544195572, 8.9608100347, 8.1482595076, 7.5760675309, 8.2045312311, 7.3593979307, 10.0839864855, 10.5264114213, 10.1826369406, 11.4155606516, 10.7326962801, 10.3327190529, 11.2522491279, 12.486269439, 11.8906617466, 14.0397722282, 14.9005922516, 16.3660345708, 16.7692696566, 9.0345494794, 9.7813145386, 9.3653304093, 10.8702470388, 13.7961892703, 12.9060523284, 14.0523573461, 14.3966968479, 9.228543928, 9.1741011406, 10.9426129708, 11.9970760392, 10.6244082783, 10.6289960854, 10.0517354647, 10.5635354533, 11.5648429896, 11.6914356827, 13.5623775399, 13.2499341118, 16.3536629588, 17.0194869302, 20.9259037273, 21.4140051324, 9.7066882337, 9.9667018825, 10.6927912512, 10.5851999774, 10.9315416523, 12.4107973318, 13.8461866, 13.4739610781, 10.2342539055, 10.1427919542, 10.6654708625, 10.5845804204, 11.3935663813, 11.6738310542, 11.8148824645, 13.9909068099, 5.6275533887, 6.6938213258, 6.2513767504, 7.3986690403, 9.1972964877, 9.1294941263, 9.0099256276, 11.6733978699, 11.7965695855, 10.9683674708, 13.0296829837, 12.9209329084, 16.037047302, 17.469761324, 16.5132127355, 17.6789396345, 10.5757609669, 10.9889839624, 11.8597970356, 11.3065875768, 15.8061796942, 15.4743031015, 16.8195638842, 16.6600030183, 8.4756599105, 9.3006069677, 11.4236554778, 13.2114522115, 12.8589426308, 14.0334620329, 14.4402684805, 14.9018138625, 12.9663782903, 13.1338759273, 13.3510711776, 15.8075170155, 14.8327823443, 16.2472748305, 16.6698903699, 16.3436607106, 3.7015617927, 3.2225983091, 2.9102106157, 3.1912908477, 5.8660080945, 4.2286302194, 5.4533591609, 4.9447324665, 14.0988466057, 15.9995192152, 17.1975646201, 18.2345032959, 18.6008969001, 20.1278669724, 20.8079178646, 21.7694281738, 11.8101878027, 11.2777201118, 10.9670870206, 14.3444797357, 14.8505194093, 14.3518302866, 15.6685180642, 14.8571684268, 7.8745035152, 11.9651629619, 12.219596979, 12.1617504056, 13.7023471757, 14.5239936553, 16.8707043298, 16.988021307, 10.3592836652, 10.6244703249, 11.2752205628, 11.9529945078, 12.3776808071, 14.6122075573, 14.6050292041, 14.7306142615, 12.1643115223, 11.3441542217, 12.3166169706, 13.6194335999, 14.8422594314, 12.8337561143, 13.9873271263, 14.0607191163, 9.2181382059, 10.8459442004, 13.7708364886, 15.6160367458, 15.9335900914, 18.8588208442, 20.9582383405, 22.8022377601, 12.75557063, 12.4817130292, 13.1916767063, 13.1765664785, 13.7514675922, 12.5364749523, 16.4750637466, 16.6963890911, 8.8231347535, 11.5767767051, 11.9926177769, 11.8780291054, 13.381616724, 13.3892968203, 13.578215075, 13.8621020978, 10.20537618, 10.239635452, 11.2332669446, 11.0376541451, 11.1259638777, 13.591254349, 12.8731692004, 14.5718905677, 10.1885482901, 10.9138080334, 11.4793926392, 12.5987048438, 12.9505879533, 13.6662008614, 15.9522129411, 16.847376523, 10.0163917521, 10.5419121195, 12.3823521885, 13.7539875292, 12.6046961461, 14.070578303, 12.6785463544, 15.3719348708, 8.1001069487, 8.7833337289, 7.9868345847, 8.3232671621, 9.8990829693, 10.4186558479, 10.5411522599, 11.4609305659, 9.8048886249, 11.4400577154, 12.5376084505, 14.6548891734, 18.6790003571, 19.2488725815, 21.5470522324, 23.8212633206, 13.079106597, 13.3556034155, 14.6879225574, 17.3945452463, 18.1425166482, 17.782448645, 18.5041554874, 19.8560415083, 7.3587564786, 8.6445532704, 12.3923591672, 12.8057216314, 13.7231982865, 15.9369465755, 16.7782671124, 17.4610422488, 7.4952590804, 8.8949073353, 11.1143933734, 12.6535722152, 14.3835637962, 15.669880098, 19.9203639697, 21.1002153864, 8.6520061685, 11.7478822578, 12.672762339, 13.0280367345, 15.3255823273, 16.7430916163, 16.0056025434, 17.9471725137, 16.117927634, 17.4246442574, 17.2841528898, 17.7208543229, 20.0351726974, 21.1139535279, 21.0645470823, 21.1195604003, 11.1033435039, 11.2572648327, 12.0782318327, 14.5648161521, 15.1343729801, 16.2480163506, 16.5489150665, 18.2181225947, 12.6596548933, 12.8096155488, 11.9387127366, 12.3833065557, 12.2972631418, 13.3555505283, 14.3041554979, 14.0505390718, 9.5637363407, 10.0215220433, 9.7167182464, 11.6773599738, 12.7778109947, 13.2667904677, 14.4270692538, 15.6249325608, 8.2045439105, 9.1137673734, 9.2494595427, 9.5915805107, 11.3492472555, 11.3848867238, 11.0086655759, 12.7697113845, 7.9198772756, 7.8583118536, 7.8108575624, 7.77116044, 8.242319185, 7.6486110298, 7.6623844192, 7.9998671954, 5.4534247456, 7.7240140718, 7.5229794077, 7.5027895161, 8.3085392651, 8.6810178247, 9.3902773443, 12.2320067671, 8.9629064514, 11.0203645973, 10.7478162141, 12.0876509081, 12.8507025559, 14.8368450803, 16.1453800557, 16.1418260649, 8.5891990427, 9.4095981928, 11.7811814408, 12.7651011258, 13.1343313594, 16.1251273086, 17.4437298838, 18.0253600587, 13.9854379951, 14.7639757183, 14.3775978505, 15.1894367325, 16.4652883037, 17.1437174187, 16.9825172495, 17.6534149658, 10.0321650927, 10.2420676158, 11.2335858571, 11.7878036745, 12.5394809989, 12.844158345, 12.48247687, 13.1571167492, 8.024335597, 8.6411294432, 10.091972887, 11.1234740567, 10.5056613962, 11.8985425852, 11.1547566118, 12.6938399034, 9.9345955968, 11.1198334324, 12.9230836454, 13.8425460393, 16.1067317008, 17.4744352879, 17.9757394976, 19.6884535538, 11.6571886915, 11.7598431143, 11.9586296453, 13.0102892769, 15.3323764726, 14.9957600999, 15.0765730658, 16.9557918406, 10.0466860215, 9.860239409, 10.7659642377, 10.0409029778, 11.5683616444, 12.5254109624, 9.9821850542, 11.4973994886, 6.4017428428, 9.1643730761, 8.8354533194, 11.1413522742, 11.4891043837, 13.3119428231, 14.9731654588, 17.2719914609, 11.1581292908, 10.5104144373, 10.9473783518, 8.8275239879, 9.2126546364, 8.7913224095, 10.0102072827, 9.4682920337, 10.7376906277, 12.0120658428, 11.8178851493, 12.9978836975, 14.2980056577, 14.0079393523, 14.7070942853, 15.7698666256, 11.3498489601, 12.2018650499, 13.4217978532, 14.4781580664, 15.8245730227, 16.5924573006, 16.8935951601, 17.5482451142, 15.0354823918, 16.2411917575, 17.7660621187, 17.8999552061, 19.9443907966, 19.7573042725, 19.5954989927, 21.3265814891, 8.7533713341, 9.9063949751, 9.2931667164, 11.7512305426, 10.7036208651, 11.5043878771, 11.8724066438, 12.6130423382, 10.6822215851, 14.1209385356, 14.6168887123, 14.2092221818, 14.3785162159, 16.5398470763, 16.5245330842, 17.7115868081, 15.8744829206, 17.2296257444, 17.548417576, 18.2182142644, 18.2537355174, 19.0750439375, 20.951101569, 21.8724328735, 13.7491988212, 12.6217297473, 13.0962845423, 13.5784765476, 14.1267298649, 14.1374649981, 14.989558247, 15.1201202175, 10.3305672511, 11.5952483563, 11.9573466431, 13.2624056957, 14.3595664464, 15.8929371706, 15.0312084096, 16.6732654317, 11.9988143095, 14.2031511151, 14.5398990256, 15.2304426147, 16.7140442533, 16.562456425, 17.8062950922, 19.3568892633, 12.2567770878, 13.1697080619, 13.9400685651, 14.402355725, 14.3658125781, 14.9153179402, 15.5228556481, 15.2016342961, 10.21895483, 10.2697777777, 9.3584682616, 7.7884187415, 6.8575785811, 6.5889429581, 6.1431925639, 4.7384024895, 10.1919770819, 10.0544926491, 9.4378779177, 8.2066753, 7.6863678226, 6.9493657814, 5.4549458726, 5.6178158901, 14.4224668076, 14.365000323, 15.3204210562, 15.7892783651, 16.1980386411, 16.6571508963, 16.9435952841, 16.8871202417, 12.5830243761, 14.5388322476, 15.0377721044, 16.4800623959, 18.1744270251, 18.1421885087, 19.4621299931, 19.7874171042, 11.4708973041, 10.3558936289, 10.0038039921, 10.1746416028, 8.6942505187, 10.0554520471, 9.7019666274, 8.5914653591, 13.7179325414, 15.1758421593, 15.1269898642, 16.4013892131, 15.9828209057, 17.029536539, 18.8116233526, 17.8661771566, 12.5684546544, 15.6129474794, 15.5465683992, 17.5499052354, 17.5702188597, 19.3425144932, 20.7046895593, 21.7469452985, 13.8622395714, 13.6849257263, 13.7466130141, 14.3055722157, 14.6039291776, 14.6045267684, 13.8785112768, 15.6310943536, 13.6008197106, 13.9263269151, 15.1451380768, 15.6477831171, 16.7102708262, 17.8386300597, 19.0934902546, 20.1197035175, 12.9785681979, 14.0815632403, 14.895724642, 15.9196722536, 17.5715587839, 17.5894994253, 19.9224850938, 21.1739931185, 12.68661227, 12.8578698411, 12.0370973619, 13.5791167657, 14.5781083916, 13.8575523145, 15.401569001, 15.4744017208, 14.4022923757, 13.3177713362, 15.7404883756, 15.9646198123, 15.9586586708, 16.3209700076, 18.5517174971, 18.3145884693, 15.0097419715, 15.9343980817, 15.3319961973, 16.5602326005, 17.5593596307, 17.7098923647, 18.7043459397, 19.5642610408, 13.3945302066, 12.3381246343, 12.9847646573, 13.6329592504, 15.9648635042, 15.5706066794, 17.2156338893, 18.7373742924, 11.1610862204, 10.8873590248, 11.9472903405, 13.1632595208, 14.6595462083, 15.899871116, 15.2389595405, 16.4153326588], + "X1": [0.15400852931, 0.11415233602, 0.57709390603, 0.85318855789, 0.55590497383, 1.0157454721, 1.3783482577, 2.3435960245, 0.93847492271, 1.0378067651, 1.3064938414, 1.871887172, 1.5655389992, 2.0039977402, 2.204580646, 2.4557226681, -2.417568529, -2.2179346771, -1.8414189478, -1.4492290674, -1.5648614304, -0.86070550174, -0.67586710864, -0.62954171254, 1.0860476496, 1.1808631312, 1.9105887167, 2.0377831109, 2.4321671278, 2.7007725184, 3.0000658028, 3.5124552386, -0.35540460267, 0.27460209794, 0.11884069875, 0.58428208602, 0.8519941451, 1.0778613623, 1.3164027151, 1.9282850783, 0.28279001329, 0.73281941264, 0.61339726768, 0.88809470965, 1.2464878204, 1.7094052812, 1.994318726, 2.0632645107, 1.0685160442, 1.5967783533, 1.3839599002, 2.2052350073, 2.365929338, 2.4973347321, 3.17123498, 3.3269363877, -0.73109461859, -0.3069301318, -0.24174546941, 0.27240945293, 0.47275338968, 0.77174250391, 1.0136158512, 1.2315088622, 0.82101979489, 1.1420745286, 1.0296343076, 1.3311585215, 1.6189025295, 1.7600298266, 2.1832767347, 2.5623784171, -0.0384630446, 0.52009940023, 1.3051259571, 1.1016737447, 1.1295328428, 2.1441490753, 2.1498415006, 2.6685845839, 0.34601329878, 0.2609382346, 0.85129673413, 1.1257230263, 1.1944632465, 1.5281953892, 1.989829146, 2.4387410668, 0.30377224339, 0.8282027899, 0.95391081074, 1.0326008451, 1.529805293, 2.134797451, 2.2857490923, 2.3037685011, -0.63660385308, -0.18210957582, -0.16868101181, 0.71261630418, 0.68155028153, 0.74633201172, 0.74337247824, 1.6217315675, 1.8376290416, 1.9532580082, 2.5144792368, 3.0647187255, 3.3756246963, 3.1741013193, 3.9466651598, 4.2675469893, 0.68345632478, 1.3563114101, 1.5906678311, 1.4988436621, 1.8435188253, 2.1536357416, 2.5491115613, 2.9237967073, -2.1779765866, -2.1286843767, -1.3071481988, -1.2403852184, -0.75647564566, -0.42023647903, -0.45833163024, -0.32694480677, -1.2173273199, -0.94767711342, -0.6516461419, -0.38300141284, -0.28153845517, 0.094935874206, 0.39514103274, 0.73841724966, -0.74404768999, -0.41625410403, -0.42864821887, -0.40294084264, 0.074216770983, 0.59134069301, 0.70752211952, 1.0029183905, -1.5594873739, -0.809819221, -0.61850332589, -0.52105189888, -0.31912776348, -0.114503424, 0.75916879344, 0.97623846997, 0.23443883819, 0.66836144033, 0.7775055598, 0.98162309329, 1.3625091521, 1.6004425002, 1.8337904336, 2.48751441, 0.3395369079, 0.43991929533, 1.5463305093, 1.6940456252, 1.7656152123, 2.0067298953, 1.9837294709, 2.3083762415, 1.4818035381, 1.1845304682, 2.2327476042, 1.9606420024, 2.5722370765, 2.6403837793, 2.9832861806, 3.5809974348, -0.73589926099, -0.23913006973, -0.29920038345, 0.34438429609, 0.59303786529, 0.78950427617, 1.1992072769, 1.6383395965, 1.7001679735, 1.5045022589, 1.9434666613, 2.3539710967, 3.0651161542, 2.2000967322, 2.7205965035, 3.6744119976, -1.175747208, -0.99673871011, -0.76720432151, -0.57197231547, -0.28638795951, 0.2100107533, 0.68727300595, 1.0574024543, -1.0475841961, -0.51303357657, -0.35103196536, -0.070747801221, 0.2354375157, 0.48470756671, 0.92116047791, 1.0132317972, -0.82983770989, -0.81460052212, -0.52551125798, -0.13571266531, -0.010984117405, 0.62167137111, 0.94378566692, 1.0810402668, 0.036191509354, 0.24980586351, 0.30705650566, 1.0994846506, 1.3299588565, 1.6495412143, 1.852797026, 1.785312367, -0.42297479474, 0.023577810896, -0.059182055667, 0.56699590848, 0.92004878879, 0.81801537545, 1.3382520322, 1.2209813434, 0.47704089862, 0.15235962707, 0.80454752055, 0.88099470664, 1.716604062, 1.8166767144, 2.1122943568, 2.1975936235, 0.24493811235, 0.49797592366, 0.43537911777, 0.79890572422, 1.4862408453, 1.2616066488, 1.5254068247, 2.2866616751, 0.12379052513, 0.1434780841, 0.49582612489, 1.1204186047, 1.3628439652, 1.5657786947, 1.9995355075, 2.0680500015, 0.94117953238, 1.5399261415, 1.5112797717, 2.4384862581, 2.2701816975, 2.8132872051, 3.1268317819, 3.1490452699, 1.0385981192, 1.0408405425, 1.4192287118, 1.6068366185, 1.8567671749, 1.9301235555, 2.7127301229, 2.9552690142, 0.49362698831, 0.6302917365, 0.82687986282, 1.3505913865, 1.6109402799, 1.443675347, 2.111729705, 2.3745725308, -0.63119987956, -0.12726163743, 0.031251472444, 0.53715760163, 0.96885310216, 0.78693777839, 1.4163227532, 1.5520488266, -1.4074908621, -0.97751144141, -0.81658749143, -0.35073934913, -0.26784912099, 0.09620519599, 0.49048621029, 0.96779916551, -2.2315975812, -1.8632595784, -1.6876738061, -0.96669073235, -0.92314612201, -0.61802859006, -0.10403172538, 0.1320281929, -0.037495560575, 0.074268276845, 0.39396534576, 0.77225585343, 1.1053119562, 1.7408362785, 2.0551715032, 1.8307768964, -0.47108450344, -0.27077231676, 0.066908397226, 0.25588858331, 0.4801736319, 0.61894446785, 1.7292307287, 1.6605328413, -1.0748595937, -0.63891104976, -0.49682059654, -0.019358158021, 0.1545085371, 0.3097524347, 0.63584900021, 1.5605181181, -0.56748347061, -0.23980712274, 0.07738539788, 0.40182348483, 0.5543361312, 0.58053691565, 1.3205244417, 1.4577994367, -1.2503759154, -0.99218849584, -0.66732487774, -0.20134722653, -0.012641996853, 0.6431271323, 0.44586710025, 0.8525100494, -0.49309408917, 0.22784415291, 0.30887614916, 0.3930345575, 1.1806417375, 1.2490824121, 1.3247740871, 1.9622608241, 1.7429535437, 1.7704502128, 2.7430010143, 2.6921359455, 2.8378956586, 3.0407853219, 3.4400385995, 3.3758853332, -0.17165322711, 0.32682097947, 0.54019550239, 1.0379239064, 1.2664127229, 1.6970726529, 1.8147901907, 2.0731866947, -0.38712449843, -0.3490014193, 0.17289531051, 0.25489466443, 0.49585416384, 1.4135189934, 1.3034421106, 1.6677381105, -0.79482285655, -0.58895894848, -0.4064922381, -0.38695006685, -0.014456593378, 0.41518832007, 1.0187113363, 1.2980841924, -1.4661152271, -1.0173574763, -0.77560607629, -0.75719941255, -0.4570545043, 0.28200209134, 0.48860073713, 0.84000087005, 1.7097062225, 2.1128918967, 2.3271187899, 2.5724273637, 2.7129115988, 3.3323049975, 3.6134044117, 3.9225156671, -1.0054167351, -0.6180353033, -0.86970400841, -0.16938879739, 0.56026310631, 0.21583285647, 0.87992344803, 1.0964428497, -0.69018789675, -0.59437964394, -0.42231119008, 0.45592135655, 0.58857190026, 0.42662381785, 1.2260166171, 1.3478524159, -0.19309889945, 0.32519694586, 0.793548828, 0.65131842682, 1.1363766217, 1.9353335238, 1.5561947094, 2.230302318, 1.0191076833, 1.5582829082, 1.9552685605, 1.9922642114, 2.3179610759, 2.5990860105, 3.1828573942, 3.2261259316, 1.0200755213, 1.1269920226, 1.6499336604, 2.0451746681, 2.3998029294, 2.4741937647, 2.5925415694, 2.8421665986, -1.2622282792, -0.89070348837, -0.84763773479, -0.24933573072, -0.1073282734, 0.31097520517, 0.34543532913, 1.2137762603, 0.8521470155, 1.0294537364, 1.5561786401, 2.1211575136, 2.5034397728, 2.2937212131, 2.9257958845, 3.0195732036, -0.55355845786, -0.38200061031, -0.07097877558, -0.087760577332, 0.45680561018, 1.0016800038, 1.5009191323, 1.3228637081, -1.0318412766, -0.6090015445, -0.44635119598, 0.095648334147, -0.28657999209, 0.26900150442, 0.58731769304, 0.9576778458, 0.41256220901, 0.72714165467, 0.77040563469, 1.3150350163, 1.2456410225, 1.8336807005, 1.8447725178, 1.9212858042, -2.9067290971, -2.9006632995, -2.4353505931, -2.0168512538, -1.5243810899, -1.210501856, -1.1232876571, -0.73823445962, 1.9288376389, 2.0754503616, 2.0873442465, 2.4916137961, 2.6529525206, 2.9728699412, 3.4757036292, 3.6474277975, 0.5193415454, 0.53557012465, 1.0085715273, 1.4056060147, 1.7841775469, 1.9284798348, 2.689604456, 2.5998841916, -0.69127107793, -0.72345834642, -0.43390251113, -0.13622137355, 0.22652497584, 0.31103624757, 0.7685639245, 1.0123258704, 1.1034245416, 1.1847076211, 1.660221564, 1.9005794551, 2.428280544, 2.6225268936, 2.7275486806, 3.1042343752, 0.031649475424, -0.16786063606, 0.31476507378, 0.2626085475, 1.350637716, 1.0781354586, 1.5768016321, 1.7305715141, -0.1354008153, 0.043590257448, 0.50408040774, 0.73789291877, 0.58543762486, 1.1946296809, 1.6229265831, 2.2745966939, 0.4814759717, 0.65878692804, 1.0875638837, 1.3566307935, 1.9055181936, 2.0482657687, 2.5853054595, 2.6163535987, -0.87844252702, -0.30310140924, -0.0666723733, 0.037077724591, 0.61516505432, 0.50356868356, 0.97129359299, 1.1229796598, -1.3754168706, -1.4140263834, -1.0313120665, -0.67275440477, -0.16774298063, -0.16591616247, -0.059810288253, 1.0147839795, 0.58090752131, 1.1746605659, 1.122914448, 1.599869968, 2.0976461481, 2.0586777768, 2.7275854246, 2.9534730275, 0.8203540983, 1.4422978258, 1.5855905695, 2.3828752315, 2.4564663386, 2.6431538448, 2.9090952027, 3.5515769935, -0.11793873412, 0.073328824538, 0.22686749265, 0.74863336471, 0.73366051164, 1.4494077534, 1.3972291282, 2.1516230888, -1.0551794909, -0.84949657754, -0.59442055497, -0.018397876617, 0.49176402769, 0.3855740774, 0.70851332225, 1.1786551769, 0.088985409101, 0.12974457762, 0.72157908697, 0.71428346736, 1.4089091405, 1.3884546986, 2.1031206209, 2.1380341349, 1.04569154, 1.4043899764, 1.7902682108, 1.730731888, 2.2579717384, 2.8798785158, 2.9923714999, 3.1692580401, -1.820683193, -1.208398891, -1.1154061529, -0.67779608677, -0.55986730657, -0.50525945791, 0.18424716195, 0.56303591811, 0.93546271763, 1.6147347393, 2.2039015021, 2.311948997, 2.5337877754, 3.0815594219, 3.0897413197, 3.2252417196, 1.3699952096, 1.7655401919, 1.9003954955, 2.1263920992, 2.7808209677, 3.0239482169, 3.3959692223, 3.6587322359, 0.60404410922, 0.56622647686, 1.0877068379, 0.96261584911, 1.6884626643, 1.8610897685, 2.0099887601, 2.4117138641, 0.12058178859, 0.18753765243, 0.65398269427, 0.64260591215, 0.96712509953, 1.0239750554, 1.8439313705, 2.0690602387, 0.022234871383, 0.48542154298, 0.29875554177, 0.98914294484, 0.92787284774, 1.4619618897, 1.6692895725, 2.1571472637, 0.61677254851, 0.85472271241, 1.6161910735, 1.6360499655, 2.0136157539, 2.2090768723, 2.4567604237, 2.9211146713, -1.5330398386, -0.76489807201, -0.8175959004, -0.42272755865, -0.24513100473, 0.2895223967, 0.37002047542, 0.57677609852, -0.12843969189, 0.42784978482, 0.46249702731, 0.77033869152, 0.99018214117, 1.469833896, 1.95106429, 2.2076050468, 0.46424447489, 0.48108523619, 0.77854197103, 1.1863288992, 1.2978250346, 1.6724338351, 2.3928615422, 2.7507162526, -0.12454345118, -0.20337696592, 0.40918134854, 0.60905835729, 0.59075228414, 1.5406065471, 1.4794380644, 1.9900223252, 1.8402405985, 1.8771176227, 2.1782551142, 2.3543867103, 2.8959714902, 3.4447183696, 3.4030323308, 3.5874139976, 0.55835847831, 0.45054688659, 0.98000221393, 1.247792334, 2.0693781355, 1.9762064249, 1.6900869488, 2.5285055957, 0.42476621243, 0.53615718734, 1.1420157606, 1.4534580831, 1.591443807, 1.9946262109, 2.2806691879, 2.6245684979, -0.96601272852, -0.3047486098, -0.18789313565, 0.44353161794, 0.76121231974, 0.81275394632, 1.042290257, 1.725352778, 0.76742598106, 1.1611759378, 1.6108817205, 1.9370833995, 2.6732850668, 2.5945685393, 2.5684600079, 3.1596846412, -0.29743048142, -0.073560496455, 0.21638021438, 0.55294480831, 0.91213046964, 1.3550773632, 0.91131475257, 1.3478322122, 0.77760261059, 1.466219538, 1.4981017603, 2.016727301, 1.7971686489, 2.0521107346, 2.564950188, 3.0582241127, 1.187399522, 1.3071628945, 1.6772352374, 1.5900054636, 2.0186630806, 2.0653164468, 2.6762481939, 2.5724744557, -0.46144490294, 0.25987081855, 0.20542759418, 0.87385856235, 1.2863426302, 1.1941111052, 1.718730943, 1.8465617732, -0.44614964677, -0.039352978633, 0.4626353528, 0.67752392296, 1.1248159622, 1.1062074857, 1.420332464, 1.6857588038, 1.4412238398, 1.8152649348, 2.2002828688, 2.3915487447, 2.6402552075, 2.8060534633, 3.153049997, 3.6963094211, -1.3017627378, -0.54015247191, -0.70178161266, 0.070304681334, 0.43318401498, 0.23637848761, 0.84234244941, 1.1234036335, -0.2235726474, 0.059182292437, 0.0065953677133, 0.44035184876, 0.4578312075, 1.3522783078, 1.3998797436, 1.5387305296, 0.40977372263, 0.67017052453, 0.95634437107, 1.1614443482, 1.4670650831, 1.4290089076, 1.7601816589, 2.806071891, -0.39808449478, -0.1478521334, 0.081407833911, 0.17134301054, 0.44557052712, 0.9429498303, 1.8205799706, 1.7374070275, 0.19778080791, 0.75280622719, 0.83782149091, 1.3998551219, 1.9107252809, 2.5405228694, 2.5578578961, 2.6322970112, 0.22051906714, 0.53733872791, 1.1445217162, 1.308793636, 1.7496248968, 1.9164985729, 2.530460169, 2.7266870766, 0.83437863713, 1.1109138937, 1.6807814239, 1.6445038396, 1.6905128797, 2.0652605968, 2.4779644804, 2.6500848446, -1.182571717, -1.5309102926, -1.0415484516, -0.77636519894, -0.091739297359, 0.078328551117, 0.21686326666, 0.38488578686, -0.067074298549, 0.24838763366, 0.73233326572, 0.68809606312, 1.2700157545, 1.4735910576, 1.8020467073, 2.4312923957, 1.8457637126, 2.3058706849, 2.6788131934, 3.1357706666, 3.1797789515, 3.7913390226, 3.9991842888, 4.4192672464, 0.34188991798, 0.306041105, 0.87728705962, 0.83605614635, 1.8003649389, 1.7425931947, 2.2717237259, 2.4908689295, 0.12457035207, 0.14467418698, 0.47605394527, 0.66811618695, 0.92798020086, 1.2483031895, 1.75449723, 1.9159042541, -0.48704484687, 0.27908970143, 0.46067806713, 0.27768492038, 0.59184098795, 1.2964123722, 1.8094638876, 1.8610563576, 0.54099049253, 1.1026684871, 1.0604457896, 1.5242110266, 1.6901476751, 2.1392429073, 2.2753588497, 2.9607643124, 0.91901153498, 1.0266339435, 1.4911249187, 1.7428510491, 1.9216822835, 2.1649792556, 2.2838747608, 3.1221718036, 0.57670059103, 1.0384699809, 1.1018173162, 1.2802959628, 1.7129933251, 1.9075513152, 2.196568468, 2.629413161, 0.42407606808, 0.64474312992, 0.86070282087, 1.2343760882, 1.4551983842, 1.6374980413, 2.204430878, 2.3590438042, 0.29795511467, 0.036968002476, 0.15754905075, 1.2015471908, 1.3905955595, 1.6199082929, 2.0997399824, 2.069596813, 1.4761004945, 1.4262692429, 2.2089352118, 2.4443351254, 2.6294728161, 2.7945932376, 3.420143589, 4.0158839706, 0.41032286026, 0.87423261984, 0.84619816924, 1.1384803898, 1.6190825771, 1.8421170282, 1.9380704927, 2.236120542, -0.22266162633, -0.62101834689, 0.022961216377, 0.27787046758, 0.7799166492, 0.59436999474, 1.4653481434, 1.4854937954, -0.67903181114, -0.73909963609, -0.13944637483, -0.044626041928, 0.51831919075, 0.97380655141, 0.82965286966, 1.2969524661] + }, + "params": { + "pattern": "joiners_only", + "n_groups": 80, + "n_periods": 8, + "seed": 112, + "effects": 2, + "placebo": 1, + "ci_level": 95, + "controls": "X1", + "trends_lin": true + }, + "results": { + "effects": { + "1": { + "overall_att": 2.1226216982, + "overall_se": 0.17639240757, + "overall_ci_lo": 1.7768989322, + "overall_ci_hi": 2.4683444642, + "n_switchers": 360 + }, + "2": { + "overall_att": 2.3255975503, + "overall_se": 0.26311972108, + "overall_ci_lo": 1.8098923733, + "overall_ci_hi": 2.8413027272, + "n_switchers": 271 + } + }, + "placebos": { + "1": { + "effect": 0.073967677172, + "se": 0.18518629863, + "ci_lo": -0.28899079857, + "ci_hi": 0.43692615292 + } + } + } } }, "generator": "generate_reversible_did_data v1", diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index f153e1eb..fd979087 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -483,6 +483,9 @@ def fit( trends_linear: Optional[bool] = None, trends_nonparam: Optional[Any] = None, honest_did: bool = False, + # ---------- Phase 3 extensions ---------- + heterogeneity: Optional[str] = None, + design2: bool = False, # ---------- deferred (separate effort) ---------- survey_design: Any = None, ) -> ChaisemartinDHaultfoeuilleResults: @@ -598,6 +601,40 @@ def fit( treatment=treatment, ) + # ------------------------------------------------------------------ + # Step 4b: Covariate aggregation (DID^X, Web Appendix Section 1.2) + # ------------------------------------------------------------------ + if controls is not None: + if L_max is None: + raise ValueError( + "Covariate adjustment (DID^X) requires L_max >= 1. The " + "per-period DID path does not support covariate " + "residualization. Set L_max to use the per-group " + "DID_{g,l} path with covariate adjustment." + ) + missing_controls = [c for c in controls if c not in data.columns] + if missing_controls: + raise ValueError( + f"Control column(s) {missing_controls!r} not found in " + f"data. Available columns: {list(data.columns)}" + ) + for c in controls: + try: + data[c] = pd.to_numeric(data[c]) + except (ValueError, TypeError) as exc: + raise ValueError( + f"Could not coerce control column {c!r} to numeric: {exc}" + ) from exc + n_nan = int(data[c].isna().sum()) + if n_nan > 0: + raise ValueError( + f"Control column {c!r} contains {n_nan} NaN value(s). " + "Drop or impute missing covariates before fitting." + ) + # Aggregate covariates to cell means (same groupby as treatment/outcome) + x_cell_agg = data.groupby([group, time], as_index=False)[controls].mean() + cell = cell.merge(x_cell_agg, on=[group, time], how="left") + # ------------------------------------------------------------------ # Step 5a: Compute the TWFE diagnostic on the FULL pre-filter cell # dataset, so the diagnostic reflects the data the user @@ -883,6 +920,130 @@ def fit( Y_mat = y_pivot.to_numpy() N_mat = n_pivot.to_numpy() + # ------------------------------------------------------------------ + # Step 7b: Covariate residualization (DID^X) + # + # When controls are specified, residualize Y_mat by partialling + # out covariate effects per baseline treatment group. This + # transforms Y_mat in-place so ALL downstream DID computations + # (per-period and per-group multi-horizon) automatically produce + # covariate-adjusted estimates. See Web Appendix Section 1.2. + # ------------------------------------------------------------------ + covariate_diagnostics: Optional[Dict[str, Any]] = None + _switch_metadata_computed = False + + if controls is not None: + # Pivot covariates to (n_groups, n_periods, n_covariates) + X_pivots = [] + for c in controls: + x_piv = cell.pivot( + index=group, columns=time, values=c + ).reindex(index=all_groups, columns=all_periods) + X_pivots.append(x_piv.to_numpy()) + X_cell = np.stack(X_pivots, axis=2) + + # Need switch metadata for residualization (baselines, F_g) + baselines, first_switch_idx_arr, switch_direction_arr, T_g_arr = ( + _compute_group_switch_metadata(D_mat, N_mat) + ) + _switch_metadata_computed = True + + Y_mat, covariate_diagnostics = _compute_covariate_residualization( + Y_mat=Y_mat, + X_cell=X_cell, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx_arr, + ) + + # ------------------------------------------------------------------ + # Step 7c: First-differencing for linear trends (DID^{fd}) + # + # When trends_linear=True, replace Y_mat with Z_mat (first- + # differenced outcomes) so that DID_{g,l}(Z) = DID^{fd}_{g,l}. + # N_mat is also adjusted: N_mat_fd marks which Z values are valid. + # IMPORTANT: _compute_group_switch_metadata uses the ORIGINAL + # N_mat (treatment path metadata), not N_mat_fd. + # ------------------------------------------------------------------ + _is_trends_linear = trends_linear is True + linear_trends_effects: Optional[Dict[int, Dict[str, Any]]] = None + # N_mat_orig preserves observation counts for switch-metadata and + # cohort-identification code that must NOT see the first-differenced + # N_mat_fd. When trends_linear=False, N_mat_orig == N_mat. + N_mat_orig = N_mat + + if _is_trends_linear: + if L_max is None: + raise ValueError( + "Group-specific linear trends (DID^{fd}) requires " + "L_max >= 1. Set L_max to use the per-group " + "DID_{g,l} path with trend adjustment." + ) + if len(all_periods) < 3: + raise ValueError( + "Group-specific linear trends (DID^{fd}) requires " + "at least 3 time periods (F_g >= 3 in the paper). " + f"Got {len(all_periods)} period(s)." + ) + # Compute switch metadata on original N_mat if not done yet + if not _switch_metadata_computed: + baselines, first_switch_idx_arr, switch_direction_arr, T_g_arr = ( + _compute_group_switch_metadata(D_mat, N_mat) + ) + _switch_metadata_computed = True + # Count and warn about excluded groups (F_g < 3 -> f_g < 2) + n_excluded_fd = int( + ((first_switch_idx_arr >= 0) & (first_switch_idx_arr < 2)).sum() + ) + if n_excluded_fd > 0: + warnings.warn( + f"DID^{{fd}} (trends_linear=True): {n_excluded_fd} " + f"switching group(s) have F_g < 3 (fewer than 2 " + f"pre-switch periods) and are excluded from the " + f"trend-adjusted estimation.", + UserWarning, + stacklevel=2, + ) + N_mat_orig = N_mat.copy() + Y_mat, N_mat = _compute_first_differenced_matrix(Y_mat, N_mat) + + # ------------------------------------------------------------------ + # Step 7d: State-set trends validation (trends_nonparam) + # + # When trends_nonparam is set (a column name), restrict the + # control pool for each switcher to groups in the same set. + # ------------------------------------------------------------------ + set_ids_arr: Optional[np.ndarray] = None + + if trends_nonparam is not None: + if L_max is None: + raise ValueError( + "State-set-specific trends (trends_nonparam) requires " + "L_max >= 1. Set L_max to use the per-group " + "DID_{g,l} path with state-set trends." + ) + set_col = str(trends_nonparam) + if set_col not in data.columns: + raise ValueError( + f"trends_nonparam column {set_col!r} not found in " + f"data. Available columns: {list(data.columns)}" + ) + # Aggregate set membership per group (must be time-invariant) + set_per_group = data.groupby(group)[set_col].nunique() + time_varying = set_per_group[set_per_group > 1] + if len(time_varying) > 0: + raise ValueError( + f"trends_nonparam column {set_col!r} must be " + f"time-invariant within each group. " + f"{len(time_varying)} group(s) have varying values. " + f"Examples: {time_varying.index.tolist()[:5]}" + ) + # Extract set membership per group aligned with all_groups + set_map = data.groupby(group)[set_col].first() + set_ids_arr = np.array( + [set_map.loc[g] for g in all_groups], dtype=object + ) + # ------------------------------------------------------------------ # Step 8-9: Switching-cell counts and per-period DIDs (Theorem 3) # with explicit A11 zero-retention pseudocode @@ -1041,11 +1202,13 @@ def fit( # ------------------------------------------------------------------ # Step 12b: Per-group switch metadata (shared by Phase 1 IF and - # Phase 2 multi-horizon) + # Phase 2 multi-horizon). May already be computed by + # Step 7b (covariate residualization). # ------------------------------------------------------------------ - baselines, first_switch_idx_arr, switch_direction_arr, T_g_arr = ( - _compute_group_switch_metadata(D_mat, N_mat) - ) + if not _switch_metadata_computed: + baselines, first_switch_idx_arr, switch_direction_arr, T_g_arr = ( + _compute_group_switch_metadata(D_mat, N_mat_orig) + ) # ------------------------------------------------------------------ # Step 12c: Multi-horizon per-group computation (L_max >= 1) @@ -1065,6 +1228,7 @@ def fit( switch_direction=switch_direction_arr, T_g=T_g_arr, L_max=L_max, + set_ids=set_ids_arr, ) # Surface A11 warnings from multi-horizon computation mh_a11 = multi_horizon_dids.pop("_a11_warnings", None) @@ -1098,6 +1262,7 @@ def fit( switch_direction=switch_direction_arr, T_g=T_g_arr, L_max=L_max, + set_ids=set_ids_arr, ) # Per-horizon analytical SE via cohort recentering. @@ -1325,7 +1490,7 @@ def fit( ) = _compute_cohort_recentered_inputs( D_mat=D_mat, Y_mat=Y_mat, - N_mat=N_mat, + N_mat=N_mat_orig, n_10_t_arr=n_10_t_arr, n_00_t_arr=n_00_t_arr, n_01_t_arr=n_01_t_arr, @@ -1857,6 +2022,99 @@ def fit( "denominator": denom, } + # ------------------------------------------------------------------ + # DID^{fd} cumulation: recover level effects from second-differences + # + # DID^{fd}_l identifies delta_{g,l} - delta_{g,l-1} (Lemma 6). + # Cumulate per-group: for each group eligible at horizon l, + # sum DID^{fd}_{g,l'} for l'=1..l, then average over that + # eligible set. This matches R's did_multiplegt_dyn which + # cumulates per-group then aggregates (NOT sum-of-aggregates, + # which mixes different eligible populations). + # ------------------------------------------------------------------ + if _is_trends_linear and multi_horizon_dids is not None: + cumulated = {} + n_groups_total = D_mat.shape[0] + # Accumulate per-group running sum of DID^{fd}_{g,l'} + running_per_group = np.zeros(n_groups_total) + for l_h in range(1, (L_max or 0) + 1): + if l_h not in multi_horizon_dids: + continue + mh = multi_horizon_dids[l_h] + did_g_l = mh["did_g_l"] # (n_groups,) per-group DID + eligible = mh["eligible_mask"] # (n_groups,) bool + N_l = mh["N_l"] + if N_l == 0: + continue + # Add this horizon's per-group DID to running sum + # (NaN for ineligible groups; use 0 for accumulation) + increment = np.where(np.isfinite(did_g_l), did_g_l, 0.0) + running_per_group += increment + # Average the cumulated sum over groups eligible at THIS horizon + # Weight by S_g (switch direction) and divide by N_l + S_arr = switch_direction_arr.astype(float) + cum_effect = float( + np.sum(S_arr[eligible] * running_per_group[eligible]) / N_l + ) + # SE: conservative upper bound (sum of per-horizon SEs) + running_se_ub = sum( + event_study_effects.get(ll, {}).get("se", 0.0) + for ll in range(1, l_h + 1) + if np.isfinite(event_study_effects.get(ll, {}).get("se", np.nan)) + ) if event_study_effects is not None else float("nan") + cum_t, cum_p, cum_ci = safe_inference( + cum_effect, running_se_ub, alpha=self.alpha, df=None + ) + cumulated[l_h] = { + "effect": cum_effect, + "se": running_se_ub, + "t_stat": cum_t, + "p_value": cum_p, + "conf_int": cum_ci, + } + linear_trends_effects = cumulated if cumulated else None + + # ------------------------------------------------------------------ + # Heterogeneity testing (Web Appendix Section 1.5, Lemma 7) + # ------------------------------------------------------------------ + heterogeneity_effects: Optional[Dict[int, Dict[str, Any]]] = None + if heterogeneity is not None and L_max is not None and L_max >= 1: + het_col = str(heterogeneity) + if het_col not in data.columns: + raise ValueError( + f"heterogeneity column {het_col!r} not found in data." + ) + # Extract per-group covariate (must be time-invariant) + het_per_group = data.groupby(group)[het_col].nunique() + het_varying = het_per_group[het_per_group > 1] + if len(het_varying) > 0: + raise ValueError( + f"heterogeneity column {het_col!r} must be " + f"time-invariant within each group. " + f"{len(het_varying)} group(s) have varying values." + ) + het_map = data.groupby(group)[het_col].first() + X_het = np.array( + [float(het_map.loc[g]) for g in all_groups] + ) + # Use original Y_mat (not first-differenced) for heterogeneity + # test, since it operates on level differences Y[out] - Y[ref]. + # When trends_linear, the DID^{fd} second-differences are in + # event_study_effects but the het test uses level outcomes. + Y_het = Y_mat if not _is_trends_linear else y_pivot.to_numpy() + N_het = N_mat_orig + heterogeneity_effects = _compute_heterogeneity_test( + Y_mat=Y_het, + N_mat=N_het, + baselines=baselines, + first_switch_idx=first_switch_idx_arr, + switch_direction=switch_direction_arr, + T_g=T_g_arr, + X_het=X_het, + L_max=L_max, + alpha=self.alpha, + ) + twfe_weights_df = None twfe_fraction_negative = None twfe_sigma_fe = None @@ -1966,6 +2224,27 @@ def fit( else None ), bootstrap_results=bootstrap_results, + covariate_residuals=( + _build_covariate_diagnostics_df(covariate_diagnostics, controls) + if covariate_diagnostics is not None + else None + ), + linear_trends_effects=linear_trends_effects, + heterogeneity_effects=heterogeneity_effects, + design2_effects=( + _compute_design2_effects( + D_mat=D_mat, + Y_mat=Y_mat if not _is_trends_linear else y_pivot.to_numpy(), + N_mat=N_mat_orig, + baselines=baselines, + first_switch_idx=first_switch_idx_arr, + switch_direction=switch_direction_arr, + T_g=T_g_arr, + L_max=L_max if L_max is not None else 1, + ) + if design2 + else None + ), _estimator_ref=self, ) @@ -2001,25 +2280,12 @@ def _check_forward_compat_gates( ) # L_max is validated inline in fit() after period detection (needs # the period count). Not gated here. - if controls is not None: - raise NotImplementedError( - "Covariate adjustment (DID^X) is reserved for Phase 3 of dCDH, which " - "implements the residualization-style covariate adjustment from Web " - "Appendix Section 1.2 of the dynamic companion paper. Note: this is " - "NOT doubly-robust, NOT IPW, and NOT Callaway-Sant'Anna-style. " - "See ROADMAP.md Phase 3." - ) - if trends_linear is not None: - raise NotImplementedError( - "Group-specific linear trends (DID^{fd}) are reserved for Phase 3 of " - "dCDH (Web Appendix Section 1.3, Lemma 6 of the dynamic companion " - "paper). See ROADMAP.md Phase 3." - ) - if trends_nonparam is not None: - raise NotImplementedError( - "State-set-specific trends (trends_nonparam) are reserved for Phase 3 " - "of dCDH (Web Appendix Section 1.4). See ROADMAP.md Phase 3." - ) + # controls gate lifted — DID^X covariate residualization implemented. + # Validation (L_max >= 1 required) is in fit() after L_max detection. + # trends_linear gate lifted - DID^{fd} linear trends implemented. + # Validation (L_max >= 1, n_periods >= 3 required) is in fit(). + # trends_nonparam gate lifted - state-set trends implemented. + # Validation (L_max >= 1, column exists, time-invariant) is in fit(). if honest_did: raise NotImplementedError( "HonestDiD integration for dCDH is reserved for Phase 3, applied to " @@ -2340,6 +2606,423 @@ def _compute_placebo( return placebo_effect, True, placebo_a11_warnings +# ====================================================================== +# Phase 3: Covariate residualization helpers +# ====================================================================== + + +def _compute_covariate_residualization( + Y_mat: np.ndarray, + X_cell: np.ndarray, + N_mat: np.ndarray, + baselines: np.ndarray, + first_switch_idx: np.ndarray, +) -> Tuple[np.ndarray, Dict[str, Any]]: + """Residualize outcomes by partialling out covariates per baseline treatment. + + Implements ``DID^X`` from Web Appendix Section 1.2 of de Chaisemartin & + D'Haultfoeuille (2024). For each baseline treatment value *d*, estimates + ``theta_hat_d`` via OLS of first-differenced outcomes on first-differenced + covariates with time FEs, restricted to not-yet-treated observations. + Then residualizes at levels: ``Y_tilde[g,t] = Y[g,t] - X[g,t] @ theta_hat_d``. + + The level-residualization is equivalent to difference-residualization by + the Frisch-Waugh-Lovell theorem, so all downstream DID computations + (which use ``Y[g, out] - Y[g, ref]``) automatically produce the correct + covariate-adjusted estimates. + + Parameters + ---------- + Y_mat : np.ndarray, shape (n_groups, n_periods) + Cell-level outcome means. + X_cell : np.ndarray, shape (n_groups, n_periods, n_covariates) + Cell-level covariate means. + N_mat : np.ndarray, shape (n_groups, n_periods) + Observation counts per cell (>0 if observed). + baselines : np.ndarray, shape (n_groups,) + ``D_{g,1}`` baseline treatment values (float). + first_switch_idx : np.ndarray, shape (n_groups,) + Column index of first treatment change (-1 if never-switching). + + Returns + ------- + Y_residualized : np.ndarray, shape (n_groups, n_periods) + Outcome matrix with covariate effects removed. + diagnostics : dict + Keyed by baseline value (float). Each entry has ``theta_hat`` + (covariate coefficients), ``n_obs`` (OLS sample size), and + ``r_squared`` (first-stage R-squared). + """ + from diff_diff.linalg import solve_ols + + n_groups, n_periods = Y_mat.shape + n_covariates = X_cell.shape[2] + Y_resid = Y_mat.copy() + diagnostics: Dict[str, Any] = {} + + # Pre-compute observation validity masks for first-differencing. + # both_observed[g, t] = True iff N_mat[g, t] > 0 AND N_mat[g, t-1] > 0 + both_observed = np.zeros((n_groups, n_periods), dtype=bool) + both_observed[:, 1:] = (N_mat[:, 1:] > 0) & (N_mat[:, :-1] > 0) + + # not_yet_switched[g, t] = True iff group g has not switched by period t + # (first_switch_idx[g] == -1 means never-switcher -> always True) + t_indices = np.arange(n_periods)[np.newaxis, :] # (1, n_periods) + f_g_col = first_switch_idx[:, np.newaxis] # (n_groups, 1) + not_yet_switched = (f_g_col == -1) | (f_g_col > t_indices) + + for d_val in np.unique(baselines): + d_mask = baselines == d_val # (n_groups,) + + # Valid OLS observations: baseline matches, not-yet-treated, both + # periods observed, t >= 1 (first-differencing needs t and t-1). + valid = d_mask[:, np.newaxis] & not_yet_switched & both_observed + valid_g, valid_t = np.where(valid) + + n_obs = len(valid_g) + if n_obs == 0: + diagnostics[float(d_val)] = { + "theta_hat": np.full(n_covariates, np.nan), + "n_obs": 0, + "r_squared": np.nan, + } + warnings.warn( + f"No not-yet-treated observations for baseline treatment " + f"d={d_val}. Cannot estimate covariate slope theta_hat " + f"for this baseline. Outcomes for these groups are not " + f"residualized.", + UserWarning, + stacklevel=3, + ) + continue + + # First-differenced outcomes and covariates + dY = Y_mat[valid_g, valid_t] - Y_mat[valid_g, valid_t - 1] # (n_obs,) + dX = X_cell[valid_g, valid_t] - X_cell[valid_g, valid_t - 1] # (n_obs, K) + + # Check for non-finite values (NaN from missing covariates/outcomes) + finite_mask = np.isfinite(dY) & np.all(np.isfinite(dX), axis=1) + if not finite_mask.all(): + dY = dY[finite_mask] + dX = dX[finite_mask] + n_obs = len(dY) + if n_obs == 0: + diagnostics[float(d_val)] = { + "theta_hat": np.full(n_covariates, np.nan), + "n_obs": 0, + "r_squared": np.nan, + } + continue + valid_t_finite = valid_t[finite_mask] + else: + valid_t_finite = valid_t + + # Build time FE dummies (drop first unique period as reference) + unique_t = np.unique(valid_t_finite) + n_time_fe = len(unique_t) - 1 + if n_time_fe > 0: + time_dummies = np.zeros((n_obs, n_time_fe)) + for i, t_val in enumerate(unique_t[1:]): + time_dummies[:, i] = (valid_t_finite == t_val).astype(float) + design = np.hstack([dX, time_dummies]) + else: + design = dX + + # OLS: dY = [dX, time_FE] @ beta + epsilon + coefs, residuals, _vcov = solve_ols( + design, + dY, + return_vcov=True, + rank_deficient_action="warn", + ) + + # Extract covariate coefficients (first n_covariates entries) + theta_hat = coefs[:n_covariates] + + # R-squared of first-stage regression + ss_res = float(np.sum(residuals**2)) + ss_tot = float(np.sum((dY - dY.mean()) ** 2)) + r_squared = 1.0 - ss_res / ss_tot if ss_tot > 0 else np.nan + + diagnostics[float(d_val)] = { + "theta_hat": theta_hat.copy(), + "n_obs": n_obs, + "r_squared": r_squared, + } + + # Residualize Y at levels for all groups with this baseline. + # Y_tilde[g, t] = Y[g, t] - X[g, t] @ theta_hat + group_indices = np.where(d_mask)[0] + for g in group_indices: + for t in range(n_periods): + if N_mat[g, t] > 0 and np.all(np.isfinite(X_cell[g, t])): + Y_resid[g, t] = Y_mat[g, t] - float(X_cell[g, t] @ theta_hat) + + return Y_resid, diagnostics + + +def _compute_first_differenced_matrix( + Y_mat: np.ndarray, + N_mat: np.ndarray, +) -> Tuple[np.ndarray, np.ndarray]: + """First-difference the outcome matrix for ``DID^{fd}`` estimation. + + Transforms ``Y_mat`` into first-differences for the group-specific + linear trends estimator (Web Appendix Section 1.3, Lemma 6). When + passed to ``_compute_multi_horizon_dids()`` and the IF function, + the standard ``DID_{g,l}`` formula on ``Z_mat`` produces + ``DID^{fd}_{g,l}`` exactly. + + The ``F_g >= 3`` constraint (paper, 1-indexed) maps to + ``first_switch_idx >= 2`` (0-indexed). This is enforced + automatically: ``N_mat_fd[:, 0] = 0`` causes groups with + ``first_switch_idx = 1`` to fail the ``N_mat > 0`` eligibility + check at their reference period. + + Parameters + ---------- + Y_mat : np.ndarray, shape (n_groups, n_periods) + Cell-level outcome means (possibly already residualized). + N_mat : np.ndarray, shape (n_groups, n_periods) + Observation counts per cell. + + Returns + ------- + Z_mat : np.ndarray, shape (n_groups, n_periods) + First-differenced outcomes. ``Z[:, 0] = NaN``, + ``Z[:, t] = Y[:, t] - Y[:, t-1]`` for ``t >= 1``. + N_mat_fd : np.ndarray, shape (n_groups, n_periods) + Adjusted observation counts. ``N_fd[:, 0] = 0``, + ``N_fd[:, t] = min(N[:, t], N[:, t-1])`` for ``t >= 1``. + """ + n_groups, n_periods = Y_mat.shape + Z_mat = np.full((n_groups, n_periods), np.nan) + Z_mat[:, 1:] = Y_mat[:, 1:] - Y_mat[:, :-1] + + N_mat_fd = np.zeros_like(N_mat) + N_mat_fd[:, 1:] = np.minimum(N_mat[:, 1:], N_mat[:, :-1]) + + return Z_mat, N_mat_fd + + +def _compute_heterogeneity_test( + Y_mat: np.ndarray, + N_mat: np.ndarray, + baselines: np.ndarray, + first_switch_idx: np.ndarray, + switch_direction: np.ndarray, + T_g: np.ndarray, + X_het: np.ndarray, + L_max: int, + alpha: float = 0.05, +) -> Dict[int, Dict[str, Any]]: + """Test for heterogeneous treatment effects (Web Appendix Section 1.5). + + Regresses ``S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})`` on ``X_g`` plus + cohort indicator dummies ``(D_{g,1}, F_g, S_g)``. Under Assumption 15 + (Lemma 7), the coefficient on ``X_g`` is an unbiased estimator of the + variance-weighted average of effect differences. Standard OLS inference + is valid - no need to account for DID estimation error. + + Parameters + ---------- + Y_mat : np.ndarray, shape (n_groups, n_periods) + N_mat : np.ndarray, shape (n_groups, n_periods) + baselines, first_switch_idx, switch_direction, T_g : np.ndarray + X_het : np.ndarray, shape (n_groups,) + Time-invariant covariate to test for heterogeneity. + L_max : int + alpha : float + + Returns + ------- + dict + ``{l: {beta, se, t_stat, p_value, conf_int, n_obs}}`` per horizon. + """ + from diff_diff.linalg import solve_ols + from diff_diff.utils import safe_inference + + n_groups, n_periods = Y_mat.shape + results: Dict[int, Dict[str, Any]] = {} + + for l_h in range(1, L_max + 1): + # Eligible switchers at this horizon (same logic as multi-horizon DID) + eligible = [] + dep_var = [] + x_vals = [] + cohort_keys = [] + + for g in range(n_groups): + f_g = first_switch_idx[g] + if f_g < 0: + continue # never-switcher + ref_idx = f_g - 1 + out_idx = f_g - 1 + l_h + if out_idx >= n_periods: + continue + if ref_idx < 0: + continue + if N_mat[g, ref_idx] <= 0 or N_mat[g, out_idx] <= 0: + continue + if T_g[g] < out_idx: + continue + S_g = float(switch_direction[g]) + y_diff = Y_mat[g, out_idx] - Y_mat[g, ref_idx] + eligible.append(g) + dep_var.append(S_g * y_diff) + x_vals.append(X_het[g]) + cohort_keys.append( + (float(baselines[g]), int(f_g), int(switch_direction[g])) + ) + + n_obs = len(eligible) + if n_obs < 3: + results[l_h] = { + "beta": float("nan"), "se": float("nan"), + "t_stat": float("nan"), "p_value": float("nan"), + "conf_int": (float("nan"), float("nan")), + "n_obs": n_obs, + } + continue + + dep_arr = np.array(dep_var) + x_arr = np.array(x_vals).reshape(-1, 1) + + # Cohort dummies (drop one as reference) + unique_cohorts = sorted(set(cohort_keys)) + n_cohort_dummies = len(unique_cohorts) - 1 + if n_cohort_dummies > 0: + cohort_map = {c: i for i, c in enumerate(unique_cohorts)} + cohort_idx = np.array([cohort_map[c] for c in cohort_keys]) + cohort_dummies = np.zeros((n_obs, len(unique_cohorts))) + cohort_dummies[np.arange(n_obs), cohort_idx] = 1.0 + # Drop first cohort as reference + cohort_dummies = cohort_dummies[:, 1:] + design = np.hstack([x_arr, cohort_dummies]) + else: + design = x_arr + + coefs, _residuals, vcov = solve_ols( + design, dep_arr, + return_vcov=True, + rank_deficient_action="warn", + ) + + beta_het = float(coefs[0]) + se_het = float(np.sqrt(vcov[0, 0])) if vcov is not None else float("nan") + t_stat, p_val, ci = safe_inference(beta_het, se_het, alpha=alpha, df=None) + + results[l_h] = { + "beta": beta_het, + "se": se_het, + "t_stat": t_stat, + "p_value": p_val, + "conf_int": ci, + "n_obs": n_obs, + } + + return results + + +def _compute_design2_effects( + D_mat: np.ndarray, + Y_mat: np.ndarray, + N_mat: np.ndarray, + baselines: np.ndarray, + first_switch_idx: np.ndarray, + switch_direction: np.ndarray, + T_g: np.ndarray, + L_max: int, +) -> Optional[Dict[str, Any]]: + """Compute Design-2 switch-in/switch-out effects (Web Appendix Section 1.6). + + Identifies groups with exactly 2 treatment changes (join then leave), + computes the exit period E_g, and provides delta^+ (post-join) and + delta^- (post-leave) summaries. + + This is a convenience wrapper that reports descriptive statistics about + the switch-in and switch-out subpopulations rather than a full + re-estimation (which would require specialized control pools as + described in the paper). See REGISTRY.md for documentation. + + Returns None if no join-then-leave groups exist. + """ + n_groups, n_periods = D_mat.shape + + # Identify join-then-leave groups: exactly 2 treatment changes where + # the first is a join (D increases) and the second is a leave (D decreases) + design2_groups = [] + exit_periods = [] + + for g in range(n_groups): + changes = [] + for t in range(1, n_periods): + if N_mat[g, t] <= 0 or N_mat[g, t - 1] <= 0: + continue + if D_mat[g, t] != D_mat[g, t - 1]: + direction = 1 if D_mat[g, t] > D_mat[g, t - 1] else -1 + changes.append((t, direction)) + if len(changes) == 2 and changes[0][1] == 1 and changes[1][1] == -1: + design2_groups.append(g) + exit_periods.append(changes[1][0]) + + if len(design2_groups) == 0: + return None + + # Compute summary statistics for the switch-in/switch-out subpopulation + switch_in_effects = [] + switch_out_effects = [] + + for i, g in enumerate(design2_groups): + f_g = first_switch_idx[g] + e_g = exit_periods[i] + ref_idx = f_g - 1 + + # Switch-in: Y[g, f_g] - Y[g, f_g-1] (effect of joining) + if ref_idx >= 0 and N_mat[g, f_g] > 0 and N_mat[g, ref_idx] > 0: + switch_in = float(Y_mat[g, f_g] - Y_mat[g, ref_idx]) + switch_in_effects.append(switch_in) + + # Switch-out: Y[g, e_g] - Y[g, e_g-1] (effect of leaving) + if e_g - 1 >= 0 and N_mat[g, e_g] > 0 and N_mat[g, e_g - 1] > 0: + switch_out = float(Y_mat[g, e_g] - Y_mat[g, e_g - 1]) + switch_out_effects.append(switch_out) + + result: Dict[str, Any] = { + "n_design2_groups": len(design2_groups), + "switch_in": { + "n_groups": len(switch_in_effects), + "mean_effect": float(np.mean(switch_in_effects)) if switch_in_effects else np.nan, + }, + "switch_out": { + "n_groups": len(switch_out_effects), + "mean_effect": float(np.mean(switch_out_effects)) if switch_out_effects else np.nan, + }, + } + return result + + +def _build_covariate_diagnostics_df( + diagnostics: Dict[str, Any], + control_names: List[str], +) -> pd.DataFrame: + """Build a tidy DataFrame from the per-baseline residualization diagnostics.""" + rows = [] + for d_val, diag in sorted(diagnostics.items()): + theta = diag["theta_hat"] + for k, name in enumerate(control_names): + rows.append( + { + "baseline_treatment": d_val, + "covariate": name, + "theta_hat": float(theta[k]) if np.isfinite(theta[k]) else np.nan, + "n_obs": diag["n_obs"], + "r_squared": diag["r_squared"], + } + ) + return pd.DataFrame(rows) + + # ====================================================================== # Phase 2: Multi-horizon helpers # ====================================================================== @@ -2453,6 +3136,7 @@ def _compute_multi_horizon_dids( switch_direction: np.ndarray, T_g: np.ndarray, L_max: int, + set_ids: Optional[np.ndarray] = None, ) -> Dict[int, Dict[str, Any]]: """ Compute the per-group building block ``DID_{g,l}`` and its aggregate @@ -2563,6 +3247,9 @@ def _compute_multi_horizon_dids( & (N_mat[ctrl_indices, ref_idx] > 0) & (N_mat[ctrl_indices, out_idx] > 0) ) + # State-set trends: restrict controls to same set as switcher + if set_ids is not None: + ctrl_mask &= set_ids[ctrl_indices] == set_ids[g] ctrl_pool = ctrl_indices[ctrl_mask] if ctrl_pool.size == 0: @@ -2624,6 +3311,7 @@ def _compute_per_group_if_multi_horizon( switch_direction: np.ndarray, T_g: np.ndarray, L_max: int, + set_ids: Optional[np.ndarray] = None, ) -> Dict[int, np.ndarray]: """ Compute per-group influence function ``U^G_{g,l}`` for ``l = 1..L_max``. @@ -2694,6 +3382,9 @@ def _compute_per_group_if_multi_horizon( & (N_mat[ctrl_indices, ref_idx] > 0) & (N_mat[ctrl_indices, out_idx] > 0) ) + # State-set trends: restrict controls to same set as switcher + if set_ids is not None: + ctrl_mask &= set_ids[ctrl_indices] == set_ids[g] ctrl_pool = ctrl_indices[ctrl_mask] n_ctrl = ctrl_pool.size diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 397d4893..633cff3e 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -405,6 +405,8 @@ class ChaisemartinDHaultfoeuilleResults: sup_t_bands: Optional[Dict[str, Any]] = field(default=None, repr=False) covariate_residuals: Optional[pd.DataFrame] = field(default=None, repr=False) linear_trends_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False) + heterogeneity_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None, repr=False) + design2_effects: Optional[Dict[str, Any]] = field(default=None, repr=False) honest_did_results: Optional[Any] = field(default=None, repr=False) # --- Repr-suppressed metadata --- @@ -416,15 +418,41 @@ class ChaisemartinDHaultfoeuilleResults: # Repr / properties # ------------------------------------------------------------------ - def __repr__(self) -> str: - """Concise string representation.""" - sig = _get_significance_stars(self.overall_p_value) + def _estimand_label(self) -> str: + """Return the estimand label based on active features.""" + has_controls = self.covariate_residuals is not None + has_trends = self.linear_trends_effects is not None + if self.L_max is not None and self.L_max >= 2: - label = "delta" + base = "delta" elif self.L_max is not None and self.L_max == 1: - label = "DID_1" + base = "DID_1" + else: + base = "DID_M" + + if has_controls and has_trends: + suffix = "^{X,fd}" + elif has_controls: + suffix = "^X" + elif has_trends: + suffix = "^{fd}" else: - label = "DID_M" + suffix = "" + + # For delta, suffix goes after: delta^X, delta^{fd} + if base == "delta" and suffix: + return f"delta{suffix}" + # For DID variants, suffix goes on DID: DID^X_1, DID^{fd}_M + if suffix: + did_part = base.split("_")[0] # "DID" + sub_part = base.split("_")[1] if "_" in base else "" + return f"{did_part}{suffix}_{sub_part}" if sub_part else f"{did_part}{suffix}" + return base + + def __repr__(self) -> str: + """Concise string representation.""" + sig = _get_significance_stars(self.overall_p_value) + label = self._estimand_label() return ( f"ChaisemartinDHaultfoeuilleResults(" f"{label}={self.overall_att:.4f}{sig}, " @@ -522,15 +550,25 @@ def summary(self, alpha: Optional[float] = None) -> str: ) # --- Overall --- + has_controls = self.covariate_residuals is not None + has_trends = self.linear_trends_effects is not None + adj_tag = "" + if has_controls and has_trends: + adj_tag = " (Covariate-and-Trend-Adjusted)" + elif has_controls: + adj_tag = " (Covariate-Adjusted)" + elif has_trends: + adj_tag = " (Trend-Adjusted)" + if self.L_max is not None and self.L_max >= 2: - overall_label = "Cost-Benefit Delta" - overall_row_label = "delta" + overall_label = f"Cost-Benefit Delta{adj_tag}" + overall_row_label = self._estimand_label() elif self.L_max is not None and self.L_max == 1: - overall_label = "DID_1 (Per-Group ATT at Horizon 1)" - overall_row_label = "DID_1" + overall_label = f"Per-Group ATT at Horizon 1{adj_tag}" + overall_row_label = self._estimand_label() else: - overall_label = "DID_M (Contemporaneous-Switch ATT)" - overall_row_label = "DID_M" + overall_label = f"DID_M (Contemporaneous-Switch ATT){adj_tag}" + overall_row_label = self._estimand_label() lines.extend( [ thin, @@ -812,13 +850,7 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: return pd.DataFrame( [ { - "estimand": ( - "delta" - if self.L_max is not None and self.L_max >= 2 - else "DID_1" - if self.L_max is not None and self.L_max == 1 - else "DID_M" - ), + "estimand": self._estimand_label(), "effect": self.overall_att, "se": self.overall_se, "t_stat": self.overall_t_stat, @@ -840,12 +872,7 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: # For the DID_M row, both quantities use the overall switching # cell set: n_cells = sum of joiner + leaver cells, and n_obs # is the same sum of raw observation counts. - if self.L_max is not None and self.L_max >= 2: - overall_est_label = "delta" - elif self.L_max is not None and self.L_max == 1: - overall_est_label = "DID_1" - else: - overall_est_label = "DID_M" + overall_est_label = self._estimand_label() rows = [ { "estimand": overall_est_label, diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index c8cda6f6..0c183501 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -583,6 +583,8 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note:** The analytical CI is **conservative** under Assumption 8 (independent groups) of the dynamic companion paper, and exact only under iid sampling. This is documented as a deliberate deviation from "default nominal coverage". The bootstrap CI uses the same conservative weighting and is provided for users who want a non-asymptotic alternative. +- **Note (deviation from R DIDmultiplegtDYN - SE normalization):** The analytical SE is ~4% smaller than R `did_multiplegt_dyn` on identical data. This is a normalization difference, not a bug. Python implements the paper's Section 3.7.3 plug-in formula verbatim: `SE = sigma-hat / sqrt(N_l)` where `sigma-hat^2 = (1/N_l) * sum_g U^{G,2}_{g,l} - sum_k (#C_k^G / N_l) * U-bar_k^2` and `N_l` is the number of eligible switcher groups at horizon `l`. R normalizes the influence function by `G` (total number of groups including never-switchers and stable controls) and computes `SE = sqrt(sum(U_R^2)) / G`. Both converge to the same asymptotic variance as `G -> infinity`. In finite samples R's formula produces slightly larger (more conservative) SEs because the `G`-normalization interacts with cohort recentering differently than the paper's `N_l`-normalization. Since the paper's formula is already an upper bound on the true variance (Eq 54, Jensen's inequality under Assumption 8), Python's tighter SE remains conservative. The observed gap is consistent across horizons and scenarios (~3.5-5.1%), deterministic on identical data, and does not involve any randomization. + - **Note:** Placebo SE is `NaN` for the single-period `DID_M^pl` (`L_max=None`). Multi-horizon placebos (`L_max >= 1`) have valid analytical SE and bootstrap SE via the placebo IF (see the dynamic placebo SE Note above). - **Note:** When every variance-eligible group forms its own `(D_{g,1}, F_g, S_g)` cohort (a degenerate small-panel case where the cohort framework has zero degrees of freedom), the cohort-recentered plug-in formula is unidentified: cohort recentering subtracts the cohort mean from each group's `U^G_g`, and for singleton cohorts the centered value is exactly zero, so the centered influence function vector collapses to all zeros. The estimator returns `overall_se = NaN` with a `UserWarning` rather than silently collapsing to `0.0` (which would falsely imply infinite precision). The `DID_M` point estimate remains well-defined. The bootstrap path inherits the same degeneracy on these panels — the multiplier weights act on an all-zero vector, so the bootstrap distribution is also degenerate. **Deviation from R `DIDmultiplegtDYN`:** R returns a non-zero SE on the canonical 4-group worked example via small-sample sandwich machinery that Python does not implement. Both responses are valid for a degenerate case; Python's `NaN`+warning is the safer default. To get a non-degenerate SE, include more groups so cohorts have peers (real-world panels typically have `G >> K`). @@ -607,13 +609,23 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (deviation from R DIDmultiplegtDYN):** Phase 1 requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. The Step 5b validation in `fit()` enforces this contract: groups missing the baseline raise `ValueError`; groups with interior gaps are dropped with a `UserWarning`; groups with **terminal missingness** (early exit / right-censoring — observed at the baseline but missing one or more later periods) are retained and contribute from their observed periods only. R `DIDmultiplegtDYN` accepts unbalanced panels with documented missing-treatment-before-first-switch handling. Python's restriction is a Phase 1 limitation: the cohort enumeration uses `D_{g,1}` as the canonical baseline (so the baseline observation must exist) and the first-switch detection walks adjacent observed periods (so interior gaps create ambiguous transition counts). Terminal missingness is supported because the per-period `present = (N_mat[:, t] > 0) & (N_mat[:, t-1] > 0)` guard appears at three sites in the variance computation (`_compute_per_period_dids`, `_compute_full_per_group_contributions`, `_compute_cohort_recentered_inputs`) and cleanly masks out missing transitions without propagating NaN into the arithmetic. **Workaround for unbalanced panels:** pre-process your data to back-fill the baseline (or drop late-entry groups before fitting), or use R `DIDmultiplegtDYN` until a future phase lifts the restriction. The Step 5b `ValueError` and `UserWarning` messages name the offending group IDs so you can locate them quickly. +- **Note (Phase 3 DID^X covariate adjustment):** Implements the residualization-style covariate adjustment from Web Appendix Section 1.2 (Assumption 11). For each baseline treatment value `d`, estimates `theta_hat_d` via OLS of first-differenced outcomes on first-differenced covariates with time FEs, restricted to not-yet-treated observations. Residualizes at levels: `Y_tilde[g,t] = Y[g,t] - X[g,t] @ theta_hat_d`. All downstream DID computations use residualized outcomes. This is NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. Plug-in IF (treating `theta_hat` as fixed) is valid by FWL theorem. Requires `L_max >= 1`. Activated via `controls=["col1", "col2"]` in `fit()`. + +- **Note (Phase 3 DID^{fd} linear trends):** Implements group-specific linear trends from Web Appendix Section 1.3 (Assumption 12, Lemma 6). Uses the Z_mat transformation: `Z[g,t] = Y[g,t] - Y[g,t-1]` (first-differenced outcomes). Since `DID_{g,l}(Z) = DID^{fd}_{g,l}` algebraically, the existing multi-horizon DID code produces trend-adjusted estimates when fed Z_mat. Requires F_g >= 3 (at least 2 pre-switch periods); groups with F_g < 3 are excluded with a `UserWarning`. Cumulated level effects `delta^{fd}_l = sum_{l'=1}^l DID^{fd}_{l'}` stored in `results.linear_trends_effects`. Cumulated SE uses conservative upper bound (sum of per-horizon SEs); cross-horizon covariance from IF vectors is a library extension (paper proves Theorem 1 per-horizon, not cross-horizon). When combined with DID^X, residualization is applied first, then first-differencing (per paper assumption ordering). Activated via `trends_linear=True` in `fit()`. + +- **Note (Phase 3 state-set trends):** Implements state-set-specific trends from Web Appendix Section 1.4 (Assumptions 13-14). Restricts the control pool for each switcher to groups in the same set (e.g., same state in county-level data). The restriction applies in BOTH `_compute_multi_horizon_dids()` (point estimates) and `_compute_per_group_if_multi_horizon()` (influence functions) to ensure IF consistency. Cohort structure stays as `(D_{g,1}, F_g, S_g)` triples (does not incorporate set membership). Set membership must be time-invariant per group. Activated via `trends_nonparam="state_column"` in `fit()`. + +- **Note (Phase 3 heterogeneity testing):** Implements the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Saturated OLS regression of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid - the paper shows no need to account for DID estimation error. Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. + +- **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. The paper notes Design-2 can be implemented by "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. + **Reference implementation(s):** - R: [`DIDmultiplegtDYN`](https://cran.r-project.org/package=DIDmultiplegtDYN) (CRAN, maintained by the paper authors). The Python implementation matches `did_multiplegt_dyn(..., effects=1)` at horizon `l = 1`. Parity tests live in `tests/test_chaisemartin_dhaultfoeuille_parity.py`. - Stata: `did_multiplegt_dyn` (SSC, also maintained by the paper authors). **Requirements checklist:** - [x] Single class `ChaisemartinDHaultfoeuille` (alias `DCDH`); not a family -- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for Phase 2/3 parameters (`aggregate`, `L_max`, `controls`, `trends_linear`, `trends_nonparam`, `honest_did`, `survey_design`) +- [x] Forward-compat `fit()` signature with `NotImplementedError` gates for remaining parameters (`aggregate`, `honest_did`, `survey_design`); Phase 3 gates lifted for `controls`, `trends_linear`, `trends_nonparam` - [x] `DID_M` point estimate with cohort-recentered analytical SE - [x] Joiners-only `DID_+` and leavers-only `DID_-` decompositions with their own inference - [x] Single-lag placebo `DID_M^pl` (point estimate; SE deferred to Phase 2) @@ -628,6 +640,11 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - [x] No silent failures: every drop / round / fallback emits a `warnings.warn()` or `ValueError` - [x] Hand-calculable 4-group worked example: `DID_M = 2.5`, `DID_+ = 2.0`, `DID_- = 3.0` exactly - [x] R `DIDmultiplegtDYN` parity tests at `l = 1` (fixture skips cleanly when R or `DIDmultiplegtDYN` is unavailable) +- [x] DID^X covariate residualization via per-baseline OLS (Web Appendix Section 1.2) +- [x] DID^{fd} group-specific linear trends via Z_mat first-differencing (Web Appendix Section 1.3) +- [x] State-set-specific trends via control-pool restriction (Web Appendix Section 1.4) +- [x] Heterogeneity testing via saturated OLS (Web Appendix Section 1.5, Lemma 7) +- [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6) --- diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 82ef0035..a9f646ad 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -338,19 +338,21 @@ def test_L_max_validation(self, data): ) assert 1 in results.event_study_effects - def test_controls_raises_not_implemented(self, data): - with pytest.raises(NotImplementedError, match="Phase 3"): + def test_controls_requires_lmax(self, data): + """DID^X covariate adjustment requires L_max >= 1.""" + with pytest.raises(ValueError, match="requires L_max >= 1"): self._est().fit( data, outcome="outcome", group="group", time="period", treatment="treatment", - controls=["x"], + controls=["outcome"], # reuse existing column as dummy covariate ) - def test_trends_linear_raises_not_implemented(self, data): - with pytest.raises(NotImplementedError, match="Phase 3"): + def test_trends_linear_requires_lmax(self, data): + """DID^{fd} trend adjustment requires L_max >= 1.""" + with pytest.raises(ValueError, match="requires L_max >= 1"): self._est().fit( data, outcome="outcome", @@ -360,8 +362,9 @@ def test_trends_linear_raises_not_implemented(self, data): trends_linear=True, ) - def test_trends_nonparam_raises_not_implemented(self, data): - with pytest.raises(NotImplementedError, match="Phase 3"): + def test_trends_nonparam_requires_lmax(self, data): + """State-set trends requires L_max >= 1.""" + with pytest.raises(ValueError, match="requires L_max >= 1"): self._est().fit( data, outcome="outcome", @@ -2283,6 +2286,458 @@ def test_normalized_level(self, data): assert len(df) == 3 +class TestCovariateAdjustment: + """DID^X covariate residualization (ROADMAP item 3a).""" + + @staticmethod + def _make_panel_with_covariates(seed=42, n_groups=40, n_periods=6): + """Create a panel where a covariate confounds the outcome.""" + rng = np.random.RandomState(seed) + rows = [] + for g in range(n_groups): + group_fe = rng.normal(0, 2) + # Covariate: group-level value plus time variation + x_base = rng.normal(0, 1) + # Treatment: first half switch at period 3, rest never + switches = g < n_groups // 2 + for t in range(n_periods): + d = 1 if (switches and t >= 3) else 0 + x = x_base + 0.5 * t + rng.normal(0, 0.1) + # Outcome depends on group FE, time trend, covariate, + # and treatment effect + y = group_fe + 2.0 * t + 3.0 * x + 5.0 * d + rng.normal(0, 0.5) + rows.append( + {"group": g, "period": t, "treatment": d, "outcome": y, "X1": x} + ) + return pd.DataFrame(rows) + + def test_controls_requires_lmax(self): + """controls without L_max raises ValueError.""" + df = self._make_panel_with_covariates() + with pytest.raises(ValueError, match="requires L_max >= 1"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", controls=["X1"] + ) + + def test_controls_missing_column(self): + """controls with nonexistent column raises ValueError.""" + df = self._make_panel_with_covariates() + with pytest.raises(ValueError, match="not found in data"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["nonexistent"], L_max=1, + ) + + def test_covariate_residualization_basic(self): + """DID^X produces different results from unadjusted DID.""" + df = self._make_panel_with_covariates() + est = ChaisemartinDHaultfoeuille(seed=1) + + # Unadjusted + r_plain = est.fit(df, "outcome", "group", "period", "treatment", L_max=1) + # Covariate-adjusted + r_x = est.fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=1, + ) + + # Results should differ (covariate is confounding) + assert r_x.overall_att != r_plain.overall_att + # Covariate diagnostics should be populated + assert r_x.covariate_residuals is not None + assert len(r_x.covariate_residuals) > 0 + assert "theta_hat" in r_x.covariate_residuals.columns + # SE should be finite + assert np.isfinite(r_x.overall_se) + + def test_multiple_covariates(self): + """Multiple covariates are accepted and produce diagnostics.""" + df = self._make_panel_with_covariates() + # Add a second covariate + df["X2"] = np.random.RandomState(99).normal(0, 1, len(df)) + est = ChaisemartinDHaultfoeuille(seed=1) + r = est.fit( + df, "outcome", "group", "period", "treatment", + controls=["X1", "X2"], L_max=1, + ) + assert r.covariate_residuals is not None + # Should have rows for each (baseline, covariate) combination + assert set(r.covariate_residuals["covariate"].unique()) == {"X1", "X2"} + + def test_covariate_residuals_diagnostics(self): + """Diagnostics DataFrame has expected structure.""" + df = self._make_panel_with_covariates() + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=2, + ) + diag = r.covariate_residuals + assert diag is not None + expected_cols = {"baseline_treatment", "covariate", "theta_hat", "n_obs", "r_squared"} + assert expected_cols.issubset(set(diag.columns)) + # All baselines should have positive n_obs + assert (diag["n_obs"] > 0).all() + # theta_hat should be finite (not NaN) + theta = diag.loc[diag["covariate"] == "X1", "theta_hat"].values[0] + assert np.isfinite(theta), f"theta_hat is not finite: {theta}" + + def test_controls_with_nonbinary_treatment(self): + """Covariates work with non-binary treatment and L_max >= 1.""" + rng = np.random.RandomState(123) + rows = [] + for g in range(30): + x_base = rng.normal(0, 1) + for t in range(5): + # Ordinal treatment: 0 -> 2 for first 10, 0 -> 1 for next 10, never for rest + if g < 10: + d = 2.0 if t >= 2 else 0.0 + elif g < 20: + d = 1.0 if t >= 3 else 0.0 + else: + d = 0.0 + x = x_base + 0.1 * t + y = 10 + 2 * t + 1.5 * x + 3 * d + rng.normal(0, 0.5) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y, "X1": x}) + df = pd.DataFrame(rows) + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=1, + ) + assert np.isfinite(r.overall_att) + assert np.isfinite(r.overall_se) + + def test_controls_with_multi_horizon(self): + """Covariates work with L_max > 1 event study.""" + df = self._make_panel_with_covariates() + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=2, + ) + assert r.event_study_effects is not None + assert 1 in r.event_study_effects + assert 2 in r.event_study_effects + # Both horizons should have finite effects and SEs + for h in [1, 2]: + assert np.isfinite(r.event_study_effects[h]["effect"]) + assert np.isfinite(r.event_study_effects[h]["se"]) + + +class TestLinearTrends: + """DID^{fd} group-specific linear trends (ROADMAP item 3b).""" + + @staticmethod + def _make_panel_with_trends(seed=42, n_groups=40, n_periods=8): + """Create a panel with group-specific linear trends in outcomes.""" + rng = np.random.RandomState(seed) + rows = [] + for g in range(n_groups): + group_fe = rng.normal(0, 2) + group_trend = rng.normal(0, 0.5) # group-specific linear trend + switches = g < n_groups // 2 + switch_period = 4 if switches else n_periods + 1 + for t in range(n_periods): + d = 1 if t >= switch_period else 0 + y = ( + group_fe + + 2.0 * t + + group_trend * t # group-specific trend + + 5.0 * d + + rng.normal(0, 0.3) + ) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + return pd.DataFrame(rows) + + def test_trends_linear_requires_lmax(self): + """trends_linear without L_max raises ValueError.""" + df = self._make_panel_with_trends() + with pytest.raises(ValueError, match="requires L_max >= 1"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + trends_linear=True, + ) + + def test_trends_linear_basic(self): + """DID^{fd} produces different results from unadjusted DID.""" + df = self._make_panel_with_trends() + est = ChaisemartinDHaultfoeuille(seed=1) + r_plain = est.fit(df, "outcome", "group", "period", "treatment", L_max=2) + r_fd = est.fit( + df, "outcome", "group", "period", "treatment", + L_max=2, trends_linear=True, + ) + # Results should differ (group-specific trends confound unadjusted) + assert r_fd.overall_att != r_plain.overall_att + # Event study should have horizons + assert r_fd.event_study_effects is not None + assert 1 in r_fd.event_study_effects + + def test_cumulated_level_effects(self): + """Cumulated delta^{fd}_l = sum DID^{fd}_{l'} for l'=1..l.""" + df = self._make_panel_with_trends() + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=3, trends_linear=True, + ) + assert r.linear_trends_effects is not None + # Check cumulation: delta^{fd}_1 = DID^{fd}_1 + es = r.event_study_effects + lt = r.linear_trends_effects + assert abs(lt[1]["effect"] - es[1]["effect"]) < 1e-12 + # delta^{fd}_2 = DID^{fd}_1 + DID^{fd}_2 + assert abs(lt[2]["effect"] - (es[1]["effect"] + es[2]["effect"])) < 1e-12 + + def test_fg_less_than_3_warning(self): + """Groups with F_g < 3 produce a UserWarning.""" + rng = np.random.RandomState(99) + rows = [] + for g in range(20): + for t in range(6): + # Group 0-4: switch at period 1 (F_g=2, 0-indexed f_g=1 < 2) + if g < 5: + d = 1 if t >= 1 else 0 + elif g < 10: + d = 1 if t >= 3 else 0 + else: + d = 0 + y = 10 + 2 * t + 3 * d + rng.normal(0, 0.5) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + df = pd.DataFrame(rows) + with pytest.warns(UserWarning, match="F_g < 3"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, trends_linear=True, + ) + + def test_trends_with_covariates(self): + """Combined DID^{X,fd}: covariates + linear trends.""" + df = self._make_panel_with_trends() + df["X1"] = np.random.RandomState(77).normal(0, 1, len(df)) + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=2, trends_linear=True, + ) + assert np.isfinite(r.overall_att) + assert r.covariate_residuals is not None + assert r.linear_trends_effects is not None + + +class TestStateSetTrends: + """State-set-specific trends (ROADMAP item 3c).""" + + @staticmethod + def _make_panel_with_sets(seed=42, n_groups=40, n_periods=6): + """Create a panel where groups belong to state sets.""" + rng = np.random.RandomState(seed) + rows = [] + for g in range(n_groups): + state = g % 4 # 4 states + group_fe = rng.normal(0, 2) + switches = g < n_groups // 2 + for t in range(n_periods): + d = 1 if (switches and t >= 3) else 0 + y = group_fe + 2.0 * t + 5.0 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "state": state, + }) + return pd.DataFrame(rows) + + def test_trends_nonparam_requires_lmax(self): + df = self._make_panel_with_sets() + with pytest.raises(ValueError, match="requires L_max >= 1"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + trends_nonparam="state", + ) + + def test_trends_nonparam_basic(self): + """State-set restriction produces different results.""" + df = self._make_panel_with_sets() + est = ChaisemartinDHaultfoeuille(seed=1) + r_plain = est.fit(df, "outcome", "group", "period", "treatment", L_max=1) + r_set = est.fit( + df, "outcome", "group", "period", "treatment", + L_max=1, trends_nonparam="state", + ) + # With set-restricted controls, results may differ + # (both should be finite and reasonable) + assert np.isfinite(r_set.overall_att) + assert np.isfinite(r_set.overall_se) + + def test_time_varying_set_raises(self): + """Set membership that varies over time raises ValueError.""" + df = self._make_panel_with_sets() + # Make state vary over time for some groups + df.loc[(df["group"] == 0) & (df["period"] == 3), "state"] = 99 + with pytest.raises(ValueError, match="time-invariant"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, trends_nonparam="state", + ) + + def test_missing_set_column_raises(self): + df = self._make_panel_with_sets() + with pytest.raises(ValueError, match="not found in data"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, trends_nonparam="nonexistent", + ) + + def test_nonparam_with_covariates(self): + """Combined state-set trends + covariates.""" + df = self._make_panel_with_sets() + df["X1"] = np.random.RandomState(77).normal(0, 1, len(df)) + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=1, trends_nonparam="state", + ) + assert np.isfinite(r.overall_att) + assert r.covariate_residuals is not None + + +class TestHeterogeneityTesting: + """Heterogeneity testing beta^{het}_l (ROADMAP item 3d).""" + + @staticmethod + def _make_panel_with_het(seed=42, n_groups=40, n_periods=6): + """Create a panel with heterogeneous effects by covariate.""" + rng = np.random.RandomState(seed) + rows = [] + for g in range(n_groups): + x_g = 1 if g < n_groups // 2 else 0 # binary het covariate + group_fe = rng.normal(0, 2) + switches = g < (3 * n_groups) // 4 + effect = 5.0 + 3.0 * x_g # heterogeneous effect + for t in range(n_periods): + d = 1 if (switches and t >= 3) else 0 + y = group_fe + 2.0 * t + effect * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "het_x": x_g, + }) + return pd.DataFrame(rows) + + def test_heterogeneity_basic(self): + """Detect heterogeneous effects with binary covariate.""" + df = self._make_panel_with_het() + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, heterogeneity="het_x", + ) + assert r.heterogeneity_effects is not None + assert 1 in r.heterogeneity_effects + het = r.heterogeneity_effects[1] + assert np.isfinite(het["beta"]) + assert np.isfinite(het["se"]) + # True het effect is ~3.0 (effect difference between x=1 and x=0) + assert het["beta"] > 0, f"Expected positive beta, got {het['beta']}" + + def test_heterogeneity_null(self): + """No heterogeneity produces beta near zero.""" + rng = np.random.RandomState(123) + rows = [] + for g in range(40): + x_g = rng.normal(0, 1) # random covariate, uncorrelated with effect + switches = g < 20 + for t in range(6): + d = 1 if (switches and t >= 3) else 0 + y = 10 + 2 * t + 5 * d + rng.normal(0, 0.5) + rows.append({ + "group": g, "period": t, "treatment": d, + "outcome": y, "het_x": x_g, + }) + df = pd.DataFrame(rows) + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, heterogeneity="het_x", + ) + het = r.heterogeneity_effects[1] + # Not significantly different from zero + assert abs(het["beta"]) < 5.0 + + def test_heterogeneity_multi_horizon(self): + """Heterogeneity test at multiple horizons.""" + df = self._make_panel_with_het() + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, heterogeneity="het_x", + ) + assert 1 in r.heterogeneity_effects + assert 2 in r.heterogeneity_effects + + def test_heterogeneity_missing_column(self): + df = self._make_panel_with_het() + with pytest.raises(ValueError, match="not found"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, heterogeneity="nonexistent", + ) + + +class TestDesign2: + """Design-2 switch-in/switch-out separation (ROADMAP item 3e).""" + + @staticmethod + def _make_join_then_leave_panel(seed=42, n_groups=30, n_periods=8): + """Panel with join-then-leave groups.""" + rng = np.random.RandomState(seed) + rows = [] + for g in range(n_groups): + group_fe = rng.normal(0, 2) + for t in range(n_periods): + # Groups 0-9: join at t=2, leave at t=5 (design 2) + if g < 10: + d = 1 if 2 <= t < 5 else 0 + # Groups 10-19: join at t=3, never leave + elif g < 20: + d = 1 if t >= 3 else 0 + # Groups 20-29: never switch + else: + d = 0 + y = group_fe + 2.0 * t + 5.0 * d + rng.normal(0, 0.3) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + return pd.DataFrame(rows) + + def test_design2_basic(self): + """Design-2 identifies join-then-leave groups.""" + df = self._make_join_then_leave_panel() + # drop_larger_lower=False to keep the 2-switch groups + r = ChaisemartinDHaultfoeuille(seed=1, drop_larger_lower=False).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, design2=True, + ) + assert r.design2_effects is not None + assert r.design2_effects["n_design2_groups"] == 10 + # Switch-in should show positive effect (joining treatment) + assert r.design2_effects["switch_in"]["mean_effect"] > 0 + # Switch-out should show negative effect (leaving treatment) + assert r.design2_effects["switch_out"]["mean_effect"] < 0 + + def test_design2_no_eligible(self): + """No join-then-leave groups returns None.""" + rng = np.random.RandomState(99) + rows = [] + for g in range(20): + for t in range(6): + d = 1 if (g < 10 and t >= 3) else 0 + y = 10 + 2 * t + 5 * d + rng.normal(0, 0.5) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + df = pd.DataFrame(rows) + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, design2=True, + ) + assert r.design2_effects is None + + def test_design2_disabled_by_default(self): + """design2=False (default) produces no design2_effects.""" + df = self._make_join_then_leave_panel() + r = ChaisemartinDHaultfoeuille(seed=1, drop_larger_lower=False).fit( + df, "outcome", "group", "period", "treatment", L_max=1, + ) + assert r.design2_effects is None + + class TestNonBinaryTreatment: """Non-binary treatment support (ROADMAP item 3f).""" diff --git a/tests/test_chaisemartin_dhaultfoeuille_parity.py b/tests/test_chaisemartin_dhaultfoeuille_parity.py index 026ece4a..4fc7767d 100644 --- a/tests/test_chaisemartin_dhaultfoeuille_parity.py +++ b/tests/test_chaisemartin_dhaultfoeuille_parity.py @@ -342,3 +342,125 @@ def test_parity_joiners_only_long_multi_horizon_se(self, golden_values): self._check_multi_horizon_se( golden_values, "joiners_only_long_multi_horizon", L_max=5, se_rtol=0.15 ) + + +# --------------------------------------------------------------------------- +# Phase 3: Covariate and trend parity tests +# --------------------------------------------------------------------------- + + +def _golden_to_df_with_covariates(data_dict: dict) -> pd.DataFrame: + """Reconstruct a panel DataFrame including covariate columns.""" + cols = { + "group": data_dict["group"], + "period": data_dict["period"], + "treatment": data_dict["treatment"], + "outcome": data_dict["outcome"], + } + if "X1" in data_dict: + cols["X1"] = data_dict["X1"] + return pd.DataFrame(cols) + + +class TestDCDHDynRParityPhase3: + """ + Phase 3 parity tests: covariates (DID^X) and linear trends (DID^{fd}). + + Tests that the Python implementation matches R ``did_multiplegt_dyn`` + with ``controls`` and ``trends_lin`` options on identical data. + + Tolerances are wider than Phase 1/2 because the covariate and trend + adjustments involve additional OLS steps that may amplify the + cell-count vs obs-count weighting deviation documented in REGISTRY.md. + """ + + # Controls-only: observed gap 0.15%-0.26% (from OLS residualization). + # Trends-only: exact (0.0000%) at both horizons after cumulation fix. + # Combined: observed gap 0.30%-0.59% (from OLS residualization only). + # SE: 3-5% from cell-count weighting; 12-18% for cumulated SEs. + POINT_RTOL = 0.01 # 1% for controls (observed: 0.26%) + SE_RTOL = 0.20 # 20% for SE (cell-count weighting + cumulation) + + def _check_phase3_scenario( + self, golden_values, scenario_name, L_max, controls=None, + trends_linear=None, point_rtol=None, se_rtol=None, + ): + scenario = golden_values.get(scenario_name) + if scenario is None: + pytest.skip(f"scenario {scenario_name!r} not in golden values") + + df = _golden_to_df_with_covariates(scenario["data"]) + est = ChaisemartinDHaultfoeuille() + results = est.fit( + df, outcome="outcome", group="group", time="period", + treatment="treatment", L_max=L_max, + controls=controls, trends_linear=trends_linear, + ) + r_results = scenario["results"] + rtol = point_rtol or self.POINT_RTOL + se_tol = se_rtol or self.SE_RTOL + + # When trends_linear is active, R returns cumulated level effects + # (delta^{fd}_l), not second-differences (DID^{fd}_l). Compare + # against linear_trends_effects (cumulated) instead of + # event_study_effects (second-differences). + if trends_linear: + py_effects = results.linear_trends_effects + assert py_effects is not None, "linear_trends_effects is None" + else: + py_effects = results.event_study_effects + + # Check per-horizon effects + for h_str, r_eff in r_results.get("effects", {}).items(): + h = int(h_str) + assert h in py_effects, ( + f"Horizon {h} missing from Python results" + ) + py_eff = py_effects[h]["effect"] + assert py_eff == pytest.approx( + r_eff["overall_att"], rel=rtol + ), f"h={h}: Python={py_eff:.4f} vs R={r_eff['overall_att']:.4f}" + + # SE comparison (wider tolerance) + py_se = py_effects[h]["se"] + r_se = r_eff["overall_se"] + if py_se > 0 and r_se > 0: + assert py_se == pytest.approx( + r_se, rel=se_tol + ), f"h={h} SE: Python={py_se:.4f} vs R={r_se:.4f}" + + def test_parity_joiners_only_controls(self, golden_values): + """DID^X with controls vs R did_multiplegt_dyn(..., controls='X1'). + + Observed gap: 0.15% at h=1, 0.26% at h=2. Deterministic on + identical data - the small gap is from the documented cell-count + vs obs-count weighting deviation in REGISTRY.md. + """ + self._check_phase3_scenario( + golden_values, "joiners_only_controls", L_max=2, + controls=["X1"], + point_rtol=self.POINT_RTOL, + ) + + def test_parity_joiners_only_trends_lin(self, golden_values): + """DID^{fd} with trends_linear vs R did_multiplegt_dyn(..., trends_lin=TRUE). + + Exact match (0.0000%) at both horizons after per-group cumulation fix. + """ + self._check_phase3_scenario( + golden_values, "joiners_only_trends_lin", L_max=2, + trends_linear=True, + point_rtol=1e-4, # exact match + ) + + def test_parity_joiners_only_controls_trends_lin(self, golden_values): + """DID^{X,fd} with controls + trends vs R. + + Observed gap: 0.30%-0.59% (from OLS residualization step only; + the trends cumulation is now exact after per-group cumulation fix). + """ + self._check_phase3_scenario( + golden_values, "joiners_only_controls_trends_lin", L_max=2, + controls=["X1"], trends_linear=True, + point_rtol=self.POINT_RTOL, + ) From f580dae010062c3ec9282c51b4999dc3b8cfcb84 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 12:43:08 -0400 Subject: [PATCH 02/17] Address AI review P1/P2 findings for Phase 3 PR B P1 fixes: - DID^X residualization no longer leaks into per-period path: per_period_effects uses raw Y_mat, only multi-horizon path sees residualized outcomes - Added to_dataframe levels for heterogeneity and linear_trends P2 fixes: - Covariate coercion no longer mutates caller's DataFrame - Vectorized residualization (einsum replaces nested loop) - Heterogeneity test guards against rank-deficient OLS - Added estimand contract test for controls + L_max=1 - REGISTRY note clarifies per_period_effects stays unadjusted Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 65 +++++++++++++++---- .../chaisemartin_dhaultfoeuille_results.py | 22 +++++++ docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 30 +++++++++ 4 files changed, 104 insertions(+), 15 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index fd979087..27bdaf4f 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -618,21 +618,26 @@ def fit( f"Control column(s) {missing_controls!r} not found in " f"data. Available columns: {list(data.columns)}" ) + # Work on a copy to avoid mutating the caller's DataFrame + data_controls = data[controls].copy() for c in controls: try: - data[c] = pd.to_numeric(data[c]) + data_controls[c] = pd.to_numeric(data_controls[c]) except (ValueError, TypeError) as exc: raise ValueError( f"Could not coerce control column {c!r} to numeric: {exc}" ) from exc - n_nan = int(data[c].isna().sum()) + n_nan = int(data_controls[c].isna().sum()) if n_nan > 0: raise ValueError( f"Control column {c!r} contains {n_nan} NaN value(s). " "Drop or impute missing covariates before fitting." ) - # Aggregate covariates to cell means (same groupby as treatment/outcome) - x_cell_agg = data.groupby([group, time], as_index=False)[controls].mean() + # Aggregate covariates to cell means (same groupby as treatment/outcome). + # Use the coerced copy joined with group/time from original data. + x_agg_input = data[[group, time]].copy() + x_agg_input[controls] = data_controls[controls].values + x_cell_agg = x_agg_input.groupby([group, time], as_index=False)[controls].mean() cell = cell.merge(x_cell_agg, on=[group, time], how="left") # ------------------------------------------------------------------ @@ -948,13 +953,19 @@ def fit( ) _switch_metadata_computed = True - Y_mat, covariate_diagnostics = _compute_covariate_residualization( + Y_mat_residualized, covariate_diagnostics = _compute_covariate_residualization( Y_mat=Y_mat, X_cell=X_cell, N_mat=N_mat, baselines=baselines, first_switch_idx=first_switch_idx_arr, ) + # Keep raw Y_mat for the per-period DID path (which does not + # support covariate residualization - it uses binary joiner/leaver + # categorization). The residualized matrix is used only by the + # per-group multi-horizon path (L_max >= 1). + Y_mat_raw = Y_mat + Y_mat = Y_mat_residualized # ------------------------------------------------------------------ # Step 7c: First-differencing for linear trends (DID^{fd}) @@ -1061,8 +1072,13 @@ def fit( a11_minus_zeroed_arr, ) = _compute_per_period_dids( D_mat=D_mat, - Y_mat=Y_mat, - N_mat=N_mat, + # Use raw (unadjusted) outcomes for per-period DID. Covariate + # residualization applies only to the per-group multi-horizon + # path (L_max >= 1). The per-period path uses binary + # joiner/leaver categorization and is not part of the DID^X + # contract (Web Appendix Section 1.2). + Y_mat=Y_mat_raw if controls is not None else Y_mat, + N_mat=N_mat_orig, periods=all_periods, ) if a11_warnings: @@ -1489,7 +1505,8 @@ def fit( U_centered_leavers, ) = _compute_cohort_recentered_inputs( D_mat=D_mat, - Y_mat=Y_mat, + # Phase 1 IF uses per-period structure: use raw outcomes + Y_mat=Y_mat_raw if controls is not None else Y_mat, N_mat=N_mat_orig, n_10_t_arr=n_10_t_arr, n_00_t_arr=n_00_t_arr, @@ -2751,12 +2768,17 @@ def _compute_covariate_residualization( } # Residualize Y at levels for all groups with this baseline. - # Y_tilde[g, t] = Y[g, t] - X[g, t] @ theta_hat + # Vectorized level residualization: Y_tilde[g, t] = Y[g, t] - X[g, t] @ theta_hat group_indices = np.where(d_mask)[0] - for g in group_indices: - for t in range(n_periods): - if N_mat[g, t] > 0 and np.all(np.isfinite(X_cell[g, t])): - Y_resid[g, t] = Y_mat[g, t] - float(X_cell[g, t] @ theta_hat) + if len(group_indices) > 0: + # X_sub: (n_d_groups, n_periods, n_covariates), theta: (n_covariates,) + X_sub = X_cell[group_indices] # (n_d, T, K) + adjustment = np.einsum("gtk,k->gt", X_sub, theta_hat) # (n_d, T) + # Mask: only adjust cells that are observed and have finite covariates + valid = (N_mat[group_indices] > 0) & np.all(np.isfinite(X_sub), axis=2) + Y_resid[group_indices] = np.where( + valid, Y_mat[group_indices] - adjustment, Y_mat[group_indices] + ) return Y_resid, diagnostics @@ -2902,6 +2924,17 @@ def _compute_heterogeneity_test( else: design = x_arr + # Guard: need more observations than parameters + n_params = design.shape[1] + if n_obs <= n_params: + results[l_h] = { + "beta": float("nan"), "se": float("nan"), + "t_stat": float("nan"), "p_value": float("nan"), + "conf_int": (float("nan"), float("nan")), + "n_obs": n_obs, + } + continue + coefs, _residuals, vcov = solve_ols( design, dep_arr, return_vcov=True, @@ -2909,7 +2942,11 @@ def _compute_heterogeneity_test( ) beta_het = float(coefs[0]) - se_het = float(np.sqrt(vcov[0, 0])) if vcov is not None else float("nan") + # NaN-safe: if vcov is None or target coefficient variance is NaN + # (rank-deficient), all inference fields are NaN. + se_het = float("nan") + if vcov is not None and np.isfinite(vcov[0, 0]) and vcov[0, 0] > 0: + se_het = float(np.sqrt(vcov[0, 0])) t_stat, p_val, ci = safe_inference(beta_het, se_het, alpha=alpha, df=None) results[l_h] = { diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 633cff3e..183fe2b3 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -1029,6 +1029,28 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: ) return self.twfe_weights.copy() + elif level == "heterogeneity": + if self.heterogeneity_effects is None: + raise ValueError( + "Heterogeneity test results not available. Pass " + "heterogeneity='column_name' to fit()." + ) + rows = [] + for h, data in sorted(self.heterogeneity_effects.items()): + rows.append({"horizon": h, **data}) + return pd.DataFrame(rows) + + elif level == "linear_trends": + if self.linear_trends_effects is None: + raise ValueError( + "Linear trends effects not available. Pass " + "trends_linear=True to fit()." + ) + rows = [] + for h, data in sorted(self.linear_trends_effects.items()): + rows.append({"horizon": h, **data}) + return pd.DataFrame(rows) + else: raise ValueError( f"Unknown level: {level!r}. Use 'overall', 'joiners_leavers', " diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 0c183501..aa9f4fda 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -609,7 +609,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (deviation from R DIDmultiplegtDYN):** Phase 1 requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. The Step 5b validation in `fit()` enforces this contract: groups missing the baseline raise `ValueError`; groups with interior gaps are dropped with a `UserWarning`; groups with **terminal missingness** (early exit / right-censoring — observed at the baseline but missing one or more later periods) are retained and contribute from their observed periods only. R `DIDmultiplegtDYN` accepts unbalanced panels with documented missing-treatment-before-first-switch handling. Python's restriction is a Phase 1 limitation: the cohort enumeration uses `D_{g,1}` as the canonical baseline (so the baseline observation must exist) and the first-switch detection walks adjacent observed periods (so interior gaps create ambiguous transition counts). Terminal missingness is supported because the per-period `present = (N_mat[:, t] > 0) & (N_mat[:, t-1] > 0)` guard appears at three sites in the variance computation (`_compute_per_period_dids`, `_compute_full_per_group_contributions`, `_compute_cohort_recentered_inputs`) and cleanly masks out missing transitions without propagating NaN into the arithmetic. **Workaround for unbalanced panels:** pre-process your data to back-fill the baseline (or drop late-entry groups before fitting), or use R `DIDmultiplegtDYN` until a future phase lifts the restriction. The Step 5b `ValueError` and `UserWarning` messages name the offending group IDs so you can locate them quickly. -- **Note (Phase 3 DID^X covariate adjustment):** Implements the residualization-style covariate adjustment from Web Appendix Section 1.2 (Assumption 11). For each baseline treatment value `d`, estimates `theta_hat_d` via OLS of first-differenced outcomes on first-differenced covariates with time FEs, restricted to not-yet-treated observations. Residualizes at levels: `Y_tilde[g,t] = Y[g,t] - X[g,t] @ theta_hat_d`. All downstream DID computations use residualized outcomes. This is NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. Plug-in IF (treating `theta_hat` as fixed) is valid by FWL theorem. Requires `L_max >= 1`. Activated via `controls=["col1", "col2"]` in `fit()`. +- **Note (Phase 3 DID^X covariate adjustment):** When `controls` is set, `per_period_effects` (the Phase 1 per-period DID_M decomposition) remains **unadjusted** (computed on raw outcomes). The covariate residualization applies only to the per-group `DID_{g,l}` path (`L_max >= 1`), which produces `event_study_effects` and `overall_att`. This means `per_period_effects` and `event_study_effects[1]` may diverge when controls are active - by design (the per-period path uses binary joiner/leaver categorization and is not part of the DID^X contract). Implements the residualization-style covariate adjustment from Web Appendix Section 1.2 (Assumption 11). For each baseline treatment value `d`, estimates `theta_hat_d` via OLS of first-differenced outcomes on first-differenced covariates with time FEs, restricted to not-yet-treated observations. Residualizes at levels: `Y_tilde[g,t] = Y[g,t] - X[g,t] @ theta_hat_d`. All downstream DID computations use residualized outcomes. This is NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. Plug-in IF (treating `theta_hat` as fixed) is valid by FWL theorem. Requires `L_max >= 1`. Activated via `controls=["col1", "col2"]` in `fit()`. - **Note (Phase 3 DID^{fd} linear trends):** Implements group-specific linear trends from Web Appendix Section 1.3 (Assumption 12, Lemma 6). Uses the Z_mat transformation: `Z[g,t] = Y[g,t] - Y[g,t-1]` (first-differenced outcomes). Since `DID_{g,l}(Z) = DID^{fd}_{g,l}` algebraically, the existing multi-horizon DID code produces trend-adjusted estimates when fed Z_mat. Requires F_g >= 3 (at least 2 pre-switch periods); groups with F_g < 3 are excluded with a `UserWarning`. Cumulated level effects `delta^{fd}_l = sum_{l'=1}^l DID^{fd}_{l'}` stored in `results.linear_trends_effects`. Cumulated SE uses conservative upper bound (sum of per-horizon SEs); cross-horizon covariance from IF vectors is a library extension (paper proves Theorem 1 per-horizon, not cross-horizon). When combined with DID^X, residualization is applied first, then first-differencing (per paper assumption ordering). Activated via `trends_linear=True` in `fit()`. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index a9f646ad..d5c4214b 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -2421,6 +2421,36 @@ def test_controls_with_multi_horizon(self): assert np.isfinite(r.event_study_effects[h]["effect"]) assert np.isfinite(r.event_study_effects[h]["se"]) + def test_controls_lmax1_estimand_contract(self): + """DID^X with L_max=1: per_period_effects stay raw, overall uses DID^X_1.""" + df = self._make_panel_with_covariates() + est = ChaisemartinDHaultfoeuille(seed=1) + + # Fit without controls for raw per-period baseline + r_raw = est.fit(df, "outcome", "group", "period", "treatment") + # Fit with controls + r_x = est.fit( + df, "outcome", "group", "period", "treatment", + controls=["X1"], L_max=1, + ) + + # per_period_effects should be UNADJUSTED (raw Phase 1 DID_M) + # because the per-period path does not support covariate adjustment + for period_key in r_raw.per_period_effects: + if period_key in r_x.per_period_effects: + raw_eff = r_raw.per_period_effects[period_key] + x_eff = r_x.per_period_effects[period_key] + assert raw_eff["did_plus_t"] == pytest.approx( + x_eff["did_plus_t"], abs=1e-10 + ), f"per_period_effects should be unadjusted at period {period_key}" + + # overall_att should come from event_study_effects[1] (DID^X_1) + assert r_x.overall_att == pytest.approx( + r_x.event_study_effects[1]["effect"], abs=1e-10 + ) + # and should differ from the raw overall_att (covariate effect) + assert r_x.overall_att != r_raw.overall_att + class TestLinearTrends: """DID^{fd} group-specific linear trends (ROADMAP item 3b).""" From 2c6cabfd5922da4358ea627b4ff837825a5fd852 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 13:06:24 -0400 Subject: [PATCH 03/17] Fix CI review Round 1: NaN-consistency, overall surface, rank_deficient_action P0: Cumulated DID^{fd} SE now requires ALL component SEs to be finite; non-finite SE at any horizon propagates NaN (was silently dropped). P1: trends_linear + L_max>=2 overall surface now reports cumulated level effects from linear_trends_effects (was second-difference delta). cost_benefit_delta suppressed under trends_linear (meaningless on second-differences). P2: rank_deficient_action threaded through _compute_covariate_residualization and _compute_heterogeneity_test (was hardcoded "warn"). P3: fit() docstrings updated for controls, trends_linear, trends_nonparam (were stale "Reserved for Phase 3" text). Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 62 +++++++++++++++++------ tests/test_chaisemartin_dhaultfoeuille.py | 52 +++++++++++++++++++ 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 27bdaf4f..a1923f76 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -525,14 +525,18 @@ def fit( Must be a positive integer not exceeding the number of post-baseline periods in the panel. controls : list of str, optional - **Reserved for Phase 3** (covariate adjustment via the - residualization-style ``DID^X`` from Web Appendix Section 1.2 - of the dynamic paper). + Column names for covariate adjustment via residualization-style + ``DID^X`` (Web Appendix Section 1.2). Requires ``L_max >= 1``. + One ``theta_hat`` per baseline treatment value, estimated by + OLS on not-yet-treated observations. NOT doubly-robust. trends_linear : bool, optional - **Reserved for Phase 3** (group-specific linear trends via - ``DID^{fd}``). - trends_nonparam : Any, optional - **Reserved for Phase 3** (state-set-specific trends). + If ``True``, estimate group-specific linear trends via + ``DID^{fd}`` (Web Appendix Section 1.3, Lemma 6). Requires + ``L_max >= 1`` and at least 3 time periods. + trends_nonparam : str, optional + Column name for state-set membership. Restricts the control + pool to groups in the same set (Web Appendix Section 1.4). + Requires ``L_max >= 1`` and time-invariant values per group. honest_did : bool, default=False **Reserved for Phase 3** (HonestDiD integration on placebos). survey_design : Any, optional @@ -959,6 +963,7 @@ def fit( N_mat=N_mat, baselines=baselines, first_switch_idx=first_switch_idx_arr, + rank_deficient_action=self.rank_deficient_action, ) # Keep raw Y_mat for the per-period DID path (which does not # support covariate residualization - it uses binary joiner/leaver @@ -2073,12 +2078,20 @@ def fit( cum_effect = float( np.sum(S_arr[eligible] * running_per_group[eligible]) / N_l ) - # SE: conservative upper bound (sum of per-horizon SEs) - running_se_ub = sum( - event_study_effects.get(ll, {}).get("se", 0.0) - for ll in range(1, l_h + 1) - if np.isfinite(event_study_effects.get(ll, {}).get("se", np.nan)) - ) if event_study_effects is not None else float("nan") + # SE: conservative upper bound (sum of per-horizon SEs). + # NaN-consistency: if ANY component SE up to horizon l is + # non-finite, the cumulated SE is NaN (not 0.0). + if event_study_effects is not None: + component_ses = [ + event_study_effects.get(ll, {}).get("se", np.nan) + for ll in range(1, l_h + 1) + ] + if all(np.isfinite(s) for s in component_ses): + running_se_ub = sum(component_ses) + else: + running_se_ub = float("nan") + else: + running_se_ub = float("nan") cum_t, cum_p, cum_ci = safe_inference( cum_effect, running_se_ub, alpha=self.alpha, df=None ) @@ -2091,6 +2104,22 @@ def fit( } linear_trends_effects = cumulated if cumulated else None + # When trends_linear=True and L_max>=2, suppress cost_benefit_delta + # (which is computed on second-differences) and set overall_* from + # the cumulated level effects instead. This prevents the results + # surface from labeling a second-difference aggregate as delta^{fd} + # (a level-effect estimand). + if _is_trends_linear and L_max is not None and L_max >= 2: + cost_benefit_result = None + if linear_trends_effects: + max_h = max(linear_trends_effects.keys()) + lt = linear_trends_effects[max_h] + effective_overall_att = lt["effect"] + effective_overall_se = lt["se"] + effective_overall_t = lt["t_stat"] + effective_overall_p = lt["p_value"] + effective_overall_ci = lt["conf_int"] + # ------------------------------------------------------------------ # Heterogeneity testing (Web Appendix Section 1.5, Lemma 7) # ------------------------------------------------------------------ @@ -2130,6 +2159,7 @@ def fit( X_het=X_het, L_max=L_max, alpha=self.alpha, + rank_deficient_action=self.rank_deficient_action, ) twfe_weights_df = None @@ -2634,6 +2664,7 @@ def _compute_covariate_residualization( N_mat: np.ndarray, baselines: np.ndarray, first_switch_idx: np.ndarray, + rank_deficient_action: str = "warn", ) -> Tuple[np.ndarray, Dict[str, Any]]: """Residualize outcomes by partialling out covariates per baseline treatment. @@ -2750,7 +2781,7 @@ def _compute_covariate_residualization( design, dY, return_vcov=True, - rank_deficient_action="warn", + rank_deficient_action=rank_deficient_action, ) # Extract covariate coefficients (first n_covariates entries) @@ -2837,6 +2868,7 @@ def _compute_heterogeneity_test( X_het: np.ndarray, L_max: int, alpha: float = 0.05, + rank_deficient_action: str = "warn", ) -> Dict[int, Dict[str, Any]]: """Test for heterogeneous treatment effects (Web Appendix Section 1.5). @@ -2938,7 +2970,7 @@ def _compute_heterogeneity_test( coefs, _residuals, vcov = solve_ols( design, dep_arr, return_vcov=True, - rank_deficient_action="warn", + rank_deficient_action=rank_deficient_action, ) beta_het = float(coefs[0]) diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index d5c4214b..b41cd879 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -2550,6 +2550,58 @@ def test_trends_with_covariates(self): assert r.covariate_residuals is not None assert r.linear_trends_effects is not None + def test_trends_linear_lmax2_overall_surface(self): + """Overall surface under trends_linear + L_max>=2 uses cumulated level effects.""" + df = self._make_panel_with_trends() + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=3, trends_linear=True, + ) + # overall_att should equal the cumulated level effect at max horizon + assert r.linear_trends_effects is not None + max_h = max(r.linear_trends_effects.keys()) + cum_effect = r.linear_trends_effects[max_h]["effect"] + assert r.overall_att == pytest.approx(cum_effect, abs=1e-10) + # cost_benefit_delta should be suppressed (not computed on second-diffs) + assert r.cost_benefit_delta is None + + def test_cumulated_se_nan_propagation(self): + """Cumulated SE is NaN when a component horizon has NaN SE.""" + # Create a panel where horizon 2 has no eligible switchers (NaN SE) + # but horizon 1 does. The cumulated effect at h=2 should have NaN SE. + rng = np.random.RandomState(77) + rows = [] + for g in range(30): + group_fe = rng.normal(0, 1) + # Groups 0-9: switch at period 3 (enough pre-switch for trends) + # Groups 10-19: never switch (controls) + # Groups 20-29: switch at period 4 (only 1 post-switch period) + if g < 10: + switch_t = 3 + elif g < 20: + switch_t = 99 + else: + switch_t = 4 + for t in range(5): + d = 1 if t >= switch_t else 0 + y = group_fe + t + 3 * d + rng.normal(0, 0.3) + rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) + df = pd.DataFrame(rows) + r = ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, trends_linear=True, + ) + # If SE at horizon 1 is finite but horizon 2 is NaN, + # cumulated h=2 SE must be NaN (not 0.0) + if r.linear_trends_effects is not None and 2 in r.linear_trends_effects: + cum_se = r.linear_trends_effects[2]["se"] + es = r.event_study_effects + if es and 2 in es and not np.isfinite(es[2]["se"]): + assert not np.isfinite(cum_se), ( + f"Cumulated SE should be NaN when component h=2 SE is NaN, " + f"got {cum_se}" + ) + class TestStateSetTrends: """State-set-specific trends (ROADMAP item 3c).""" From 6f1c99fa5bb81c2cf04146a6f6206fc48cca037f Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 13:21:31 -0400 Subject: [PATCH 04/17] Fix CI review Round 2: NaN overall for trends, reject controls+heterogeneity P1: trends_linear + L_max>=2 overall_* is now NaN (R does not compute an aggregate in trends_lin mode). Cumulated effects available via results.linear_trends_effects[l]. P1: heterogeneity + controls now raises ValueError (matching R's predict_het which disallows controls). REGISTRY documents heterogeneity as partial implementation (post-treatment only, no placebo regressions or joint null test). P3: Added fit() docstrings for heterogeneity and design2 parameters. P3: Updated to_dataframe() error text with new level names. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 40 +++++++++++++------ .../chaisemartin_dhaultfoeuille_results.py | 3 +- docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 33 +++++++++++---- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index a1923f76..a3e2c2a2 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -539,6 +539,17 @@ def fit( Requires ``L_max >= 1`` and time-invariant values per group. honest_did : bool, default=False **Reserved for Phase 3** (HonestDiD integration on placebos). + heterogeneity : str, optional + Column name for a time-invariant covariate to test for + heterogeneous effects (Web Appendix Section 1.5, Lemma 7). + Partial implementation: post-treatment regressions only + (no placebo regressions or joint null test). Cannot be + combined with ``controls``. Requires ``L_max >= 1``. + design2 : bool, default=False + If ``True``, identify and report switch-in/switch-out + (Design-2) groups. Convenience wrapper (descriptive summary, + not full paper re-estimation). Requires + ``drop_larger_lower=False`` to retain 2-switch groups. survey_design : Any, optional **Not supported in any phase.** Survey design integration is handled as a separate effort after all three phases ship. @@ -2105,20 +2116,17 @@ def fit( linear_trends_effects = cumulated if cumulated else None # When trends_linear=True and L_max>=2, suppress cost_benefit_delta - # (which is computed on second-differences) and set overall_* from - # the cumulated level effects instead. This prevents the results - # surface from labeling a second-difference aggregate as delta^{fd} - # (a level-effect estimand). + # and NaN out the overall_* surface. R's did_multiplegt_dyn with + # trends_lin=TRUE does not compute an aggregate "average total + # effect" - users should access cumulated level effects via + # results.linear_trends_effects[l] instead. if _is_trends_linear and L_max is not None and L_max >= 2: cost_benefit_result = None - if linear_trends_effects: - max_h = max(linear_trends_effects.keys()) - lt = linear_trends_effects[max_h] - effective_overall_att = lt["effect"] - effective_overall_se = lt["se"] - effective_overall_t = lt["t_stat"] - effective_overall_p = lt["p_value"] - effective_overall_ci = lt["conf_int"] + effective_overall_att = float("nan") + effective_overall_se = float("nan") + effective_overall_t = float("nan") + effective_overall_p = float("nan") + effective_overall_ci = (float("nan"), float("nan")) # ------------------------------------------------------------------ # Heterogeneity testing (Web Appendix Section 1.5, Lemma 7) @@ -2130,6 +2138,14 @@ def fit( raise ValueError( f"heterogeneity column {het_col!r} not found in data." ) + # R's predict_het disallows controls; our partial implementation + # follows this restriction to avoid inconsistent behavior. + if controls is not None: + raise ValueError( + "heterogeneity cannot be combined with controls. " + "R's did_multiplegt_dyn disallows predict_het with " + "controls; remove one of the two options." + ) # Extract per-group covariate (must be time-invariant) het_per_group = data.groupby(group)[het_col].nunique() het_varying = het_per_group[het_per_group > 1] diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 183fe2b3..c29e31a5 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -1054,7 +1054,8 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: else: raise ValueError( f"Unknown level: {level!r}. Use 'overall', 'joiners_leavers', " - f"'per_period', 'event_study', 'normalized', or 'twfe_weights'." + f"'per_period', 'event_study', 'normalized', 'twfe_weights', " + f"'heterogeneity', or 'linear_trends'." ) diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index aa9f4fda..13b3922f 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -615,7 +615,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 state-set trends):** Implements state-set-specific trends from Web Appendix Section 1.4 (Assumptions 13-14). Restricts the control pool for each switcher to groups in the same set (e.g., same state in county-level data). The restriction applies in BOTH `_compute_multi_horizon_dids()` (point estimates) and `_compute_per_group_if_multi_horizon()` (influence functions) to ensure IF consistency. Cohort structure stays as `(D_{g,1}, F_g, S_g)` triples (does not incorporate set membership). Set membership must be time-invariant per group. Activated via `trends_nonparam="state_column"` in `fit()`. -- **Note (Phase 3 heterogeneity testing):** Implements the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Saturated OLS regression of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid - the paper shows no need to account for DID estimation error. Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. +- **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. Combination with `controls` is rejected (matching R). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. The paper notes Design-2 can be implemented by "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index b41cd879..0d6929f0 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -2546,24 +2546,31 @@ def test_trends_with_covariates(self): df, "outcome", "group", "period", "treatment", controls=["X1"], L_max=2, trends_linear=True, ) - assert np.isfinite(r.overall_att) + # overall_att is NaN for trends + L_max>=2 (no aggregate) + assert np.isnan(r.overall_att) assert r.covariate_residuals is not None assert r.linear_trends_effects is not None def test_trends_linear_lmax2_overall_surface(self): - """Overall surface under trends_linear + L_max>=2 uses cumulated level effects.""" + """Under trends_linear + L_max>=2, overall_* is NaN (no aggregate). + + R's did_multiplegt_dyn with trends_lin=TRUE does not compute an + aggregate average total effect. Cumulated level effects are + available via results.linear_trends_effects[l]. + """ df = self._make_panel_with_trends() r = ChaisemartinDHaultfoeuille(seed=1).fit( df, "outcome", "group", "period", "treatment", L_max=3, trends_linear=True, ) - # overall_att should equal the cumulated level effect at max horizon - assert r.linear_trends_effects is not None - max_h = max(r.linear_trends_effects.keys()) - cum_effect = r.linear_trends_effects[max_h]["effect"] - assert r.overall_att == pytest.approx(cum_effect, abs=1e-10) - # cost_benefit_delta should be suppressed (not computed on second-diffs) + # overall_* should be NaN (not computed in trends mode) + assert np.isnan(r.overall_att) + assert np.isnan(r.overall_se) + # cost_benefit_delta suppressed assert r.cost_benefit_delta is None + # Cumulated effects still available + assert r.linear_trends_effects is not None + assert len(r.linear_trends_effects) >= 1 def test_cumulated_se_nan_propagation(self): """Cumulated SE is NaN when a component horizon has NaN SE.""" @@ -2755,6 +2762,16 @@ def test_heterogeneity_missing_column(self): L_max=1, heterogeneity="nonexistent", ) + def test_heterogeneity_rejects_controls(self): + """heterogeneity + controls raises ValueError (matching R predict_het).""" + df = self._make_panel_with_het() + df["X1"] = np.random.RandomState(42).normal(0, 1, len(df)) + with pytest.raises(ValueError, match="cannot be combined with controls"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, heterogeneity="het_x", controls=["X1"], + ) + class TestDesign2: """Design-2 switch-in/switch-out separation (ROADMAP item 3e).""" From 3f7e72c0fac612bdb456e2506372930acb1599f2 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 13:35:17 -0400 Subject: [PATCH 05/17] Fix CI review Round 3: validation guards for het/design2 interactions P1: heterogeneity without L_max now raises ValueError (was silent no-op). P1: heterogeneity rejects trends_linear and trends_nonparam (would produce coefficients for wrong estimand since het test uses raw level changes). P1: design2=True with drop_larger_lower=True now raises ValueError (the 2-switch groups Design-2 needs are dropped by the default filter). P3: NaN overall row under trends_linear+L_max>=2 now labeled as "DID^{fd}_l (see linear_trends_effects)" instead of "Cost-Benefit Delta". Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 31 +++++++++++++- .../chaisemartin_dhaultfoeuille_results.py | 5 +++ tests/test_chaisemartin_dhaultfoeuille.py | 40 ++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index a3e2c2a2..79584f56 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -601,6 +601,16 @@ def fit( "CallawaySantAnna which supports survey_design." ) + # Design-2 precondition: requires drop_larger_lower=False + if design2 and self.drop_larger_lower: + raise ValueError( + "design2=True requires drop_larger_lower=False because " + "Design-2 groups have exactly 2 treatment changes (join " + "then leave), which are dropped by the default " + "drop_larger_lower=True filter. Construct the estimator " + "with ChaisemartinDHaultfoeuille(drop_larger_lower=False)." + ) + # ------------------------------------------------------------------ # Step 4-5: Validate input + aggregate to (g, t) cells via the # shared helper used by both fit() and twowayfeweights(). The @@ -2132,7 +2142,12 @@ def fit( # Heterogeneity testing (Web Appendix Section 1.5, Lemma 7) # ------------------------------------------------------------------ heterogeneity_effects: Optional[Dict[int, Dict[str, Any]]] = None - if heterogeneity is not None and L_max is not None and L_max >= 1: + if heterogeneity is not None: + if L_max is None: + raise ValueError( + "heterogeneity testing requires L_max >= 1. Set L_max " + "to use the per-group DID_{g,l} path." + ) het_col = str(heterogeneity) if het_col not in data.columns: raise ValueError( @@ -2146,6 +2161,20 @@ def fit( "R's did_multiplegt_dyn disallows predict_het with " "controls; remove one of the two options." ) + if _is_trends_linear: + raise ValueError( + "heterogeneity cannot be combined with trends_linear. " + "The heterogeneity test operates on level outcome " + "changes but trends_linear uses second-differenced " + "outcomes; the results would be inconsistent." + ) + if trends_nonparam is not None: + raise ValueError( + "heterogeneity cannot be combined with trends_nonparam. " + "The heterogeneity test does not thread state-set " + "control-pool restrictions; the results would be " + "inconsistent with the fitted estimator." + ) # Extract per-group covariate (must be time-invariant) het_per_group = data.groupby(group)[het_col].nunique() het_varying = het_per_group[het_per_group > 1] diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index c29e31a5..dfd5c78a 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -423,6 +423,11 @@ def _estimand_label(self) -> str: has_controls = self.covariate_residuals is not None has_trends = self.linear_trends_effects is not None + # When trends_linear + L_max>=2, overall is NaN (no aggregate). + # Label reflects that per-horizon effects are in linear_trends_effects. + if has_trends and self.L_max is not None and self.L_max >= 2: + return "DID^{fd}_l (see linear_trends_effects)" + if self.L_max is not None and self.L_max >= 2: base = "delta" elif self.L_max is not None and self.L_max == 1: diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 0d6929f0..5e1c59be 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -2772,6 +2772,34 @@ def test_heterogeneity_rejects_controls(self): L_max=1, heterogeneity="het_x", controls=["X1"], ) + def test_heterogeneity_requires_lmax(self): + """heterogeneity without L_max raises ValueError.""" + df = self._make_panel_with_het() + with pytest.raises(ValueError, match="requires L_max >= 1"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + heterogeneity="het_x", + ) + + def test_heterogeneity_rejects_trends_linear(self): + """heterogeneity + trends_linear raises ValueError.""" + df = self._make_panel_with_het() + with pytest.raises(ValueError, match="cannot be combined with trends_linear"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=2, heterogeneity="het_x", trends_linear=True, + ) + + def test_heterogeneity_rejects_trends_nonparam(self): + """heterogeneity + trends_nonparam raises ValueError.""" + df = self._make_panel_with_het() + df["state"] = df["group"] % 3 + with pytest.raises(ValueError, match="cannot be combined with trends_nonparam"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, heterogeneity="het_x", trends_nonparam="state", + ) + class TestDesign2: """Design-2 switch-in/switch-out separation (ROADMAP item 3e).""" @@ -2822,7 +2850,8 @@ def test_design2_no_eligible(self): y = 10 + 2 * t + 5 * d + rng.normal(0, 0.5) rows.append({"group": g, "period": t, "treatment": d, "outcome": y}) df = pd.DataFrame(rows) - r = ChaisemartinDHaultfoeuille(seed=1).fit( + # drop_larger_lower=False required for design2=True + r = ChaisemartinDHaultfoeuille(seed=1, drop_larger_lower=False).fit( df, "outcome", "group", "period", "treatment", L_max=1, design2=True, ) @@ -2836,6 +2865,15 @@ def test_design2_disabled_by_default(self): ) assert r.design2_effects is None + def test_design2_rejects_drop_larger_lower(self): + """design2=True with default drop_larger_lower=True raises ValueError.""" + df = self._make_join_then_leave_panel() + with pytest.raises(ValueError, match="drop_larger_lower=False"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, design2=True, + ) + class TestNonBinaryTreatment: """Non-binary treatment support (ROADMAP item 3f).""" From 357a551a5a3df0478512a40beb887efe5d88e48e Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 13:49:47 -0400 Subject: [PATCH 06/17] Fix CI review Round 4: add intercepts to OLS regressions, NaN guard P0: DID^X first-stage OLS now includes intercept when dropping one time dummy as reference (was forcing omitted period FE to zero). theta_hat extraction updated to skip intercept at index 0. P0: Heterogeneity regression now includes intercept when dropping one cohort dummy as reference (was forcing omitted cohort mean to zero). beta_het extraction updated to read index 1 instead of 0. P1: DID^X now guards against NaN control coefficients from rank-deficient OLS. If any theta_hat entry is NaN, residualization is skipped for that baseline with a UserWarning. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 45 ++++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 79584f56..f6614959 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -2810,16 +2810,20 @@ def _compute_covariate_residualization( else: valid_t_finite = valid_t - # Build time FE dummies (drop first unique period as reference) + # Build design: [intercept, dX, time_dummies (reference dropped)] + # The intercept is required when dropping one time dummy as + # reference category; without it the omitted period's FE is + # forced to zero, biasing theta_hat. + intercept = np.ones((n_obs, 1)) unique_t = np.unique(valid_t_finite) n_time_fe = len(unique_t) - 1 if n_time_fe > 0: time_dummies = np.zeros((n_obs, n_time_fe)) for i, t_val in enumerate(unique_t[1:]): time_dummies[:, i] = (valid_t_finite == t_val).astype(float) - design = np.hstack([dX, time_dummies]) + design = np.hstack([intercept, dX, time_dummies]) else: - design = dX + design = np.hstack([intercept, dX]) # OLS: dY = [dX, time_FE] @ beta + epsilon coefs, residuals, _vcov = solve_ols( @@ -2829,8 +2833,9 @@ def _compute_covariate_residualization( rank_deficient_action=rank_deficient_action, ) - # Extract covariate coefficients (first n_covariates entries) - theta_hat = coefs[:n_covariates] + # Extract covariate coefficients (indices 1..n_covariates; + # index 0 is the intercept) + theta_hat = coefs[1:1 + n_covariates] # R-squared of first-stage regression ss_res = float(np.sum(residuals**2)) @@ -2843,6 +2848,19 @@ def _compute_covariate_residualization( "r_squared": r_squared, } + # Guard: if any control coefficient is NaN (rank-deficient OLS + # dropped a collinear control), skip residualization for this + # baseline to prevent NaN propagation through Y_resid. + if not np.all(np.isfinite(theta_hat)): + warnings.warn( + f"DID^X: rank-deficient first-stage OLS for baseline " + f"d={d_val} produced NaN coefficients. Outcomes for " + f"groups with this baseline are not residualized.", + UserWarning, + stacklevel=3, + ) + continue + # Residualize Y at levels for all groups with this baseline. # Vectorized level residualization: Y_tilde[g, t] = Y[g, t] - X[g, t] @ theta_hat group_indices = np.where(d_mask)[0] @@ -2987,7 +3005,11 @@ def _compute_heterogeneity_test( dep_arr = np.array(dep_var) x_arr = np.array(x_vals).reshape(-1, 1) - # Cohort dummies (drop one as reference) + # Design: [intercept, X_g, cohort_dummies (reference dropped)] + # The intercept is required when dropping one cohort dummy as + # reference; without it the omitted cohort's mean is forced to + # zero, which biases beta^{het}_l. + intercept = np.ones((n_obs, 1)) unique_cohorts = sorted(set(cohort_keys)) n_cohort_dummies = len(unique_cohorts) - 1 if n_cohort_dummies > 0: @@ -2997,9 +3019,9 @@ def _compute_heterogeneity_test( cohort_dummies[np.arange(n_obs), cohort_idx] = 1.0 # Drop first cohort as reference cohort_dummies = cohort_dummies[:, 1:] - design = np.hstack([x_arr, cohort_dummies]) + design = np.hstack([intercept, x_arr, cohort_dummies]) else: - design = x_arr + design = np.hstack([intercept, x_arr]) # Guard: need more observations than parameters n_params = design.shape[1] @@ -3018,12 +3040,13 @@ def _compute_heterogeneity_test( rank_deficient_action=rank_deficient_action, ) - beta_het = float(coefs[0]) + # beta_het is at index 1 (index 0 is intercept) + beta_het = float(coefs[1]) # NaN-safe: if vcov is None or target coefficient variance is NaN # (rank-deficient), all inference fields are NaN. se_het = float("nan") - if vcov is not None and np.isfinite(vcov[0, 0]) and vcov[0, 0] > 0: - se_het = float(np.sqrt(vcov[0, 0])) + if vcov is not None and np.isfinite(vcov[1, 1]) and vcov[1, 1] > 0: + se_het = float(np.sqrt(vcov[1, 1])) t_stat, p_val, ci = safe_inference(beta_het, se_het, alpha=alpha, df=None) results[l_h] = { From bc1dab76d3175c18152776658a22a427d5b4a98d Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 14:28:44 -0400 Subject: [PATCH 07/17] Fix CI review Round 5: partial theta_hat, coarser-than-group, het docs P1: DID^X rank-deficiency now residualizes with finite subset of theta_hat (zeroing NaN coefficients) instead of skipping entirely. P1: trends_nonparam now rejects set definitions that are not coarser than group (singleton sets have no within-set controls). P1: heterogeneity restrictions on trends_linear and trends_nonparam now documented in REGISTRY.md and fit() docstring. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 35 +++++++++++++++++------ docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 10 +++++++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index f6614959..2b916305 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -544,7 +544,8 @@ def fit( heterogeneous effects (Web Appendix Section 1.5, Lemma 7). Partial implementation: post-treatment regressions only (no placebo regressions or joint null test). Cannot be - combined with ``controls``. Requires ``L_max >= 1``. + combined with ``controls``, ``trends_linear``, or + ``trends_nonparam``. Requires ``L_max >= 1``. design2 : bool, default=False If ``True``, identify and report switch-in/switch-out (Design-2) groups. Convenience wrapper (descriptive summary, @@ -1075,6 +1076,20 @@ def fit( f"{len(time_varying)} group(s) have varying values. " f"Examples: {time_varying.index.tolist()[:5]}" ) + # Set partition must be coarser than group (multiple groups + # per set). A group-level partition creates singleton sets + # with no within-set controls available. + set_map_check = data.groupby(group)[set_col].first() + n_sets = set_map_check.nunique() + n_groups_total = len(set_map_check) + if n_sets >= n_groups_total: + raise ValueError( + f"trends_nonparam column {set_col!r} defines " + f"{n_sets} distinct sets for {n_groups_total} " + f"groups. The set partition must be coarser than " + f"group (multiple groups per set) to provide " + f"within-set controls." + ) # Extract set membership per group aligned with all_groups set_map = data.groupby(group)[set_col].first() set_ids_arr = np.array( @@ -2848,18 +2863,22 @@ def _compute_covariate_residualization( "r_squared": r_squared, } - # Guard: if any control coefficient is NaN (rank-deficient OLS - # dropped a collinear control), skip residualization for this - # baseline to prevent NaN propagation through Y_resid. - if not np.all(np.isfinite(theta_hat)): + # Guard: if some control coefficients are NaN (rank-deficient + # OLS dropped collinear controls), residualize with only the + # finite subset. Replace NaN coefficients with 0 so einsum + # only uses the identified controls. + nan_mask = ~np.isfinite(theta_hat) + if nan_mask.any(): + n_dropped = int(nan_mask.sum()) warnings.warn( f"DID^X: rank-deficient first-stage OLS for baseline " - f"d={d_val} produced NaN coefficients. Outcomes for " - f"groups with this baseline are not residualized.", + f"d={d_val} dropped {n_dropped} collinear control(s). " + f"Residualization uses the {n_covariates - n_dropped} " + f"identified control(s).", UserWarning, stacklevel=3, ) - continue + theta_hat = np.where(np.isfinite(theta_hat), theta_hat, 0.0) # Residualize Y at levels for all groups with this baseline. # Vectorized level residualization: Y_tilde[g, t] = Y[g, t] - X[g, t] @ theta_hat diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 13b3922f..11ecb599 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -615,7 +615,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 state-set trends):** Implements state-set-specific trends from Web Appendix Section 1.4 (Assumptions 13-14). Restricts the control pool for each switcher to groups in the same set (e.g., same state in county-level data). The restriction applies in BOTH `_compute_multi_horizon_dids()` (point estimates) and `_compute_per_group_if_multi_horizon()` (influence functions) to ensure IF consistency. Cohort structure stays as `(D_{g,1}, F_g, S_g)` triples (does not incorporate set membership). Set membership must be time-invariant per group. Activated via `trends_nonparam="state_column"` in `fit()`. -- **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. Combination with `controls` is rejected (matching R). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. +- **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. The paper notes Design-2 can be implemented by "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index 5e1c59be..e5e4e6be 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -2672,6 +2672,16 @@ def test_missing_set_column_raises(self): L_max=1, trends_nonparam="nonexistent", ) + def test_group_level_set_rejected(self): + """Set partition at group level (not coarser) raises ValueError.""" + df = self._make_panel_with_sets() + # Use group column itself as set (each group is its own set) + with pytest.raises(ValueError, match="coarser than group"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, trends_nonparam="group", + ) + def test_nonparam_with_covariates(self): """Combined state-set trends + covariates.""" df = self._make_panel_with_sets() From 000dc46f63ade3e2cc74e6a5b659504f0ba34ac3 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 14:57:00 -0400 Subject: [PATCH 08/17] Fix CI review Round 6: thread set_ids into placebos, small-sample guard P0: trends_nonparam set_ids now threaded into both placebo functions (_compute_multi_horizon_placebos and _compute_per_group_if_placebo_horizon) so placebo diagnostics use same-set controls when state-set trends active. P1: DID^X first-stage now guards n_obs < n_params before calling solve_ols. Skips residualization for that baseline with warning instead of crashing. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 2b916305..c3157361 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -1422,6 +1422,7 @@ def fit( switch_direction=switch_direction_arr, T_g=T_g_arr, L_max=L_max, + set_ids=set_ids_arr, ) # Surface placebo A11 warnings pl_a11 = multi_horizon_placebos.pop("_a11_warnings", None) @@ -1448,6 +1449,7 @@ def fit( switch_direction=switch_direction_arr, T_g=T_g_arr, L_max=L_max, + set_ids=set_ids_arr, ) # Per-placebo-horizon analytical SE via cohort recentering # (same pattern as positive-horizon SE at Step 12c). @@ -2840,6 +2842,24 @@ def _compute_covariate_residualization( else: design = np.hstack([intercept, dX]) + # Small-sample guard: skip if fewer obs than parameters + n_params = design.shape[1] + if n_obs < n_params: + diagnostics[float(d_val)] = { + "theta_hat": np.full(n_covariates, np.nan), + "n_obs": n_obs, + "r_squared": np.nan, + } + warnings.warn( + f"DID^X: baseline d={d_val} has {n_obs} not-yet-treated " + f"observations but {n_params} regressors. Cannot estimate " + f"covariate slopes. Outcomes for these groups are not " + f"residualized.", + UserWarning, + stacklevel=3, + ) + continue + # OLS: dY = [dX, time_FE] @ beta + epsilon coefs, residuals, _vcov = solve_ols( design, @@ -3572,6 +3592,7 @@ def _compute_per_group_if_placebo_horizon( switch_direction: np.ndarray, T_g: np.ndarray, L_max: int, + set_ids: Optional[np.ndarray] = None, ) -> Dict[int, np.ndarray]: """ Compute per-group influence function for placebo horizons. @@ -3639,6 +3660,9 @@ def _compute_per_group_if_placebo_horizon( & (N_mat[ctrl_indices, backward_idx] > 0) & (N_mat[ctrl_indices, forward_idx] > 0) ) + # State-set trends: restrict controls to same set + if set_ids is not None: + ctrl_mask &= set_ids[ctrl_indices] == set_ids[g] ctrl_pool = ctrl_indices[ctrl_mask] n_ctrl = ctrl_pool.size @@ -3667,6 +3691,7 @@ def _compute_multi_horizon_placebos( switch_direction: np.ndarray, T_g: np.ndarray, L_max: int, + set_ids: Optional[np.ndarray] = None, ) -> Dict[int, Dict[str, Any]]: """ Compute dynamic placebo estimators ``DID^{pl}_l`` for ``l = 1..L_pl_max``. @@ -3758,6 +3783,9 @@ def _compute_multi_horizon_placebos( & (N_mat[ctrl_indices, backward_idx] > 0) & (N_mat[ctrl_indices, forward_idx] > 0) ) + # State-set trends: restrict controls to same set + if set_ids is not None: + ctrl_mask &= set_ids[ctrl_indices] == set_ids[g] ctrl_pool = ctrl_indices[ctrl_mask] if ctrl_pool.size == 0: From 8fd2e604a92e930bd5cfed3243a7c844811c6c66 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 15:09:36 -0400 Subject: [PATCH 09/17] Fix CI review Round 7: failed DID^X strata excluded, event study labels P1: Failed DID^X first-stage strata (n_obs=0 or n_obs < n_params) now have outcomes set to NaN so they're excluded from downstream DID computation. Previously left unadjusted, mixing raw + adjusted. P1: Cell-weight deviation documented in REGISTRY (equal cell weights vs R's N_gt observation-count weights - same Phase 1 convention). P3: Event study rows in to_dataframe("event_study") now labeled as DID^X_h, DID^{fd}_h, DID^{X,fd}_h when adjustments active. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 19 +++++++++++++------ .../chaisemartin_dhaultfoeuille_results.py | 14 +++++++++++++- docs/methodology/REGISTRY.md | 2 +- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index c3157361..bf71c1c0 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -2796,11 +2796,15 @@ def _compute_covariate_residualization( "n_obs": 0, "r_squared": np.nan, } + # NaN out outcomes for failed strata so they're excluded + # from downstream DID computation (don't mix raw + adjusted). + group_indices = np.where(d_mask)[0] + Y_resid[group_indices, :] = np.nan warnings.warn( f"No not-yet-treated observations for baseline treatment " - f"d={d_val}. Cannot estimate covariate slope theta_hat " - f"for this baseline. Outcomes for these groups are not " - f"residualized.", + f"d={d_val}. Cannot estimate covariate slope theta_hat. " + f"Groups with this baseline are excluded from the " + f"covariate-adjusted estimation.", UserWarning, stacklevel=3, ) @@ -2850,11 +2854,14 @@ def _compute_covariate_residualization( "n_obs": n_obs, "r_squared": np.nan, } + # NaN out outcomes for failed strata (don't mix raw + adjusted) + group_indices_fail = np.where(d_mask)[0] + Y_resid[group_indices_fail, :] = np.nan warnings.warn( f"DID^X: baseline d={d_val} has {n_obs} not-yet-treated " - f"observations but {n_params} regressors. Cannot estimate " - f"covariate slopes. Outcomes for these groups are not " - f"residualized.", + f"observations but {n_params} regressors. Groups with " + f"this baseline are excluded from covariate-adjusted " + f"estimation.", UserWarning, stacklevel=3, ) diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index dfd5c78a..15b7b4a3 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -418,6 +418,18 @@ class ChaisemartinDHaultfoeuilleResults: # Repr / properties # ------------------------------------------------------------------ + def _horizon_label(self, h: int) -> str: + """Return per-horizon estimand label for event study rows.""" + has_controls = self.covariate_residuals is not None + has_trends = self.linear_trends_effects is not None + if has_controls and has_trends: + return f"DID^{{X,fd}}_{h}" + elif has_controls: + return f"DID^X_{h}" + elif has_trends: + return f"DID^{{fd}}_{h}" + return f"DID_{h}" + def _estimand_label(self) -> str: """Return the estimand label based on active features.""" has_controls = self.covariate_residuals is not None @@ -991,7 +1003,7 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: rows.append( { "horizon": h, - "estimand": f"DID_{h}", + "estimand": self._horizon_label(h), "effect": entry["effect"], "se": entry["se"], "t_stat": entry["t_stat"], diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 11ecb599..b4f79e07 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -609,7 +609,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (deviation from R DIDmultiplegtDYN):** Phase 1 requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. The Step 5b validation in `fit()` enforces this contract: groups missing the baseline raise `ValueError`; groups with interior gaps are dropped with a `UserWarning`; groups with **terminal missingness** (early exit / right-censoring — observed at the baseline but missing one or more later periods) are retained and contribute from their observed periods only. R `DIDmultiplegtDYN` accepts unbalanced panels with documented missing-treatment-before-first-switch handling. Python's restriction is a Phase 1 limitation: the cohort enumeration uses `D_{g,1}` as the canonical baseline (so the baseline observation must exist) and the first-switch detection walks adjacent observed periods (so interior gaps create ambiguous transition counts). Terminal missingness is supported because the per-period `present = (N_mat[:, t] > 0) & (N_mat[:, t-1] > 0)` guard appears at three sites in the variance computation (`_compute_per_period_dids`, `_compute_full_per_group_contributions`, `_compute_cohort_recentered_inputs`) and cleanly masks out missing transitions without propagating NaN into the arithmetic. **Workaround for unbalanced panels:** pre-process your data to back-fill the baseline (or drop late-entry groups before fitting), or use R `DIDmultiplegtDYN` until a future phase lifts the restriction. The Step 5b `ValueError` and `UserWarning` messages name the offending group IDs so you can locate them quickly. -- **Note (Phase 3 DID^X covariate adjustment):** When `controls` is set, `per_period_effects` (the Phase 1 per-period DID_M decomposition) remains **unadjusted** (computed on raw outcomes). The covariate residualization applies only to the per-group `DID_{g,l}` path (`L_max >= 1`), which produces `event_study_effects` and `overall_att`. This means `per_period_effects` and `event_study_effects[1]` may diverge when controls are active - by design (the per-period path uses binary joiner/leaver categorization and is not part of the DID^X contract). Implements the residualization-style covariate adjustment from Web Appendix Section 1.2 (Assumption 11). For each baseline treatment value `d`, estimates `theta_hat_d` via OLS of first-differenced outcomes on first-differenced covariates with time FEs, restricted to not-yet-treated observations. Residualizes at levels: `Y_tilde[g,t] = Y[g,t] - X[g,t] @ theta_hat_d`. All downstream DID computations use residualized outcomes. This is NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. Plug-in IF (treating `theta_hat` as fixed) is valid by FWL theorem. Requires `L_max >= 1`. Activated via `controls=["col1", "col2"]` in `fit()`. +- **Note (Phase 3 DID^X covariate adjustment):** When `controls` is set, `per_period_effects` (the Phase 1 per-period DID_M decomposition) remains **unadjusted** (computed on raw outcomes). The covariate residualization applies only to the per-group `DID_{g,l}` path (`L_max >= 1`), which produces `event_study_effects` and `overall_att`. This means `per_period_effects` and `event_study_effects[1]` may diverge when controls are active - by design (the per-period path uses binary joiner/leaver categorization and is not part of the DID^X contract). Implements the residualization-style covariate adjustment from Web Appendix Section 1.2 (Assumption 11). For each baseline treatment value `d`, estimates `theta_hat_d` via OLS of first-differenced outcomes on first-differenced covariates with time FEs, restricted to not-yet-treated observations. Residualizes at levels: `Y_tilde[g,t] = Y[g,t] - X[g,t] @ theta_hat_d`. All downstream DID computations use residualized outcomes. This is NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. Plug-in IF (treating `theta_hat` as fixed) is valid by FWL theorem. **Deviation from R `DIDmultiplegtDYN`:** The first-stage OLS uses equal cell weights (one observation per `(g,t)` cell), consistent with the library's cell-count weighting convention documented in Phase 1. R weights by `N_gt` (observation count per cell). On panels with 1 observation per cell (the common case), results are identical. When baseline-specific first stages fail (`n_obs = 0` or `n_obs < n_params`), the affected strata are excluded from the estimation (outcomes set to NaN) rather than retained unadjusted - matching R's "drop failed strata" behavior. Requires `L_max >= 1`. Activated via `controls=["col1", "col2"]` in `fit()`. - **Note (Phase 3 DID^{fd} linear trends):** Implements group-specific linear trends from Web Appendix Section 1.3 (Assumption 12, Lemma 6). Uses the Z_mat transformation: `Z[g,t] = Y[g,t] - Y[g,t-1]` (first-differenced outcomes). Since `DID_{g,l}(Z) = DID^{fd}_{g,l}` algebraically, the existing multi-horizon DID code produces trend-adjusted estimates when fed Z_mat. Requires F_g >= 3 (at least 2 pre-switch periods); groups with F_g < 3 are excluded with a `UserWarning`. Cumulated level effects `delta^{fd}_l = sum_{l'=1}^l DID^{fd}_{l'}` stored in `results.linear_trends_effects`. Cumulated SE uses conservative upper bound (sum of per-horizon SEs); cross-horizon covariance from IF vectors is a library extension (paper proves Theorem 1 per-horizon, not cross-horizon). When combined with DID^X, residualization is applied first, then first-differencing (per paper assumption ordering). Activated via `trends_linear=True` in `fit()`. From d087f2152706b0d2e14718b5ccaab50d21640f31 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 15:29:03 -0400 Subject: [PATCH 10/17] Fix CI review Round 8: exclude failed DID^X strata from N_mat, fix labels P1: Failed DID^X first-stage strata now have N_mat zeroed out (not just Y_mat NaN'd). The downstream eligibility checks (N_mat[g,idx] > 0) in all DID/IF/placebo functions naturally exclude these groups from N_l/N_pl_l, preventing NaN poisoning of otherwise estimable horizons. P3: _estimand_label() now returns DID^{X,fd}_l (not DID^{fd}_l) when both controls and trends_linear active with L_max>=2. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 28 +++++++++++++------ .../chaisemartin_dhaultfoeuille_results.py | 2 ++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index bf71c1c0..c33c7eaf 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -979,14 +979,23 @@ def fit( ) _switch_metadata_computed = True - Y_mat_residualized, covariate_diagnostics = _compute_covariate_residualization( - Y_mat=Y_mat, - X_cell=X_cell, - N_mat=N_mat, - baselines=baselines, - first_switch_idx=first_switch_idx_arr, - rank_deficient_action=self.rank_deficient_action, + Y_mat_residualized, covariate_diagnostics, _failed_baselines = ( + _compute_covariate_residualization( + Y_mat=Y_mat, + X_cell=X_cell, + N_mat=N_mat, + baselines=baselines, + first_switch_idx=first_switch_idx_arr, + rank_deficient_action=self.rank_deficient_action, + ) ) + # Zero out N_mat for failed-stratum groups so the downstream + # eligibility checks (N_mat[g, idx] > 0) naturally exclude + # them from all DID/IF/placebo computation. + if _failed_baselines: + for g_idx in range(len(baselines)): + if float(baselines[g_idx]) in _failed_baselines: + N_mat[g_idx, :] = 0 # Keep raw Y_mat for the per-period DID path (which does not # support covariate residualization - it uses binary joiner/leaver # categorization). The residualized matrix is used only by the @@ -2769,6 +2778,7 @@ def _compute_covariate_residualization( n_covariates = X_cell.shape[2] Y_resid = Y_mat.copy() diagnostics: Dict[str, Any] = {} + failed_baselines: set = set() # Pre-compute observation validity masks for first-differencing. # both_observed[g, t] = True iff N_mat[g, t] > 0 AND N_mat[g, t-1] > 0 @@ -2800,6 +2810,7 @@ def _compute_covariate_residualization( # from downstream DID computation (don't mix raw + adjusted). group_indices = np.where(d_mask)[0] Y_resid[group_indices, :] = np.nan + failed_baselines.add(float(d_val)) warnings.warn( f"No not-yet-treated observations for baseline treatment " f"d={d_val}. Cannot estimate covariate slope theta_hat. " @@ -2857,6 +2868,7 @@ def _compute_covariate_residualization( # NaN out outcomes for failed strata (don't mix raw + adjusted) group_indices_fail = np.where(d_mask)[0] Y_resid[group_indices_fail, :] = np.nan + failed_baselines.add(float(d_val)) warnings.warn( f"DID^X: baseline d={d_val} has {n_obs} not-yet-treated " f"observations but {n_params} regressors. Groups with " @@ -2920,7 +2932,7 @@ def _compute_covariate_residualization( valid, Y_mat[group_indices] - adjustment, Y_mat[group_indices] ) - return Y_resid, diagnostics + return Y_resid, diagnostics, failed_baselines def _compute_first_differenced_matrix( diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 15b7b4a3..c8234153 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -438,6 +438,8 @@ def _estimand_label(self) -> str: # When trends_linear + L_max>=2, overall is NaN (no aggregate). # Label reflects that per-horizon effects are in linear_trends_effects. if has_trends and self.L_max is not None and self.L_max >= 2: + if has_controls: + return "DID^{X,fd}_l (see linear_trends_effects)" return "DID^{fd}_l (see linear_trends_effects)" if self.L_max is not None and self.L_max >= 2: From e3d51db3b187db18be0176a69ce3f2180df78942 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 15:49:05 -0400 Subject: [PATCH 11/17] Fix CI review Round 9: suppress normalized_effects under trends, Inf guard P0: normalized_effects suppressed under trends_linear (was normalizing second-differences DID^{fd}_l instead of level effects). REGISTRY documents that normalized_effects and cost_benefit_delta are both unavailable under trends_linear. P1: Non-finite (Inf) control values now rejected with ValueError during DID^X validation (was silently collapsing first-stage OLS). P3: summary() event study header and row labels now use _horizon_label() (DID^X_l, DID^{fd}_l, DID^{X,fd}_l matching to_dataframe). Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 25 +++++++++++++------ .../chaisemartin_dhaultfoeuille_results.py | 6 ++--- docs/methodology/REGISTRY.md | 2 +- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index c33c7eaf..c93fd8a8 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -659,6 +659,12 @@ def fit( f"Control column {c!r} contains {n_nan} NaN value(s). " "Drop or impute missing covariates before fitting." ) + n_inf = int(np.isinf(data_controls[c].to_numpy()).sum()) + if n_inf > 0: + raise ValueError( + f"Control column {c!r} contains {n_inf} Inf value(s). " + "Remove or replace non-finite covariates before fitting." + ) # Aggregate covariates to cell means (same groupby as treatment/outcome). # Use the coerced copy joined with group/time from original data. x_agg_input = data[[group, time]].copy() @@ -1514,14 +1520,17 @@ def fit( "n_obs": pl_data["N_pl_l"], } - # Normalized effects DID^n_l - normalized_effects_dict = _compute_normalized_effects( - multi_horizon_dids=multi_horizon_dids, - D_mat=D_mat, - baselines=baselines, - first_switch_idx=first_switch_idx_arr, - L_max=L_max, - ) + # Normalized effects DID^n_l (suppressed under trends_linear + # because event_study_effects holds second-differences DID^{fd}_l, + # not level effects - normalizing second-differences is wrong) + if not _is_trends_linear: + normalized_effects_dict = _compute_normalized_effects( + multi_horizon_dids=multi_horizon_dids, + D_mat=D_mat, + baselines=baselines, + first_switch_idx=first_switch_idx_arr, + L_max=L_max, + ) # Cost-benefit delta (only meaningful when L_max >= 2) if L_max >= 2: diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index c8234153..fd8ea6f8 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -418,7 +418,7 @@ class ChaisemartinDHaultfoeuilleResults: # Repr / properties # ------------------------------------------------------------------ - def _horizon_label(self, h: int) -> str: + def _horizon_label(self, h) -> str: """Return per-horizon estimand label for event study rows.""" has_controls = self.covariate_residuals is not None has_trends = self.linear_trends_effects is not None @@ -728,7 +728,7 @@ def summary(self, alpha: Optional[float] = None) -> str: lines.extend( [ thin, - f"Event Study (DID_l, l = 1..{self.L_max})".center(width), + f"Event Study ({self._horizon_label('l')}, l = 1..{self.L_max})".center(width), thin, header_row, thin, @@ -738,7 +738,7 @@ def summary(self, alpha: Optional[float] = None) -> str: entry = self.event_study_effects[l_h] lines.append( _format_inference_row( - f"DID_{l_h}", + self._horizon_label(l_h), entry["effect"], entry["se"], entry["t_stat"], diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index b4f79e07..b3548e7a 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -611,7 +611,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 DID^X covariate adjustment):** When `controls` is set, `per_period_effects` (the Phase 1 per-period DID_M decomposition) remains **unadjusted** (computed on raw outcomes). The covariate residualization applies only to the per-group `DID_{g,l}` path (`L_max >= 1`), which produces `event_study_effects` and `overall_att`. This means `per_period_effects` and `event_study_effects[1]` may diverge when controls are active - by design (the per-period path uses binary joiner/leaver categorization and is not part of the DID^X contract). Implements the residualization-style covariate adjustment from Web Appendix Section 1.2 (Assumption 11). For each baseline treatment value `d`, estimates `theta_hat_d` via OLS of first-differenced outcomes on first-differenced covariates with time FEs, restricted to not-yet-treated observations. Residualizes at levels: `Y_tilde[g,t] = Y[g,t] - X[g,t] @ theta_hat_d`. All downstream DID computations use residualized outcomes. This is NOT doubly-robust, NOT IPW, NOT Callaway-Sant'Anna-style. Plug-in IF (treating `theta_hat` as fixed) is valid by FWL theorem. **Deviation from R `DIDmultiplegtDYN`:** The first-stage OLS uses equal cell weights (one observation per `(g,t)` cell), consistent with the library's cell-count weighting convention documented in Phase 1. R weights by `N_gt` (observation count per cell). On panels with 1 observation per cell (the common case), results are identical. When baseline-specific first stages fail (`n_obs = 0` or `n_obs < n_params`), the affected strata are excluded from the estimation (outcomes set to NaN) rather than retained unadjusted - matching R's "drop failed strata" behavior. Requires `L_max >= 1`. Activated via `controls=["col1", "col2"]` in `fit()`. -- **Note (Phase 3 DID^{fd} linear trends):** Implements group-specific linear trends from Web Appendix Section 1.3 (Assumption 12, Lemma 6). Uses the Z_mat transformation: `Z[g,t] = Y[g,t] - Y[g,t-1]` (first-differenced outcomes). Since `DID_{g,l}(Z) = DID^{fd}_{g,l}` algebraically, the existing multi-horizon DID code produces trend-adjusted estimates when fed Z_mat. Requires F_g >= 3 (at least 2 pre-switch periods); groups with F_g < 3 are excluded with a `UserWarning`. Cumulated level effects `delta^{fd}_l = sum_{l'=1}^l DID^{fd}_{l'}` stored in `results.linear_trends_effects`. Cumulated SE uses conservative upper bound (sum of per-horizon SEs); cross-horizon covariance from IF vectors is a library extension (paper proves Theorem 1 per-horizon, not cross-horizon). When combined with DID^X, residualization is applied first, then first-differencing (per paper assumption ordering). Activated via `trends_linear=True` in `fit()`. +- **Note (Phase 3 DID^{fd} linear trends):** Implements group-specific linear trends from Web Appendix Section 1.3 (Assumption 12, Lemma 6). Uses the Z_mat transformation: `Z[g,t] = Y[g,t] - Y[g,t-1]` (first-differenced outcomes). Since `DID_{g,l}(Z) = DID^{fd}_{g,l}` algebraically, the existing multi-horizon DID code produces trend-adjusted estimates when fed Z_mat. Requires F_g >= 3 (at least 2 pre-switch periods); groups with F_g < 3 are excluded with a `UserWarning`. Cumulated level effects `delta^{fd}_l = sum_{l'=1}^l DID^{fd}_{l'}` stored in `results.linear_trends_effects`. Cumulated SE uses conservative upper bound (sum of per-horizon SEs); cross-horizon covariance from IF vectors is a library extension (paper proves Theorem 1 per-horizon, not cross-horizon). When combined with DID^X, residualization is applied first, then first-differencing (per paper assumption ordering). **Suppressed surfaces under `trends_linear`:** `normalized_effects` (`DID^n_l`) and `cost_benefit_delta` are suppressed because they would operate on second-differences rather than level effects. Users should access cumulated level effects via `linear_trends_effects`. Activated via `trends_linear=True` in `fit()`. - **Note (Phase 3 state-set trends):** Implements state-set-specific trends from Web Appendix Section 1.4 (Assumptions 13-14). Restricts the control pool for each switcher to groups in the same set (e.g., same state in county-level data). The restriction applies in BOTH `_compute_multi_horizon_dids()` (point estimates) and `_compute_per_group_if_multi_horizon()` (influence functions) to ensure IF consistency. Cohort structure stays as `(D_{g,1}, F_g, S_g)` triples (does not incorporate set membership). Set membership must be time-invariant per group. Activated via `trends_nonparam="state_column"` in `fit()`. From 8a57c5df34a85cce09c71863187801dac8c6a028 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 16:37:56 -0400 Subject: [PATCH 12/17] Fix CI review Round 10: NaN set validation, design2 raw Y, controls=[] P1: trends_nonparam now rejects NaN/missing set assignments with ValueError. P1: design2_effects always uses raw level outcomes from y_pivot (not residualized or first-differenced Y_mat). P2: controls=[] now raises ValueError instead of crashing on np.stack([]). P3: summary() overall block labeled "N/A under trends_linear" when trends + L_max>=2 (was "Cost-Benefit Delta" with NaN value). Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 18 +++++++++++++++++- .../chaisemartin_dhaultfoeuille_results.py | 5 ++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index c93fd8a8..fca7e6fc 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -631,6 +631,12 @@ def fit( # Step 4b: Covariate aggregation (DID^X, Web Appendix Section 1.2) # ------------------------------------------------------------------ if controls is not None: + if not controls: + raise ValueError( + "controls must be a non-empty list of column names, " + "got an empty list. Pass controls=None to disable " + "covariate adjustment." + ) if L_max is None: raise ValueError( "Covariate adjustment (DID^X) requires L_max >= 1. The " @@ -1081,6 +1087,14 @@ def fit( f"trends_nonparam column {set_col!r} not found in " f"data. Available columns: {list(data.columns)}" ) + # Reject NaN/missing set assignments + n_na_set = int(data[set_col].isna().sum()) + if n_na_set > 0: + raise ValueError( + f"trends_nonparam column {set_col!r} contains " + f"{n_na_set} NaN/missing value(s). All groups must " + f"have a valid set assignment." + ) # Aggregate set membership per group (must be time-invariant) set_per_group = data.groupby(group)[set_col].nunique() time_varying = set_per_group[set_per_group > 1] @@ -2361,7 +2375,9 @@ def fit( design2_effects=( _compute_design2_effects( D_mat=D_mat, - Y_mat=Y_mat if not _is_trends_linear else y_pivot.to_numpy(), + # Design-2 always uses raw level outcomes (not residualized, + # not first-differenced). Use y_pivot as the canonical raw source. + Y_mat=y_pivot.to_numpy(), N_mat=N_mat_orig, baselines=baselines, first_switch_idx=first_switch_idx_arr, diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index fd8ea6f8..a305c497 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -580,7 +580,10 @@ def summary(self, alpha: Optional[float] = None) -> str: adj_tag = " (Trend-Adjusted)" if self.L_max is not None and self.L_max >= 2: - overall_label = f"Cost-Benefit Delta{adj_tag}" + if has_trends: + overall_label = f"Overall (N/A under trends_linear){adj_tag}" + else: + overall_label = f"Cost-Benefit Delta{adj_tag}" overall_row_label = self._estimand_label() elif self.L_max is not None and self.L_max == 1: overall_label = f"Per-Group ATT at Horizon 1{adj_tag}" From 4da0841a6f00352035465b657be78784a2cbbf2d Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 16:56:42 -0400 Subject: [PATCH 13/17] Fix CI review Round 11: document Design-2 raw-outcome contract P1: REGISTRY now explicitly documents that design2_effects always uses raw (unadjusted) outcomes regardless of active controls/trends/nonparam. For full adjusted Design-2 estimation, the paper recommends running the standard estimator on a restricted subsample with trends_nonparam. P3: Added test_nan_set_membership_rejected for trends_nonparam NaN guard. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/methodology/REGISTRY.md | 2 +- tests/test_chaisemartin_dhaultfoeuille.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index b3548e7a..76b59938 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -617,7 +617,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. -- **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. The paper notes Design-2 can be implemented by "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. +- **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups. **Reference implementation(s):** - R: [`DIDmultiplegtDYN`](https://cran.r-project.org/package=DIDmultiplegtDYN) (CRAN, maintained by the paper authors). The Python implementation matches `did_multiplegt_dyn(..., effects=1)` at horizon `l = 1`. Parity tests live in `tests/test_chaisemartin_dhaultfoeuille_parity.py`. diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py index e5e4e6be..e8de230b 100644 --- a/tests/test_chaisemartin_dhaultfoeuille.py +++ b/tests/test_chaisemartin_dhaultfoeuille.py @@ -2682,6 +2682,16 @@ def test_group_level_set_rejected(self): L_max=1, trends_nonparam="group", ) + def test_nan_set_membership_rejected(self): + """NaN in trends_nonparam column raises ValueError.""" + df = self._make_panel_with_sets() + df.loc[df["group"] == 0, "state"] = np.nan + with pytest.raises(ValueError, match="NaN/missing"): + ChaisemartinDHaultfoeuille(seed=1).fit( + df, "outcome", "group", "period", "treatment", + L_max=1, trends_nonparam="state", + ) + def test_nonparam_with_covariates(self): """Combined state-set trends + covariates.""" df = self._make_panel_with_sets() From d20efde1c20dab8f41dc23dc490321aeb9b7afe1 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 17:24:28 -0400 Subject: [PATCH 14/17] Fix CI review Round 12: document Assumption 14 support-trimming behavior P1: REGISTRY now documents that trends_nonparam does not enforce Assumption 14 (common last-untreated period across sets) up front. When within-set controls are exhausted at a given horizon, affected switcher/horizon pairs are excluded via the existing empty-control-pool mechanism. The effective estimand is trimmed to within-set support. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/methodology/REGISTRY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 76b59938..645d577c 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -613,7 +613,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param - **Note (Phase 3 DID^{fd} linear trends):** Implements group-specific linear trends from Web Appendix Section 1.3 (Assumption 12, Lemma 6). Uses the Z_mat transformation: `Z[g,t] = Y[g,t] - Y[g,t-1]` (first-differenced outcomes). Since `DID_{g,l}(Z) = DID^{fd}_{g,l}` algebraically, the existing multi-horizon DID code produces trend-adjusted estimates when fed Z_mat. Requires F_g >= 3 (at least 2 pre-switch periods); groups with F_g < 3 are excluded with a `UserWarning`. Cumulated level effects `delta^{fd}_l = sum_{l'=1}^l DID^{fd}_{l'}` stored in `results.linear_trends_effects`. Cumulated SE uses conservative upper bound (sum of per-horizon SEs); cross-horizon covariance from IF vectors is a library extension (paper proves Theorem 1 per-horizon, not cross-horizon). When combined with DID^X, residualization is applied first, then first-differencing (per paper assumption ordering). **Suppressed surfaces under `trends_linear`:** `normalized_effects` (`DID^n_l`) and `cost_benefit_delta` are suppressed because they would operate on second-differences rather than level effects. Users should access cumulated level effects via `linear_trends_effects`. Activated via `trends_linear=True` in `fit()`. -- **Note (Phase 3 state-set trends):** Implements state-set-specific trends from Web Appendix Section 1.4 (Assumptions 13-14). Restricts the control pool for each switcher to groups in the same set (e.g., same state in county-level data). The restriction applies in BOTH `_compute_multi_horizon_dids()` (point estimates) and `_compute_per_group_if_multi_horizon()` (influence functions) to ensure IF consistency. Cohort structure stays as `(D_{g,1}, F_g, S_g)` triples (does not incorporate set membership). Set membership must be time-invariant per group. Activated via `trends_nonparam="state_column"` in `fit()`. +- **Note (Phase 3 state-set trends):** Implements state-set-specific trends from Web Appendix Section 1.4 (Assumptions 13-14). Restricts the control pool for each switcher to groups in the same set (e.g., same state in county-level data). The restriction applies in all four DID/IF paths: `_compute_multi_horizon_dids()`, `_compute_per_group_if_multi_horizon()`, `_compute_multi_horizon_placebos()`, and `_compute_per_group_if_placebo_horizon()`. Cohort structure stays as `(D_{g,1}, F_g, S_g)` triples (does not incorporate set membership). Set membership must be time-invariant per group. **Note on Assumption 14 (common support):** The paper requires a common last-untreated period across sets (`T_u^s` equal for all `s`). This implementation does NOT enforce Assumption 14 up front. Instead, when within-set controls are exhausted at a given horizon (because a set has shorter untreated support than others), the affected switcher/horizon pairs are silently excluded via the existing empty-control-pool mechanism. This means `N_l` may be smaller under `trends_nonparam` than without it, and the effective estimand is trimmed to the within-set support at each horizon. The existing multi-horizon A11 warning fires when exclusions occur. Activated via `trends_nonparam="state_column"` in `fit()`. - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`. From cd73917dea80c62f7f90d84697ce4c7e0cd25213 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 17:37:44 -0400 Subject: [PATCH 15/17] Fix CI review Round 13: per-period path uses raw Y under trends_linear P1: When trends_linear=True without controls, _compute_per_period_dids and _compute_cohort_recentered_inputs now use raw y_pivot outcomes (not the first-differenced Z_mat). Previously the per-period path would double-difference the already-differenced outcomes. Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index fca7e6fc..8238d268 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -1147,7 +1147,9 @@ def fit( # path (L_max >= 1). The per-period path uses binary # joiner/leaver categorization and is not part of the DID^X # contract (Web Appendix Section 1.2). - Y_mat=Y_mat_raw if controls is not None else Y_mat, + # Use raw outcomes for per-period DID when controls or + # trends_linear is active (both transform Y_mat). + Y_mat=Y_mat_raw if controls is not None else (y_pivot.to_numpy() if _is_trends_linear else Y_mat), N_mat=N_mat_orig, periods=all_periods, ) @@ -1581,7 +1583,8 @@ def fit( ) = _compute_cohort_recentered_inputs( D_mat=D_mat, # Phase 1 IF uses per-period structure: use raw outcomes - Y_mat=Y_mat_raw if controls is not None else Y_mat, + # when controls or trends_linear transform Y_mat. + Y_mat=Y_mat_raw if controls is not None else (y_pivot.to_numpy() if _is_trends_linear else Y_mat), N_mat=N_mat_orig, n_10_t_arr=n_10_t_arr, n_00_t_arr=n_00_t_arr, From bfed3e649218f13263d56cdb3471451962e19ce7 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 17:56:33 -0400 Subject: [PATCH 16/17] Address green review P3s: return type, design2 to_dataframe, docstrings - Fix _compute_covariate_residualization return type annotation to include failed_baselines set (was 2-tuple, now 3-tuple) - Add to_dataframe("design2") level for Design-2 results - Update to_dataframe() docstring with all new levels - Update results dataclass docstring: replace Phase 3 placeholder text with actual field descriptions for covariate_residuals, linear_trends_effects, heterogeneity_effects, design2_effects Co-Authored-By: Claude Opus 4.6 (1M context) --- diff_diff/chaisemartin_dhaultfoeuille.py | 2 +- .../chaisemartin_dhaultfoeuille_results.py | 32 ++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 8238d268..9959ad2d 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -2764,7 +2764,7 @@ def _compute_covariate_residualization( baselines: np.ndarray, first_switch_idx: np.ndarray, rank_deficient_action: str = "warn", -) -> Tuple[np.ndarray, Dict[str, Any]]: +) -> Tuple[np.ndarray, Dict[str, Any], set]: """Residualize outcomes by partialling out covariates per baseline treatment. Implements ``DID^X`` from Web Appendix Section 1.2 of de Chaisemartin & diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index a305c497..9c85b438 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -320,11 +320,19 @@ class ChaisemartinDHaultfoeuilleResults: sup_t_bands : dict, optional Phase 2 placeholder (sup-t simultaneous confidence bands). covariate_residuals : pd.DataFrame, optional - Phase 3 placeholder (``DID^X`` residuals). + ``DID^X`` first-stage diagnostics: per-baseline ``theta_hat``, + ``n_obs``, and ``r_squared``. Populated when ``controls`` is set. linear_trends_effects : dict, optional - Phase 3 placeholder (``DID^{fd}`` group-specific linear trends). + Cumulated ``DID^{fd}`` level effects ``delta^{fd}_l``. Keyed by + horizon. Populated when ``trends_linear=True``. + heterogeneity_effects : dict, optional + Per-horizon heterogeneity test results ``beta^{het}_l``. + Populated when ``heterogeneity`` is set. + design2_effects : dict, optional + Design-2 switch-in/switch-out descriptive summary. Populated + when ``design2=True``. honest_did_results : Any, optional - Phase 3 placeholder (HonestDiD integration on placebos). + Reserved for HonestDiD integration on placebos. survey_metadata : Any, optional Always ``None`` in Phase 1 — survey integration is deferred to a separate effort after all phases ship. @@ -863,6 +871,14 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: - ``"twfe_weights"``: per-(group, time) TWFE decomposition weights table. Only available when ``twfe_diagnostic=True`` was passed to ``fit()``. + - ``"heterogeneity"``: one row per horizon for the + heterogeneity test ``beta^{het}_l``. Available when + ``heterogeneity`` is passed to ``fit()``. + - ``"linear_trends"``: one row per horizon for the + cumulated trend-adjusted level effects ``delta^{fd}_l``. + Available when ``trends_linear=True``. + - ``"design2"``: Design-2 switch-in/switch-out descriptive + summary. Available when ``design2=True``. Returns ------- @@ -1073,11 +1089,19 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame: rows.append({"horizon": h, **data}) return pd.DataFrame(rows) + elif level == "design2": + if self.design2_effects is None: + raise ValueError( + "Design-2 effects not available. Pass " + "design2=True with drop_larger_lower=False to fit()." + ) + return pd.DataFrame([self.design2_effects]) + else: raise ValueError( f"Unknown level: {level!r}. Use 'overall', 'joiners_leavers', " f"'per_period', 'event_study', 'normalized', 'twfe_weights', " - f"'heterogeneity', or 'linear_trends'." + f"'heterogeneity', 'linear_trends', or 'design2'." ) From 3e6611fe3b838b0f7781599aeb7ef9c5fc69fab0 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 13 Apr 2026 18:24:23 -0400 Subject: [PATCH 17/17] Update ROADMAP: mark 3i parity tests as shipped Co-Authored-By: Claude Opus 4.6 (1M context) --- ROADMAP.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ROADMAP.md b/ROADMAP.md index 66b7aada..db56947c 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -175,7 +175,7 @@ The dynamic companion paper subsumes the AER 2020 paper: `DID_1 = DID_M`. The si | **3f.** Non-binary treatment support (the formula already handles it; this row is documentation + tests) | MEDIUM | Shipped (PR #300; also ships placebo SE, L_max=1 per-group path, parity SE assertions) | | **3g.** HonestDiD (Rambachan-Roth) integration on `DID^{pl}_l` placebos | MEDIUM | Not started | | **3h.** **Single comprehensive tutorial notebook** covering all three phases — Favara-Imbs (2015) banking deregulation replication as the headline application, with comparison plots vs LP / TWFE | HIGH | Not started | -| **3i.** Parity tests vs `did_multiplegt_dyn` for covariate and extension specifications | HIGH | Not started | +| **3i.** Parity tests vs `did_multiplegt_dyn` for covariate and extension specifications | HIGH | Shipped (PR B; controls, trends_lin, combined) | ### Out of scope for the dCDH single-class evolution