Skip to content

Commit 080ac0b

Browse files
authored
Merge pull request #169 from igerber/method-review-triple-d
Rewrite TripleDifference to match R's triplediff::ddd()
2 parents 7f19bf1 + 1614459 commit 080ac0b

File tree

10 files changed

+2640
-501
lines changed

10 files changed

+2640
-501
lines changed

METHODOLOGY_REVIEW.md

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -428,15 +428,51 @@ variables appear to the left of the `|` separator.
428428
|-------|-------|
429429
| Module | `triple_diff.py` |
430430
| Primary Reference | Ortiz-Villavicencio & Sant'Anna (2025) |
431-
| R Reference | (forthcoming) |
432-
| Status | Not Started |
433-
| Last Review | - |
431+
| R Reference | `triplediff::ddd()` (v0.2.1, CRAN) |
432+
| Status | **Complete** |
433+
| Last Review | 2026-02-18 |
434+
435+
**Verified Components:**
436+
- [x] ATT matches R `triplediff::ddd()` for all 3 methods (DR, RA, IPW) — <0.001% relative difference
437+
- [x] SE matches R `triplediff::ddd()` for all 3 methods — <0.001% relative difference
438+
- [x] With-covariates ATT matches R — <0.001% relative difference
439+
- [x] With-covariates SE matches R — <0.001% relative difference
440+
- [x] Verified across all 4 DGP types from `gen_dgp_2periods()` (different model misspecification scenarios)
441+
- [x] Influence function-based SE: `SE = std(w3*IF_3 + w2*IF_2 - w1*IF_1, ddof=1) / sqrt(n)`
442+
- [x] Three-DiD decomposition: `DDD = DiD_3 + DiD_2 - DiD_1` matching R's approach
443+
- [x] safe_inference() used for all inference fields (t_stat, p_value, conf_int)
434444

435445
**Corrections Made:**
436-
- (None yet)
446+
1. **Complete rewrite of estimation methods** (was naive cell-mean approach, now three-DiD
447+
decomposition). The original implementation computed DDD directly from 8 cell means with
448+
a naive cell-variance SE. Replaced with R's decomposition into three pairwise DiD
449+
comparisons (subgroup j vs reference subgroup 4), each using DR/IPW/RA methodology
450+
from Callaway & Sant'Anna. This fixed:
451+
- DR SE: was off by >100% (naive cell variance vs influence function)
452+
- IPW SE: was off by >200% (incorrect cell-probability-ratio weights)
453+
- With-covariates ATT: was off by >1000% for all methods (incorrect cell-by-cell regression)
454+
2. **Influence function SE** replaces naive cell variance for all methods:
455+
`SE = std(w3*IF_3 + w2*IF_2 - w1*IF_1, ddof=1) / sqrt(n)` where
456+
`w_j = n / n_j` and `IF_j` is the per-observation influence function for pairwise DiD j.
457+
3. **Propensity score estimation** now runs per-pairwise-comparison (P(subgroup=4|X) within
458+
{j, 4} subset) instead of global P(G=1|X).
459+
4. **Outcome regression** now fits separate OLS per subgroup-time cell within each pairwise
460+
comparison, matching R's `compute_outcome_regression_rc()`.
437461

438462
**Outstanding Concerns:**
439-
- (None yet)
463+
- Implementation uses `panel=FALSE` (repeated cross-section) mode. Panel mode (`panel=TRUE`)
464+
with differenced outcomes not yet implemented.
465+
466+
**R Comparison Results (panel=FALSE, n=500 per DGP):**
467+
| DGP | Method | Covariates | ATT Diff | SE Diff |
468+
|-----|--------|-----------|----------|---------|
469+
| 1 | DR | No | <0.001% | <0.001% |
470+
| 1 | DR | Yes | <0.001% | <0.001% |
471+
| 1 | REG | No | <0.001% | <0.001% |
472+
| 1 | REG | Yes | <0.001% | <0.001% |
473+
| 1 | IPW | No | <0.001% | <0.001% |
474+
| 1 | IPW | Yes | <0.001% | <0.001% |
475+
| 2-4 | All | Both | <0.001% | <0.001% |
440476

441477
---
442478

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/usr/bin/env Rscript
2+
# Benchmark: Triple Difference (R `triplediff` package)
3+
#
4+
# This uses triplediff::ddd() with panel=FALSE (repeated cross-section mode),
5+
# matching the Python TripleDifference estimator's approach.
6+
#
7+
# Usage:
8+
# Rscript benchmark_triplediff.R --data path/to/data.csv --output path/to/results.json \
9+
# [--method dr|reg|ipw] [--covariates true|false]
10+
11+
library(triplediff)
12+
library(jsonlite)
13+
library(data.table)
14+
15+
# Parse command line arguments
16+
args <- commandArgs(trailingOnly = TRUE)
17+
18+
parse_args <- function(args) {
19+
result <- list(
20+
data = NULL,
21+
output = NULL,
22+
method = "dr",
23+
covariates = FALSE
24+
)
25+
26+
i <- 1
27+
while (i <= length(args)) {
28+
if (args[i] == "--data") {
29+
result$data <- args[i + 1]
30+
i <- i + 2
31+
} else if (args[i] == "--output") {
32+
result$output <- args[i + 1]
33+
i <- i + 2
34+
} else if (args[i] == "--method") {
35+
result$method <- args[i + 1]
36+
i <- i + 2
37+
} else if (args[i] == "--covariates") {
38+
result$covariates <- tolower(args[i + 1]) == "true"
39+
i <- i + 2
40+
} else {
41+
i <- i + 1
42+
}
43+
}
44+
45+
if (is.null(result$data) || is.null(result$output)) {
46+
stop("Usage: Rscript benchmark_triplediff.R --data <path> --output <path> [--method dr|reg|ipw] [--covariates true|false]")
47+
}
48+
49+
return(result)
50+
}
51+
52+
config <- parse_args(args)
53+
54+
# Load data
55+
message(sprintf("Loading data from: %s", config$data))
56+
data <- fread(config$data)
57+
58+
# Build covariate formula
59+
cov_cols <- grep("^cov", names(data), value = TRUE)
60+
if (config$covariates && length(cov_cols) > 0) {
61+
xformla <- as.formula(paste("~", paste(cov_cols, collapse = "+")))
62+
message(sprintf("Using covariates: %s", paste(cov_cols, collapse = ", ")))
63+
} else {
64+
xformla <- ~1
65+
message("No covariates")
66+
}
67+
68+
# Run benchmark
69+
message(sprintf("Running DDD estimation (method=%s, panel=FALSE)...", config$method))
70+
timing <- system.time({
71+
res <- ddd(
72+
yname = "y",
73+
tname = "time",
74+
idname = "id",
75+
gname = "state",
76+
pname = "partition",
77+
data = data,
78+
control_group = "nevertreated",
79+
panel = FALSE,
80+
xformla = xformla,
81+
est_method = config$method,
82+
boot = FALSE
83+
)
84+
})
85+
86+
# Collect results
87+
output <- list(
88+
ATT = res$ATT,
89+
se = res$se,
90+
lci = res$lci,
91+
uci = res$uci,
92+
method = config$method,
93+
covariates = config$covariates,
94+
n_obs = nrow(data),
95+
elapsed_seconds = timing["elapsed"]
96+
)
97+
98+
# Write results
99+
message(sprintf("Writing results to: %s", config$output))
100+
write(toJSON(output, pretty = TRUE, auto_unbox = TRUE, digits = 15), config$output)
101+
102+
message("Done.")
103+
message(sprintf(" ATT = %.6f", res$ATT))
104+
message(sprintf(" SE = %.6f", res$se))
105+
message(sprintf(" Time: %.3fs", timing["elapsed"]))

benchmarks/R/requirements.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ required_packages <- c(
1010
"didimputation", # Borusyak, Jaravel & Spiess (2024) imputation DiD
1111
"HonestDiD", # Rambachan & Roth (2023) sensitivity analysis
1212
"fixest", # Fast TWFE and basic DiD
13+
"triplediff", # Ortiz-Villavicencio & Sant'Anna (2025) triple difference
1314

1415
# Utilities
1516
"jsonlite", # JSON output for Python interop
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
{
2+
"dgp1_dr_nocov": {
3+
"ATT": -4.713891309648176,
4+
"se": 15.32210646783081,
5+
"lci": -34.744668153884774,
6+
"uci": 25.316885534588423
7+
},
8+
"dgp1_dr_cov": {
9+
"ATT": -0.370943148650857,
10+
"se": 0.3629174762792657,
11+
"lci": -1.082248331518387,
12+
"uci": 0.340362034216673
13+
},
14+
"dgp1_reg_nocov": {
15+
"ATT": -4.713891309648488,
16+
"se": 15.322106467830805,
17+
"lci": -34.74466815388507,
18+
"uci": 25.3168855345881
19+
},
20+
"dgp1_reg_cov": {
21+
"ATT": -0.3648022796066925,
22+
"se": 12.25511177194079,
23+
"lci": -24.384379979123477,
24+
"uci": 23.65477541991009
25+
},
26+
"dgp1_ipw_nocov": {
27+
"ATT": -4.713891309648119,
28+
"se": 15.322106467830803,
29+
"lci": -34.7446681538847,
30+
"uci": 25.316885534588465
31+
},
32+
"dgp1_ipw_cov": {
33+
"ATT": 0.1746221292513894,
34+
"se": 14.84686185530122,
35+
"lci": -28.92469239058052,
36+
"uci": 29.2739366490833
37+
},
38+
"dgp2_dr_nocov": {
39+
"ATT": -2.802437682226054,
40+
"se": 15.162115011913007,
41+
"lci": -32.51963703502963,
42+
"uci": 26.914761670577523
43+
},
44+
"dgp2_dr_cov": {
45+
"ATT": -0.131884249677628,
46+
"se": 0.3615976238477585,
47+
"lci": -0.8406025693144961,
48+
"uci": 0.57683406995924
49+
},
50+
"dgp2_reg_nocov": {
51+
"ATT": -2.802437682226468,
52+
"se": 15.16211501191301,
53+
"lci": -32.519637035030044,
54+
"uci": 26.91476167057711
55+
},
56+
"dgp2_reg_cov": {
57+
"ATT": -0.1264941935341142,
58+
"se": 12.278681040167424,
59+
"lci": -24.192266809917065,
60+
"uci": 23.939278422848837
61+
},
62+
"dgp2_ipw_nocov": {
63+
"ATT": -2.802437682225957,
64+
"se": 15.16211501191301,
65+
"lci": -32.51963703502953,
66+
"uci": 26.914761670577622
67+
},
68+
"dgp2_ipw_cov": {
69+
"ATT": 0.4425176545858278,
70+
"se": 14.578330164503246,
71+
"lci": -28.130484422574405,
72+
"uci": 29.01551973174606
73+
},
74+
"dgp3_dr_nocov": {
75+
"ATT": -4.047926092563451,
76+
"se": 13.619021126223045,
77+
"lci": -30.740717004650733,
78+
"uci": 22.644864819523832
79+
},
80+
"dgp3_dr_cov": {
81+
"ATT": -1.206339068198347,
82+
"se": 5.715500553553746,
83+
"lci": -12.408514306782427,
84+
"uci": 9.995836170385735
85+
},
86+
"dgp3_reg_nocov": {
87+
"ATT": -4.047926092563443,
88+
"se": 13.61902112622304,
89+
"lci": -30.740717004650715,
90+
"uci": 22.64486481952383
91+
},
92+
"dgp3_reg_cov": {
93+
"ATT": -1.506286210381859,
94+
"se": 11.48877687437943,
95+
"lci": -24.02387511058219,
96+
"uci": 21.01130268981847
97+
},
98+
"dgp3_ipw_nocov": {
99+
"ATT": -4.047926092563728,
100+
"se": 13.61902112622304,
101+
"lci": -30.740717004651,
102+
"uci": 22.644864819523544
103+
},
104+
"dgp3_ipw_cov": {
105+
"ATT": -0.797266162250736,
106+
"se": 13.500012852552667,
107+
"lci": -27.256805144081792,
108+
"uci": 25.66227281958032
109+
},
110+
"dgp4_dr_nocov": {
111+
"ATT": -5.281043961510922,
112+
"se": 13.550738720691161,
113+
"lci": -31.840003817977955,
114+
"uci": 21.277915894956113
115+
},
116+
"dgp4_dr_cov": {
117+
"ATT": -2.919555392612542,
118+
"se": 5.682194173268706,
119+
"lci": -14.05645132538255,
120+
"uci": 8.217340540157466
121+
},
122+
"dgp4_reg_nocov": {
123+
"ATT": -5.281043961511244,
124+
"se": 13.550738720691157,
125+
"lci": -31.84000381797827,
126+
"uci": 21.277915894955783
127+
},
128+
"dgp4_reg_cov": {
129+
"ATT": -3.131035790104079,
130+
"se": 11.449511458993447,
131+
"lci": -25.571665890309877,
132+
"uci": 19.30959431010172
133+
},
134+
"dgp4_ipw_nocov": {
135+
"ATT": -5.281043961510647,
136+
"se": 13.550738720691161,
137+
"lci": -31.84000381797768,
138+
"uci": 21.277915894956386
139+
},
140+
"dgp4_ipw_cov": {
141+
"ATT": -2.588437808164429,
142+
"se": 13.347963293853894,
143+
"lci": -28.749965131080682,
144+
"uci": 23.573089514751825
145+
}
146+
}

0 commit comments

Comments
 (0)