Skip to content

Commit 5d04748

Browse files
igerberclaude
andcommitted
Address PR #169 review round 4: non-finite IF propagation and docstring fix
Non-finite influence function values (from extreme propensity scores or near-singular design) now warn and set SE to NaN instead of silently zeroing. Updated triple_difference() docstring to document robust as no-op, matching class docstring. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ba50dd1 commit 5d04748

File tree

3 files changed

+59
-3
lines changed

3 files changed

+59
-3
lines changed

diff_diff/triple_diff.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,7 @@ def _estimate_ddd_decomposition(
807807
pscore_stats = None
808808
all_pscores = {} # Collect pscores for diagnostics
809809
overlap_issues = [] # Collect overlap diagnostics across comparisons
810+
any_nonfinite_if = False
810811

811812
with np.errstate(divide="ignore", invalid="ignore", over="ignore"):
812813
for j in [3, 2, 1]:
@@ -907,8 +908,10 @@ def _estimate_ddd_decomposition(
907908
hessian, est_method, n_sub,
908909
)
909910

910-
# Replace any NaN in influence function with 0
911-
inf_j = np.where(np.isfinite(inf_j), inf_j, 0.0)
911+
# Track non-finite IF values (flag for NaN SE later)
912+
if not np.all(np.isfinite(inf_j)):
913+
any_nonfinite_if = True
914+
inf_j = np.where(np.isfinite(inf_j), inf_j, 0.0)
912915

913916
# Pad influence function to full length
914917
inf_full = np.zeros(n)
@@ -963,6 +966,17 @@ def _estimate_ddd_decomposition(
963966
else:
964967
se = float(np.std(inf_func, ddof=1) / np.sqrt(n))
965968

969+
# Non-finite IF values make SE undefined
970+
if any_nonfinite_if:
971+
warnings.warn(
972+
"Non-finite values in influence function (likely due to "
973+
"extreme propensity scores or near-singular design). "
974+
"SE set to NaN.",
975+
UserWarning,
976+
stacklevel=3,
977+
)
978+
se = np.nan
979+
966980
# Propensity score stats (for IPW/DR with covariates)
967981
if has_covariates and est_method != "reg" and all_pscores:
968982
all_ps = np.concatenate(list(all_pscores.values()))
@@ -1533,7 +1547,10 @@ def triple_difference(
15331547
Estimation method: "dr" (doubly robust), "reg" (regression),
15341548
or "ipw" (inverse probability weighting).
15351549
robust : bool, default=True
1536-
Whether to use robust standard errors.
1550+
Whether to use heteroskedasticity-robust standard errors.
1551+
Note: influence function-based SEs are inherently robust to
1552+
heteroskedasticity, so this parameter has no effect. Retained
1553+
for API compatibility.
15371554
cluster : str, optional
15381555
Column name for cluster-robust standard errors.
15391556
alpha : float, default=0.05

docs/methodology/REGISTRY.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,9 @@ has no additional effect.
829829
sets hessian=None (skipping PS correction in influence function), emits UserWarning
830830
- Collinear covariates: detected via pivoted QR in `solve_ols()`, action controlled by
831831
`rank_deficient_action` ("warn", "error", "silent")
832+
- Non-finite influence function values (e.g., from extreme propensity scores in IPW/DR
833+
or near-singular design): warns and sets SE to NaN, propagated to t_stat/p_value/CI
834+
via safe_inference()
832835
- NaN inference for undefined statistics:
833836
- t_stat: Uses NaN (not 0.0) when SE is non-finite or zero
834837
- p_value and CI: Also NaN when t_stat is NaN

tests/test_methodology_triple_diff.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,42 @@ def _failing_lr(*args, **kwargs):
13341334
assert np.isfinite(result.att)
13351335
assert np.isfinite(result.se) and result.se > 0
13361336

1337+
@pytest.mark.parametrize("method", ["ipw", "dr"])
1338+
def test_nonfinite_if_propagates_nan_se(self, monkeypatch, method):
1339+
"""Non-finite IF values produce NaN SE and NaN inference fields."""
1340+
data = generate_ddd_data(n_per_cell=50, seed=42, add_covariates=True)
1341+
1342+
import diff_diff.triple_diff as td_module
1343+
1344+
original_did_rc = td_module.TripleDifference._compute_did_rc
1345+
1346+
def _did_rc_with_nan(self_inner, *args, **kwargs):
1347+
att, inf = original_did_rc(self_inner, *args, **kwargs)
1348+
inf[0] = np.inf # Inject non-finite value
1349+
return att, inf
1350+
1351+
monkeypatch.setattr(
1352+
td_module.TripleDifference, "_compute_did_rc", _did_rc_with_nan,
1353+
)
1354+
1355+
ddd = TripleDifference(estimation_method=method)
1356+
with warnings.catch_warnings(record=True) as w:
1357+
warnings.simplefilter("always")
1358+
result = ddd.fit(data, outcome="outcome", group="group",
1359+
partition="partition", time="time",
1360+
covariates=["age"])
1361+
nonfinite_warnings = [
1362+
x for x in w if "non-finite" in str(x.message).lower()
1363+
]
1364+
assert len(nonfinite_warnings) > 0, "Expected non-finite IF warning"
1365+
assert np.isnan(result.se), "SE should be NaN when IF has non-finite values"
1366+
assert_nan_inference({
1367+
"se": result.se,
1368+
"t_stat": result.t_stat,
1369+
"p_value": result.p_value,
1370+
"conf_int": result.conf_int,
1371+
})
1372+
13371373
def test_r_squared_respects_rank_deficient_action(self):
13381374
"""r_squared computation uses estimator's rank_deficient_action, not hardcoded 'silent'."""
13391375
data = generate_ddd_data(n_per_cell=50, seed=42, add_covariates=True)

0 commit comments

Comments
 (0)