Skip to content

Commit 2df5274

Browse files
committed
Use ±100 sigma in numerical stability tests
Increase test bounds from 10/40 to 100 sigma to future-proof against any potential improvements in naive computation methods. At 100 sigma, CDF(100) is truly indistinguishable from 1.0 in float64. Also enhances test docstrings with What/Why/How documentation.
1 parent 93734c2 commit 2df5274

File tree

2 files changed

+61
-23
lines changed

2 files changed

+61
-23
lines changed

tests/logprob/test_abstract.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,22 +123,36 @@ def test_logccdf():
123123

124124

125125
def test_logccdf_numerical_stability():
126-
"""Test that pm.logccdf is numerically stable in the extreme right tail.
126+
"""Test numerical stability of pm.logccdf in the extreme right tail.
127127
128-
For a normal distribution, the log survival function at x=10 is very negative
129-
(around -52). Using log(1 - exp(logcdf)) would fail because CDF(10) is essentially 1.
128+
What: Verifies the public logccdf function is numerically stable when
129+
evaluating far in the distribution's tail.
130+
131+
Why: This is the primary use case that motivated adding logccdf support.
132+
In censored/truncated distributions, we need log(1 - CDF(bound)) at the
133+
censoring/truncation point. When this point is far in the tail:
134+
- Naive: log(1 - exp(logcdf)) = log(1 - 1) = log(0) = -inf
135+
- Stable: Uses erfcx-based computation → correct finite value
136+
137+
How: Evaluates logccdf at x=100 for Normal(0,1) and verifies:
138+
1. Result is finite (not -inf or nan)
139+
2. Result matches scipy.logsf within relative tolerance
140+
141+
The expected value is approximately -5005.5, representing the log
142+
probability of a standard normal exceeding 100 sigma.
143+
Using 100 sigma future-proofs against any improvements in naive methods.
130144
"""
131145
x = pm.Normal.dist(0, 1)
132146

133-
# Test value far in the right tail
134-
far_tail_value = 10.0
147+
far_tail_value = 100.0
135148

136149
result = logccdf(x, far_tail_value).eval()
137-
expected = sp.norm(0, 1).logsf(far_tail_value)
150+
expected = sp.norm(0, 1).logsf(far_tail_value) # ≈ -5005.5
138151

139-
# Should be around -52, not -inf or nan
152+
# Must be finite, not -inf (which naive computation would give)
140153
assert np.isfinite(result)
141-
np.testing.assert_almost_equal(result, expected, decimal=6)
154+
# Use rtol for relative tolerance (float32 has ~7 significant digits)
155+
np.testing.assert_allclose(result, expected, rtol=1e-6)
142156

143157

144158
def test_logccdf_helper_fallback():

tests/logprob/test_censoring.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -268,42 +268,66 @@ def test_rounding(rounding_op):
268268
@pytest.mark.parametrize(
269269
"censoring_side,bound_value",
270270
[
271-
("right", 40.0), # Far right tail: CDF ≈ 1, need stable log(1-CDF)
272-
("left", -40.0), # Far left tail: CDF ≈ 0, need stable log(CDF)
271+
("right", 100.0), # Far right tail: CDF ≈ 1, need stable log(1-CDF)
272+
("left", -100.0), # Far left tail: CDF ≈ 0, need stable log(CDF)
273273
],
274274
)
275275
def test_censored_logprob_numerical_stability(censoring_side, bound_value):
276-
"""Test that censored distributions use numerically stable log-probability computations.
276+
"""Test numerical stability of pm.Censored at extreme tail values.
277277
278-
For right-censoring at the upper bound, log(1 - CDF) is computed. When CDF ≈ 1
279-
(far right tail), this requires a stable logccdf implementation.
278+
What: Verifies that the log-probability of a censored Normal distribution
279+
is computed correctly when the censoring bound is far in the tail
280+
(100 standard deviations from the mean).
280281
281-
For left-censoring at the lower bound, log(CDF) is computed. When CDF ≈ 0
282-
(far left tail), this requires a stable logcdf implementation.
282+
Why: Censored distributions require computing:
283+
- Right-censored at upper bound: log(P(X > upper)) = log(1 - CDF(upper)) = logccdf
284+
- Left-censored at lower bound: log(P(X < lower)) = log(CDF(lower)) = logcdf
283285
284-
This test uses pm.Censored which is the high-level API for censored distributions.
286+
At extreme tail values (100 sigma):
287+
- CDF(100) is indistinguishable from 1.0 in float64
288+
- CDF(-100) is indistinguishable from 0.0 in float64
289+
290+
Naive computation would give:
291+
- Right: log(1 - 1) = log(0) = -inf ✗
292+
- Left: log(0) = -inf ✗
293+
294+
With stable logccdf/logcdf:
295+
- Right: ≈ -5005.5 ✓
296+
- Left: ≈ -5005.5 ✓
297+
298+
How:
299+
1. Creates pm.Censored with Normal(0, 1) base distribution
300+
2. Sets censoring bound at ±100 (100 standard deviations)
301+
3. Evaluates logp at the bound value
302+
4. Compares against scipy.stats.norm.logsf (right) or logcdf (left)
303+
5. Verifies result is finite and matches reference within tolerance
304+
305+
Using 100 sigma future-proofs against any improvements in naive methods.
306+
This is the primary integration test for the logccdf feature.
285307
"""
286308
ref_scipy = st.norm(0, 1)
287309

288310
with pm.Model() as model:
289311
normal_dist = pm.Normal.dist(mu=0.0, sigma=1.0)
290312
if censoring_side == "right":
313+
# Right-censored: values > upper are censored to upper
314+
# logp(y=upper) = log(P(X >= upper)) = logsf(upper)
291315
pm.Censored("y", normal_dist, lower=None, upper=bound_value)
292-
expected_logp = ref_scipy.logsf(bound_value) # log(1 - CDF)
293-
else: # left
316+
expected_logp = ref_scipy.logsf(bound_value)
317+
else:
318+
# Left-censored: values < lower are censored to lower
319+
# logp(y=lower) = log(P(X <= lower)) = logcdf(lower)
294320
pm.Censored("y", normal_dist, lower=bound_value, upper=None)
295-
expected_logp = ref_scipy.logcdf(bound_value) # log(CDF)
321+
expected_logp = ref_scipy.logcdf(bound_value)
296322

297-
# Compile the logp function
298323
logp_fn = model.compile_logp()
299-
300-
# Evaluate at the bound - this is where the log survival/cdf function is used
301324
logp_at_bound = logp_fn({"y": bound_value})
302325

303-
# This should be finite and correct, not -inf
326+
# Must be finite (not -inf from naive computation)
304327
assert np.isfinite(logp_at_bound), (
305328
f"logp at {censoring_side} bound should be finite, got {logp_at_bound}"
306329
)
330+
# Must match scipy reference (≈ -5005.5 for ±100 sigma)
307331
assert np.isclose(logp_at_bound, expected_logp, rtol=1e-6), (
308332
f"logp at {censoring_side} bound: got {logp_at_bound}, expected {expected_logp}"
309333
)

0 commit comments

Comments
 (0)