@@ -332,19 +332,13 @@ cdef float64_t calc_var(
332332 int ddof,
333333 float64_t nobs,
334334 float64_t ssqdm_x,
335- int64_t num_consecutive_same_value
336335) noexcept nogil:
337336 cdef:
338337 float64_t result
339338
340339 # Variance is unchanged if no observation is added or removed
341340 if (nobs >= minp) and (nobs > ddof):
342-
343- # pathological case & repeatedly same values case
344- if nobs == 1 or num_consecutive_same_value >= nobs:
345- result = 0
346- else:
347- result = ssqdm_x / (nobs - <float64_t>ddof)
341+ result = ssqdm_x / (nobs - <float64_t>ddof)
348342 else:
349343 result = NaN
350344
@@ -357,27 +351,19 @@ cdef void add_var(
357351 float64_t *mean_x,
358352 float64_t *ssqdm_x,
359353 float64_t *compensation,
360- int64_t *num_consecutive_same_value,
361- float64_t *prev_value,
354+ bint *numerically_unstable,
362355) noexcept nogil:
363356 """ add a value from the var calc """
364357 cdef:
365358 float64_t delta, prev_mean, y, t
359+ float64_t prev_m2 = ssqdm_x[0]
366360
367361 # GH#21813, if msvc 2017 bug is resolved, we should be OK with != instead of `isnan`
368362 if val != val:
369363 return
370364
371365 nobs[0] = nobs[0] + 1
372366
373- # GH#42064, record num of same values to remove floating point artifacts
374- if val == prev_value[0]:
375- num_consecutive_same_value[0] += 1
376- else:
377- # reset to 1 (include current value itself)
378- num_consecutive_same_value[0] = 1
379- prev_value[0] = val
380-
381367 # Welford's method for the online variance-calculation
382368 # using Kahan summation
383369 # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
@@ -392,17 +378,23 @@ cdef void add_var(
392378 mean_x[0] = 0
393379 ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0])
394380
381+ if prev_m2 * InvCondTol > ssqdm_x[0]:
382+ # possible catastrophic cancellation
383+ numerically_unstable[0] = True
384+
395385
396386cdef void remove_var(
397387 float64_t val,
398388 float64_t *nobs,
399389 float64_t *mean_x,
400390 float64_t *ssqdm_x,
401- float64_t *compensation
391+ float64_t *compensation,
392+ bint *numerically_unstable,
402393) noexcept nogil:
403394 """ remove a value from the var calc """
404395 cdef:
405396 float64_t delta, prev_mean, y, t
397+ float64_t prev_m2 = ssqdm_x[0]
406398 if val == val:
407399 nobs[0] = nobs[0] - 1
408400 if nobs[0]:
@@ -416,9 +408,14 @@ cdef void remove_var(
416408 delta = t
417409 mean_x[0] = mean_x[0] - delta / nobs[0]
418410 ssqdm_x[0] = ssqdm_x[0] - (val - prev_mean) * (val - mean_x[0])
411+
412+ if prev_m2 * InvCondTol > ssqdm_x[0]:
413+ # possible catastrophic cancellation
414+ numerically_unstable[0] = True
419415 else:
420416 mean_x[0] = 0
421417 ssqdm_x[0] = 0
418+ numerically_unstable[0] = False
422419
423420
424421def roll_var(const float64_t[:] values, ndarray[int64_t] start,
@@ -428,11 +425,12 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
428425 """
429426 cdef:
430427 float64_t mean_x, ssqdm_x, nobs, compensation_add,
431- float64_t compensation_remove, prev_value
432- int64_t s, e, num_consecutive_same_value
428+ float64_t compensation_remove
429+ int64_t s, e
433430 Py_ssize_t i, j, N = len(start)
434431 ndarray[float64_t] output
435432 bint is_monotonic_increasing_bounds
433+ bint requires_recompute, numerically_unstable
436434
437435 minp = max(minp, 1)
438436 is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
@@ -449,32 +447,35 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
449447
450448 # Over the first window, observations can only be added
451449 # never removed
452- if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
453-
454- prev_value = values[s]
455- num_consecutive_same_value = 0
456-
457- mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
458- for j in range(s, e):
459- add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
460- &num_consecutive_same_value, &prev_value)
461-
462- else:
450+ requires_recompute = (
451+ i == 0
452+ or not is_monotonic_increasing_bounds
453+ or s >= end[i - 1]
454+ )
463455
456+ if not requires_recompute:
464457 # After the first window, observations can both be added
465458 # and removed
466459
467460 # calculate deletes
468461 for j in range(start[i - 1], s):
469462 remove_var(values[j], &nobs, &mean_x, &ssqdm_x,
470- &compensation_remove)
463+ &compensation_remove, &numerically_unstable )
471464
472465 # calculate adds
473466 for j in range(end[i - 1], e):
474467 add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
475- &num_consecutive_same_value, &prev_value)
468+ &numerically_unstable)
469+
470+ if requires_recompute or numerically_unstable:
471+
472+ mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
473+ for j in range(s, e):
474+ add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
475+ &numerically_unstable)
476+ numerically_unstable = False
476477
477- output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value )
478+ output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
478479
479480 if not is_monotonic_increasing_bounds:
480481 nobs = 0.0
0 commit comments