Skip to content

Commit 55b78ce

Browse files
authored
Eliminate duplicated calculations and unnecessary work for linear regression (GH-25922)
1 parent e852556 commit 55b78ce

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

Lib/statistics.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -952,11 +952,16 @@ def linear_regression(regressor, dependent_variable, /):
952952
raise StatisticsError('linear regression requires that both inputs have same number of data points')
953953
if n < 2:
954954
raise StatisticsError('linear regression requires at least two data points')
955+
x, y = regressor, dependent_variable
956+
xbar = fsum(x) / n
957+
ybar = fsum(y) / n
958+
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
959+
s2x = fsum((xi - xbar) ** 2.0 for xi in x)
955960
try:
956-
slope = covariance(regressor, dependent_variable) / variance(regressor)
961+
slope = sxy / s2x
957962
except ZeroDivisionError:
958963
raise StatisticsError('regressor is constant')
959-
intercept = fmean(dependent_variable) - slope * fmean(regressor)
964+
intercept = ybar - slope * xbar
960965
return LinearRegression(intercept=intercept, slope=slope)
961966

962967

0 commit comments

Comments
 (0)