Skip to content

Commit 63b074f

Browse files
committed
Fix ruff issues for CI compliance
- Replace all Greek alpha symbols (α) with 'alpha' in docstrings and comments - Fix line length issues by breaking long type annotations - Fix trailing whitespace issues - Replace 'pass' with '...' in abstract base class method - Maintain full functionality while improving code quality compliance
1 parent 494d87e commit 63b074f

File tree

6 files changed

+34
-30
lines changed

6 files changed

+34
-30
lines changed

neural_network/optimizers/adagrad.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
88
The update rules are:
99
G_t = G_{t-1} + g_t ⊙ g_t (element-wise squared gradient accumulation)
10-
θ_{t+1} = θ_t - (α / √(G_t + ε)) ⊙ g_t
10+
θ_{t+1} = θ_t - (alpha / √(G_t + ε)) ⊙ g_t
1111
1212
where G_t accumulates squared gradients, ε prevents division by zero,
1313
and ⊙ denotes element-wise multiplication.
@@ -31,12 +31,12 @@ class Adagrad(BaseOptimizer):
3131
3232
Mathematical formulation:
3333
G_t = G_{t-1} + g_t ⊙ g_t
34-
θ_{t+1} = θ_t - (α / √(G_t + ε)) ⊙ g_t
34+
θ_{t+1} = θ_t - (alpha / √(G_t + ε)) ⊙ g_t
3535
3636
Where:
3737
- θ_t: parameters at time step t
3838
- G_t: accumulated squared gradients up to time t
39-
- α: learning rate
39+
- alpha: learning rate
4040
- ε: small constant for numerical stability (typically 1e-8)
4141
- g_t: gradients at time step t
4242
- ⊙: element-wise multiplication
@@ -56,7 +56,7 @@ class Adagrad(BaseOptimizer):
5656
True
5757
>>> updated1[0] > 0.85 # Small gradient -> larger step
5858
True
59-
>>> updated1[1] < 1.95 # Large gradient -> smaller step (but still close to 2.0)
59+
>>> updated1[1] < 1.95 # Large gradient -> smaller step (close to 2.0)
6060
True
6161
6262
>>> # Second update (gradients accumulate)
@@ -106,7 +106,7 @@ def update(
106106
107107
Performs adaptive gradient update:
108108
G_t = G_{t-1} + g_t^2
109-
θ_{t+1} = θ_t - (α / √(G_t + ε)) * g_t
109+
θ_{t+1} = θ_t - (alpha / √(G_t + ε)) * g_t
110110
111111
Args:
112112
parameters: Current parameter values
@@ -123,7 +123,10 @@ def _adagrad_update_recursive(
123123
parameters: float | list[float | list[float]],
124124
gradients: float | list[float | list[float]],
125125
accumulated_gradients: float | list[float | list[float]]
126-
) -> tuple[float | list[float | list[float]], float | list[float | list[float]]]:
126+
) -> tuple[
127+
float | list[float | list[float]],
128+
float | list[float | list[float]]
129+
]:
127130
# Handle scalar case
128131
if isinstance(parameters, (int, float)):
129132
if not isinstance(gradients, (int, float)):
@@ -137,7 +140,7 @@ def _adagrad_update_recursive(
137140
# Accumulate squared gradients: G = G + g^2
138141
new_acc_grads = accumulated_gradients + gradients * gradients
139142

140-
# Adaptive learning rate: α / √(G + ε)
143+
# Adaptive learning rate: alpha / √(G + ε)
141144
adaptive_lr = self.learning_rate / math.sqrt(
142145
new_acc_grads + self.epsilon
143146
)

neural_network/optimizers/adam.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
v_t = β₂ * v_{t-1} + (1-β₂) * g_t² # Second moment estimate
1111
m̂_t = m_t / (1 - β₁^t) # Bias-corrected first moment
1212
v̂_t = v_t / (1 - β₂^t) # Bias-corrected second moment
13-
θ_{t+1} = θ_t - α * m̂_t / (√v̂_t + ε) # Parameter update
13+
θ_{t+1} = θ_t - alpha * m̂_t / (√v̂_t + ε) # Parameter update
1414
"""
1515

1616
from __future__ import annotations
@@ -34,13 +34,13 @@ class Adam(BaseOptimizer):
3434
v_t = β₂ * v_{t-1} + (1-β₂) * g_t²
3535
m̂_t = m_t / (1 - β₁^t)
3636
v̂_t = v_t / (1 - β₂^t)
37-
θ_{t+1} = θ_t - α * m̂_t / (√v̂_t + ε)
37+
θ_{t+1} = θ_t - alpha * m̂_t / (√v̂_t + ε)
3838
3939
Where:
4040
- θ_t: parameters at time step t
4141
- m_t, v_t: first and second moment estimates
4242
- m̂_t, v̂_t: bias-corrected moment estimates
43-
- α: learning rate (default: 0.001)
43+
- alpha: learning rate (default: 0.001)
4444
- β₁, β₂: exponential decay rates (default: 0.9, 0.999)
4545
- ε: small constant for numerical stability (default: 1e-8)
4646
- t: time step
@@ -139,7 +139,7 @@ def update(
139139
v_t = β₂ * v_{t-1} + (1-β₂) * g_t²
140140
m̂_t = m_t / (1 - β₁^t)
141141
v̂_t = v_t / (1 - β₂^t)
142-
θ_{t+1} = θ_t - α * m̂_t / (√v̂_t + ε)
142+
θ_{t+1} = θ_t - alpha * m̂_t / (√v̂_t + ε)
143143
144144
Args:
145145
parameters: Current parameter values
@@ -188,7 +188,7 @@ def _adam_update_recursive(
188188
m_hat = new_first_moment / bias_correction1
189189
v_hat = new_second_moment / bias_correction2
190190

191-
# Parameter update: θ = θ - α * m̂ / (√v̂ + ε)
191+
# Parameter update: θ = θ - alpha * m̂ / (√v̂ + ε)
192192
new_param = parameters - self.learning_rate * m_hat / (
193193
math.sqrt(v_hat) + self.epsilon
194194
)

neural_network/optimizers/base_optimizer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def reset(self) -> None:
7777
or when you want to clear any accumulated state (like momentum).
7878
Default implementation does nothing, but optimizers with state should override.
7979
"""
80+
...
8081

8182
def __str__(self) -> str:
8283
"""String representation of the optimizer."""

neural_network/optimizers/momentum_sgd.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
88
The update rules are:
99
v_t = β * v_{t-1} + (1-β) * g_t
10-
θ_t = θ_{t-1} - α * v_t
10+
θ_t = θ_{t-1} - alpha * v_t
1111
1212
where v_t is the velocity (momentum), β is the momentum coefficient,
13-
α is the learning rate, and g_t is the gradient.
13+
alpha is the learning rate, and g_t is the gradient.
1414
"""
1515

1616
from __future__ import annotations
@@ -28,12 +28,12 @@ class MomentumSGD(BaseOptimizer):
2828
2929
Mathematical formulation:
3030
v_t = β * v_{t-1} + (1-β) * g_t
31-
θ_{t+1} = θ_t - α * v_t
31+
θ_{t+1} = θ_t - alpha * v_t
3232
3333
Where:
3434
- θ_t: parameters at time step t
3535
- v_t: velocity (momentum) at time step t
36-
- α: learning rate
36+
- alpha: learning rate
3737
- β: momentum coefficient (typically 0.9)
3838
- g_t: gradients at time step t
3939
@@ -101,7 +101,7 @@ def update(
101101
102102
Performs momentum update:
103103
v_t = β * v_{t-1} + (1-β) * g_t
104-
θ_t = θ_{t-1} - α * v_t
104+
θ_t = θ_{t-1} - alpha * v_t
105105
106106
Args:
107107
parameters: Current parameter values
@@ -131,7 +131,7 @@ def _check_shapes_and_get_velocity(
131131

132132
# Update velocity: v = β * v + (1-β) * g
133133
new_velocity = self.momentum * velocity_values + (1 - self.momentum) * gradients
134-
# Update parameter: θ = θ - α * v
134+
# Update parameter: θ = θ - alpha * v
135135
new_param = parameters - self.learning_rate * new_velocity
136136

137137
return new_param, new_velocity

neural_network/optimizers/nag.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
overshooting and often leads to better convergence.
77
88
The update rules are:
9-
θ_lookahead = θ_t - α * β * v_{t-1}
9+
θ_lookahead = θ_t - alpha * β * v_{t-1}
1010
g_t = ∇f(θ_lookahead) # Gradient at lookahead position
1111
v_t = β * v_{t-1} + (1-β) * g_t
12-
θ_{t+1} = θ_t - α * v_t
12+
θ_{t+1} = θ_t - alpha * v_t
1313
1414
However, a more efficient formulation equivalent to the above is:
1515
v_t = β * v_{t-1} + (1-β) * g_t
16-
θ_{t+1} = θ_t - α * (β * v_t + (1-β) * g_t)
16+
θ_{t+1} = θ_t - alpha * (β * v_t + (1-β) * g_t)
1717
"""
1818

1919
from __future__ import annotations
@@ -31,12 +31,12 @@ class NAG(BaseOptimizer):
3131
3232
Mathematical formulation (efficient version):
3333
v_t = β * v_{t-1} + (1-β) * g_t
34-
θ_{t+1} = θ_t - α * (β * v_t + (1-β) * g_t)
34+
θ_{t+1} = θ_t - alpha * (β * v_t + (1-β) * g_t)
3535
3636
Where:
3737
- θ_t: parameters at time step t
3838
- v_t: velocity (momentum) at time step t
39-
- α: learning rate
39+
- alpha: learning rate
4040
- β: momentum coefficient (typically 0.9)
4141
- g_t: gradients at time step t
4242
@@ -103,7 +103,7 @@ def update(
103103
104104
Performs Nesterov update using efficient formulation:
105105
v_t = β * v_{t-1} + (1-β) * g_t
106-
θ_{t+1} = θ_t - α * (β * v_t + (1-β) * g_t)
106+
θ_{t+1} = θ_t - alpha * (β * v_t + (1-β) * g_t)
107107
108108
Args:
109109
parameters: Current parameter values
@@ -134,7 +134,7 @@ def _nag_update_recursive(
134134
# Update velocity: v = β * v + (1-β) * g
135135
new_velocity = self.momentum * velocity + (1 - self.momentum) * gradients
136136

137-
# NAG update: θ = θ - α * (β * v + (1-β) * g)
137+
# NAG update: θ = θ - alpha * (β * v + (1-β) * g)
138138
nesterov_update = (
139139
self.momentum * new_velocity + (1 - self.momentum) * gradients
140140
)

neural_network/optimizers/sgd.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
SGD is the most basic optimization algorithm for neural networks. It updates
55
parameters by moving in the direction opposite to the gradient of the loss function.
66
7-
The update rule is: θ = θ - α * ∇θ
8-
where θ are the parameters, α is the learning rate, and ∇θ is the gradient.
7+
The update rule is: θ = θ - alpha * ∇θ
8+
where θ are the parameters, alpha is the learning rate, and ∇θ is the gradient.
99
"""
1010

1111
from __future__ import annotations
@@ -22,11 +22,11 @@ class SGD(BaseOptimizer):
2222
the learning rate.
2323
2424
Mathematical formulation:
25-
θ_{t+1} = θ_t - α * g_t
25+
θ_{t+1} = θ_t - alpha * g_t
2626
2727
Where:
2828
- θ_t: parameters at time step t
29-
- α: learning rate
29+
- alpha: learning rate
3030
- g_t: gradients at time step t
3131
3232
Parameters:
@@ -83,7 +83,7 @@ def update(
8383
"""
8484
Update parameters using SGD rule.
8585
86-
Performs the classic SGD update: θ = θ - α * ∇θ
86+
Performs the classic SGD update: θ = θ - alpha * ∇θ
8787
8888
Args:
8989
parameters: Current parameter values

0 commit comments

Comments
 (0)