Skip to content

Commit f9aca1e

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 86e4848 commit f9aca1e

File tree

1 file changed

+78
-25
lines changed

1 file changed

+78
-25
lines changed

neural_network/real_time_encoder_transformer.py

Lines changed: 78 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,20 @@ def forward(self, time_steps: np.ndarray) -> np.ndarray:
6464
# -------------------------------
6565
class PositionwiseFeedForward:
6666
def __init__(
67-
self, d_model: int, hidden: int, drop_prob: float = 0.0,
68-
seed: Optional[int] = None
67+
self,
68+
d_model: int,
69+
hidden: int,
70+
drop_prob: float = 0.0,
71+
seed: Optional[int] = None,
6972
) -> None:
7073
self.rng = np.random.default_rng(seed)
71-
self.w1 = self.rng.standard_normal((d_model, hidden)) * math.sqrt(2.0 / (d_model + hidden))
74+
self.w1 = self.rng.standard_normal((d_model, hidden)) * math.sqrt(
75+
2.0 / (d_model + hidden)
76+
)
7277
self.b1 = np.zeros((hidden,))
73-
self.w2 = self.rng.standard_normal((hidden, d_model)) * math.sqrt(2.0 / (hidden + d_model))
78+
self.w2 = self.rng.standard_normal((hidden, d_model)) * math.sqrt(
79+
2.0 / (hidden + d_model)
80+
)
7481
self.b2 = np.zeros((d_model,))
7582

7683
def forward(self, input_tensor: np.ndarray) -> np.ndarray:
@@ -96,8 +103,11 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
96103
# -------------------------------
97104
class ScaledDotProductAttention:
98105
def forward(
99-
self, query: np.ndarray, key: np.ndarray, value: np.ndarray,
100-
mask: np.ndarray | None = None
106+
self,
107+
query: np.ndarray,
108+
key: np.ndarray,
109+
value: np.ndarray,
110+
mask: np.ndarray | None = None,
101111
) -> tuple[np.ndarray, np.ndarray]:
102112
"""
103113
Compute scaled dot-product attention.
@@ -134,31 +144,46 @@ def __init__(self, d_model: int, n_head: int, seed: Optional[int] = None) -> Non
134144
self.n_head = n_head
135145
self.d_k = d_model // n_head
136146

137-
self.w_q = self.rng.standard_normal((d_model, d_model)) * math.sqrt(2.0 / (d_model + d_model))
147+
self.w_q = self.rng.standard_normal((d_model, d_model)) * math.sqrt(
148+
2.0 / (d_model + d_model)
149+
)
138150
self.b_q = np.zeros((d_model,))
139-
self.w_k = self.rng.standard_normal((d_model, d_model)) * math.sqrt(2.0 / (d_model + d_model))
151+
self.w_k = self.rng.standard_normal((d_model, d_model)) * math.sqrt(
152+
2.0 / (d_model + d_model)
153+
)
140154
self.b_k = np.zeros((d_model,))
141-
self.w_v = self.rng.standard_normal((d_model, d_model)) * math.sqrt(2.0 / (d_model + d_model))
155+
self.w_v = self.rng.standard_normal((d_model, d_model)) * math.sqrt(
156+
2.0 / (d_model + d_model)
157+
)
142158
self.b_v = np.zeros((d_model,))
143-
self.w_out = self.rng.standard_normal((d_model, d_model)) * math.sqrt(2.0 / (d_model + d_model))
159+
self.w_out = self.rng.standard_normal((d_model, d_model)) * math.sqrt(
160+
2.0 / (d_model + d_model)
161+
)
144162
self.b_out = np.zeros((d_model,))
145163

146164
self.attn = ScaledDotProductAttention()
147165

148-
def _linear(self, x: np.ndarray, weight: np.ndarray, bias: np.ndarray) -> np.ndarray:
166+
def _linear(
167+
self, x: np.ndarray, weight: np.ndarray, bias: np.ndarray
168+
) -> np.ndarray:
149169
return np.tensordot(x, weight, axes=([2], [0])) + bias
150170

151171
def _split_heads(self, x: np.ndarray) -> np.ndarray:
152172
batch_size, seq_len, _ = x.shape
153-
return x.reshape(batch_size, seq_len, self.n_head, self.d_k).transpose(0, 2, 1, 3)
173+
return x.reshape(batch_size, seq_len, self.n_head, self.d_k).transpose(
174+
0, 2, 1, 3
175+
)
154176

155177
def _concat_heads(self, x: np.ndarray) -> np.ndarray:
156178
batch_size, n_head, seq_len, d_k = x.shape
157179
return x.transpose(0, 2, 1, 3).reshape(batch_size, seq_len, n_head * d_k)
158180

159181
def forward(
160-
self, query: np.ndarray, key: np.ndarray, value: np.ndarray,
161-
mask: np.ndarray | None = None
182+
self,
183+
query: np.ndarray,
184+
key: np.ndarray,
185+
value: np.ndarray,
186+
mask: np.ndarray | None = None,
162187
) -> tuple[np.ndarray, np.ndarray]:
163188
"""
164189
Parameters
@@ -184,6 +209,8 @@ def forward(
184209
concat = self._concat_heads(context)
185210
out = np.tensordot(concat, self.w_out, axes=([2], [0])) + self.b_out
186211
return out, attn_weights
212+
213+
187214
# -------------------------------
188215
# 🔹 LayerNorm
189216
# -------------------------------
@@ -215,13 +242,17 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
215242
# 🔹 TransformerEncoderLayer
216243
# -------------------------------
217244
class TransformerEncoderLayer:
218-
def __init__(self, d_model: int, n_head: int, hidden_dim: int, seed: Optional[int] = None) -> None:
245+
def __init__(
246+
self, d_model: int, n_head: int, hidden_dim: int, seed: Optional[int] = None
247+
) -> None:
219248
self.self_attn = MultiHeadAttention(d_model, n_head, seed)
220249
self.ffn = PositionwiseFeedForward(d_model, hidden_dim, seed=seed)
221250
self.norm1 = LayerNorm(d_model)
222251
self.norm2 = LayerNorm(d_model)
223252

224-
def forward(self, input_tensor: np.ndarray, mask: np.ndarray | None = None) -> np.ndarray:
253+
def forward(
254+
self, input_tensor: np.ndarray, mask: np.ndarray | None = None
255+
) -> np.ndarray:
225256
"""
226257
Parameters
227258
----------
@@ -235,7 +266,9 @@ def forward(self, input_tensor: np.ndarray, mask: np.ndarray | None = None) -> n
235266
np.ndarray
236267
Shape (batch, seq_len, d_model)
237268
"""
238-
attn_out, _ = self.self_attn.forward(input_tensor, input_tensor, input_tensor, mask)
269+
attn_out, _ = self.self_attn.forward(
270+
input_tensor, input_tensor, input_tensor, mask
271+
)
239272
x_norm1 = self.norm1.forward(input_tensor + attn_out)
240273
ffn_out = self.ffn.forward(x_norm1)
241274
x_norm2 = self.norm2.forward(x_norm1 + ffn_out)
@@ -246,10 +279,22 @@ def forward(self, input_tensor: np.ndarray, mask: np.ndarray | None = None) -> n
246279
# 🔹 TransformerEncoder (stack)
247280
# -------------------------------
248281
class TransformerEncoder:
249-
def __init__(self, d_model: int, n_head: int, hidden_dim: int, num_layers: int, seed: Optional[int] = None) -> None:
250-
self.layers = [TransformerEncoderLayer(d_model, n_head, hidden_dim, seed) for _ in range(num_layers)]
282+
def __init__(
283+
self,
284+
d_model: int,
285+
n_head: int,
286+
hidden_dim: int,
287+
num_layers: int,
288+
seed: Optional[int] = None,
289+
) -> None:
290+
self.layers = [
291+
TransformerEncoderLayer(d_model, n_head, hidden_dim, seed)
292+
for _ in range(num_layers)
293+
]
251294

252-
def forward(self, input_tensor: np.ndarray, mask: np.ndarray | None = None) -> np.ndarray:
295+
def forward(
296+
self, input_tensor: np.ndarray, mask: np.ndarray | None = None
297+
) -> np.ndarray:
253298
"""
254299
Parameters
255300
----------
@@ -278,7 +323,9 @@ def __init__(self, d_model: int, seed: Optional[int] = None) -> None:
278323
self.w = self.rng.standard_normal((d_model,)) * math.sqrt(2.0 / d_model)
279324
self.b = 0.0
280325

281-
def forward(self, input_tensor: np.ndarray, mask: np.ndarray | None = None) -> tuple[np.ndarray, np.ndarray]:
326+
def forward(
327+
self, input_tensor: np.ndarray, mask: np.ndarray | None = None
328+
) -> tuple[np.ndarray, np.ndarray]:
282329
"""
283330
Parameters
284331
----------
@@ -315,27 +362,33 @@ def __init__(
315362
num_layers: int = 4,
316363
output_dim: int = 1,
317364
task_type: str = "regression",
318-
seed: Optional[int] = None
365+
seed: Optional[int] = None,
319366
) -> None:
320367
self.rng = np.random.default_rng(seed)
321368
self.feature_dim = feature_dim
322369
self.d_model = d_model
323370
self.task_type = task_type
324371

325-
self.w_in = self.rng.standard_normal((feature_dim, d_model)) * math.sqrt(2.0 / (feature_dim + d_model))
372+
self.w_in = self.rng.standard_normal((feature_dim, d_model)) * math.sqrt(
373+
2.0 / (feature_dim + d_model)
374+
)
326375
self.b_in = np.zeros((d_model,))
327376

328377
self.time2vec = Time2Vec(d_model, seed)
329378
self.encoder = TransformerEncoder(d_model, n_head, hidden_dim, num_layers, seed)
330379
self.pooling = AttentionPooling(d_model, seed)
331380

332-
self.w_out = self.rng.standard_normal((d_model, output_dim)) * math.sqrt(2.0 / (d_model + output_dim))
381+
self.w_out = self.rng.standard_normal((d_model, output_dim)) * math.sqrt(
382+
2.0 / (d_model + output_dim)
383+
)
333384
self.b_out = np.zeros((output_dim,))
334385

335386
def _input_proj(self, features: np.ndarray) -> np.ndarray:
336387
return np.tensordot(features, self.w_in, axes=([2], [0])) + self.b_in
337388

338-
def forward(self, features: np.ndarray, mask: np.ndarray | None = None) -> tuple[np.ndarray, np.ndarray]:
389+
def forward(
390+
self, features: np.ndarray, mask: np.ndarray | None = None
391+
) -> tuple[np.ndarray, np.ndarray]:
339392
"""
340393
Parameters
341394
----------

0 commit comments

Comments
 (0)