66"""
77
88import doctest
9-
109import numpy as np
1110from numpy import ndarray
1211from sklearn .datasets import load_iris
@@ -42,19 +41,23 @@ def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> nda
4241
4342 >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
4443 >>> probabilities = compute_pairwise_affinities(x)
45- >>> float( round(probabilities[0, 1], 3) )
44+ >>> round(float( probabilities[0, 1]) , 3)
4645 0.25
4746 """
4847 n_samples = data_matrix .shape [0 ]
48+ # Compute pairwise squared Euclidean distances
4949 squared_sum = np .sum (np .square (data_matrix ), axis = 1 )
50- squared_distance = np . add (
51- np .add ( - 2 * np .dot (data_matrix , data_matrix .T ), squared_sum ). T , squared_sum
50+ squared_distance = (
51+ squared_sum [:, np .newaxis ] + squared_sum [ np . newaxis , :] - 2 * np .dot (data_matrix , data_matrix .T )
5252 )
5353
54+ # Gaussian kernel
5455 affinity_matrix = np .exp (- squared_distance / (2 * sigma ** 2 ))
5556 np .fill_diagonal (affinity_matrix , 0 )
5657
58+ # Normalize to form probability distribution
5759 affinity_matrix /= np .sum (affinity_matrix )
60+ # Symmetrize
5861 return (affinity_matrix + affinity_matrix .T ) / (2 * n_samples )
5962
6063
@@ -74,13 +77,10 @@ def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndar
7477 (2, 2)
7578 """
7679 squared_sum = np .sum (np .square (embedding_matrix ), axis = 1 )
77- numerator_matrix = 1 / (
78- 1
79- + np .add (
80- np .add (- 2 * np .dot (embedding_matrix , embedding_matrix .T ), squared_sum ).T ,
81- squared_sum ,
82- )
80+ squared_distance = (
81+ squared_sum [:, np .newaxis ] + squared_sum [np .newaxis , :] - 2 * np .dot (embedding_matrix , embedding_matrix .T )
8382 )
83+ numerator_matrix = 1 / (1 + squared_distance )
8484 np .fill_diagonal (numerator_matrix , 0 )
8585
8686 q_matrix = numerator_matrix / np .sum (numerator_matrix )
@@ -129,6 +129,7 @@ def apply_tsne(
129129
130130 affinity_diff = high_dim_affinities - low_dim_affinities
131131
132+ # Gradient of the Kullback-Leibler divergence cost function
132133 gradient = 4 * (
133134 np .dot ((affinity_diff * numerator_matrix ), embedding )
134135 - np .multiply (
@@ -137,7 +138,7 @@ def apply_tsne(
137138 )
138139 )
139140
140- embedding_increment = momentum * embedding_increment - learning_rate * gradient
141+ embedding_increment = momentum * embedding_increment + learning_rate * gradient
141142 embedding += embedding_increment
142143
143144 if iteration == int (n_iter / 4 ):
@@ -161,16 +162,7 @@ def main() -> None:
161162 raise TypeError ("t-SNE embedding must be an ndarray" )
162163
163164 print ("t-SNE embedding (first 5 points):" )
164- print (embedding [:5 ])
165-
166- # Optional visualization (Ruff/mypy compliant)
167-
168- # import matplotlib.pyplot as plt
169- # plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis")
170- # plt.title("t-SNE Visualization of the Iris Dataset")
171- # plt.xlabel("Dimension 1")
172- # plt.ylabel("Dimension 2")
173- # plt.show()
165+ print (np .round (embedding [:5 ], 4 ))
174166
175167
176168if __name__ == "__main__" :
0 commit comments