Skip to content

Commit 907ec7f

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent c0543f7 commit 907ec7f

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

machine_learning/k_medoids.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,21 @@
2323
from matplotlib import pyplot as plt
2424
from sklearn.metrics import pairwise_distances
2525

26+
2627
def get_initial_medoids(data, k, seed=None):
2728
rng = np.random.default_rng(seed)
2829
n = data.shape[0]
2930
indices = rng.choice(n, k, replace=False)
3031
medoids = data[indices, :]
3132
return medoids
3233

34+
3335
def assign_clusters(data, medoids):
34-
distances = pairwise_distances(data, medoids, metric='euclidean')
36+
distances = pairwise_distances(data, medoids, metric="euclidean")
3537
cluster_assignment = np.argmin(distances, axis=1)
3638
return cluster_assignment
3739

40+
3841
def revise_medoids(data, k, cluster_assignment):
3942
new_medoids = []
4043
for i in range(k):
@@ -47,6 +50,7 @@ def revise_medoids(data, k, cluster_assignment):
4750
new_medoids.append(members[medoid_index])
4851
return np.array(new_medoids)
4952

53+
5054
def compute_heterogeneity(data, k, medoids, cluster_assignment):
5155
heterogeneity = 0.0
5256
for i in range(k):
@@ -57,14 +61,18 @@ def compute_heterogeneity(data, k, medoids, cluster_assignment):
5761
heterogeneity += np.sum(distances**2)
5862
return heterogeneity
5963

64+
6065
def kmedoids(data, k, initial_medoids, maxiter=100, verbose=False):
6166
medoids = initial_medoids.copy()
6267
prev_assignment = None
6368
for itr in range(maxiter):
6469
cluster_assignment = assign_clusters(data, medoids)
6570
medoids = revise_medoids(data, k, cluster_assignment)
6671

67-
if prev_assignment is not None and (prev_assignment == cluster_assignment).all():
72+
if (
73+
prev_assignment is not None
74+
and (prev_assignment == cluster_assignment).all()
75+
):
6876
break
6977

7078
if verbose and prev_assignment is not None:
@@ -75,21 +83,24 @@ def kmedoids(data, k, initial_medoids, maxiter=100, verbose=False):
7583

7684
return medoids, cluster_assignment
7785

86+
7887
# Optional plotting
7988
def plot_clusters(data, medoids, cluster_assignment):
80-
ax = plt.axes(projection='3d')
81-
ax.scatter(data[:,0], data[:,1], data[:,2], c=cluster_assignment, cmap='viridis')
82-
ax.scatter(medoids[:,0], medoids[:,1], medoids[:,2], c='red', s=100, marker='x')
89+
ax = plt.axes(projection="3d")
90+
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=cluster_assignment, cmap="viridis")
91+
ax.scatter(medoids[:, 0], medoids[:, 1], medoids[:, 2], c="red", s=100, marker="x")
8392
ax.set_xlabel("X")
8493
ax.set_ylabel("Y")
8594
ax.set_zlabel("Z")
8695
ax.set_title("3D K-Medoids Clustering")
8796
plt.show()
8897

98+
8999
# Optional test
90100
if __name__ == "__main__":
91101
from sklearn import datasets
92-
X = datasets.load_iris()['data']
102+
103+
X = datasets.load_iris()["data"]
93104
k = 3
94105
medoids = get_initial_medoids(X, k, seed=0)
95106
medoids, clusters = kmedoids(X, k, medoids, maxiter=50, verbose=True)

machine_learning/k_nearest_neighbours.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ def _calculate_distance(self, a: np.ndarray[float], b: np.ndarray[float]) -> flo
5454
elif self.distance_metric == "minkowski":
5555
return float(np.sum(np.abs(a - b) ** self.p) ** (1 / self.p))
5656
else:
57-
raise ValueError("Invalid distance metric. Choose 'euclidean', 'manhattan', or 'minkowski'.")
57+
raise ValueError(
58+
"Invalid distance metric. Choose 'euclidean', 'manhattan', or 'minkowski'."
59+
)
5860

5961
def classify(self, pred_point: np.ndarray[float], k: int = 5) -> str:
6062
"""
@@ -81,6 +83,7 @@ def classify(self, pred_point: np.ndarray[float], k: int = 5) -> str:
8183

8284
if __name__ == "__main__":
8385
import doctest
86+
8487
doctest.testmod()
8588

8689
iris = datasets.load_iris()
@@ -102,4 +105,4 @@ def classify(self, pred_point: np.ndarray[float], k: int = 5) -> str:
102105

103106
print("\nUsing Minkowski Distance (p=3):")
104107
classifier3 = KNN(X_train, y_train, iris_classes, distance_metric="minkowski", p=3)
105-
print(classifier3.classify(iris_point, k=3))
108+
print(classifier3.classify(iris_point, k=3))

0 commit comments

Comments
 (0)