Kmeans Clustering Implementation Using Python
Kmeans Clustering Implementation Using Python
In [1]: ## Initialisation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.DataFrame({
'x': [12, 20, 28, 18, 29, 33, 24, 45, 45, 52, 51, 52, 55, 53, 55, 61, 64,
69, 72],
'y': [39, 36, 30, 52, 54, 46, 55, 59, 63, 70, 66, 63, 58, 23, 14, 8, 19, 7
, 24]
})
np.random.seed(200)
k = 3
# centroids[i] = [x, y]
centroids = {
i+1: [np.random.randint(0, 80), np.random.randint(0, 80)]
for i in range(k)
}
localhost:8888/nbconvert/html/Untitled9.ipynb?download=false 1/5
1/22/2021 Untitled9
df = assignment(df, centroids)
print(df.head())
localhost:8888/nbconvert/html/Untitled9.ipynb?download=false 2/5
1/22/2021 Untitled9
import copy
old_centroids = copy.deepcopy(centroids)
def update(k):
for i in centroids.keys():
centroids[i][0] = np.mean(df[df['closest'] == i]['x'])
centroids[i][1] = np.mean(df[df['closest'] == i]['y'])
return k
centroids = update(centroids)
localhost:8888/nbconvert/html/Untitled9.ipynb?download=false 3/5
1/22/2021 Untitled9
df = assignment(df, centroids)
# Plot results
fig = plt.figure(figsize=(5, 5))
plt.scatter(df['x'], df['y'], color=df['color'], alpha=0.5, edgecolor='k')
for i in centroids.keys():
plt.scatter(*centroids[i], color=colmap[i])
plt.xlim(0, 80)
plt.ylim(0, 80)
plt.show()
localhost:8888/nbconvert/html/Untitled9.ipynb?download=false 4/5
1/22/2021 Untitled9
In [6]: #Continue until all assigned categories don't change any more
while True:
closest_centroids = df['closest'].copy(deep=True)
centroids = update(centroids)
df = assignment(df, centroids)
if closest_centroids.equals(df['closest']):
break
In [ ]:
localhost:8888/nbconvert/html/Untitled9.ipynb?download=false 5/5