PMA Experiment 2
PMA Experiment 2
Initialization
# Initialization
# Module Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.cluster import KMeans
# Style Definitions
plt.style.use('Solarize_Light2')
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
df.shape
(200, 5)
df.describe()
plt.figure(1, figsize=(16,4))
n = 0
for i in ['Age', 'Annual Income (k$)', 'Spending Score (1-100)']:
n += 1
plt.subplot(1 , 3 , n)
plt.subplots_adjust(hspace =0.5 , wspace = 0.5)
sns.distplot(df[i] , bins = 32)
plt.title(f'Histogram of {i}')
plt.show()
# Assignment Stage
model = KMeans(n_clusters = 4,
init='k-means++',
max_iter=500,
random_state=42)
model.fit(X1)
labels = model.labels_
centroids = model.cluster_centers_
y_kmeans = model.fit_predict(X1)
plt.figure(figsize=(20,10))
plt.scatter(X1[y_kmeans == 0, 0], X1[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
plt.scatter(X1[y_kmeans == 1, 0], X1[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
plt.scatter(X1[y_kmeans == 2, 0], X1[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
plt.scatter(X1[y_kmeans == 3, 0], X1[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
plt.scatter(X1[y_kmeans == 4, 0], X1[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
plt.title('Clusters of Customers - Age X Spending Score')
plt.xlabel('Age')
plt.ylabel('Spending Score')
plt.legend()
plt.show()
Second Clustering
By Annual Income and Spending Score
# Assignment Stage
plt.figure(figsize=(20,10))
plt.scatter(X2[y_kmeans == 0, 0], X2[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
plt.scatter(X2[y_kmeans == 1, 0], X2[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
plt.scatter(X2[y_kmeans == 2, 0], X2[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
plt.scatter(X2[y_kmeans == 3, 0], X2[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
plt.scatter(X2[y_kmeans == 4, 0], X2[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
plt.title('Clusters of Customers - Annual Income (k$) X Spending Score')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score')
plt.legend()
plt.show()
# Assignment Stage
df['cluster'] = labels
df
CustomerID Gender Age Annual Income (k$) Spending Score (1-100) cluster
0 1 Male 19 15 39 0
1 2 Male 21 15 81 1
2 3 Female 20 16 6 0
3 4 Female 23 16 77 1
4 5 Female 31 17 40 0
fig = px.scatter_3d(df,
x="Age",
y="Annual Income (k$)",
z="Spending Score (1-100)",
color='cluster',
hover_data=["Age",
"Annual Income (k$)",
"Spending Score (1-100)"],
category_orders = {"cluster": range(0, 5)},
)