Clustering Mall Data Students
Clustering Mall Data Students
In [4]: df = pd.read_csv('Mall_Customers.csv')
df
Out[4]: CustomerID Genre Age Annual Income (k$) Spending Score (1-100)
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
for k in k_range:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)
inertia.append(kmeans.inertia_)
silhouette_scores.append(silhouette_score(X_scaled, kmeans.labels_))
Out[9]: CustomerID Genre Age Annual Income (k$) Spending Score (1-100) Cluster
0 1 Male 19 15 39 4
1 2 Male 21 15 81 2
2 3 Female 20 16 6 4
3 4 Female 23 16 77 2
4 5 Female 31 17 40 4
# Assuming you have optimal_k, X_scaled, and kmeans defined from your previous code
plt.figure(figsize=(10, 7))
In [11]: df = pd.read_csv('Mall_Customers.csv')
In [12]: # Assuming 'df' is your DataFrame and 'Genre' is the categorical column
encoded_columns = pd.get_dummies(df['Genre'], prefix='Genre') # 'Genre' is used as a prefix for new columns
file:///C:/Users/Admin/OneDrive - NATIONAL INSTITUTE OF INDUSTRIAL ENGINEERING/COURSES/AI&ML/PPT/My PPT 2024/Code/Clustering_Mall_data.html 7/11
11/28/24, 12:31 PM Clustering_Mall_data
In [13]:
for k in k_range:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)
inertia.append(kmeans.inertia_)
silhouette_scores.append(silhouette_score(X_scaled, kmeans.labels_))
Out[16]: CustomerID Age Annual Income (k$) Spending Score (1-100) Genre_Female Genre_Male Cluster
0 1 19 15 39 False True 3
1 2 21 15 81 False True 3
2 3 20 16 6 True False 2
3 4 23 16 77 True False 1
4 5 31 17 40 True False 2