K-Means Clustering - Jupyter Notebook
K-Means Clustering - Jupyter Notebook
K-Means clustering
In [1]: import pandas as pd
In [2]: df=pd.read_csv("C:\\Users\\vaish\\Downloads\\SIC\\Mall_Customers.csv")
In [4]: df
0 15 39
1 15 81
2 16 6
3 16 77
4 17 40
195 120 79
196 126 28
197 126 74
198 137 18
199 137 83
In [5]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Annual Income (k$) 200 non-null int64
1 Spending Score (1-100) 200 non-null int64
dtypes: int64(2)
memory usage: 3.3 KB
In [6]: df.columns
In [7]: df.isnull().sum()
In [ ]: vc=[]
for i in range (1,11):
km=KMeans(n_clusters=i)
km.fit_predict(df)
vc.append(km.inertia_)
In [23]: vc
In [13]: X = df.iloc[:,:].values
km = KMeans(n_clusters = 5)
y_means = km.fit_predict(X)
In [14]: y_means
array([[15, 39],
Out[16]:
[16, 6],
[17, 40],
[18, 6],
[19, 3],
[19, 14],
[20, 15],
[20, 13],
[21, 35],
[23, 29],
[24, 35],
[25, 5],
[28, 14],
[28, 32],
[29, 31],
[30, 4],
[33, 4],
[33, 14],
[34, 17],
[37, 26],
[38, 35],
[39, 36],
[39, 28]], dtype=int64)
array([61, 55, 47, 42, 42, 52, 60, 54, 60, 45, 41, 50, 46, 51, 46, 56, 55,
Out[20]:
52, 59, 51, 59, 50, 48, 59, 47, 55, 42, 49, 56, 47, 54, 53, 48, 52,
42, 51, 55, 41, 44, 57, 46, 58, 55, 60, 46, 55, 41, 49, 40, 42, 52,
47, 50, 42, 49, 41, 48, 59, 55, 56, 42, 50, 46, 43, 48, 52, 54, 42,
46, 48, 50, 43, 59, 43, 57, 56, 40, 58, 35, 34, 40], dtype=int64)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Clusters of data points')
plt.legend()
plt.show()