from sklearn.
cluster import KMeans
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
%matplotlib inline
df = pd.read_csv("income.csv")
df.head()
Name Age Income
0 Rutuja 27 70000
1 Samruddhi 29 90000
2 Shubhangi 29 61000
3 Pratiksha 28 60000
4 Mohan 42 150000
plt.scatter(df.Age,df['Income'])
plt.xlabel('Age')
plt.ylabel('Income')
Text(0, 0.5, 'Income')
km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['Age','Income']])
y_predicted
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
array([2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2])
df['cluster']=y_predicted
df.head()
Name Age Income cluster
0 Rutuja 27 70000 2
1 Samruddhi 29 90000 2
2 Shubhangi 29 61000 1
3 Pratiksha 28 60000 1
4 Mohan 42 150000 0
km.cluster_centers_
array([[3.82857143e+01, 1.50000000e+05],
[3.26000000e+01, 5.59500000e+04],
[3.16666667e+01, 8.00000000e+04]])
df1 = df[df.cluster==0]
df2 = df[df.cluster==1]
df3 = df[df.cluster==2]
plt.scatter(df1.Age,df1['Income'],color='green')
plt.scatter(df2.Age,df2['Income'],color='red')
plt.scatter(df3.Age,df3['Income'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='p
urple',marker='*',label='centroid')
plt.xlabel('Age')
plt.ylabel('Income')
plt.legend()
<matplotlib.legend.Legend at 0x658dfd0>
scaler = MinMaxScaler()
scaler.fit(df[['Income']])
df['Income'] = scaler.transform(df[['Income']])
scaler.fit(df[['Age']])
df['Age'] = scaler.transform(df[['Age']])
df.head()
Name Age Income cluster
0 Rutuja 0.058824 0.213675 2
1 Samruddhi 0.176471 0.384615 2
2 Shubhangi 0.176471 0.136752 1
3 Pratiksha 0.117647 0.128205 1
4 Mohan 0.941176 0.897436 0
plt.scatter(df.Age,df['Income'])
<matplotlib.collections.PathCollection at 0x658e4f0>
km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['Age','Income']])
y_predicted
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2])
df1 = df[df.cluster==0]
df2 = df[df.cluster==1]
df3 = df[df.cluster==2]
plt.scatter(df1.Age,df1['Income'],color='green')
plt.scatter(df2.Age,df2['Income'],color='red')
plt.scatter(df3.Age,df3['Income'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='p
urple',marker='*',label='centroid')
plt.xlabel('Age')
plt.ylabel('Income ')
plt.legend()
<matplotlib.legend.Legend at 0x67a30c0>
km.cluster_centers_
array([[0.72268908, 0.8974359 ],
[0.11029412, 0.12232906],
[0.8 , 0.17094017]])
sse = []
k_rng = range(1,10)
for k in k_rng:
km = KMeans(n_clusters=k)
km.fit(df[['Age','Income']])
sse.append(km.inertia_)
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in 1.4. Set the value of `n_init` explicitly to suppress the
warning
warnings.warn(
plt.xlabel('K')
plt.ylabel('Sum of squared error')
plt.plot(k_rng,sse)
[<matplotlib.lines.Line2D at 0x6c45878>]
sse
[5.116391204288242,
1.8517482098747688,
0.4938829874100704,
0.36790934699671735,
0.2632376117287616,
0.1931260970868519,
0.13764115392926798,
0.10882448152285196,
0.07871979164270587]