ML Assignment No 5
ML Assignment No 5
#importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
(200, 5)
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
5 6 Female 22 17 76
6 7 Female 35 18 6
7 8 Female 23 18 94
8 9 Male 64 19 3
9 10 Female 30 19 72
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 CustomerID 200 non-null int64
1 Genre 200 non-null object
2 Age 200 non-null int64
3 Annual Income (k$) 200 non-null int64
4 Spending Score (1-100) 200 non-null int64
dtypes: int64(4), object(1)
memory usage: 7.9+ KB
count
#find the 200.000000
missing values 200.000000
in the dataset 200.000000 200.000000
df.isna()
mean 100.500000 38.850000 60.560000 50.200000
std CustomerID
57.879185Genre
13.969007 26.264721
Age Annual Income (k$) Spending Score25.823522
(1-100)
min
0 1.000000 18.000000
False False False 15.000000
False 1.000000
False
25%
1 50.750000 28.750000
False False False 41.500000
False 34.750000
False
50%
2 100.500000 36.000000
False False False 61.500000
False 50.000000
False
75%
3 150.250000 49.000000
False False False 78.000000
False 73.000000
False
max
4 200.000000 70.000000
False False False 137.000000
False 99.000000
False
CustomerID 0
Genre 0
Age 0
Annual Income (k$) 0
Spending Score (1-100) 0
dtype: int64
#to return all the values from dataset which are equal to zero
df==0
CustomerID Genre Age Annual Income (k$) Spending Score (1-100)
(df==0).sum().sum()
0 1
1 2
2 3
3 4
4 5
...
195 196
196 197
197 198
198 199
199 200
Name: CustomerID, Length: 200, dtype: int64
100.5
200
#to access specific row and specific coloumn using index locations
df.iloc[3, 4]
77
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
CustomerID 2
Genre Male
Age 21
Annual Income (k$) 15
Spending Score (1-100) 81
Name: 1, dtype: object
0 1
1 2
2 3
3 4
4 5
...
195 196
196 197
197 198
198 199
199 200
Name: CustomerID, Length: 200, dtype: int64
Age CustomerID
0 19 1
1 21 2
2 20 3
3 23 4
4 31 5
195 35 196
196 45 197
197 32 198
198 32 199
199 30 200
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
df.dtypes
CustomerID int64
Genre object
Age int64
Annual Income (k$) int64
Spending Score (1-100) int64
dtype: object
<matplotlib.collections.PathCollection at 0x7f078a127a50>
#centroid
print(" Cluster centroids are \n", kmeans. cluster_centers_)
print(" \n\n predicated clusters for data points are :")
y_predict
0 1 Male 19 15 39 2
1 2 Male 21 15 81 3
2 3 Female 20 16 6 2
3 4 Female 23 16 77 3
4 5 Female 31 17 40 2