LP Practical ! Jupyter Notebook
LP Practical ! Jupyter Notebook
os.getcwd()
Out[1]: 'C:\\Users\\kunal'
In [6]: df.head() #df stands for data frame (it shows the first 5 entry of dataset)
Out[6]:
Unnamed: 0 Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak Slope Ca Thal AHD
Out[8]:
Unnamed: 0 Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak Slope Ca Thal AHD
0 False False False False False False False False False False False False False False False
1 False False False False False False False False False False False False False False False
2 False False False False False False False False False False False False False False False
3 False False False False False False False False False False False False False False False
4 False False False False False False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
298 False False False False False False False False False False False False False False False
299 False False False False False False False False False False False False False False False
300 False False False False False False False False False False False False False False False
301 False False False False False False False False False False False False False False False
302 False False False False False False False False False False False False True False False
Out[9]: Unnamed: 0 0
Age 0
Sex 0
ChestPain 0
RestBP 0
Chol 0
Fbs 0
RestECG 0
MaxHR 0
ExAng 0
Oldpeak 0
Slope 0
Ca 4
Thal 2
AHD 0
dtype: int64
In [10]: # we can use other method this gives the not null values
df.count()
In [11]: # find data type of each column by using attribute not method
df.dtypes
Out[12]:
Unnamed: 0 Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak Slope Ca Thal AHD
0 False False False False False False False False False True False False True False False
1 False False False False False False True False False False False False False False False
2 False False False False False False True False False False False False False False False
3 False False False False False False True True False True False False True False False
4 False False True False False False True False False True False False True False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
298 False False False False False False True True False True False False True False False
299 False False False False False False False True False True False False False False False
300 False False False False False False True True False False False False False False False
301 False False True False False False True False False True True False False False False
302 False False False False False False True True False True True False False False False
Out[13]:
Unnamed: 0 Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak Slope Ca Thal AHD
0 NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 0.0 NaN NaN
1 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN 0.0 0.0 NaN 0.0 NaN NaN 0.0 NaN NaN
4 NaN NaN 0.0 NaN NaN NaN 0.0 NaN NaN 0.0 NaN NaN 0.0 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
298 NaN NaN NaN NaN NaN NaN 0.0 0.0 NaN 0.0 NaN NaN 0.0 NaN NaN
299 NaN NaN NaN NaN NaN NaN NaN 0.0 NaN 0.0 NaN NaN NaN NaN NaN
300 NaN NaN NaN NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN
301 NaN NaN 0.0 NaN NaN NaN 0.0 NaN NaN 0.0 0.0 NaN NaN NaN NaN
302 NaN NaN NaN NaN NaN NaN 0.0 0.0 NaN 0.0 0.0 NaN NaN NaN NaN
Out[14]: Unnamed: 0 0
Age 0
Sex 97
ChestPain 0
RestBP 0
Chol 0
Fbs 258
RestECG 151
MaxHR 0
ExAng 204
Oldpeak 99
Slope 0
Ca 176
Thal 0
AHD 0
dtype: int64
In [15]: # find mean age from age column so we first list the all columns name
df.columns
In [16]: # accessing age column called as label based listing and also want to find mean hence .mean()
df['Age'].mean()
Out[16]: 54.43894389438944
In [21]: # extracting given columns only for more than one column use double brackets
newdf =df[['Age' , 'Sex' , 'ChestPain' , 'Chol']]
Out[22]:
Age Sex ChestPain Chol
0 63 1 typical 233
1 67 1 asymptomatic 286
2 67 1 asymptomatic 229
3 37 1 nonanginal 250
4 41 0 nontypical 204
In [24]: # for cross validation we pass 75% data for training sklearn is library in which train_test method is present
#cross validation
from sklearn.model_selection import train_test_split
In [26]: train, test= train_test_split(df, random_state=0 ,test_size=0.25) # we can give any random state to shuffle da
# by default also size is given as 75% and 25%
In [27]: train.shape
In [28]: test.shape
In [29]: import numpy as np # import if you want to create array we take some randdom data for testing
In [30]: actual=list(np.ones(45)) + list(np.zeros(55)) # create array as actual ones mesans aray of 1,1,1....
#zeros for remaining 55 values
In [31]: np.array(actual)
Out[31]: array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
Out[32]: array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.])
In [36]: ConfusionMatrixDisplay.from_predictions(actual,predicted)
Out[40]: 0.87
In [ ]: