Assignment 1
Assignment 1
In [3]: dataFrame=pd.read_csv('heart.csv')
In [5]: dataFrame.shape
In [7]: dataFrame.head()
Out[7]: Unnamed:
Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng O
0
In [9]: dataFrame.tail()
Out[9]: Unnamed:
Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng
0
In [13]: dataFrame.dtypes
In [15]: dataFrame.describe()
In [17]: dataFrame.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Age 303 non-null int64
1 Sex 303 non-null int64
2 ChestPain 303 non-null object
3 RestBP 303 non-null int64
4 Chol 303 non-null int64
5 Fbs 303 non-null int64
6 RestECG 303 non-null int64
7 MaxHR 303 non-null int64
8 ExAng 303 non-null int64
9 Oldpeak 303 non-null float64
10 Slope 303 non-null int64
11 Ca 299 non-null float64
12 Thal 301 non-null object
13 AHD 303 non-null object
dtypes: float64(2), int64(9), object(3)
memory usage: 33.3+ KB
In [19]: dataFrame.Ca.value_counts()
Out[19]: Ca
0.0 176
1.0 65
2.0 38
3.0 20
Name: count, dtype: int64
In [21]: dataFrame.Sex.value_counts()
Out[21]: Sex
1 206
0 97
Name: count, dtype: int64
In [23]: dataFrame.isnull()
Out[23]: Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak S
0 False False False False False False False False False False
1 False False False False False False False False False False
2 False False False False False False False False False False
3 False False False False False False False False False False
4 False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ...
298 False False False False False False False False False False
299 False False False False False False False False False False
300 False False False False False False False False False False
301 False False False False False False False False False False
302 False False False False False False False False False False
In [25]: dataFrame.isnull().sum()
Out[25]: Age 0
Sex 0
ChestPain 0
RestBP 0
Chol 0
Fbs 0
RestECG 0
MaxHR 0
ExAng 0
Oldpeak 0
Slope 0
Ca 4
Thal 2
AHD 0
dtype: int64
In [27]: dataFrame.Age.mean()
Out[27]: 54.43894389438944
In [29]: dataFrame.describe()
In [31]: dataFrame["Age"].mean(axis=0)
Out[31]: 54.43894389438944
In [33]: var=dataFrame.loc[:,['Age','Sex','ChestPain','RestBP','Chol']]
In [35]: var
In [39]: tp=90
fp=11
fn=19
tn=40
acc=(tp+tn)/(tp+fp+fn+tn)
pre=tp/(tp+fp)
rec=tp/(tp+fn)
print("Accuracy is : {}".format(acc))
print("Precision is : {}".format(pre))
print("Recall is : {}".format(rec))
print("F1-Score is : {}".format((2*pre*rec)/(pre+rec)))
Accuracy is : 0.8125
Precision is : 0.8910891089108911
Recall is : 0.8256880733944955
F1-Score is : 0.8571428571428572
In [43]: dataFrame.plot();
In [47]: sns.pairplot(var);
In [49]: labels=['Male','Female']
dataFrame.Sex.value_counts().plot(kind= "pie",labels =labels,startangle=90,shado
explode=(0,0),autopct='%1.1f%%' );
plt.show()
In [ ]: