0% found this document useful (0 votes)
19 views23 pages

Heart Disease Prediction Using Streamlit - Ipynb

Python file

Uploaded by

Shubham Goel
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
19 views23 pages

Heart Disease Prediction Using Streamlit - Ipynb

Python file

Uploaded by

Shubham Goel
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 23

{

"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "4ebd3fcd",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"data=pd.read_csv('heart.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1c927d67",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>cp</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>fbs</th>\n",
" <th>restecg</th>\n",
" <th>thalach</th>\n",
" <th>exang</th>\n",
" <th>oldpeak</th>\n",
" <th>slope</th>\n",
" <th>ca</th>\n",
" <th>thal</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>52</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>125</td>\n",
" <td>212</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>168</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>203</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>155</td>\n",
" <td>1</td>\n",
" <td>3.1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>70</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>145</td>\n",
" <td>174</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>125</td>\n",
" <td>1</td>\n",
" <td>2.6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>61</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>148</td>\n",
" <td>203</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>161</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>62</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>138</td>\n",
" <td>294</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>106</td>\n",
" <td>0</td>\n",
" <td>1.9</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1020</th>\n",
" <td>59</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>140</td>\n",
" <td>221</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>164</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1021</th>\n",
" <td>60</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>125</td>\n",
" <td>258</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>141</td>\n",
" <td>1</td>\n",
" <td>2.8</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>47</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>110</td>\n",
" <td>275</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>118</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1023</th>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>110</td>\n",
" <td>254</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>159</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1024</th>\n",
" <td>54</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>188</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>113</td>\n",
" <td>0</td>\n",
" <td>1.4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1025 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" age sex cp trestbps chol fbs restecg thalach exang
oldpeak \\\n",
"0 52 1 0 125 212 0 1 168 0 1.0
\n",
"1 53 1 0 140 203 1 0 155 1 3.1
\n",
"2 70 1 0 145 174 0 1 125 1 2.6
\n",
"3 61 1 0 148 203 0 1 161 0 0.0
\n",
"4 62 0 0 138 294 1 1 106 0 1.9
\n",
"... ... ... .. ... ... ... ... ... ... ...
\n",
"1020 59 1 1 140 221 0 1 164 1 0.0
\n",
"1021 60 1 0 125 258 0 0 141 1 2.8
\n",
"1022 47 1 0 110 275 0 0 118 1 1.0
\n",
"1023 50 0 0 110 254 0 0 159 0 0.0
\n",
"1024 54 1 0 120 188 0 1 113 0 1.4
\n",
"\n",
" slope ca thal target \n",
"0 2 2 3 0 \n",
"1 0 0 3 0 \n",
"2 0 0 3 0 \n",
"3 2 1 3 0 \n",
"4 1 3 2 0 \n",
"... ... .. ... ... \n",
"1020 2 0 2 1 \n",
"1021 1 1 3 0 \n",
"1022 1 1 2 0 \n",
"1023 2 0 2 1 \n",
"1024 1 1 3 0 \n",
"\n",
"[1025 rows x 14 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4931ddec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1025, 14)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "1e28bc9c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>cp</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>fbs</th>\n",
" <th>restecg</th>\n",
" <th>thalach</th>\n",
" <th>exang</th>\n",
" <th>oldpeak</th>\n",
" <th>slope</th>\n",
" <th>ca</th>\n",
" <th>thal</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.00000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>54.434146</td>\n",
" <td>0.695610</td>\n",
" <td>0.942439</td>\n",
" <td>131.611707</td>\n",
" <td>246.00000</td>\n",
" <td>0.149268</td>\n",
" <td>0.529756</td>\n",
" <td>149.114146</td>\n",
" <td>0.336585</td>\n",
" <td>1.071512</td>\n",
" <td>1.385366</td>\n",
" <td>0.754146</td>\n",
" <td>2.323902</td>\n",
" <td>0.513171</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>9.072290</td>\n",
" <td>0.460373</td>\n",
" <td>1.029641</td>\n",
" <td>17.516718</td>\n",
" <td>51.59251</td>\n",
" <td>0.356527</td>\n",
" <td>0.527878</td>\n",
" <td>23.005724</td>\n",
" <td>0.472772</td>\n",
" <td>1.175053</td>\n",
" <td>0.617755</td>\n",
" <td>1.030798</td>\n",
" <td>0.620660</td>\n",
" <td>0.500070</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>29.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>94.000000</td>\n",
" <td>126.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>71.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>48.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>120.000000</td>\n",
" <td>211.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>132.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>56.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>130.000000</td>\n",
" <td>240.00000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>152.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.800000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>61.000000</td>\n",
" <td>1.000000</td>\n",
" <td>2.000000</td>\n",
" <td>140.000000</td>\n",
" <td>275.00000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>166.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.800000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>77.000000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>200.000000</td>\n",
" <td>564.00000</td>\n",
" <td>1.000000</td>\n",
" <td>2.000000</td>\n",
" <td>202.000000</td>\n",
" <td>1.000000</td>\n",
" <td>6.200000</td>\n",
" <td>2.000000</td>\n",
" <td>4.000000</td>\n",
" <td>3.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age sex cp trestbps chol \\\
n",
"count 1025.000000 1025.000000 1025.000000 1025.000000 1025.00000 \
n",
"mean 54.434146 0.695610 0.942439 131.611707 246.00000 \
n",
"std 9.072290 0.460373 1.029641 17.516718 51.59251 \
n",
"min 29.000000 0.000000 0.000000 94.000000 126.00000 \
n",
"25% 48.000000 0.000000 0.000000 120.000000 211.00000 \
n",
"50% 56.000000 1.000000 1.000000 130.000000 240.00000 \
n",
"75% 61.000000 1.000000 2.000000 140.000000 275.00000 \
n",
"max 77.000000 1.000000 3.000000 200.000000 564.00000 \
n",
"\n",
" fbs restecg thalach exang oldpeak \\\
n",
"count 1025.000000 1025.000000 1025.000000 1025.000000 1025.000000 \
n",
"mean 0.149268 0.529756 149.114146 0.336585 1.071512 \
n",
"std 0.356527 0.527878 23.005724 0.472772 1.175053 \
n",
"min 0.000000 0.000000 71.000000 0.000000 0.000000 \
n",
"25% 0.000000 0.000000 132.000000 0.000000 0.000000 \
n",
"50% 0.000000 1.000000 152.000000 0.000000 0.800000 \
n",
"75% 0.000000 1.000000 166.000000 1.000000 1.800000 \
n",
"max 1.000000 2.000000 202.000000 1.000000 6.200000 \
n",
"\n",
" slope ca thal target \n",
"count 1025.000000 1025.000000 1025.000000 1025.000000 \n",
"mean 1.385366 0.754146 2.323902 0.513171 \n",
"std 0.617755 1.030798 0.620660 0.500070 \n",
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 1.000000 0.000000 2.000000 0.000000 \n",
"50% 1.000000 0.000000 2.000000 1.000000 \n",
"75% 2.000000 1.000000 3.000000 1.000000 \n",
"max 2.000000 4.000000 3.000000 1.000000 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "24def13a",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"age 0\n",
"sex 0\n",
"cp 0\n",
"trestbps 0\n",
"chol 0\n",
"fbs 0\n",
"restecg 0\n",
"thalach 0\n",
"exang 0\n",
"oldpeak 0\n",
"slope 0\n",
"ca 0\n",
"thal 0\n",
"target 0\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "0b2ae3c7",
"metadata": {},
"outputs": [],
"source": [
"#dataset is clean and has no missing values"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9b11f7b6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 526\n",
"0 499\n",
"Name: target, dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['target'].value_counts()"
]
},
{
"cell_type": "markdown",
"id": "23b55089",
"metadata": {},
"source": [
"### dataset is balanced"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f6d964d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1025 entries, 0 to 1024\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 age 1025 non-null int64 \n",
" 1 sex 1025 non-null int64 \n",
" 2 cp 1025 non-null int64 \n",
" 3 trestbps 1025 non-null int64 \n",
" 4 chol 1025 non-null int64 \n",
" 5 fbs 1025 non-null int64 \n",
" 6 restecg 1025 non-null int64 \n",
" 7 thalach 1025 non-null int64 \n",
" 8 exang 1025 non-null int64 \n",
" 9 oldpeak 1025 non-null float64\n",
" 10 slope 1025 non-null int64 \n",
" 11 ca 1025 non-null int64 \n",
" 12 thal 1025 non-null int64 \n",
" 13 target 1025 non-null int64 \n",
"dtypes: float64(1), int64(13)\n",
"memory usage: 112.2 KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "cbe20dae",
"metadata": {},
"outputs": [],
"source": [
"#All data are numerical"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "72d75bcb",
"metadata": {},
"outputs": [],
"source": [
"#let's transform categorical values into dummies/Convert categorical variable
into dummy/indicator variables.\n",
"\n",
"data = pd.get_dummies(data, columns = ['sex', 'cp', 'fbs', 'restecg', 'exang',
'slope', 'ca', 'thal'])\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "fc488f7f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>thalach</th>\n",
" <th>oldpeak</th>\n",
" <th>target</th>\n",
" <th>sex_0</th>\n",
" <th>sex_1</th>\n",
" <th>cp_0</th>\n",
" <th>cp_1</th>\n",
" <th>...</th>\n",
" <th>slope_2</th>\n",
" <th>ca_0</th>\n",
" <th>ca_1</th>\n",
" <th>ca_2</th>\n",
" <th>ca_3</th>\n",
" <th>ca_4</th>\n",
" <th>thal_0</th>\n",
" <th>thal_1</th>\n",
" <th>thal_2</th>\n",
" <th>thal_3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>52</td>\n",
" <td>125</td>\n",
" <td>212</td>\n",
" <td>168</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>53</td>\n",
" <td>140</td>\n",
" <td>203</td>\n",
" <td>155</td>\n",
" <td>3.1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>70</td>\n",
" <td>145</td>\n",
" <td>174</td>\n",
" <td>125</td>\n",
" <td>2.6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>61</td>\n",
" <td>148</td>\n",
" <td>203</td>\n",
" <td>161</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>62</td>\n",
" <td>138</td>\n",
" <td>294</td>\n",
" <td>106</td>\n",
" <td>1.9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1020</th>\n",
" <td>59</td>\n",
" <td>140</td>\n",
" <td>221</td>\n",
" <td>164</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1021</th>\n",
" <td>60</td>\n",
" <td>125</td>\n",
" <td>258</td>\n",
" <td>141</td>\n",
" <td>2.8</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>47</td>\n",
" <td>110</td>\n",
" <td>275</td>\n",
" <td>118</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1023</th>\n",
" <td>50</td>\n",
" <td>110</td>\n",
" <td>254</td>\n",
" <td>159</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1024</th>\n",
" <td>54</td>\n",
" <td>120</td>\n",
" <td>188</td>\n",
" <td>113</td>\n",
" <td>1.4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1025 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" age trestbps chol thalach oldpeak target sex_0 sex_1 cp_0
cp_1 \\\n",
"0 52 125 212 168 1.0 0 0 1 1
0 \n",
"1 53 140 203 155 3.1 0 0 1 1
0 \n",
"2 70 145 174 125 2.6 0 0 1 1
0 \n",
"3 61 148 203 161 0.0 0 0 1 1
0 \n",
"4 62 138 294 106 1.9 0 1 0 1
0 \n",

"... ... ... ... ... ... ... ... ... ... ... \
n",
"1020 59 140 221 164 0.0 1 0 1 0
1 \n",
"1021 60 125 258 141 2.8 0 0 1 1
0 \n",
"1022 47 110 275 118 1.0 0 0 1 1
0 \n",
"1023 50 110 254 159 0.0 1 1 0 1
0 \n",
"1024 54 120 188 113 1.4 0 0 1 1
0 \n",
"\n",
" ... slope_2 ca_0 ca_1 ca_2 ca_3 ca_4 thal_0 thal_1
thal_2 \\\n",
"0 ... 1 0 0 1 0 0 0 0
0 \n",
"1 ... 0 1 0 0 0 0 0 0
0 \n",
"2 ... 0 1 0 0 0 0 0 0
0 \n",
"3 ... 1 0 1 0 0 0 0 0
0 \n",
"4 ... 0 0 0 0 1 0 0 0
1 \n",

"... ... ... ... ... ... ... ... ... ... ... \n",
"1020 ... 1 1 0 0 0 0 0 0
1 \n",
"1021 ... 0 0 1 0 0 0 0 0
0 \n",
"1022 ... 0 0 1 0 0 0 0 0
1 \n",
"1023 ... 1 1 0 0 0 0 0 0
1 \n",
"1024 ... 0 0 1 0 0 0 0 0
0 \n",
"\n",
" thal_3 \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 0 \n",
"... ... \n",
"1020 0 \n",
"1021 1 \n",
"1022 0 \n",
"1023 0 \n",
"1024 1 \n",
"\n",
"[1025 rows x 31 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "31d72878",
"metadata": {},
"outputs": [],
"source": [
"#We will test some classification algorithms: Logistic regression, svm,
stochastic gradient descent , decision tree, random forest."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "eaa51455",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\LAMYA\\Anaconda3\\envs\\tensoflow200\\lib\\site-packages\\
sklearn\\linear_model\\_logistic.py:818: ConvergenceWarning: lbfgs failed to
converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\
n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression\n",
" extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n"
]
},
{
"data": {
"text/plain": [
"RandomForestClassifier(max_depth=6, n_estimators=20, random_state=12)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn import svm\n",
"from sklearn.linear_model import SGDClassifier\n",
"from sklearn import tree\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"y = data['target']\n",
"\n",
"X = data.drop('target',axis=1)\n",
"X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.3,random_state=0)\n",
"LR_classifier = LogisticRegression(random_state=0)\n",
"clf = svm.SVC()\n",
"sgd=SGDClassifier()\n",
"forest=RandomForestClassifier(n_estimators=20, random_state=12,max_depth=6)\
n",
"treee = tree.DecisionTreeClassifier(criterion =
'entropy',random_state=0,max_depth = 6)\n",
"LR_classifier.fit(X_train, y_train)\n",
"clf.fit(X_train, y_train)\n",
"sgd.fit(X_train, y_train)\n",
"treee.fit(X_train, y_train)\n",
"forest.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "6233d08b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RandomForestClassifier(max_depth=6, n_estimators=20, random_state=12)\n"
]
}
],
"source": [
"print(forest)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2c551508",
"metadata": {},
"outputs": [],
"source": [
"#traing accuracy\n",
"y_pred=LR_classifier.predict(X_train)\n",
"y_predsvm=clf.predict(X_train)\n",
"y_predsgd=sgd.predict(X_train)\n",
"y_predtree=treee.predict(X_train)\n",
"y_predforest=forest.predict(X_train)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "62c20953",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8661087866108786\n",
"0.691771269177127\n",
"0.6680613668061367\n",
"0.9497907949790795\n",
"0.9525801952580195\n"
]
}
],
"source": [
"print(accuracy_score(y_train, y_pred))\n",
"print(accuracy_score(y_train, y_predsvm))\n",
"print(accuracy_score(y_train, y_predsgd))\n",
"print(accuracy_score(y_train, y_predtree))\n",
"print(accuracy_score(y_train, y_predforest))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "bf427c13",
"metadata": {},
"outputs": [],
"source": [
"#test accuracy\n",
"y_pred=LR_classifier.predict(X_test)\n",
"y_predsvm=clf.predict(X_test)\n",
"y_predsgd=sgd.predict(X_test)\n",
"y_predtree=treee.predict(X_test)\n",
"y_predforest=forest.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7bdb31ae",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8928571428571429\n",
"0.75\n",
"0.7305194805194806\n",
"0.9188311688311688\n",
"0.961038961038961\n"
]
}
],
"source": [
"print(accuracy_score(y_test, y_pred))\n",
"print(accuracy_score(y_test, y_predsvm))\n",
"print(accuracy_score(y_test, y_predsgd))\n",
"print(accuracy_score(y_test, y_predtree))\n",
"print(accuracy_score(y_test, y_predforest))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "0f29a459",
"metadata": {},
"outputs": [],
"source": [
"#best model is randomForest"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "656d373c",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"pickle.dump(forest, open('Random_forest_model.pkl', 'wb'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.11"
},
"vscode": {
"interpreter": {
"hash": "fd6b15a8b5d653da77bba7c1c0df8852efe10c90ff8ccb5efed3ff34eac0bbe6"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

You might also like