Heart Disease Prediction Using Streamlit - Ipynb
Heart Disease Prediction Using Streamlit - Ipynb
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "4ebd3fcd",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"data=pd.read_csv('heart.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1c927d67",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>cp</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>fbs</th>\n",
" <th>restecg</th>\n",
" <th>thalach</th>\n",
" <th>exang</th>\n",
" <th>oldpeak</th>\n",
" <th>slope</th>\n",
" <th>ca</th>\n",
" <th>thal</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>52</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>125</td>\n",
" <td>212</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>168</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>203</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>155</td>\n",
" <td>1</td>\n",
" <td>3.1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>70</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>145</td>\n",
" <td>174</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>125</td>\n",
" <td>1</td>\n",
" <td>2.6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>61</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>148</td>\n",
" <td>203</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>161</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>62</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>138</td>\n",
" <td>294</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>106</td>\n",
" <td>0</td>\n",
" <td>1.9</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1020</th>\n",
" <td>59</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>140</td>\n",
" <td>221</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>164</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1021</th>\n",
" <td>60</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>125</td>\n",
" <td>258</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>141</td>\n",
" <td>1</td>\n",
" <td>2.8</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>47</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>110</td>\n",
" <td>275</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>118</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1023</th>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>110</td>\n",
" <td>254</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>159</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1024</th>\n",
" <td>54</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>188</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>113</td>\n",
" <td>0</td>\n",
" <td>1.4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1025 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" age sex cp trestbps chol fbs restecg thalach exang
oldpeak \\\n",
"0 52 1 0 125 212 0 1 168 0 1.0
\n",
"1 53 1 0 140 203 1 0 155 1 3.1
\n",
"2 70 1 0 145 174 0 1 125 1 2.6
\n",
"3 61 1 0 148 203 0 1 161 0 0.0
\n",
"4 62 0 0 138 294 1 1 106 0 1.9
\n",
"... ... ... .. ... ... ... ... ... ... ...
\n",
"1020 59 1 1 140 221 0 1 164 1 0.0
\n",
"1021 60 1 0 125 258 0 0 141 1 2.8
\n",
"1022 47 1 0 110 275 0 0 118 1 1.0
\n",
"1023 50 0 0 110 254 0 0 159 0 0.0
\n",
"1024 54 1 0 120 188 0 1 113 0 1.4
\n",
"\n",
" slope ca thal target \n",
"0 2 2 3 0 \n",
"1 0 0 3 0 \n",
"2 0 0 3 0 \n",
"3 2 1 3 0 \n",
"4 1 3 2 0 \n",
"... ... .. ... ... \n",
"1020 2 0 2 1 \n",
"1021 1 1 3 0 \n",
"1022 1 1 2 0 \n",
"1023 2 0 2 1 \n",
"1024 1 1 3 0 \n",
"\n",
"[1025 rows x 14 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4931ddec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1025, 14)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "1e28bc9c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>cp</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>fbs</th>\n",
" <th>restecg</th>\n",
" <th>thalach</th>\n",
" <th>exang</th>\n",
" <th>oldpeak</th>\n",
" <th>slope</th>\n",
" <th>ca</th>\n",
" <th>thal</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.00000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" <td>1025.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>54.434146</td>\n",
" <td>0.695610</td>\n",
" <td>0.942439</td>\n",
" <td>131.611707</td>\n",
" <td>246.00000</td>\n",
" <td>0.149268</td>\n",
" <td>0.529756</td>\n",
" <td>149.114146</td>\n",
" <td>0.336585</td>\n",
" <td>1.071512</td>\n",
" <td>1.385366</td>\n",
" <td>0.754146</td>\n",
" <td>2.323902</td>\n",
" <td>0.513171</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>9.072290</td>\n",
" <td>0.460373</td>\n",
" <td>1.029641</td>\n",
" <td>17.516718</td>\n",
" <td>51.59251</td>\n",
" <td>0.356527</td>\n",
" <td>0.527878</td>\n",
" <td>23.005724</td>\n",
" <td>0.472772</td>\n",
" <td>1.175053</td>\n",
" <td>0.617755</td>\n",
" <td>1.030798</td>\n",
" <td>0.620660</td>\n",
" <td>0.500070</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>29.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>94.000000</td>\n",
" <td>126.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>71.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>48.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>120.000000</td>\n",
" <td>211.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>132.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>56.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>130.000000</td>\n",
" <td>240.00000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>152.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.800000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>61.000000</td>\n",
" <td>1.000000</td>\n",
" <td>2.000000</td>\n",
" <td>140.000000</td>\n",
" <td>275.00000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>166.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.800000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>77.000000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>200.000000</td>\n",
" <td>564.00000</td>\n",
" <td>1.000000</td>\n",
" <td>2.000000</td>\n",
" <td>202.000000</td>\n",
" <td>1.000000</td>\n",
" <td>6.200000</td>\n",
" <td>2.000000</td>\n",
" <td>4.000000</td>\n",
" <td>3.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age sex cp trestbps chol \\\
n",
"count 1025.000000 1025.000000 1025.000000 1025.000000 1025.00000 \
n",
"mean 54.434146 0.695610 0.942439 131.611707 246.00000 \
n",
"std 9.072290 0.460373 1.029641 17.516718 51.59251 \
n",
"min 29.000000 0.000000 0.000000 94.000000 126.00000 \
n",
"25% 48.000000 0.000000 0.000000 120.000000 211.00000 \
n",
"50% 56.000000 1.000000 1.000000 130.000000 240.00000 \
n",
"75% 61.000000 1.000000 2.000000 140.000000 275.00000 \
n",
"max 77.000000 1.000000 3.000000 200.000000 564.00000 \
n",
"\n",
" fbs restecg thalach exang oldpeak \\\
n",
"count 1025.000000 1025.000000 1025.000000 1025.000000 1025.000000 \
n",
"mean 0.149268 0.529756 149.114146 0.336585 1.071512 \
n",
"std 0.356527 0.527878 23.005724 0.472772 1.175053 \
n",
"min 0.000000 0.000000 71.000000 0.000000 0.000000 \
n",
"25% 0.000000 0.000000 132.000000 0.000000 0.000000 \
n",
"50% 0.000000 1.000000 152.000000 0.000000 0.800000 \
n",
"75% 0.000000 1.000000 166.000000 1.000000 1.800000 \
n",
"max 1.000000 2.000000 202.000000 1.000000 6.200000 \
n",
"\n",
" slope ca thal target \n",
"count 1025.000000 1025.000000 1025.000000 1025.000000 \n",
"mean 1.385366 0.754146 2.323902 0.513171 \n",
"std 0.617755 1.030798 0.620660 0.500070 \n",
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 1.000000 0.000000 2.000000 0.000000 \n",
"50% 1.000000 0.000000 2.000000 1.000000 \n",
"75% 2.000000 1.000000 3.000000 1.000000 \n",
"max 2.000000 4.000000 3.000000 1.000000 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "24def13a",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"age 0\n",
"sex 0\n",
"cp 0\n",
"trestbps 0\n",
"chol 0\n",
"fbs 0\n",
"restecg 0\n",
"thalach 0\n",
"exang 0\n",
"oldpeak 0\n",
"slope 0\n",
"ca 0\n",
"thal 0\n",
"target 0\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "0b2ae3c7",
"metadata": {},
"outputs": [],
"source": [
"#dataset is clean and has no missing values"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9b11f7b6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 526\n",
"0 499\n",
"Name: target, dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['target'].value_counts()"
]
},
{
"cell_type": "markdown",
"id": "23b55089",
"metadata": {},
"source": [
"### dataset is balanced"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f6d964d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1025 entries, 0 to 1024\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 age 1025 non-null int64 \n",
" 1 sex 1025 non-null int64 \n",
" 2 cp 1025 non-null int64 \n",
" 3 trestbps 1025 non-null int64 \n",
" 4 chol 1025 non-null int64 \n",
" 5 fbs 1025 non-null int64 \n",
" 6 restecg 1025 non-null int64 \n",
" 7 thalach 1025 non-null int64 \n",
" 8 exang 1025 non-null int64 \n",
" 9 oldpeak 1025 non-null float64\n",
" 10 slope 1025 non-null int64 \n",
" 11 ca 1025 non-null int64 \n",
" 12 thal 1025 non-null int64 \n",
" 13 target 1025 non-null int64 \n",
"dtypes: float64(1), int64(13)\n",
"memory usage: 112.2 KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "cbe20dae",
"metadata": {},
"outputs": [],
"source": [
"#All data are numerical"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "72d75bcb",
"metadata": {},
"outputs": [],
"source": [
"#let's transform categorical values into dummies/Convert categorical variable
into dummy/indicator variables.\n",
"\n",
"data = pd.get_dummies(data, columns = ['sex', 'cp', 'fbs', 'restecg', 'exang',
'slope', 'ca', 'thal'])\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "fc488f7f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>thalach</th>\n",
" <th>oldpeak</th>\n",
" <th>target</th>\n",
" <th>sex_0</th>\n",
" <th>sex_1</th>\n",
" <th>cp_0</th>\n",
" <th>cp_1</th>\n",
" <th>...</th>\n",
" <th>slope_2</th>\n",
" <th>ca_0</th>\n",
" <th>ca_1</th>\n",
" <th>ca_2</th>\n",
" <th>ca_3</th>\n",
" <th>ca_4</th>\n",
" <th>thal_0</th>\n",
" <th>thal_1</th>\n",
" <th>thal_2</th>\n",
" <th>thal_3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>52</td>\n",
" <td>125</td>\n",
" <td>212</td>\n",
" <td>168</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>53</td>\n",
" <td>140</td>\n",
" <td>203</td>\n",
" <td>155</td>\n",
" <td>3.1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>70</td>\n",
" <td>145</td>\n",
" <td>174</td>\n",
" <td>125</td>\n",
" <td>2.6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>61</td>\n",
" <td>148</td>\n",
" <td>203</td>\n",
" <td>161</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>62</td>\n",
" <td>138</td>\n",
" <td>294</td>\n",
" <td>106</td>\n",
" <td>1.9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1020</th>\n",
" <td>59</td>\n",
" <td>140</td>\n",
" <td>221</td>\n",
" <td>164</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1021</th>\n",
" <td>60</td>\n",
" <td>125</td>\n",
" <td>258</td>\n",
" <td>141</td>\n",
" <td>2.8</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>47</td>\n",
" <td>110</td>\n",
" <td>275</td>\n",
" <td>118</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1023</th>\n",
" <td>50</td>\n",
" <td>110</td>\n",
" <td>254</td>\n",
" <td>159</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1024</th>\n",
" <td>54</td>\n",
" <td>120</td>\n",
" <td>188</td>\n",
" <td>113</td>\n",
" <td>1.4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1025 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" age trestbps chol thalach oldpeak target sex_0 sex_1 cp_0
cp_1 \\\n",
"0 52 125 212 168 1.0 0 0 1 1
0 \n",
"1 53 140 203 155 3.1 0 0 1 1
0 \n",
"2 70 145 174 125 2.6 0 0 1 1
0 \n",
"3 61 148 203 161 0.0 0 0 1 1
0 \n",
"4 62 138 294 106 1.9 0 1 0 1
0 \n",
"... ... ... ... ... ... ... ... ... ... ... \
n",
"1020 59 140 221 164 0.0 1 0 1 0
1 \n",
"1021 60 125 258 141 2.8 0 0 1 1
0 \n",
"1022 47 110 275 118 1.0 0 0 1 1
0 \n",
"1023 50 110 254 159 0.0 1 1 0 1
0 \n",
"1024 54 120 188 113 1.4 0 0 1 1
0 \n",
"\n",
" ... slope_2 ca_0 ca_1 ca_2 ca_3 ca_4 thal_0 thal_1
thal_2 \\\n",
"0 ... 1 0 0 1 0 0 0 0
0 \n",
"1 ... 0 1 0 0 0 0 0 0
0 \n",
"2 ... 0 1 0 0 0 0 0 0
0 \n",
"3 ... 1 0 1 0 0 0 0 0
0 \n",
"4 ... 0 0 0 0 1 0 0 0
1 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"1020 ... 1 1 0 0 0 0 0 0
1 \n",
"1021 ... 0 0 1 0 0 0 0 0
0 \n",
"1022 ... 0 0 1 0 0 0 0 0
1 \n",
"1023 ... 1 1 0 0 0 0 0 0
1 \n",
"1024 ... 0 0 1 0 0 0 0 0
0 \n",
"\n",
" thal_3 \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 0 \n",
"... ... \n",
"1020 0 \n",
"1021 1 \n",
"1022 0 \n",
"1023 0 \n",
"1024 1 \n",
"\n",
"[1025 rows x 31 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "31d72878",
"metadata": {},
"outputs": [],
"source": [
"#We will test some classification algorithms: Logistic regression, svm,
stochastic gradient descent , decision tree, random forest."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "eaa51455",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\LAMYA\\Anaconda3\\envs\\tensoflow200\\lib\\site-packages\\
sklearn\\linear_model\\_logistic.py:818: ConvergenceWarning: lbfgs failed to
converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\
n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression\n",
" extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n"
]
},
{
"data": {
"text/plain": [
"RandomForestClassifier(max_depth=6, n_estimators=20, random_state=12)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn import svm\n",
"from sklearn.linear_model import SGDClassifier\n",
"from sklearn import tree\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"y = data['target']\n",
"\n",
"X = data.drop('target',axis=1)\n",
"X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.3,random_state=0)\n",
"LR_classifier = LogisticRegression(random_state=0)\n",
"clf = svm.SVC()\n",
"sgd=SGDClassifier()\n",
"forest=RandomForestClassifier(n_estimators=20, random_state=12,max_depth=6)\
n",
"treee = tree.DecisionTreeClassifier(criterion =
'entropy',random_state=0,max_depth = 6)\n",
"LR_classifier.fit(X_train, y_train)\n",
"clf.fit(X_train, y_train)\n",
"sgd.fit(X_train, y_train)\n",
"treee.fit(X_train, y_train)\n",
"forest.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "6233d08b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RandomForestClassifier(max_depth=6, n_estimators=20, random_state=12)\n"
]
}
],
"source": [
"print(forest)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2c551508",
"metadata": {},
"outputs": [],
"source": [
"#traing accuracy\n",
"y_pred=LR_classifier.predict(X_train)\n",
"y_predsvm=clf.predict(X_train)\n",
"y_predsgd=sgd.predict(X_train)\n",
"y_predtree=treee.predict(X_train)\n",
"y_predforest=forest.predict(X_train)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "62c20953",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8661087866108786\n",
"0.691771269177127\n",
"0.6680613668061367\n",
"0.9497907949790795\n",
"0.9525801952580195\n"
]
}
],
"source": [
"print(accuracy_score(y_train, y_pred))\n",
"print(accuracy_score(y_train, y_predsvm))\n",
"print(accuracy_score(y_train, y_predsgd))\n",
"print(accuracy_score(y_train, y_predtree))\n",
"print(accuracy_score(y_train, y_predforest))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "bf427c13",
"metadata": {},
"outputs": [],
"source": [
"#test accuracy\n",
"y_pred=LR_classifier.predict(X_test)\n",
"y_predsvm=clf.predict(X_test)\n",
"y_predsgd=sgd.predict(X_test)\n",
"y_predtree=treee.predict(X_test)\n",
"y_predforest=forest.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7bdb31ae",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8928571428571429\n",
"0.75\n",
"0.7305194805194806\n",
"0.9188311688311688\n",
"0.961038961038961\n"
]
}
],
"source": [
"print(accuracy_score(y_test, y_pred))\n",
"print(accuracy_score(y_test, y_predsvm))\n",
"print(accuracy_score(y_test, y_predsgd))\n",
"print(accuracy_score(y_test, y_predtree))\n",
"print(accuracy_score(y_test, y_predforest))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "0f29a459",
"metadata": {},
"outputs": [],
"source": [
"#best model is randomForest"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "656d373c",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"pickle.dump(forest, open('Random_forest_model.pkl', 'wb'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.11"
},
"vscode": {
"interpreter": {
"hash": "fd6b15a8b5d653da77bba7c1c0df8852efe10c90ff8ccb5efed3ff34eac0bbe6"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}