0% found this document useful (0 votes)
81 views38 pages

Student Dropout

Uploaded by

mohan343mk2005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
81 views38 pages

Student Dropout

Uploaded by

mohan343mk2005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 38

from google.

colab import drive


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly


remount, call drive.mount("/content/drive", force_remount=True).

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression,Perceptron
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, recall_score,
accuracy_score, f1_score, classification_report, confusion_matrix,
ConfusionMatrixDisplay, PrecisionRecallDisplay, RocCurveDisplay

df = pd.read_csv("/content/drive/MyDrive/data.csv",sep=";")
df

{"type":"dataframe","variable_name":"df"}

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4424 entries, 0 to 4423
Data columns (total 37 columns):
# Column Non-Null Count
Dtype
--- ------ --------------
-----
0 Marital status 4424 non-null
int64
1 Application mode 4424 non-null
int64
2 Application order 4424 non-null
int64
3 Course 4424 non-null
int64
4 Daytime/evening attendance 4424 non-null
int64
5 Previous qualification 4424 non-null
int64
6 Previous qualification (grade) 4424 non-null
float64
7 Nacionality 4424 non-null
int64
8 Mother's qualification 4424 non-null
int64
9 Father's qualification 4424 non-null
int64
10 Mother's occupation 4424 non-null
int64
11 Father's occupation 4424 non-null
int64
12 Admission grade 4424 non-null
float64
13 Displaced 4424 non-null
int64
14 Educational special needs 4424 non-null
int64
15 Debtor 4424 non-null
int64
16 Tuition fees up to date 4424 non-null
int64
17 Gender 4424 non-null
int64
18 Scholarship holder 4424 non-null
int64
19 Age at enrollment 4424 non-null
int64
20 International 4424 non-null
int64
21 Curricular units 1st sem (credited) 4424 non-null
int64
22 Curricular units 1st sem (enrolled) 4424 non-null
int64
23 Curricular units 1st sem (evaluations) 4424 non-null
int64
24 Curricular units 1st sem (approved) 4424 non-null
int64
25 Curricular units 1st sem (grade) 4424 non-null
float64
26 Curricular units 1st sem (without evaluations) 4424 non-null
int64
27 Curricular units 2nd sem (credited) 4424 non-null
int64
28 Curricular units 2nd sem (enrolled) 4424 non-null
int64
29 Curricular units 2nd sem (evaluations) 4424 non-null
int64
30 Curricular units 2nd sem (approved) 4424 non-null
int64
31 Curricular units 2nd sem (grade) 4424 non-null
float64
32 Curricular units 2nd sem (without evaluations) 4424 non-null
int64
33 Unemployment rate 4424 non-null
float64
34 Inflation rate 4424 non-null
float64
35 GDP 4424 non-null
float64
36 Target 4424 non-null
object
dtypes: float64(7), int64(29), object(1)
memory usage: 1.2+ MB

df.isnull().sum()

Marital status 0
Application mode 0
Application order 0
Course 0
Daytime/evening attendance\t 0
Previous qualification 0
Previous qualification (grade) 0
Nacionality 0
Mother's qualification 0
Father's qualification 0
Mother's occupation 0
Father's occupation 0
Admission grade 0
Displaced 0
Educational special needs 0
Debtor 0
Tuition fees up to date 0
Gender 0
Scholarship holder 0
Age at enrollment 0
International 0
Curricular units 1st sem (credited) 0
Curricular units 1st sem (enrolled) 0
Curricular units 1st sem (evaluations) 0
Curricular units 1st sem (approved) 0
Curricular units 1st sem (grade) 0
Curricular units 1st sem (without evaluations) 0
Curricular units 2nd sem (credited) 0
Curricular units 2nd sem (enrolled) 0
Curricular units 2nd sem (evaluations) 0
Curricular units 2nd sem (approved) 0
Curricular units 2nd sem (grade) 0
Curricular units 2nd sem (without evaluations) 0
Unemployment rate 0
Inflation rate 0
GDP 0
Target 0
dtype: int64

df.shape

(4424, 37)

df.size

163688

df.describe().T

{"summary":"{\n \"name\": \"df\",\n \"rows\": 36,\n \"fields\": [\n


{\n \"column\": \"count\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 0.0,\n \"min\":
4424.0,\n \"max\": 4424.0,\n \"num_unique_values\": 1,\n
\"samples\": [\n 4424.0\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"mean\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 1474.2555358018299,\n
\"min\": 0.001968806509945778,\n \"max\": 8856.642631103074,\n
\"num_unique_values\": 36,\n \"samples\": [\n
0.001968806509945778\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"std\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 343.05686248568236,\n \"min\": 0.10676005722115345,\n
\"max\": 2063.566416197521,\n \"num_unique_values\": 36,\n
\"samples\": [\n 2.2699354412537676\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"min\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 22.54533742667294,\n
\"min\": -4.06,\n \"max\": 95.0,\n
\"num_unique_values\": 8,\n \"samples\": [\n 0.0\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"25%\",\n \"properties\": {\
n \"dtype\": \"number\",\n \"std\": 1512.8102116426887,\
n \"min\": -1.7,\n \"max\": 9085.0,\n
\"num_unique_values\": 16,\n \"samples\": [\n 1.0\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"50%\",\n \"properties\": {\
n \"dtype\": \"number\",\n \"std\": 1537.9098552023688,\
n \"min\": 0.0,\n \"max\": 9238.0,\n
\"num_unique_values\": 17,\n \"samples\": [\n 1.0\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"75%\",\n \"properties\": {\
n \"dtype\": \"number\",\n \"std\": 1590.4597016670252,\
n \"min\": 0.0,\n \"max\": 9556.0,\n
\"num_unique_values\": 18,\n \"samples\": [\n 1.0\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"max\",\n \"properties\": {\
n \"dtype\": \"number\",\n \"std\": 1659.272774344112,\n
\"min\": 1.0,\n \"max\": 9991.0,\n
\"num_unique_values\": 24,\n \"samples\": [\n 44.0\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n }\n ]\n}","type":"dataframe"}

df['Target'].value_counts()

Graduate 2209
Dropout 1421
Enrolled 794
Name: Target, dtype: int64

df['Target'] = LabelEncoder().fit_transform(df['Target'])

df['Target'].value_counts()

2 2209
0 1421
1 794
Name: Target, dtype: int64

plt.figure(figsize=(5,10))
sns.displot(df['Target'],color = "Red")

<seaborn.axisgrid.FacetGrid at 0x7a50ded55cc0>

<Figure size 500x1000 with 0 Axes>


plt.figure(figsize=(5, 10))
sns.countplot(data = df, x="Target").set_title('Target')

Text(0.5, 1.0, 'Target')


plt.figure(figsize=(8, 8))
plt.title("Education Status")
plt.pie(df['Target'].value_counts(), labels = ['Graduate', 'Dropout',
'Enrolled'], explode = (0.1, 0.1, 0.0), autopct='%1.2f%%', shadow =
True)
plt.legend( loc = 'lower right')

<matplotlib.legend.Legend at 0x7a50dd113f70>

plt.figure(figsize=(8, 8))
plt.title("Gender")
plt.pie(df['Gender'].value_counts(), labels = ['Male', 'Female'],
explode = (0.1, 0.0), autopct='%1.2f%%', shadow = True)
plt.legend( loc = 'lower right')

<matplotlib.legend.Legend at 0x7a50dd0467d0>

plt.figure(figsize=(20, 45))

for i in range(0, 35):


plt.subplot(12,3,i+1)
sns.distplot(df.iloc[:, i], color='blue')
plt.grid()

<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:
`distplot` is a deprecated function and will be removed in seaborn
v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).
For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.
Please adapt your code to use either `displot` (a figure-level
function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).
For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.
Please adapt your code to use either `displot` (a figure-level
function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
sns.distplot(df.iloc[:, i], color='blue')
<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


<ipython-input-95-e4734a4c6b05>:5: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(df.iloc[:, i], color='blue')


/usr/local/lib/python3.10/dist-packages/IPython/core/events.py:89:
UserWarning: Glyph 9 ( ) missing from current font.
func(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/IPython/core/pylabtools.py:151
: UserWarning: Glyph 9 ( ) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
#feature selection
corr_matrix = df.corr(method="pearson")
plt.figure(figsize=(10, 10))
sns.heatmap(corr_matrix, vmin=-1., vmax=1., annot=False, fmt='.2f',
cmap="YlGnBu", cbar=True, linewidths=0.5)
plt.title("Pearson correlation")
plt.show()

/usr/local/lib/python3.10/dist-packages/seaborn/utils.py:61:
UserWarning: Glyph 9 ( ) missing from current font.
fig.canvas.draw()
["Tuition fees up to date","Curricular units 1st sem
(approved)","Curricular units 1st sem (grade)","Curricular units 2nd
sem (approved)","Curricular units 2nd sem (grade)"]
corr_matrix["Target"]

Marital status -0.089804


Application mode -0.221747
Application order 0.089791
Course 0.034219
Daytime/evening attendance\t 0.075107
Previous qualification -0.056039
Previous qualification (grade) 0.103764
Nacionality -0.014801
Mother's qualification -0.043178
Father's qualification -0.001393
Mother's occupation -0.005629
Father's occupation -0.001899
Admission grade 0.120889
Displaced 0.113986
Educational special needs -0.007353
Debtor -0.240999
Tuition fees up to date 0.409827
Gender -0.229270
Scholarship holder 0.297595
Age at enrollment -0.243438
International 0.003934
Curricular units 1st sem (credited) 0.048150
Curricular units 1st sem (enrolled) 0.155974
Curricular units 1st sem (evaluations) 0.044362
Curricular units 1st sem (approved) 0.529123
Curricular units 1st sem (grade) 0.485207
Curricular units 1st sem (without evaluations) -0.068702
Curricular units 2nd sem (credited) 0.054004
Curricular units 2nd sem (enrolled) 0.175847
Curricular units 2nd sem (evaluations) 0.092721
Curricular units 2nd sem (approved) 0.624157
Curricular units 2nd sem (grade) 0.566827
Curricular units 2nd sem (without evaluations) -0.094028
Unemployment rate 0.008627
Inflation rate -0.026874
GDP 0.044135
Target 1.000000
Name: Target, dtype: float64

df.drop(df[df['Target'] == 1].index, inplace = True)


df

{"type":"dataframe","variable_name":"df"}
df['Dropout'] = df['Target'].apply(lambda x: 1 if x==0 else 0)
df

{"type":"dataframe","variable_name":"df"}

from matplotlib import pyplot as plt


_df_28['index'].plot(kind='line', figsize=(8, 4), title='index')
plt.gca().spines[['top', 'right']].set_visible(False)

df['Dropout'] = df['Target'].apply(lambda x: 1 if x==0 else 0)


df

{"type":"dataframe","variable_name":"df"}

plt.figure(figsize=(5, 10))
sns.distplot(df['Dropout'], color = "red")

<ipython-input-101-f800bf5fae0e>:2: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn


v0.14.0.

Please adapt your code to use either `displot` (a figure-level


function with
similar flexibility) or `histplot` (an axes-level function for
histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
sns.distplot(df['Dropout'], color = "red")

<Axes: xlabel='Dropout', ylabel='Density'>


plt.figure(figsize=(8, 8))
plt.title("Dropout Status")
plt.pie(df['Dropout'].value_counts(), labels = ['Non-Dropout',
'Dropout'], explode = (0.2, 0.0), autopct='%1.2f%%', shadow = True)
plt.legend( loc = 'lower right')

<matplotlib.legend.Legend at 0x7a50e22241c0>

plt.figure(figsize=(8, 8))
plt.title("Dropout Status")
plt.pie(df['Dropout'].value_counts(), labels = ['Non-Dropout',
'Dropout'], explode = (0.2, 0.0), autopct='%1.2f%%', shadow = True)
plt.legend( loc = 'lower right')

<matplotlib.legend.Legend at 0x7a50e2103b50>

x = df.iloc[:, :36].values
#x = df[["Tuition fees up to date","Curricular units 1st sem
(approved)","Curricular units 1st sem (grade)","Curricular units 2nd
sem (approved)","Curricular units 2nd sem (grade)"]].values
print(x)
x = StandardScaler().fit_transform(x)
x

[[ 1. 17. 5. ... 10.8 1.4 1.74]


[ 1. 15. 1. ... 13.9 -0.3 0.79]
[ 1. 1. 5. ... 10.8 1.4 1.74]
...
[ 1. 1. 1. ... 13.9 -0.3 0.79]
[ 1. 1. 1. ... 9.4 -0.8 -3.12]
[ 1. 10. 1. ... 12.7 3.7 -1.7 ]]

array([[-0.30068558, -0.08191671, 2.4368225 , ..., -0.31131218,


0.12161459, 0.77411864],
[-0.30068558, -0.19714928, -0.56247077, ..., 0.85091858,
-1.10607007, 0.35370412],
[-0.30068558, -1.00377727, 2.4368225 , ..., -0.31131218,
0.12161459, 0.77411864],
...,
[-0.30068558, -1.00377727, -0.56247077, ..., 0.85091858,
-1.10607007, 0.35370412],
[-0.30068558, -1.00377727, -0.56247077, ..., -0.83619059,
-1.46715379, -1.37663355],
[-0.30068558, -0.4852307 , -0.56247077, ..., 0.4010228 ,
1.78259971, -0.74822447]])

y = df['Dropout'].values
y

array([1, 0, 1, ..., 1, 0, 0])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size =


0.2, random_state = 1)

def perform(y_pred):
print("Precision : ", precision_score(y_test, y_pred, average =
'micro'))
print("Recall : ", recall_score(y_test, y_pred, average =
'micro'))
print("Accuracy : ", accuracy_score(y_test, y_pred))
print("F1 Score : ", f1_score(y_test, y_pred, average = 'micro'))
cm = confusion_matrix(y_test, y_pred)
print("\n", cm)
print("\n")
print("**"*27 + "\n" + " "*16 + "Classification Report\n" +
"**"*27)
print(classification_report(y_test, y_pred))
print("**"*27+"\n")

cm = ConfusionMatrixDisplay(confusion_matrix = cm,
display_labels=['Non-Dropout', 'Dropout'])
cm.plot()
gaussian navie bayes

model_nb = GaussianNB()
model_nb.fit(x_train, y_train)

GaussianNB()

y_pred_nb = model_nb.predict(x_test)

perform(y_pred_nb)

Precision : 0.8457300275482094
Recall : 0.8457300275482094
Accuracy : 0.8457300275482094
F1 Score : 0.8457300275482094

[[405 43]
[ 69 209]]

******************************************************
Classification Report
******************************************************
precision recall f1-score support

0 0.85 0.90 0.88 448


1 0.83 0.75 0.79 278

accuracy 0.85 726


macro avg 0.84 0.83 0.83 726
weighted avg 0.84 0.85 0.84 726

******************************************************
logistic regerssion
model_lr = LogisticRegression()
model_lr.fit(x_train, y_train)

LogisticRegression()

model_svc = SVC(C=0.1,kernel='linear')
model_svc.fit(x_train, y_train)

SVC(C=0.1, kernel='linear')

y_pred_lr = model_lr.predict(x_test)

perform(y_pred_lr)

Precision : 0.9146005509641874
Recall : 0.9146005509641874
Accuracy : 0.9146005509641874
F1 Score : 0.9146005509641874

[[429 19]
[ 43 235]]
******************************************************
Classification Report
******************************************************
precision recall f1-score support

0 0.91 0.96 0.93 448


1 0.93 0.85 0.88 278

accuracy 0.91 726


macro avg 0.92 0.90 0.91 726
weighted avg 0.92 0.91 0.91 726

******************************************************

random forest
model_rf =
RandomForestClassifier(n_estimators=500,criterion='entropy')
model_rf.fit(x_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=500)

y_pred_rf = model_rf.predict(x_test)
perform(y_pred_rf)

Precision : 0.9214876033057852
Recall : 0.9214876033057852
Accuracy : 0.9214876033057852
F1 Score : 0.9214876033057853

[[434 14]
[ 43 235]]

******************************************************
Classification Report
******************************************************
precision recall f1-score support

0 0.91 0.97 0.94 448


1 0.94 0.85 0.89 278

accuracy 0.92 726


macro avg 0.93 0.91 0.92 726
weighted avg 0.92 0.92 0.92 726

******************************************************
support vector classifier
y_pred_svc = model_svc.predict(x_test)

perform(y_pred_svc)

Precision : 0.9214876033057852
Recall : 0.9214876033057852
Accuracy : 0.9214876033057852
F1 Score : 0.9214876033057853

[[436 12]
[ 45 233]]

******************************************************
Classification Report
******************************************************
precision recall f1-score support

0 0.91 0.97 0.94 448


1 0.95 0.84 0.89 278

accuracy 0.92 726


macro avg 0.93 0.91 0.91 726
weighted avg 0.92 0.92 0.92 726

******************************************************
perceptron
model_mlp = Perceptron(alpha=0.001,l1_ratio=0.5,max_iter=100)
model_mlp.fit(x_train, y_train)

Perceptron(alpha=0.001, l1_ratio=0.5, max_iter=100)

y_pred_mlp = model_mlp.predict(x_test)

perform(y_pred_mlp)

Precision : 0.8939393939393939
Recall : 0.8939393939393939
Accuracy : 0.8939393939393939
F1 Score : 0.8939393939393939

[[416 32]
[ 45 233]]

******************************************************
Classification Report
******************************************************
precision recall f1-score support
0 0.90 0.93 0.92 448
1 0.88 0.84 0.86 278

accuracy 0.89 726


macro avg 0.89 0.88 0.89 726
weighted avg 0.89 0.89 0.89 726

******************************************************

error = []

# Calculating MAE error for K values between 1 and 39


for i in range(1, 40):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit(x_train, y_train)
pred_i = knn.predict(x_test)
accuracy = accuracy_score(y_test, pred_i)
error.append(accuracy)

plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red',
linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)

plt.title('K Value accuracy')


plt.xlabel('K Value')
plt.ylabel('Accuracy')

Text(0, 0.5, 'Accuracy')

model_knn = KNeighborsClassifier(n_neighbors=3)
model_knn.fit(x_train, y_train)

KNeighborsClassifier(n_neighbors=3)

y_pred_knn = model_knn.predict(x_test)

perform(y_pred_knn)

Precision : 0.859504132231405
Recall : 0.859504132231405
Accuracy : 0.859504132231405
F1 Score : 0.859504132231405

[[422 26]
[ 76 202]]

******************************************************
Classification Report
******************************************************
precision recall f1-score support
0 0.85 0.94 0.89 448
1 0.89 0.73 0.80 278

accuracy 0.86 726


macro avg 0.87 0.83 0.85 726
weighted avg 0.86 0.86 0.86 726

******************************************************

pred=[y_pred_nb,y_pred_lr,y_pred_rf,y_pred_svc,y_pred_mlp,y_pred_knn]
acc=[]
classifiers=["NaiveBayes","Logistic
Regression","RandomForest","Support Vector
Classier","Perceptron","KNN"]
for i in pred:
temp=accuracy_score(y_test, i)
acc.append(temp)

plt.barh(classifiers, acc)

# Add labels and title


plt.ylabel('classifiers')
plt.xlabel('Accuracy')
plt.title('Comparison')
plt.show()

You might also like