Print Version
Print Version
ipynb - Colab
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from scipy import stats
# Load data
df = pd.read_csv('/content/dataset.csv')
output
( pp )
23 Curricular units 1st sem (grade) 4424 non-null float64
24 Curricular units 1st sem (without evaluations) 4424 non-null int64
25 Curricular units 2nd sem (credited) 4424 non-null int64
26 Curricular units 2nd sem (enrolled) 4424 non-null int64
27 Curricular units 2nd sem (evaluations) 4424 non-null int64
28 Curricular units 2nd sem (approved) 4424 non-null int64
29 Curricular units 2nd sem (grade) 4424 non-null float64
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 1/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
Curricular units 2nd sem (credited) Curricular units 2nd sem (enrolled) \
0 0 0
1 0 6
2 0 6
3 0 6
4 0 6
Curricular units 2nd sem (approved) Curricular units 2nd sem (grade) \
0 0 0.000000
1 6 13.666667
2 0 0.000000
3 5 12.400000
4 6 13.000000
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 2/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
# GRADIENT BOOSTING MACHINES (GBM)
# LIGHTGBM CLASSIFIER
import lightgbm as lgb
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 3/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
# ADABOOST CLASSIFIER
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 4/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
# K-Nearest Neighbors (KNN)
dt_params = {
'max_depth': [3, 5, 10, 20, None],
'min_samples_leaf': [1, 2, 4, 6],
'min_samples_split': [2, 5, 10]
}
grid_search_dt = GridSearchCV(estimator=DecisionTreeClassifier(random_state=10), param_grid=dt_params, cv=5, scoring='accura
grid_search_dt.fit(X_train, y_train)
print("Best Decision Tree Parameters:", grid_search_dt.best_params_)
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 5/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
# gridsearch for random forest
rf_params = {
'n_estimators': [50, 100, 200],
'max_depth': [5, 10, 20, None],
'min_samples_leaf': [1, 2, 4]
}
grid_search_rf = GridSearchCV(estimator=RandomForestClassifier(random_state=10), param_grid=rf_params, cv=5, scoring='accura
grid_search_rf.fit(X_train, y_train)
print("Best Random Forest Parameters:", grid_search_rf.best_params_)
gbm_params = {
'n_estimators': [50, 100, 200],
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 10]
}
grid_search_gbm = GridSearchCV(estimator=GradientBoostingClassifier(random_state=10), param_grid=gbm_params, cv=5, scoring='
grid_search_gbm.fit(X_train, y_train)
print("Best GBM Parameters:", grid_search_gbm.best_params_)
xgb_params = {
'n_estimators': [50, 100, 200],
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 7]
}
grid_search_xgb = GridSearchCV(estimator=xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'), param_grid=xgb_
grid_search_xgb.fit(X_train, y_train)
print("Best XGBoost Parameters:", grid_search_xgb.best_params_)
lgbm_params = {
'n_estimators': [50, 100, 200],
'learning_rate': [0.01, 0.1, 0.2],
'num_leaves': [31, 41, 61]
}
grid_search_lgbm = GridSearchCV(estimator=lgb.LGBMClassifier(), param_grid=lgbm_params, cv=5, scoring='accuracy', verbose=1)
grid_search_lgbm.fit(X_train, y_train)
print("Best LightGBM Parameters:", grid_search_lgbm.best_params_)
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 6/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
[LightGBM] [Info] Start training from score -0.484531
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Number of positive: 709, number of negative: 1151
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000619 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 815
[LightGBM] [Info] Number of data points in the train set: 1860, number of used features: 31
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381183 -> initscore=-0.484531
[LightGBM] [Info] Start training from score -0.484531
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Number of positive: 710, number of negative: 1150
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001070 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 813
[LightGBM] [Info] Number of data points in the train set: 1860, number of used features: 31
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381720 -> initscore=-0.482252
[LightGBM] [Info] Start training from score -0.482252
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Number of positive: 710, number of negative: 1150
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000532 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 813
[LightGBM] [Info] Number of data points in the train set: 1860, number of used features: 31
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381720 -> initscore=-0.482252
[LightGBM] [Info] Start training from score -0.482252
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Number of positive: 710, number of negative: 1150
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000553 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
ada_params = {
'n_estimators': [50, 100, 200],
'learning_rate': [0.5, 1.0, 1.5]
}
grid_search_ada = GridSearchCV(estimator=AdaBoostClassifier(random_state=10), param_grid=ada_params, cv=5, scoring='accuracy
grid_search_ada.fit(X_train, y_train)
print("Best AdaBoost Parameters:", grid_search_ada.best_params_)
svm_params = {
'C': [0.1, 1, 10],
'gamma': ['scale', 'auto'],
'kernel': ['rbf', 'poly']
}
grid_search_svm = GridSearchCV(estimator=SVC(random_state=10), param_grid=svm_params, cv=5, scoring='accuracy', verbose=1)
grid_search_svm.fit(X_train, y_train)
print("Best SVM Parameters:", grid_search_svm.best_params_)
nn_params = {
'hidden_layer_sizes': [(50,), (100,), (100, 50)],
'activation': ['tanh', 'relu'],
'solver': ['sgd', 'adam'],
'max_iter': [200, 300]
}
grid_search_nn = GridSearchCV(estimator=MLPClassifier(random_state=10), param_grid=nn_params, cv=5, scoring='accuracy', verb
grid_search_nn.fit(X_train, y_train)
print("Best Neural Network Parameters:", grid_search_nn.best_params_)
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 7/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
/usr/local/lib/python3.10/dist packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stocha
warnings.warn(
knn_params = {
'n_neighbors': [3, 5, 7, 9],
'weights': ['uniform', 'distance'],
'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}
grid_search_knn = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=knn_params, cv=5, scoring='accuracy', verbose=1)
grid_search_knn.fit(X_train, y_train)
print("Best KNN Parameters:", grid_search_knn.best_params_)
logreg_params = {
'C': [0.01, 0.1, 1, 10],
'penalty': ['l1', 'l2'],
'solver': ['liblinear', 'saga']
}
grid_search_logreg = GridSearchCV(estimator=LogisticRegression(), param_grid=logreg_params, cv=5, scoring='accuracy', verbos
grid_search_logreg.fit(X_train, y_train)
print("Best Logistic Regression Parameters:", grid_search_logreg.best_params_)
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 8/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
Best Logistic Regression Parameters: {'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached w
warnings.warn(
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 9/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.model_selection import cross_val_score
import xgboost as xgb
import lightgbm as lgb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
model_metrics.append({
'Model': name,
'Mean CV Accuracy': np.mean(cv_scores),
'Precision': precision,
'Recall': recall,
'F1-Score': f1,
'ROC AUC': roc_auc,
'Confusion Matrix': cm
})
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 10/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 11/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_pe
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_pe
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_pe
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_pe
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_pe
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_pe
warnings.warn(
Model Mean CV Accuracy Precision Recall F1-Score \
0 Decision Tree 0.898865 0.904874 0.895189 0.892153
1 Random Forest 0.914008 0.928788 0.924399 0.923104
2 GBM 0.914697 0.923739 0.919244 0.917810
3 XGBoost 0.913320 0.923095 0.919244 0.917912
4 LightGBM 0.910223 0.928151 0.924399 0.923198
5 AdaBoost 0.910221 0.916883 0.914089 0.912829
6 SVM 0.917101 0.922281 0.917526 0.916009
7 Neural Network 0.915040 0.928552 0.926117 0.925163
8 KNN 0.868246 0.883877 0.869416 0.864237
9 Logistic Regression 0.917100 0.921618 0.917526 0.916114
ROC AUC
0 0.935605
1 0.966896
2 0.967651
3 0.964818
4 0.963024
5 0.963476
6 0.958831
7 0.965560
8 0.913326
9 0.965956
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 13/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
df_melted = df.melt('Model', var_name='Metrics', value_name='Values')
plt.figure(figsize=(10, 8))
sns.barplot(x='Values', y='Model', hue='Metrics', data=df_melted)
plt.title('Performance Comparison of ML Models')
plt.xlabel('Score')
plt.ylabel('Model')
plt.legend(loc='lower right')
plt.show()
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 14/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# Data preparation
models = ['Decision Tree', 'Random Forest', 'GBM', 'XGBoost', 'LightGBM', 'AdaBoost', 'SVM', 'Neural Network', 'KNN', 'Logis
mean_cv_accuracy = [0.898865, 0.913319, 0.914697, 0.913320, 0.910223, 0.910221, 0.917101, 0.919167, 0.868246, 0.917100]
precision = [0.904874, 0.925203, 0.923739, 0.923095, 0.928151, 0.916883, 0.922281, 0.931126, 0.883877, 0.921618]
recall = [0.895189, 0.920962, 0.919244, 0.919244, 0.924399, 0.914089, 0.917526, 0.927835, 0.869416, 0.917526]
f1_score = [0.892153, 0.919609, 0.917810, 0.917912, 0.923198, 0.912829, 0.916009, 0.926777, 0.864237, 0.916114]
roc_auc = [0.935605, 0.967057, 0.967478, 0.964818, 0.963024, 0.963476, 0.958831, 0.969816, 0.913326, 0.965931]
df = pd.DataFrame({
'Model': models,
'Mean CV Accuracy': mean_cv_accuracy,
'Precision': precision,
'Recall': recall,
'F1-Score': f1_score,
'ROC AUC': roc_auc
})
# Plotting
metrics = ['Mean CV Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC AUC']
for metric in metrics:
plt.figure(figsize=(10, 6))
sns.barplot(x=metric, y='Model', data=df, palette='viridis')
plt.title(f'Comparison of Models Based on {metric}')
plt.xlabel(metric)
plt.ylabel('Model')
plt.xlim(left=min(df[metric]) * 0.95, right=max(df[metric]) * 1.05) # Adjust x-axis limits for better visualization
plt.show()
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 15/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
<ipython-input-17-13aba1823bd1>:26: FutureWarning:
<ipython-input-17-13aba1823bd1>:26: FutureWarning:
<ipython-input-17-13aba1823bd1>:26: FutureWarning:
<ipython-input-17-13aba1823bd1>:26: FutureWarning:
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 16/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
<ipython-input-17-13aba1823bd1>:26: FutureWarning:
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 17/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
import pandas as pd
# Initialize data
data = {
'Model': ['Decision Tree', 'Random Forest', 'GBM', 'XGBoost', 'LightGBM', 'AdaBoost', 'SVM', 'Neural Network', 'KNN', 'L
'Mean CV Accuracy': [0.898865, 0.913319, 0.914697, 0.913320, 0.910223, 0.910221, 0.917101, 0.919167, 0.868246, 0.917100]
'Precision': [0.904874, 0.925203, 0.923739, 0.923095, 0.928151, 0.916883, 0.922281, 0.931126, 0.883877, 0.921618],
'Recall': [0.895189, 0.920962, 0.919244, 0.919244, 0.924399, 0.914089, 0.917526, 0.927835, 0.869416, 0.917526],
'F1-Score': [0.892153, 0.919609, 0.917810, 0.917912, 0.923198, 0.912829, 0.916009, 0.926777, 0.864237, 0.916114],
'ROC AUC': [0.935605, 0.967057, 0.967478, 0.964818, 0.963024, 0.963476, 0.958831, 0.969816, 0.913326, 0.965931]
}
df = pd.DataFrame(data)
top_3_each_metric = {}
metrics = ['Mean CV Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC AUC']
for metric in metrics:
top_3_each_metric[metric] = df.nsmallest(3, f'{metric} Rank')[['Model', metric]]
print(f"Top 3 for {metric}:\n{top_3_each_metric[metric]}\n")
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 18/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
Model Recall
7 Neural Network 0.927835
4 LightGBM 0.924399
1 Random Forest 0.920962
# Overall Top 3
# Overall Winner
overall_winner = df.loc[df['Sum of Ranks'].idxmin()]
print(f"Overall Winner:\n{overall_winner}\n")
Overall Winner:
Model Neural Network
Mean CV Accuracy 0.919167
Precision 0.931126
Recall 0.927835
F1-Score 0.926777
ROC AUC 0.969816
Mean CV Accuracy Rank 1.0
Precision Rank 1.0
Recall Rank 1.0
F1-Score Rank 1.0
ROC AUC Rank 1.0
Sum of Ranks 5.0
Name: 7, dtype: object
keyboard_arrow_down VISUALIZATIONS
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 19/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 20/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 21/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 22/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 23/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
# FEATURE IMPORTANCE PLOTS
for model, name in zip([dt_model, rf_model, gbm_model, xgb_model], ['Decision Tree', 'Random Forest', 'GBM', 'XGBoost']):
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure()
plt.title(f'Feature Importances in {name}')
plt.bar(range(X_train.shape[1]), importances[indices], align='center')
plt.xticks(range(X_train.shape[1]), X_train.columns[indices], rotation=90)
plt.ylabel('Importance')
plt.show()
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 24/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 25/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 26/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
# Precision Recall Curves
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve Comparison')
plt.legend(loc='upper right')
plt.show()
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 27/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
# INTERACTIVE MODEL
import plotly.express as px
Decision Tree
Random Forest
GBM
XGBoost
LightGBM
Model
AdaBoost
SVM
Neural Network
KNN
Logistic Regression
Mea
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 28/29
4/13/24, 1:54 AM COMPLETE VERSION 1 in Sha Allah.ipynb - Colab
https://colab.research.google.com/drive/1CX1tB6z4WAqIpKOYGjkfNMTYMmlkZMeS#scrollTo=mBbvmg5s0qBl&printMode=true 29/29