Experiment01 Baseline Models Accuracy
Experiment01 Baseline Models Accuracy
python --version
Python 3.10.13
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# training/testing utils
from sklearn.utils import resample
from sklearn.utils.class_weight import compute_class_weight,
compute_sample_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report,
roc_auc_score
from sklearn.utils.class_weight import compute_sample_weight,
compute_class_weight
# Symbolic regression
from gplearn.genetic import SymbolicClassifier
/var/folders/qp/4w02sqhj6_d43815lhzjb7900000gn/T/
ipykernel_73514/129412594.py:1: DtypeWarning: Columns (756) have mixed
types. Specify dtype option on import or set low_memory=False.
data = pd.read_csv('../data/data_diet_filtered.csv', index_col=0)
Error exception: Only one class present in y_true. ROC AUC score is
not defined in that case.
k = 2
idxs = (y == 0) | (y == k) # get healthy and that class' data
def convert_to_arrays(prec=4):
for key in MODEL_NAMES:
ACCURACIES[key] = np.around(np.asarray(ACCURACIES[key]), prec)
F1SCORES[key] = np.around(np.asarray(F1SCORES[key]), prec)
AUCROCS[key] = np.around(np.asarray(AUCROCS[key]), prec)
SRmodels = []
SRfmodels = []
if not USE_BALANCED_SUBSAMPLE:
X_train, X_test, y_train, y_test = train_test_split(X1, y1,
test_size=0.25,
#
train_size=0.5, # if slow use this
random_state=random_state, stratify=y1)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1), y=y1)
else:
# print('using a balanced subsample of the data ...')
X_train, X_test, y_train, y_test = train_test_split(X1b, y1b,
test_size=0.25,
#
train_size=0.5, # if slow use this
random_state=42, stratify=y1b)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1b), y=y1b)
sample_weights = compute_sample_weight(class_weight='balanced',
y=y_train)
est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,
init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,
stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness
feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)
t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SR', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRmodels.append(est)
est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,
init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,
stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness
feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)
t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SRf', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRfmodels.append(est)
seed=42
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7265 Test AUROC: 0.7981 Test F1 score: 0.7009
DecisionTreeClassifier
Test accuracy: 0.7514 Test AUROC: 0.8186 Test F1 score: 0.7078
RandomForestClassifier
Test accuracy: 0.8011 Test AUROC: 0.9015 Test F1 score: 0.7647
XGBClassifier
Test accuracy: 0.8260 Test AUROC: 0.9131 Test F1 score: 0.8013
Time to fit symbolic classifier: 42.2771680355072 seconds
SymbolicClassifier
Test accuracy: 0.7431 Test AUROC: 0.7488 Test F1 score: 0.6847
Time to fit symbolic classifier: 41.15023899078369 seconds
SymbolicClassifier
Test accuracy: 0.7514 Test AUROC: 0.7579 Test F1 score: 0.6939
seed=2024
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
LogisticRegression
Test accuracy: 0.7210 Test AUROC: 0.7726 Test F1 score: 0.6967
DecisionTreeClassifier
Test accuracy: 0.7182 Test AUROC: 0.7402 Test F1 score: 0.6982
RandomForestClassifier
Test accuracy: 0.8122 Test AUROC: 0.8961 Test F1 score: 0.7792
XGBClassifier
Test accuracy: 0.8177 Test AUROC: 0.9159 Test F1 score: 0.7911
Time to fit symbolic classifier: 41.51435089111328 seconds
SymbolicClassifier
Test accuracy: 0.7569 Test AUROC: 0.7640 Test F1 score: 0.6966
Time to fit symbolic classifier: 41.539920806884766 seconds
SymbolicClassifier
Test accuracy: 0.7541 Test AUROC: 0.7641 Test F1 score: 0.6962
seed=1234
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7155 Test AUROC: 0.7885 Test F1 score: 0.6997
DecisionTreeClassifier
Test accuracy: 0.7845 Test AUROC: 0.8185 Test F1 score: 0.7365
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.9015 Test F1 score: 0.7947
XGBClassifier
Test accuracy: 0.8398 Test AUROC: 0.9181 Test F1 score: 0.8165
Time to fit symbolic classifier: 43.203129053115845 seconds
SymbolicClassifier
Test accuracy: 0.7403 Test AUROC: 0.7617 Test F1 score: 0.7006
Time to fit symbolic classifier: 44.14738321304321 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7692 Test F1 score: 0.7079
seed=2405
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7818 Test AUROC: 0.8474 Test F1 score: 0.7508
DecisionTreeClassifier
Test accuracy: 0.7348 Test AUROC: 0.7518 Test F1 score: 0.6667
RandomForestClassifier
Test accuracy: 0.8122 Test AUROC: 0.8982 Test F1 score: 0.7655
XGBClassifier
Test accuracy: 0.8177 Test AUROC: 0.9079 Test F1 score: 0.7925
Time to fit symbolic classifier: 43.750773906707764 seconds
SymbolicClassifier
Test accuracy: 0.7459 Test AUROC: 0.7500 Test F1 score: 0.6913
Time to fit symbolic classifier: 43.03351879119873 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7465 Test F1 score: 0.6851
seed=11
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7238 Test AUROC: 0.7970 Test F1 score: 0.7059
DecisionTreeClassifier
Test accuracy: 0.7597 Test AUROC: 0.7982 Test F1 score: 0.7307
RandomForestClassifier
Test accuracy: 0.7707 Test AUROC: 0.8785 Test F1 score: 0.7314
XGBClassifier
Test accuracy: 0.8232 Test AUROC: 0.9068 Test F1 score: 0.8012
Time to fit symbolic classifier: 43.38354301452637 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7643 Test F1 score: 0.7010
Time to fit symbolic classifier: 43.27070212364197 seconds
SymbolicClassifier
Test accuracy: 0.7155 Test AUROC: 0.7301 Test F1 score: 0.6532
seed=9345
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.6989 Test AUROC: 0.7772 Test F1 score: 0.6625
DecisionTreeClassifier
Test accuracy: 0.7155 Test AUROC: 0.7750 Test F1 score: 0.6485
RandomForestClassifier
Test accuracy: 0.8039 Test AUROC: 0.8891 Test F1 score: 0.7641
XGBClassifier
Test accuracy: 0.8370 Test AUROC: 0.9182 Test F1 score: 0.8103
Time to fit symbolic classifier: 43.272748947143555 seconds
SymbolicClassifier
Test accuracy: 0.7459 Test AUROC: 0.7491 Test F1 score: 0.6892
Time to fit symbolic classifier: 42.66357135772705 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7533 Test F1 score: 0.6915
seed=858
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
LogisticRegression
Test accuracy: 0.7624 Test AUROC: 0.8271 Test F1 score: 0.7346
DecisionTreeClassifier
Test accuracy: 0.7735 Test AUROC: 0.7956 Test F1 score: 0.7500
RandomForestClassifier
Test accuracy: 0.8315 Test AUROC: 0.9196 Test F1 score: 0.7973
XGBClassifier
Test accuracy: 0.8204 Test AUROC: 0.9280 Test F1 score: 0.7855
Time to fit symbolic classifier: 43.11080884933472 seconds
SymbolicClassifier
Test accuracy: 0.7569 Test AUROC: 0.7890 Test F1 score: 0.6986
Time to fit symbolic classifier: 43.20073890686035 seconds
SymbolicClassifier
Test accuracy: 0.7348 Test AUROC: 0.7411 Test F1 score: 0.6643
seed=8590
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7403 Test AUROC: 0.8040 Test F1 score: 0.7134
DecisionTreeClassifier
Test accuracy: 0.7514 Test AUROC: 0.7889 Test F1 score: 0.6897
RandomForestClassifier
Test accuracy: 0.8122 Test AUROC: 0.9011 Test F1 score: 0.7792
XGBClassifier
Test accuracy: 0.8232 Test AUROC: 0.9098 Test F1 score: 0.7987
Time to fit symbolic classifier: 42.94043493270874 seconds
SymbolicClassifier
Test accuracy: 0.7541 Test AUROC: 0.7544 Test F1 score: 0.6899
Time to fit symbolic classifier: 43.01768684387207 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7573 Test F1 score: 0.6915
seed=4754
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7762 Test AUROC: 0.8377 Test F1 score: 0.7461
DecisionTreeClassifier
Test accuracy: 0.7348 Test AUROC: 0.7938 Test F1 score: 0.6620
RandomForestClassifier
Test accuracy: 0.8260 Test AUROC: 0.9111 Test F1 score: 0.7921
XGBClassifier
Test accuracy: 0.8315 Test AUROC: 0.9308 Test F1 score: 0.8039
Time to fit symbolic classifier: 43.672722816467285 seconds
SymbolicClassifier
Test accuracy: 0.7762 Test AUROC: 0.7759 Test F1 score: 0.7138
Time to fit symbolic classifier: 43.33226490020752 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7694 Test F1 score: 0.7031
seed=1959
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7624 Test AUROC: 0.8320 Test F1 score: 0.7312
DecisionTreeClassifier
Test accuracy: 0.6878 Test AUROC: 0.7568 Test F1 score: 0.6744
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.8998 Test F1 score: 0.8050
XGBClassifier
Test accuracy: 0.7928 Test AUROC: 0.8996 Test F1 score: 0.7734
Time to fit symbolic classifier: 42.98330283164978 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7580 Test F1 score: 0.6915
Time to fit symbolic classifier: 44.140511989593506 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7697 Test F1 score: 0.7157
seed=707
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7624 Test AUROC: 0.8203 Test F1 score: 0.7394
DecisionTreeClassifier
Test accuracy: 0.7569 Test AUROC: 0.8137 Test F1 score: 0.6667
RandomForestClassifier
Test accuracy: 0.8260 Test AUROC: 0.9097 Test F1 score: 0.7921
XGBClassifier
Test accuracy: 0.8232 Test AUROC: 0.9166 Test F1 score: 0.7949
Time to fit symbolic classifier: 43.830246925354004 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7530 Test F1 score: 0.7217
Time to fit symbolic classifier: 43.57986807823181 seconds
SymbolicClassifier
Test accuracy: 0.7790 Test AUROC: 0.8150 Test F1 score: 0.7143
seed=10524
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
LogisticRegression
Test accuracy: 0.7514 Test AUROC: 0.8078 Test F1 score: 0.7289
DecisionTreeClassifier
Test accuracy: 0.7265 Test AUROC: 0.7624 Test F1 score: 0.6991
RandomForestClassifier
Test accuracy: 0.8204 Test AUROC: 0.8905 Test F1 score: 0.7883
XGBClassifier
Test accuracy: 0.8260 Test AUROC: 0.8991 Test F1 score: 0.8037
Time to fit symbolic classifier: 43.50123906135559 seconds
SymbolicClassifier
Test accuracy: 0.7514 Test AUROC: 0.7622 Test F1 score: 0.7039
Time to fit symbolic classifier: 43.24294877052307 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7613 Test F1 score: 0.6997
seed=83946
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7403 Test AUROC: 0.7988 Test F1 score: 0.7081
DecisionTreeClassifier
Test accuracy: 0.7238 Test AUROC: 0.7863 Test F1 score: 0.6552
RandomForestClassifier
Test accuracy: 0.8204 Test AUROC: 0.8863 Test F1 score: 0.7855
XGBClassifier
Test accuracy: 0.8287 Test AUROC: 0.9158 Test F1 score: 0.8062
Time to fit symbolic classifier: 43.89677596092224 seconds
SymbolicClassifier
Test accuracy: 0.7459 Test AUROC: 0.7638 Test F1 score: 0.6954
Time to fit symbolic classifier: 42.90026021003723 seconds
SymbolicClassifier
Test accuracy: 0.7348 Test AUROC: 0.7510 Test F1 score: 0.6800
seed=63297
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7762 Test AUROC: 0.8216 Test F1 score: 0.7508
DecisionTreeClassifier
Test accuracy: 0.7845 Test AUROC: 0.8127 Test F1 score: 0.7665
RandomForestClassifier
Test accuracy: 0.8260 Test AUROC: 0.9118 Test F1 score: 0.7948
XGBClassifier
Test accuracy: 0.8564 Test AUROC: 0.9337 Test F1 score: 0.8354
Time to fit symbolic classifier: 43.374119997024536 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7754 Test F1 score: 0.7195
Time to fit symbolic classifier: 43.408135175704956 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7666 Test F1 score: 0.7099
seed=78035
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7735 Test AUROC: 0.8348 Test F1 score: 0.7405
DecisionTreeClassifier
Test accuracy: 0.7403 Test AUROC: 0.7901 Test F1 score: 0.6846
RandomForestClassifier
Test accuracy: 0.8370 Test AUROC: 0.9148 Test F1 score: 0.8115
XGBClassifier
Test accuracy: 0.8536 Test AUROC: 0.9395 Test F1 score: 0.8296
Time to fit symbolic classifier: 43.04957699775696 seconds
SymbolicClassifier
Test accuracy: 0.7541 Test AUROC: 0.7593 Test F1 score: 0.7023
Time to fit symbolic classifier: 42.84919810295105 seconds
SymbolicClassifier
Test accuracy: 0.7762 Test AUROC: 0.8103 Test F1 score: 0.7178
seed=22664
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7790 Test AUROC: 0.8231 Test F1 score: 0.7531
DecisionTreeClassifier
Test accuracy: 0.7155 Test AUROC: 0.8195 Test F1 score: 0.7099
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.9080 Test F1 score: 0.7987
XGBClassifier
Test accuracy: 0.8453 Test AUROC: 0.9225 Test F1 score: 0.8228
Time to fit symbolic classifier: 43.30705690383911 seconds
SymbolicClassifier
Test accuracy: 0.7514 Test AUROC: 0.7704 Test F1 score: 0.7020
Time to fit symbolic classifier: 44.09287762641907 seconds
SymbolicClassifier
Test accuracy: 0.7431 Test AUROC: 0.7477 Test F1 score: 0.6714
seed=49283
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
LogisticRegression
Test accuracy: 0.7541 Test AUROC: 0.8084 Test F1 score: 0.7405
DecisionTreeClassifier
Test accuracy: 0.7293 Test AUROC: 0.7896 Test F1 score: 0.7168
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.9166 Test F1 score: 0.7947
XGBClassifier
Test accuracy: 0.8398 Test AUROC: 0.9318 Test F1 score: 0.8129
Time to fit symbolic classifier: 42.77116394042969 seconds
SymbolicClassifier
Test accuracy: 0.7569 Test AUROC: 0.7585 Test F1 score: 0.7007
Time to fit symbolic classifier: 45.94197463989258 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7513 Test F1 score: 0.7010
seed=35253
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7569 Test AUROC: 0.8184 Test F1 score: 0.7317
DecisionTreeClassifier
Test accuracy: 0.7514 Test AUROC: 0.8107 Test F1 score: 0.7059
RandomForestClassifier
Test accuracy: 0.8039 Test AUROC: 0.8988 Test F1 score: 0.7717
XGBClassifier
Test accuracy: 0.8094 Test AUROC: 0.9091 Test F1 score: 0.7903
Time to fit symbolic classifier: 44.62361788749695 seconds
SymbolicClassifier
Test accuracy: 0.7210 Test AUROC: 0.7384 Test F1 score: 0.6731
Time to fit symbolic classifier: 44.03053903579712 seconds
SymbolicClassifier
Test accuracy: 0.7403 Test AUROC: 0.7603 Test F1 score: 0.6928
seed=82273
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7293 Test AUROC: 0.7904 Test F1 score: 0.7048
DecisionTreeClassifier
Test accuracy: 0.7403 Test AUROC: 0.7878 Test F1 score: 0.6908
RandomForestClassifier
Test accuracy: 0.8039 Test AUROC: 0.8927 Test F1 score: 0.7609
XGBClassifier
Test accuracy: 0.8425 Test AUROC: 0.9112 Test F1 score: 0.8190
Time to fit symbolic classifier: 45.13826107978821 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7551 Test F1 score: 0.6936
Time to fit symbolic classifier: 44.299291133880615 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7536 Test F1 score: 0.6915
seed=90378
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.7541 Test AUROC: 0.8200 Test F1 score: 0.7343
DecisionTreeClassifier
Test accuracy: 0.7541 Test AUROC: 0.8313 Test F1 score: 0.6877
RandomForestClassifier
Test accuracy: 0.8011 Test AUROC: 0.9094 Test F1 score: 0.7647
XGBClassifier
Test accuracy: 0.8287 Test AUROC: 0.9243 Test F1 score: 0.8025
Time to fit symbolic classifier: 44.41036295890808 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7663 Test F1 score: 0.7031
Time to fit symbolic classifier: 46.336766958236694 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7671 Test F1 score: 0.7031
Process results
LENGTHS = {'SR': [], 'SRf': []}
for est in SRmodels:
LENGTHS['SR'].append(est._program.length_)
for est in SRfmodels:
LENGTHS['SRf'].append(est._program.length_)
LENGTHS['SR'] = np.asarray(LENGTHS['SR'])
LENGTHS['SRf'] = np.asarray(LENGTHS['SRf'])
convert_to_arrays()
plusminus = pm = u"\u00B1"
for model_name in MODEL_NAMES:
print(f"{model_name} acc: {ACCURACIES[model_name].mean():.2f} {pm}
{ACCURACIES[model_name].std():.4f}")
print('--------')
for model_name in MODEL_NAMES:
print(f"{model_name} F1: {F1SCORES[model_name].mean():.2f} {pm}
{F1SCORES[model_name].std():.4f}")
print('--------')
save_sr_models(SRmodels, key='SR',
save_dir='../results_srmb/sr_vanilla_models/')
save_sr_models(SRfmodels, key='SRf',
save_dir='../results_srmb/sr_special_models/')
# SRMODELS
SRmodels = []
SRfmodels = []
if not USE_BALANCED_SUBSAMPLE:
X_train, X_test, y_train, y_test = train_test_split(X1, y1,
test_size=0.25,
#
train_size=0.5, # if slow use this
random_state=random_state, stratify=y1)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1), y=y1)
else:
# print('using a balanced subsample of the data ...')
X_train, X_test, y_train, y_test = train_test_split(X1b, y1b,
test_size=0.25,
#
train_size=0.5, # if slow use this
random_state=42, stratify=y1b)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1b), y=y1b)
sample_weights = compute_sample_weight(class_weight='balanced',
y=y_train)
est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,
init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,
stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness
feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)
t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SR', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRmodels.append(est)
est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,
init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,
stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness
feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)
t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SRf', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRfmodels.append(est)
seed=42
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8467 Test AUROC: 0.8193 Test F1 score: 0.3380
DecisionTreeClassifier
Test accuracy: 0.8700 Test AUROC: 0.8443 Test F1 score: 0.3926
RandomForestClassifier
Test accuracy: 0.9490 Test AUROC: 0.9315 Test F1 score: 0.2526
XGBClassifier
Test accuracy: 0.9346 Test AUROC: 0.9426 Test F1 score: 0.5806
Time to fit symbolic classifier: 67.33232116699219 seconds
SymbolicClassifier
Test accuracy: 0.9411 Test AUROC: 0.6109 Test F1 score: 0.3223
Time to fit symbolic classifier: 65.69597911834717 seconds
SymbolicClassifier
Test accuracy: 0.9447 Test AUROC: 0.5897 Test F1 score: 0.2870
seed=2024
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8492 Test AUROC: 0.8519 Test F1 score: 0.3558
DecisionTreeClassifier
Test accuracy: 0.7745 Test AUROC: 0.8317 Test F1 score: 0.2991
RandomForestClassifier
Test accuracy: 0.9504 Test AUROC: 0.9401 Test F1 score: 0.3030
XGBClassifier
Test accuracy: 0.9318 Test AUROC: 0.9497 Test F1 score: 0.5662
Time to fit symbolic classifier: 66.04439783096313 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6489 Test F1 score: 0.4130
Time to fit symbolic classifier: 67.61290097236633 seconds
SymbolicClassifier
Test accuracy: 0.9483 Test AUROC: 0.6368 Test F1 score: 0.3950
seed=1234
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8492 Test AUROC: 0.8427 Test F1 score: 0.3458
DecisionTreeClassifier
Test accuracy: 0.8205 Test AUROC: 0.8374 Test F1 score: 0.3225
RandomForestClassifier
Test accuracy: 0.9508 Test AUROC: 0.9278 Test F1 score: 0.3184
XGBClassifier
Test accuracy: 0.9307 Test AUROC: 0.9266 Test F1 score: 0.5522
Time to fit symbolic classifier: 64.78528308868408 seconds
SymbolicClassifier
Test accuracy: 0.9436 Test AUROC: 0.6266 Test F1 score: 0.3592
Time to fit symbolic classifier: 63.20682096481323 seconds
SymbolicClassifier
Test accuracy: 0.9436 Test AUROC: 0.6266 Test F1 score: 0.3592
seed=2405
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8478 Test AUROC: 0.8632 Test F1 score: 0.3653
DecisionTreeClassifier
Test accuracy: 0.8014 Test AUROC: 0.8131 Test F1 score: 0.3061
RandomForestClassifier
Test accuracy: 0.9501 Test AUROC: 0.9250 Test F1 score: 0.2798
XGBClassifier
Test accuracy: 0.9278 Test AUROC: 0.9367 Test F1 score: 0.5483
Time to fit symbolic classifier: 67.03891324996948 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6488 Test F1 score: 0.4130
Time to fit symbolic classifier: 64.79723119735718 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6488 Test F1 score: 0.4130
seed=11
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8442 Test AUROC: 0.8298 Test F1 score: 0.3344
DecisionTreeClassifier
Test accuracy: 0.7874 Test AUROC: 0.8179 Test F1 score: 0.2936
RandomForestClassifier
Test accuracy: 0.9519 Test AUROC: 0.9456 Test F1 score: 0.3431
XGBClassifier
Test accuracy: 0.9314 Test AUROC: 0.9412 Test F1 score: 0.5708
Time to fit symbolic classifier: 68.0005111694336 seconds
SymbolicClassifier
Test accuracy: 0.9458 Test AUROC: 0.5620 Test F1 score: 0.2176
Time to fit symbolic classifier: 65.93679213523865 seconds
SymbolicClassifier
Test accuracy: 0.9504 Test AUROC: 0.6351 Test F1 score: 0.4000
seed=9345
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
LogisticRegression
Test accuracy: 0.8560 Test AUROC: 0.8329 Test F1 score: 0.3543
DecisionTreeClassifier
Test accuracy: 0.8039 Test AUROC: 0.8499 Test F1 score: 0.3175
RandomForestClassifier
Test accuracy: 0.9476 Test AUROC: 0.9272 Test F1 score: 0.2316
XGBClassifier
Test accuracy: 0.9332 Test AUROC: 0.9536 Test F1 score: 0.5830
Time to fit symbolic classifier: 63.743890047073364 seconds
SymbolicClassifier
Test accuracy: 0.9469 Test AUROC: 0.5684 Test F1 score: 0.2371
Time to fit symbolic classifier: 67.9835159778595 seconds
SymbolicClassifier
Test accuracy: 0.9472 Test AUROC: 0.6024 Test F1 score: 0.3226
seed=858
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8474 Test AUROC: 0.8279 Test F1 score: 0.3472
DecisionTreeClassifier
Test accuracy: 0.8126 Test AUROC: 0.7509 Test F1 score: 0.2965
RandomForestClassifier
Test accuracy: 0.9508 Test AUROC: 0.9173 Test F1 score: 0.3046
XGBClassifier
Test accuracy: 0.9364 Test AUROC: 0.9368 Test F1 score: 0.5755
Time to fit symbolic classifier: 65.58412313461304 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6175 Test F1 score: 0.3374
Time to fit symbolic classifier: 67.61177802085876 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6175 Test F1 score: 0.3374
seed=8590
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8445 Test AUROC: 0.8175 Test F1 score: 0.3224
DecisionTreeClassifier
Test accuracy: 0.7318 Test AUROC: 0.8034 Test F1 score: 0.2537
RandomForestClassifier
Test accuracy: 0.9479 Test AUROC: 0.9159 Test F1 score: 0.2408
XGBClassifier
Test accuracy: 0.9264 Test AUROC: 0.9206 Test F1 score: 0.5060
Time to fit symbolic classifier: 69.19223213195801 seconds
SymbolicClassifier
Test accuracy: 0.9429 Test AUROC: 0.5949 Test F1 score: 0.2933
Time to fit symbolic classifier: 73.34092903137207 seconds
SymbolicClassifier
Test accuracy: 0.9451 Test AUROC: 0.5956 Test F1 score: 0.3014
seed=4754
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8571 Test AUROC: 0.8440 Test F1 score: 0.3539
DecisionTreeClassifier
Test accuracy: 0.8226 Test AUROC: 0.8046 Test F1 score: 0.3270
RandomForestClassifier
Test accuracy: 0.9483 Test AUROC: 0.9139 Test F1 score: 0.2500
XGBClassifier
Test accuracy: 0.9329 Test AUROC: 0.9465 Test F1 score: 0.5600
Time to fit symbolic classifier: 68.17389011383057 seconds
SymbolicClassifier
Test accuracy: 0.9454 Test AUROC: 0.6299 Test F1 score: 0.3719
Time to fit symbolic classifier: 68.23464798927307 seconds
SymbolicClassifier
Test accuracy: 0.9476 Test AUROC: 0.5997 Test F1 score: 0.3178
seed=1959
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8524 Test AUROC: 0.8342 Test F1 score: 0.3568
DecisionTreeClassifier
Test accuracy: 0.8779 Test AUROC: 0.8453 Test F1 score: 0.4178
RandomForestClassifier
Test accuracy: 0.9494 Test AUROC: 0.9401 Test F1 score: 0.2694
XGBClassifier
Test accuracy: 0.9411 Test AUROC: 0.9409 Test F1 score: 0.6186
Time to fit symbolic classifier: 73.17940306663513 seconds
SymbolicClassifier
Test accuracy: 0.9483 Test AUROC: 0.6459 Test F1 score: 0.4098
Time to fit symbolic classifier: 68.82048106193542 seconds
SymbolicClassifier
Test accuracy: 0.9490 Test AUROC: 0.6202 Test F1 score: 0.3661
seed=707
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
LogisticRegression
Test accuracy: 0.8607 Test AUROC: 0.8348 Test F1 score: 0.3762
DecisionTreeClassifier
Test accuracy: 0.7415 Test AUROC: 0.8041 Test F1 score: 0.2608
RandomForestClassifier
Test accuracy: 0.9504 Test AUROC: 0.9217 Test F1 score: 0.3100
XGBClassifier
Test accuracy: 0.9278 Test AUROC: 0.9294 Test F1 score: 0.5421
Time to fit symbolic classifier: 69.46331691741943 seconds
SymbolicClassifier
Test accuracy: 0.9465 Test AUROC: 0.6451 Test F1 score: 0.4016
Time to fit symbolic classifier: 69.6081268787384 seconds
SymbolicClassifier
Test accuracy: 0.9497 Test AUROC: 0.6460 Test F1 score: 0.4167
seed=10524
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8467 Test AUROC: 0.8328 Test F1 score: 0.3421
DecisionTreeClassifier
Test accuracy: 0.7910 Test AUROC: 0.8272 Test F1 score: 0.3038
RandomForestClassifier
Test accuracy: 0.9487 Test AUROC: 0.9240 Test F1 score: 0.2741
XGBClassifier
Test accuracy: 0.9278 Test AUROC: 0.9417 Test F1 score: 0.5543
Time to fit symbolic classifier: 69.89840602874756 seconds
SymbolicClassifier
Test accuracy: 0.9476 Test AUROC: 0.6452 Test F1 score: 0.4065
Time to fit symbolic classifier: 74.91070580482483 seconds
SymbolicClassifier
Test accuracy: 0.9483 Test AUROC: 0.6312 Test F1 score: 0.3846
seed=83946
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8560 Test AUROC: 0.8386 Test F1 score: 0.3665
DecisionTreeClassifier
Test accuracy: 0.8086 Test AUROC: 0.8499 Test F1 score: 0.3227
RandomForestClassifier
Test accuracy: 0.9540 Test AUROC: 0.9249 Test F1 score: 0.3786
XGBClassifier
Test accuracy: 0.9203 Test AUROC: 0.9442 Test F1 score: 0.5216
Time to fit symbolic classifier: 69.23418998718262 seconds
SymbolicClassifier
Test accuracy: 0.9447 Test AUROC: 0.6325 Test F1 score: 0.3740
Time to fit symbolic classifier: 68.76166296005249 seconds
SymbolicClassifier
Test accuracy: 0.9447 Test AUROC: 0.6325 Test F1 score: 0.3740
seed=63297
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8481 Test AUROC: 0.8317 Test F1 score: 0.3542
DecisionTreeClassifier
Test accuracy: 0.8061 Test AUROC: 0.8330 Test F1 score: 0.3095
RandomForestClassifier
Test accuracy: 0.9490 Test AUROC: 0.9349 Test F1 score: 0.2680
XGBClassifier
Test accuracy: 0.9336 Test AUROC: 0.9520 Test F1 score: 0.5708
Time to fit symbolic classifier: 68.85216999053955 seconds
SymbolicClassifier
Test accuracy: 0.9454 Test AUROC: 0.6216 Test F1 score: 0.3559
Time to fit symbolic classifier: 68.83684873580933 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6112 Test F1 score: 0.3439
seed=78035
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8370 Test AUROC: 0.8402 Test F1 score: 0.3477
DecisionTreeClassifier
Test accuracy: 0.8456 Test AUROC: 0.8096 Test F1 score: 0.3302
RandomForestClassifier
Test accuracy: 0.9522 Test AUROC: 0.9357 Test F1 score: 0.3448
XGBClassifier
Test accuracy: 0.9268 Test AUROC: 0.9450 Test F1 score: 0.5565
Time to fit symbolic classifier: 75.26121234893799 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6688 Test F1 score: 0.4444
Time to fit symbolic classifier: 69.09115386009216 seconds
SymbolicClassifier
Test accuracy: 0.9501 Test AUROC: 0.6265 Test F1 score: 0.3822
seed=22664
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
LogisticRegression
Test accuracy: 0.8445 Test AUROC: 0.8247 Test F1 score: 0.3389
DecisionTreeClassifier
Test accuracy: 0.7372 Test AUROC: 0.8404 Test F1 score: 0.2591
RandomForestClassifier
Test accuracy: 0.9497 Test AUROC: 0.9347 Test F1 score: 0.2857
XGBClassifier
Test accuracy: 0.9314 Test AUROC: 0.9377 Test F1 score: 0.5649
Time to fit symbolic classifier: 69.37272310256958 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6203 Test F1 score: 0.3429
Time to fit symbolic classifier: 69.08472180366516 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6203 Test F1 score: 0.3429
seed=49283
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8718 Test AUROC: 0.8807 Test F1 score: 0.4138
DecisionTreeClassifier
Test accuracy: 0.7540 Test AUROC: 0.8027 Test F1 score: 0.2736
RandomForestClassifier
Test accuracy: 0.9522 Test AUROC: 0.9192 Test F1 score: 0.3383
XGBClassifier
Test accuracy: 0.9239 Test AUROC: 0.9314 Test F1 score: 0.5330
Time to fit symbolic classifier: 69.41814804077148 seconds
SymbolicClassifier
Test accuracy: 0.9472 Test AUROC: 0.6570 Test F1 score: 0.4235
Time to fit symbolic classifier: 68.55552625656128 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6225 Test F1 score: 0.3668
seed=35253
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8348 Test AUROC: 0.8110 Test F1 score: 0.3195
DecisionTreeClassifier
Test accuracy: 0.7907 Test AUROC: 0.8045 Test F1 score: 0.2899
RandomForestClassifier
Test accuracy: 0.9483 Test AUROC: 0.9151 Test F1 score: 0.2500
XGBClassifier
Test accuracy: 0.9293 Test AUROC: 0.9249 Test F1 score: 0.5343
Time to fit symbolic classifier: 75.61543703079224 seconds
SymbolicClassifier
Test accuracy: 0.9451 Test AUROC: 0.5476 Test F1 score: 0.1730
Time to fit symbolic classifier: 68.48590612411499 seconds
SymbolicClassifier
Test accuracy: 0.9461 Test AUROC: 0.5877 Test F1 score: 0.2857
seed=82273
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8531 Test AUROC: 0.8229 Test F1 score: 0.3599
DecisionTreeClassifier
Test accuracy: 0.8456 Test AUROC: 0.8416 Test F1 score: 0.3364
RandomForestClassifier
Test accuracy: 0.9476 Test AUROC: 0.9164 Test F1 score: 0.2551
XGBClassifier
Test accuracy: 0.9268 Test AUROC: 0.9287 Test F1 score: 0.5256
Time to fit symbolic classifier: 68.64090394973755 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6260 Test F1 score: 0.3534
Time to fit symbolic classifier: 68.7185320854187 seconds
SymbolicClassifier
Test accuracy: 0.9436 Test AUROC: 0.6005 Test F1 score: 0.3084
seed=90378
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
LogisticRegression
Test accuracy: 0.8657 Test AUROC: 0.8305 Test F1 score: 0.3746
DecisionTreeClassifier
Test accuracy: 0.7899 Test AUROC: 0.7964 Test F1 score: 0.2875
RandomForestClassifier
Test accuracy: 0.9490 Test AUROC: 0.9247 Test F1 score: 0.2680
XGBClassifier
Test accuracy: 0.9311 Test AUROC: 0.9397 Test F1 score: 0.5450
Time to fit symbolic classifier: 68.76493072509766 seconds
SymbolicClassifier
Test accuracy: 0.9465 Test AUROC: 0.6394 Test F1 score: 0.3918
Time to fit symbolic classifier: 67.91902613639832 seconds
SymbolicClassifier
Test accuracy: 0.9487 Test AUROC: 0.6257 Test F1 score: 0.3755
LENGTHS['SR'] = np.asarray(LENGTHS['SR'])
LENGTHS['SRf'] = np.asarray(LENGTHS['SRf'])
convert_to_arrays()
plusminus = pm = u"\u00B1"
for model_name in MODEL_NAMES:
print(f"{model_name} acc: {ACCURACIES[model_name].mean():.2f} {pm}
{ACCURACIES[model_name].std():.4f}")
print('--------')
for model_name in MODEL_NAMES:
print(f"{model_name} F1: {F1SCORES[model_name].mean():.2f} {pm}
{F1SCORES[model_name].std():.4f}")
print('--------')
# LENGTHS['SR'].mean(), LENGTHS['SRf'].mean()
Conclusion: From the results above we see that symbolic regression models have F1 score
below 0.5, i.e., they have very poor classification performance. Hence for a fairer comparison
among the classifiers, we do not use imbalanced data in our experiments.