0% found this document useful (0 votes)
13 views35 pages

Experiment01 Baseline Models Accuracy

Uploaded by

sumitpatelreso
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views35 pages

Experiment01 Baseline Models Accuracy

Uploaded by

sumitpatelreso
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 35

!

python --version

Python 3.10.13

%load_ext autoreload
%autoreload 2

import os, time, datetime


import json, pickle
from pprint import pprint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# import tree based models


from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

# training/testing utils
from sklearn.utils import resample
from sklearn.utils.class_weight import compute_class_weight,
compute_sample_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report,
roc_auc_score
from sklearn.utils.class_weight import compute_sample_weight,
compute_class_weight

# Symbolic regression
from gplearn.genetic import SymbolicClassifier

Load and prepare data


data = pd.read_csv('../data/data_diet_filtered.csv', index_col=0)

data.disease, enc_values = pd.factorize(data.disease) # to get back


original labels, do "enc_values[data.disease]"

drop_col = ['index', 'disease', 'subject_id','gender',


'country','age_category', 'diet']
X = data.drop(labels=drop_col, axis=1)
y = data.disease.values

class_map = {v: k for v, k in enumerate(list(enc_values.values))} #


coded label to names
class_counts = {k: len(y[y==v]) for v, k in
enumerate(list(enc_values.values))}
assert sum([class_counts[k] for k in class_counts]) == len(X), 'total
#samples not matching when summing for each class'

print(data.shape, X.shape, y.shape, class_map, class_counts)

(14560, 756) (14560, 749) (14560,) {0: 'healthy', 1: 'IBD', 2: 'CRC',


3: 'adenoma', 4: 'T2D'} {'healthy': 10761, 'IBD': 1736, 'CRC': 701,
'adenoma': 209, 'T2D': 1153}

/var/folders/qp/4w02sqhj6_d43815lhzjb7900000gn/T/
ipykernel_73514/129412594.py:1: DtypeWarning: Columns (756) have mixed
types. Specify dtype option on import or set low_memory=False.
data = pd.read_csv('../data/data_diet_filtered.csv', index_col=0)

# only take normalized rows!


normalized_idx = (X.sum(1) > 99)
X = X[normalized_idx]
y = y[X.index]

print(f"X.shape: {X.shape}, y.shape: {y.shape}")

X.shape: (14156, 749), y.shape: (14156,)

from srmb.utils import calculate_metrics

Healthy vs. CRC classification


For the subsequent analysis, we will only choose the healthy and CRC patients from the dataset.

from srmb.fitness_functions import customacc


from srmb.special_functions import (
presence, absence, add3, add10, ifelse, ifelseless,
presence2, absence2,
presence3, absence3
)

Error exception: Only one class present in y_true. ROC AUC score is
not defined in that case.

k = 2
idxs = (y == 0) | (y == k) # get healthy and that class' data

X1, y1 = X.iloc[idxs], y[idxs]


y1[y1 == k] = 1 # relabel 1 --> CRC, 0 --> healthy
print(f'doing for class = {class_map[k]}, {X1.shape=}, {y1.shape=},
#{class_map[k]} samples = {y1.sum()}')

doing for class = CRC, X1.shape=(11137, 749), y1.shape=(11137,), #CRC


samples = 664
1. Experiments with undersampling the healthy
class
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

def store_result(key, acc, f1, auroc):


ACCURACIES[key].append(acc)
F1SCORES[key].append(f1)
AUCROCS[key].append(auroc)

def convert_to_arrays(prec=4):
for key in MODEL_NAMES:
ACCURACIES[key] = np.around(np.asarray(ACCURACIES[key]), prec)
F1SCORES[key] = np.around(np.asarray(F1SCORES[key]), prec)
AUCROCS[key] = np.around(np.asarray(AUCROCS[key]), prec)

# seeds for 20 trials


RANDOM_SEEDS_FOR_UNDERSAMPLING = [42, 2024, 1234, 2405, 11, 9345,
858, 8590, 4754, 1959,
707, 10524, 83946, 63297, 78035,
22664, 49283, 35253, 82273, 90378]
MODEL_NAMES = ['LR', 'DT', 'RF', 'XG', 'SR', 'SRf']
ACCURACIES = {k: [] for k in MODEL_NAMES}
F1SCORES = {k: [] for k in MODEL_NAMES}
AUCROCS = {k: [] for k in MODEL_NAMES}

SRmodels = []
SRfmodels = []

USE_BALANCED_SUBSAMPLE=True # perform undersampling of healthy classes

for random_state in RANDOM_SEEDS_FOR_UNDERSAMPLING:


rus = RandomUnderSampler(sampling_strategy=0.85, # this is another
hyperparameter
random_state=random_state)
X1b, y1b = rus.fit_resample(X1, y1)
print(f'seed={random_state}')
# print(sorted(Counter(y1b).items()))

if not USE_BALANCED_SUBSAMPLE:
X_train, X_test, y_train, y_test = train_test_split(X1, y1,

test_size=0.25,
#
train_size=0.5, # if slow use this

random_state=random_state, stratify=y1)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1), y=y1)
else:
# print('using a balanced subsample of the data ...')
X_train, X_test, y_train, y_test = train_test_split(X1b, y1b,

test_size=0.25,
#
train_size=0.5, # if slow use this

random_state=42, stratify=y1b)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1b), y=y1b)

sample_weights = compute_sample_weight(class_weight='balanced',
y=y_train)

# logistic regression classifier


model_lr = LogisticRegression(max_iter=500,
class_weight=dict(enumerate(class_weight)))
model_lr.fit(X_train, y_train)
store_result('LR', *calculate_metrics(model_lr, X_train, y_train,
X_test, y_test))

# Decision tree classifier


model_dt = DecisionTreeClassifier(max_depth=5,
class_weight=dict(enumerate(class_weight)))
model_dt.fit(X_train, y_train)
store_result('DT', *calculate_metrics(model_dt, X_train, y_train,
X_test, y_test))

# Random forest classifier


model_rf = RandomForestClassifier(n_estimators=50,
class_weight=dict(enumerate(class_weight)))
model_rf.fit(X_train, y_train)
store_result('RF', *calculate_metrics(model_rf, X_train, y_train,
X_test, y_test))

# Create an XGBoost classifier for multiclass classification


model_xg = XGBClassifier(n_estimators=50, max_depth=5,
learning_rate=0.1, objective='binary:logistic')
model_xg.fit(X_train, y_train, sample_weight=sample_weights)
store_result('XG', *calculate_metrics(model_xg, X_train, y_train,
X_test, y_test))

# Do it for vanilla symbolic regression


function_set = ['add', 'sub', 'mul', 'div', 'neg', 'max', 'min',
'sqrt', 'log']

est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,

init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,

stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness

feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)

t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SR', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRmodels.append(est)

# SR with special functions


special_functions = [presence, absence, presence2, absence2,
ifelse]#, add3, add10]
function_set = ['add', 'sub', 'mul', 'div', 'neg', 'max', 'min'] +
special_functions

est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,

init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,

stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness

feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)

t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SRf', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRfmodels.append(est)

seed=42

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7265 Test AUROC: 0.7981 Test F1 score: 0.7009
DecisionTreeClassifier
Test accuracy: 0.7514 Test AUROC: 0.8186 Test F1 score: 0.7078
RandomForestClassifier
Test accuracy: 0.8011 Test AUROC: 0.9015 Test F1 score: 0.7647
XGBClassifier
Test accuracy: 0.8260 Test AUROC: 0.9131 Test F1 score: 0.8013
Time to fit symbolic classifier: 42.2771680355072 seconds
SymbolicClassifier
Test accuracy: 0.7431 Test AUROC: 0.7488 Test F1 score: 0.6847
Time to fit symbolic classifier: 41.15023899078369 seconds
SymbolicClassifier
Test accuracy: 0.7514 Test AUROC: 0.7579 Test F1 score: 0.6939
seed=2024

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7210 Test AUROC: 0.7726 Test F1 score: 0.6967
DecisionTreeClassifier
Test accuracy: 0.7182 Test AUROC: 0.7402 Test F1 score: 0.6982
RandomForestClassifier
Test accuracy: 0.8122 Test AUROC: 0.8961 Test F1 score: 0.7792
XGBClassifier
Test accuracy: 0.8177 Test AUROC: 0.9159 Test F1 score: 0.7911
Time to fit symbolic classifier: 41.51435089111328 seconds
SymbolicClassifier
Test accuracy: 0.7569 Test AUROC: 0.7640 Test F1 score: 0.6966
Time to fit symbolic classifier: 41.539920806884766 seconds
SymbolicClassifier
Test accuracy: 0.7541 Test AUROC: 0.7641 Test F1 score: 0.6962
seed=1234

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7155 Test AUROC: 0.7885 Test F1 score: 0.6997
DecisionTreeClassifier
Test accuracy: 0.7845 Test AUROC: 0.8185 Test F1 score: 0.7365
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.9015 Test F1 score: 0.7947
XGBClassifier
Test accuracy: 0.8398 Test AUROC: 0.9181 Test F1 score: 0.8165
Time to fit symbolic classifier: 43.203129053115845 seconds
SymbolicClassifier
Test accuracy: 0.7403 Test AUROC: 0.7617 Test F1 score: 0.7006
Time to fit symbolic classifier: 44.14738321304321 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7692 Test F1 score: 0.7079
seed=2405
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7818 Test AUROC: 0.8474 Test F1 score: 0.7508
DecisionTreeClassifier
Test accuracy: 0.7348 Test AUROC: 0.7518 Test F1 score: 0.6667
RandomForestClassifier
Test accuracy: 0.8122 Test AUROC: 0.8982 Test F1 score: 0.7655
XGBClassifier
Test accuracy: 0.8177 Test AUROC: 0.9079 Test F1 score: 0.7925
Time to fit symbolic classifier: 43.750773906707764 seconds
SymbolicClassifier
Test accuracy: 0.7459 Test AUROC: 0.7500 Test F1 score: 0.6913
Time to fit symbolic classifier: 43.03351879119873 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7465 Test F1 score: 0.6851
seed=11

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7238 Test AUROC: 0.7970 Test F1 score: 0.7059
DecisionTreeClassifier
Test accuracy: 0.7597 Test AUROC: 0.7982 Test F1 score: 0.7307
RandomForestClassifier
Test accuracy: 0.7707 Test AUROC: 0.8785 Test F1 score: 0.7314
XGBClassifier
Test accuracy: 0.8232 Test AUROC: 0.9068 Test F1 score: 0.8012
Time to fit symbolic classifier: 43.38354301452637 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7643 Test F1 score: 0.7010
Time to fit symbolic classifier: 43.27070212364197 seconds
SymbolicClassifier
Test accuracy: 0.7155 Test AUROC: 0.7301 Test F1 score: 0.6532
seed=9345

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.6989 Test AUROC: 0.7772 Test F1 score: 0.6625
DecisionTreeClassifier
Test accuracy: 0.7155 Test AUROC: 0.7750 Test F1 score: 0.6485
RandomForestClassifier
Test accuracy: 0.8039 Test AUROC: 0.8891 Test F1 score: 0.7641
XGBClassifier
Test accuracy: 0.8370 Test AUROC: 0.9182 Test F1 score: 0.8103
Time to fit symbolic classifier: 43.272748947143555 seconds
SymbolicClassifier
Test accuracy: 0.7459 Test AUROC: 0.7491 Test F1 score: 0.6892
Time to fit symbolic classifier: 42.66357135772705 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7533 Test F1 score: 0.6915
seed=858

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7624 Test AUROC: 0.8271 Test F1 score: 0.7346
DecisionTreeClassifier
Test accuracy: 0.7735 Test AUROC: 0.7956 Test F1 score: 0.7500
RandomForestClassifier
Test accuracy: 0.8315 Test AUROC: 0.9196 Test F1 score: 0.7973
XGBClassifier
Test accuracy: 0.8204 Test AUROC: 0.9280 Test F1 score: 0.7855
Time to fit symbolic classifier: 43.11080884933472 seconds
SymbolicClassifier
Test accuracy: 0.7569 Test AUROC: 0.7890 Test F1 score: 0.6986
Time to fit symbolic classifier: 43.20073890686035 seconds
SymbolicClassifier
Test accuracy: 0.7348 Test AUROC: 0.7411 Test F1 score: 0.6643
seed=8590

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7403 Test AUROC: 0.8040 Test F1 score: 0.7134
DecisionTreeClassifier
Test accuracy: 0.7514 Test AUROC: 0.7889 Test F1 score: 0.6897
RandomForestClassifier
Test accuracy: 0.8122 Test AUROC: 0.9011 Test F1 score: 0.7792
XGBClassifier
Test accuracy: 0.8232 Test AUROC: 0.9098 Test F1 score: 0.7987
Time to fit symbolic classifier: 42.94043493270874 seconds
SymbolicClassifier
Test accuracy: 0.7541 Test AUROC: 0.7544 Test F1 score: 0.6899
Time to fit symbolic classifier: 43.01768684387207 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7573 Test F1 score: 0.6915
seed=4754
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7762 Test AUROC: 0.8377 Test F1 score: 0.7461
DecisionTreeClassifier
Test accuracy: 0.7348 Test AUROC: 0.7938 Test F1 score: 0.6620
RandomForestClassifier
Test accuracy: 0.8260 Test AUROC: 0.9111 Test F1 score: 0.7921
XGBClassifier
Test accuracy: 0.8315 Test AUROC: 0.9308 Test F1 score: 0.8039
Time to fit symbolic classifier: 43.672722816467285 seconds
SymbolicClassifier
Test accuracy: 0.7762 Test AUROC: 0.7759 Test F1 score: 0.7138
Time to fit symbolic classifier: 43.33226490020752 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7694 Test F1 score: 0.7031
seed=1959

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7624 Test AUROC: 0.8320 Test F1 score: 0.7312
DecisionTreeClassifier
Test accuracy: 0.6878 Test AUROC: 0.7568 Test F1 score: 0.6744
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.8998 Test F1 score: 0.8050
XGBClassifier
Test accuracy: 0.7928 Test AUROC: 0.8996 Test F1 score: 0.7734
Time to fit symbolic classifier: 42.98330283164978 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7580 Test F1 score: 0.6915
Time to fit symbolic classifier: 44.140511989593506 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7697 Test F1 score: 0.7157
seed=707

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7624 Test AUROC: 0.8203 Test F1 score: 0.7394
DecisionTreeClassifier
Test accuracy: 0.7569 Test AUROC: 0.8137 Test F1 score: 0.6667
RandomForestClassifier
Test accuracy: 0.8260 Test AUROC: 0.9097 Test F1 score: 0.7921
XGBClassifier
Test accuracy: 0.8232 Test AUROC: 0.9166 Test F1 score: 0.7949
Time to fit symbolic classifier: 43.830246925354004 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7530 Test F1 score: 0.7217
Time to fit symbolic classifier: 43.57986807823181 seconds
SymbolicClassifier
Test accuracy: 0.7790 Test AUROC: 0.8150 Test F1 score: 0.7143
seed=10524

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7514 Test AUROC: 0.8078 Test F1 score: 0.7289
DecisionTreeClassifier
Test accuracy: 0.7265 Test AUROC: 0.7624 Test F1 score: 0.6991
RandomForestClassifier
Test accuracy: 0.8204 Test AUROC: 0.8905 Test F1 score: 0.7883
XGBClassifier
Test accuracy: 0.8260 Test AUROC: 0.8991 Test F1 score: 0.8037
Time to fit symbolic classifier: 43.50123906135559 seconds
SymbolicClassifier
Test accuracy: 0.7514 Test AUROC: 0.7622 Test F1 score: 0.7039
Time to fit symbolic classifier: 43.24294877052307 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7613 Test F1 score: 0.6997
seed=83946

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7403 Test AUROC: 0.7988 Test F1 score: 0.7081
DecisionTreeClassifier
Test accuracy: 0.7238 Test AUROC: 0.7863 Test F1 score: 0.6552
RandomForestClassifier
Test accuracy: 0.8204 Test AUROC: 0.8863 Test F1 score: 0.7855
XGBClassifier
Test accuracy: 0.8287 Test AUROC: 0.9158 Test F1 score: 0.8062
Time to fit symbolic classifier: 43.89677596092224 seconds
SymbolicClassifier
Test accuracy: 0.7459 Test AUROC: 0.7638 Test F1 score: 0.6954
Time to fit symbolic classifier: 42.90026021003723 seconds
SymbolicClassifier
Test accuracy: 0.7348 Test AUROC: 0.7510 Test F1 score: 0.6800
seed=63297
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7762 Test AUROC: 0.8216 Test F1 score: 0.7508
DecisionTreeClassifier
Test accuracy: 0.7845 Test AUROC: 0.8127 Test F1 score: 0.7665
RandomForestClassifier
Test accuracy: 0.8260 Test AUROC: 0.9118 Test F1 score: 0.7948
XGBClassifier
Test accuracy: 0.8564 Test AUROC: 0.9337 Test F1 score: 0.8354
Time to fit symbolic classifier: 43.374119997024536 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7754 Test F1 score: 0.7195
Time to fit symbolic classifier: 43.408135175704956 seconds
SymbolicClassifier
Test accuracy: 0.7652 Test AUROC: 0.7666 Test F1 score: 0.7099
seed=78035

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7735 Test AUROC: 0.8348 Test F1 score: 0.7405
DecisionTreeClassifier
Test accuracy: 0.7403 Test AUROC: 0.7901 Test F1 score: 0.6846
RandomForestClassifier
Test accuracy: 0.8370 Test AUROC: 0.9148 Test F1 score: 0.8115
XGBClassifier
Test accuracy: 0.8536 Test AUROC: 0.9395 Test F1 score: 0.8296
Time to fit symbolic classifier: 43.04957699775696 seconds
SymbolicClassifier
Test accuracy: 0.7541 Test AUROC: 0.7593 Test F1 score: 0.7023
Time to fit symbolic classifier: 42.84919810295105 seconds
SymbolicClassifier
Test accuracy: 0.7762 Test AUROC: 0.8103 Test F1 score: 0.7178
seed=22664

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7790 Test AUROC: 0.8231 Test F1 score: 0.7531
DecisionTreeClassifier
Test accuracy: 0.7155 Test AUROC: 0.8195 Test F1 score: 0.7099
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.9080 Test F1 score: 0.7987
XGBClassifier
Test accuracy: 0.8453 Test AUROC: 0.9225 Test F1 score: 0.8228
Time to fit symbolic classifier: 43.30705690383911 seconds
SymbolicClassifier
Test accuracy: 0.7514 Test AUROC: 0.7704 Test F1 score: 0.7020
Time to fit symbolic classifier: 44.09287762641907 seconds
SymbolicClassifier
Test accuracy: 0.7431 Test AUROC: 0.7477 Test F1 score: 0.6714
seed=49283

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7541 Test AUROC: 0.8084 Test F1 score: 0.7405
DecisionTreeClassifier
Test accuracy: 0.7293 Test AUROC: 0.7896 Test F1 score: 0.7168
RandomForestClassifier
Test accuracy: 0.8287 Test AUROC: 0.9166 Test F1 score: 0.7947
XGBClassifier
Test accuracy: 0.8398 Test AUROC: 0.9318 Test F1 score: 0.8129
Time to fit symbolic classifier: 42.77116394042969 seconds
SymbolicClassifier
Test accuracy: 0.7569 Test AUROC: 0.7585 Test F1 score: 0.7007
Time to fit symbolic classifier: 45.94197463989258 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7513 Test F1 score: 0.7010
seed=35253

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7569 Test AUROC: 0.8184 Test F1 score: 0.7317
DecisionTreeClassifier
Test accuracy: 0.7514 Test AUROC: 0.8107 Test F1 score: 0.7059
RandomForestClassifier
Test accuracy: 0.8039 Test AUROC: 0.8988 Test F1 score: 0.7717
XGBClassifier
Test accuracy: 0.8094 Test AUROC: 0.9091 Test F1 score: 0.7903
Time to fit symbolic classifier: 44.62361788749695 seconds
SymbolicClassifier
Test accuracy: 0.7210 Test AUROC: 0.7384 Test F1 score: 0.6731
Time to fit symbolic classifier: 44.03053903579712 seconds
SymbolicClassifier
Test accuracy: 0.7403 Test AUROC: 0.7603 Test F1 score: 0.6928
seed=82273
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7293 Test AUROC: 0.7904 Test F1 score: 0.7048
DecisionTreeClassifier
Test accuracy: 0.7403 Test AUROC: 0.7878 Test F1 score: 0.6908
RandomForestClassifier
Test accuracy: 0.8039 Test AUROC: 0.8927 Test F1 score: 0.7609
XGBClassifier
Test accuracy: 0.8425 Test AUROC: 0.9112 Test F1 score: 0.8190
Time to fit symbolic classifier: 45.13826107978821 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7551 Test F1 score: 0.6936
Time to fit symbolic classifier: 44.299291133880615 seconds
SymbolicClassifier
Test accuracy: 0.7486 Test AUROC: 0.7536 Test F1 score: 0.6915
seed=90378

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.7541 Test AUROC: 0.8200 Test F1 score: 0.7343
DecisionTreeClassifier
Test accuracy: 0.7541 Test AUROC: 0.8313 Test F1 score: 0.6877
RandomForestClassifier
Test accuracy: 0.8011 Test AUROC: 0.9094 Test F1 score: 0.7647
XGBClassifier
Test accuracy: 0.8287 Test AUROC: 0.9243 Test F1 score: 0.8025
Time to fit symbolic classifier: 44.41036295890808 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7663 Test F1 score: 0.7031
Time to fit symbolic classifier: 46.336766958236694 seconds
SymbolicClassifier
Test accuracy: 0.7597 Test AUROC: 0.7671 Test F1 score: 0.7031

Process results
LENGTHS = {'SR': [], 'SRf': []}
for est in SRmodels:
LENGTHS['SR'].append(est._program.length_)
for est in SRfmodels:
LENGTHS['SRf'].append(est._program.length_)

LENGTHS['SR'] = np.asarray(LENGTHS['SR'])
LENGTHS['SRf'] = np.asarray(LENGTHS['SRf'])

convert_to_arrays()

plusminus = pm = u"\u00B1"
for model_name in MODEL_NAMES:
print(f"{model_name} acc: {ACCURACIES[model_name].mean():.2f} {pm}
{ACCURACIES[model_name].std():.4f}")
print('--------')
for model_name in MODEL_NAMES:
print(f"{model_name} F1: {F1SCORES[model_name].mean():.2f} {pm}
{F1SCORES[model_name].std():.4f}")

print('--------')

LR acc: 0.75 ± 0.0232


DT acc: 0.74 ± 0.0238
RF acc: 0.82 ± 0.0152
XG acc: 0.83 ± 0.0146
SR acc: 0.75 ± 0.0107
SRf acc: 0.75 ± 0.0145
--------
LR F1: 0.72 ± 0.0230
DT F1: 0.70 ± 0.0309
RF F1: 0.78 ± 0.0187
XG F1: 0.80 ± 0.0148
SR F1: 0.70 ± 0.0110
SRf F1: 0.69 ± 0.0166
--------
Compare average length of SR models with and without
special functions
print(f"SR mean length: {LENGTHS['SR'].mean()}")
print(f"SRf mean length: {LENGTHS['SRf'].mean()}")

SR mean length: 19.55


SRf mean length: 14.55

Save symbolic regression models for later use


from srmb.utils import save_sr_models, load_sr_models

save_sr_models(SRmodels, key='SR',
save_dir='../results_srmb/sr_vanilla_models/')
save_sr_models(SRfmodels, key='SRf',
save_dir='../results_srmb/sr_special_models/')

Load the models


SRMODELS = load_sr_models('SR',
save_dir='../results_srmb/sr_vanilla_models/')
SRFMODELS = load_sr_models('SRf',
save_dir='../results_srmb/sr_special_models/')

# SRMODELS

Ablation: what if we use the entire imbalanced


dataset?
# seeds for 20 trials
RANDOM_SEEDS_FOR_UNDERSAMPLING = [42, 2024, 1234, 2405, 11, 9345,
858, 8590, 4754, 1959,
707, 10524, 83946, 63297, 78035,
22664, 49283, 35253, 82273, 90378]
MODEL_NAMES = ['LR', 'DT', 'RF', 'XG', 'SR', 'SRf']
ACCURACIES = {k: [] for k in MODEL_NAMES}
F1SCORES = {k: [] for k in MODEL_NAMES}
AUCROCS = {k: [] for k in MODEL_NAMES}

SRmodels = []
SRfmodels = []

USE_BALANCED_SUBSAMPLE=False # perform undersampling of healthy


classes

for random_state in RANDOM_SEEDS_FOR_UNDERSAMPLING:


rus = RandomUnderSampler(sampling_strategy=0.85, # this is another
hyperparameter
random_state=random_state)
X1b, y1b = rus.fit_resample(X1, y1)
print(f'seed={random_state}')
# print(sorted(Counter(y1b).items()))

if not USE_BALANCED_SUBSAMPLE:
X_train, X_test, y_train, y_test = train_test_split(X1, y1,

test_size=0.25,
#
train_size=0.5, # if slow use this

random_state=random_state, stratify=y1)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1), y=y1)
else:
# print('using a balanced subsample of the data ...')
X_train, X_test, y_train, y_test = train_test_split(X1b, y1b,

test_size=0.25,
#
train_size=0.5, # if slow use this

random_state=42, stratify=y1b)
class_weight = compute_class_weight(class_weight='balanced',
classes=np.unique(y1b), y=y1b)

sample_weights = compute_sample_weight(class_weight='balanced',
y=y_train)

# logistic regression classifier


model_lr = LogisticRegression(max_iter=500,
class_weight=dict(enumerate(class_weight)))
model_lr.fit(X_train, y_train)
store_result('LR', *calculate_metrics(model_lr, X_train, y_train,
X_test, y_test))
# Decision tree classifier
model_dt = DecisionTreeClassifier(max_depth=5,
class_weight=dict(enumerate(class_weight)))
model_dt.fit(X_train, y_train)
store_result('DT', *calculate_metrics(model_dt, X_train, y_train,
X_test, y_test))

# Random forest classifier


model_rf = RandomForestClassifier(n_estimators=50,
class_weight=dict(enumerate(class_weight)))
model_rf.fit(X_train, y_train)
store_result('RF', *calculate_metrics(model_rf, X_train, y_train,
X_test, y_test))

# Create an XGBoost classifier for multiclass classification


model_xg = XGBClassifier(n_estimators=50, max_depth=5,
learning_rate=0.1, objective='binary:logistic')
model_xg.fit(X_train, y_train, sample_weight=sample_weights)
store_result('XG', *calculate_metrics(model_xg, X_train, y_train,
X_test, y_test))

# Do it for vanilla symbolic regression


function_set = ['add', 'sub', 'mul', 'div', 'neg', 'max', 'min',
'sqrt', 'log']

est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,

init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,

stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness

feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)

t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SR', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRmodels.append(est)

# SR with special functions


special_functions = [presence, absence, presence2, absence2,
ifelse]#, add3, add10]
function_set = ['add', 'sub', 'mul', 'div', 'neg', 'max', 'min'] +
special_functions

est = SymbolicClassifier(population_size=6000,
generations=20,
tournament_size=25,

init_depth=(2, 6),
const_range=(0., 100.),
# init_method="full",
parsimony_coefficient=0.001,
function_set=function_set,

stopping_criteria=1.0, metric=customacc,
#use custom acc as fitness

feature_names=X1.columns.to_list(),
# verbose=True,
random_state=42)

t0 = time.time()
est.fit(X_train, y_train)
print('Time to fit symbolic classifier:', time.time() - t0,
'seconds')
store_result('SRf', *calculate_metrics(est, X_train, y_train,
X_test, y_test))
SRfmodels.append(est)

seed=42

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8467 Test AUROC: 0.8193 Test F1 score: 0.3380
DecisionTreeClassifier
Test accuracy: 0.8700 Test AUROC: 0.8443 Test F1 score: 0.3926
RandomForestClassifier
Test accuracy: 0.9490 Test AUROC: 0.9315 Test F1 score: 0.2526
XGBClassifier
Test accuracy: 0.9346 Test AUROC: 0.9426 Test F1 score: 0.5806
Time to fit symbolic classifier: 67.33232116699219 seconds
SymbolicClassifier
Test accuracy: 0.9411 Test AUROC: 0.6109 Test F1 score: 0.3223
Time to fit symbolic classifier: 65.69597911834717 seconds
SymbolicClassifier
Test accuracy: 0.9447 Test AUROC: 0.5897 Test F1 score: 0.2870
seed=2024

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8492 Test AUROC: 0.8519 Test F1 score: 0.3558
DecisionTreeClassifier
Test accuracy: 0.7745 Test AUROC: 0.8317 Test F1 score: 0.2991
RandomForestClassifier
Test accuracy: 0.9504 Test AUROC: 0.9401 Test F1 score: 0.3030
XGBClassifier
Test accuracy: 0.9318 Test AUROC: 0.9497 Test F1 score: 0.5662
Time to fit symbolic classifier: 66.04439783096313 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6489 Test F1 score: 0.4130
Time to fit symbolic classifier: 67.61290097236633 seconds
SymbolicClassifier
Test accuracy: 0.9483 Test AUROC: 0.6368 Test F1 score: 0.3950
seed=1234
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8492 Test AUROC: 0.8427 Test F1 score: 0.3458
DecisionTreeClassifier
Test accuracy: 0.8205 Test AUROC: 0.8374 Test F1 score: 0.3225
RandomForestClassifier
Test accuracy: 0.9508 Test AUROC: 0.9278 Test F1 score: 0.3184
XGBClassifier
Test accuracy: 0.9307 Test AUROC: 0.9266 Test F1 score: 0.5522
Time to fit symbolic classifier: 64.78528308868408 seconds
SymbolicClassifier
Test accuracy: 0.9436 Test AUROC: 0.6266 Test F1 score: 0.3592
Time to fit symbolic classifier: 63.20682096481323 seconds
SymbolicClassifier
Test accuracy: 0.9436 Test AUROC: 0.6266 Test F1 score: 0.3592
seed=2405

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8478 Test AUROC: 0.8632 Test F1 score: 0.3653
DecisionTreeClassifier
Test accuracy: 0.8014 Test AUROC: 0.8131 Test F1 score: 0.3061
RandomForestClassifier
Test accuracy: 0.9501 Test AUROC: 0.9250 Test F1 score: 0.2798
XGBClassifier
Test accuracy: 0.9278 Test AUROC: 0.9367 Test F1 score: 0.5483
Time to fit symbolic classifier: 67.03891324996948 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6488 Test F1 score: 0.4130
Time to fit symbolic classifier: 64.79723119735718 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6488 Test F1 score: 0.4130
seed=11

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8442 Test AUROC: 0.8298 Test F1 score: 0.3344
DecisionTreeClassifier
Test accuracy: 0.7874 Test AUROC: 0.8179 Test F1 score: 0.2936
RandomForestClassifier
Test accuracy: 0.9519 Test AUROC: 0.9456 Test F1 score: 0.3431
XGBClassifier
Test accuracy: 0.9314 Test AUROC: 0.9412 Test F1 score: 0.5708
Time to fit symbolic classifier: 68.0005111694336 seconds
SymbolicClassifier
Test accuracy: 0.9458 Test AUROC: 0.5620 Test F1 score: 0.2176
Time to fit symbolic classifier: 65.93679213523865 seconds
SymbolicClassifier
Test accuracy: 0.9504 Test AUROC: 0.6351 Test F1 score: 0.4000
seed=9345

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8560 Test AUROC: 0.8329 Test F1 score: 0.3543
DecisionTreeClassifier
Test accuracy: 0.8039 Test AUROC: 0.8499 Test F1 score: 0.3175
RandomForestClassifier
Test accuracy: 0.9476 Test AUROC: 0.9272 Test F1 score: 0.2316
XGBClassifier
Test accuracy: 0.9332 Test AUROC: 0.9536 Test F1 score: 0.5830
Time to fit symbolic classifier: 63.743890047073364 seconds
SymbolicClassifier
Test accuracy: 0.9469 Test AUROC: 0.5684 Test F1 score: 0.2371
Time to fit symbolic classifier: 67.9835159778595 seconds
SymbolicClassifier
Test accuracy: 0.9472 Test AUROC: 0.6024 Test F1 score: 0.3226
seed=858

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8474 Test AUROC: 0.8279 Test F1 score: 0.3472
DecisionTreeClassifier
Test accuracy: 0.8126 Test AUROC: 0.7509 Test F1 score: 0.2965
RandomForestClassifier
Test accuracy: 0.9508 Test AUROC: 0.9173 Test F1 score: 0.3046
XGBClassifier
Test accuracy: 0.9364 Test AUROC: 0.9368 Test F1 score: 0.5755
Time to fit symbolic classifier: 65.58412313461304 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6175 Test F1 score: 0.3374
Time to fit symbolic classifier: 67.61177802085876 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6175 Test F1 score: 0.3374
seed=8590
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8445 Test AUROC: 0.8175 Test F1 score: 0.3224
DecisionTreeClassifier
Test accuracy: 0.7318 Test AUROC: 0.8034 Test F1 score: 0.2537
RandomForestClassifier
Test accuracy: 0.9479 Test AUROC: 0.9159 Test F1 score: 0.2408
XGBClassifier
Test accuracy: 0.9264 Test AUROC: 0.9206 Test F1 score: 0.5060
Time to fit symbolic classifier: 69.19223213195801 seconds
SymbolicClassifier
Test accuracy: 0.9429 Test AUROC: 0.5949 Test F1 score: 0.2933
Time to fit symbolic classifier: 73.34092903137207 seconds
SymbolicClassifier
Test accuracy: 0.9451 Test AUROC: 0.5956 Test F1 score: 0.3014
seed=4754

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8571 Test AUROC: 0.8440 Test F1 score: 0.3539
DecisionTreeClassifier
Test accuracy: 0.8226 Test AUROC: 0.8046 Test F1 score: 0.3270
RandomForestClassifier
Test accuracy: 0.9483 Test AUROC: 0.9139 Test F1 score: 0.2500
XGBClassifier
Test accuracy: 0.9329 Test AUROC: 0.9465 Test F1 score: 0.5600
Time to fit symbolic classifier: 68.17389011383057 seconds
SymbolicClassifier
Test accuracy: 0.9454 Test AUROC: 0.6299 Test F1 score: 0.3719
Time to fit symbolic classifier: 68.23464798927307 seconds
SymbolicClassifier
Test accuracy: 0.9476 Test AUROC: 0.5997 Test F1 score: 0.3178
seed=1959

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8524 Test AUROC: 0.8342 Test F1 score: 0.3568
DecisionTreeClassifier
Test accuracy: 0.8779 Test AUROC: 0.8453 Test F1 score: 0.4178
RandomForestClassifier
Test accuracy: 0.9494 Test AUROC: 0.9401 Test F1 score: 0.2694
XGBClassifier
Test accuracy: 0.9411 Test AUROC: 0.9409 Test F1 score: 0.6186
Time to fit symbolic classifier: 73.17940306663513 seconds
SymbolicClassifier
Test accuracy: 0.9483 Test AUROC: 0.6459 Test F1 score: 0.4098
Time to fit symbolic classifier: 68.82048106193542 seconds
SymbolicClassifier
Test accuracy: 0.9490 Test AUROC: 0.6202 Test F1 score: 0.3661
seed=707

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8607 Test AUROC: 0.8348 Test F1 score: 0.3762
DecisionTreeClassifier
Test accuracy: 0.7415 Test AUROC: 0.8041 Test F1 score: 0.2608
RandomForestClassifier
Test accuracy: 0.9504 Test AUROC: 0.9217 Test F1 score: 0.3100
XGBClassifier
Test accuracy: 0.9278 Test AUROC: 0.9294 Test F1 score: 0.5421
Time to fit symbolic classifier: 69.46331691741943 seconds
SymbolicClassifier
Test accuracy: 0.9465 Test AUROC: 0.6451 Test F1 score: 0.4016
Time to fit symbolic classifier: 69.6081268787384 seconds
SymbolicClassifier
Test accuracy: 0.9497 Test AUROC: 0.6460 Test F1 score: 0.4167
seed=10524

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8467 Test AUROC: 0.8328 Test F1 score: 0.3421
DecisionTreeClassifier
Test accuracy: 0.7910 Test AUROC: 0.8272 Test F1 score: 0.3038
RandomForestClassifier
Test accuracy: 0.9487 Test AUROC: 0.9240 Test F1 score: 0.2741
XGBClassifier
Test accuracy: 0.9278 Test AUROC: 0.9417 Test F1 score: 0.5543
Time to fit symbolic classifier: 69.89840602874756 seconds
SymbolicClassifier
Test accuracy: 0.9476 Test AUROC: 0.6452 Test F1 score: 0.4065
Time to fit symbolic classifier: 74.91070580482483 seconds
SymbolicClassifier
Test accuracy: 0.9483 Test AUROC: 0.6312 Test F1 score: 0.3846
seed=83946
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8560 Test AUROC: 0.8386 Test F1 score: 0.3665
DecisionTreeClassifier
Test accuracy: 0.8086 Test AUROC: 0.8499 Test F1 score: 0.3227
RandomForestClassifier
Test accuracy: 0.9540 Test AUROC: 0.9249 Test F1 score: 0.3786
XGBClassifier
Test accuracy: 0.9203 Test AUROC: 0.9442 Test F1 score: 0.5216
Time to fit symbolic classifier: 69.23418998718262 seconds
SymbolicClassifier
Test accuracy: 0.9447 Test AUROC: 0.6325 Test F1 score: 0.3740
Time to fit symbolic classifier: 68.76166296005249 seconds
SymbolicClassifier
Test accuracy: 0.9447 Test AUROC: 0.6325 Test F1 score: 0.3740
seed=63297

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8481 Test AUROC: 0.8317 Test F1 score: 0.3542
DecisionTreeClassifier
Test accuracy: 0.8061 Test AUROC: 0.8330 Test F1 score: 0.3095
RandomForestClassifier
Test accuracy: 0.9490 Test AUROC: 0.9349 Test F1 score: 0.2680
XGBClassifier
Test accuracy: 0.9336 Test AUROC: 0.9520 Test F1 score: 0.5708
Time to fit symbolic classifier: 68.85216999053955 seconds
SymbolicClassifier
Test accuracy: 0.9454 Test AUROC: 0.6216 Test F1 score: 0.3559
Time to fit symbolic classifier: 68.83684873580933 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6112 Test F1 score: 0.3439
seed=78035

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8370 Test AUROC: 0.8402 Test F1 score: 0.3477
DecisionTreeClassifier
Test accuracy: 0.8456 Test AUROC: 0.8096 Test F1 score: 0.3302
RandomForestClassifier
Test accuracy: 0.9522 Test AUROC: 0.9357 Test F1 score: 0.3448
XGBClassifier
Test accuracy: 0.9268 Test AUROC: 0.9450 Test F1 score: 0.5565
Time to fit symbolic classifier: 75.26121234893799 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6688 Test F1 score: 0.4444
Time to fit symbolic classifier: 69.09115386009216 seconds
SymbolicClassifier
Test accuracy: 0.9501 Test AUROC: 0.6265 Test F1 score: 0.3822
seed=22664

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8445 Test AUROC: 0.8247 Test F1 score: 0.3389
DecisionTreeClassifier
Test accuracy: 0.7372 Test AUROC: 0.8404 Test F1 score: 0.2591
RandomForestClassifier
Test accuracy: 0.9497 Test AUROC: 0.9347 Test F1 score: 0.2857
XGBClassifier
Test accuracy: 0.9314 Test AUROC: 0.9377 Test F1 score: 0.5649
Time to fit symbolic classifier: 69.37272310256958 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6203 Test F1 score: 0.3429
Time to fit symbolic classifier: 69.08472180366516 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6203 Test F1 score: 0.3429
seed=49283

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8718 Test AUROC: 0.8807 Test F1 score: 0.4138
DecisionTreeClassifier
Test accuracy: 0.7540 Test AUROC: 0.8027 Test F1 score: 0.2736
RandomForestClassifier
Test accuracy: 0.9522 Test AUROC: 0.9192 Test F1 score: 0.3383
XGBClassifier
Test accuracy: 0.9239 Test AUROC: 0.9314 Test F1 score: 0.5330
Time to fit symbolic classifier: 69.41814804077148 seconds
SymbolicClassifier
Test accuracy: 0.9472 Test AUROC: 0.6570 Test F1 score: 0.4235
Time to fit symbolic classifier: 68.55552625656128 seconds
SymbolicClassifier
Test accuracy: 0.9479 Test AUROC: 0.6225 Test F1 score: 0.3668
seed=35253
/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8348 Test AUROC: 0.8110 Test F1 score: 0.3195
DecisionTreeClassifier
Test accuracy: 0.7907 Test AUROC: 0.8045 Test F1 score: 0.2899
RandomForestClassifier
Test accuracy: 0.9483 Test AUROC: 0.9151 Test F1 score: 0.2500
XGBClassifier
Test accuracy: 0.9293 Test AUROC: 0.9249 Test F1 score: 0.5343
Time to fit symbolic classifier: 75.61543703079224 seconds
SymbolicClassifier
Test accuracy: 0.9451 Test AUROC: 0.5476 Test F1 score: 0.1730
Time to fit symbolic classifier: 68.48590612411499 seconds
SymbolicClassifier
Test accuracy: 0.9461 Test AUROC: 0.5877 Test F1 score: 0.2857
seed=82273

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8531 Test AUROC: 0.8229 Test F1 score: 0.3599
DecisionTreeClassifier
Test accuracy: 0.8456 Test AUROC: 0.8416 Test F1 score: 0.3364
RandomForestClassifier
Test accuracy: 0.9476 Test AUROC: 0.9164 Test F1 score: 0.2551
XGBClassifier
Test accuracy: 0.9268 Test AUROC: 0.9287 Test F1 score: 0.5256
Time to fit symbolic classifier: 68.64090394973755 seconds
SymbolicClassifier
Test accuracy: 0.9422 Test AUROC: 0.6260 Test F1 score: 0.3534
Time to fit symbolic classifier: 68.7185320854187 seconds
SymbolicClassifier
Test accuracy: 0.9436 Test AUROC: 0.6005 Test F1 score: 0.3084
seed=90378

/Users/swagatam/miniconda3/envs/hiwi/lib/python3.10/site-packages/
sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs
failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression
Test accuracy: 0.8657 Test AUROC: 0.8305 Test F1 score: 0.3746
DecisionTreeClassifier
Test accuracy: 0.7899 Test AUROC: 0.7964 Test F1 score: 0.2875
RandomForestClassifier
Test accuracy: 0.9490 Test AUROC: 0.9247 Test F1 score: 0.2680
XGBClassifier
Test accuracy: 0.9311 Test AUROC: 0.9397 Test F1 score: 0.5450
Time to fit symbolic classifier: 68.76493072509766 seconds
SymbolicClassifier
Test accuracy: 0.9465 Test AUROC: 0.6394 Test F1 score: 0.3918
Time to fit symbolic classifier: 67.91902613639832 seconds
SymbolicClassifier
Test accuracy: 0.9487 Test AUROC: 0.6257 Test F1 score: 0.3755

LENGTHS = {'SR': [], 'SRf': []}


for est in SRmodels:
LENGTHS['SR'].append(est._program.length_)
for est in SRfmodels:
LENGTHS['SRf'].append(est._program.length_)

LENGTHS['SR'] = np.asarray(LENGTHS['SR'])
LENGTHS['SRf'] = np.asarray(LENGTHS['SRf'])

convert_to_arrays()
plusminus = pm = u"\u00B1"
for model_name in MODEL_NAMES:
print(f"{model_name} acc: {ACCURACIES[model_name].mean():.2f} {pm}
{ACCURACIES[model_name].std():.4f}")
print('--------')
for model_name in MODEL_NAMES:
print(f"{model_name} F1: {F1SCORES[model_name].mean():.2f} {pm}
{F1SCORES[model_name].std():.4f}")

print('--------')

LR acc: 0.85 ± 0.0087


DT acc: 0.80 ± 0.0397
RF acc: 0.95 ± 0.0017
XG acc: 0.93 ± 0.0045
SR acc: 0.95 ± 0.0022
SRf acc: 0.95 ± 0.0025
--------
LR F1: 0.35 ± 0.0202
DT F1: 0.31 ± 0.0395
RF F1: 0.29 ± 0.0393
XG F1: 0.56 ± 0.0248
SR F1: 0.35 ± 0.0708
SRf F1: 0.35 ± 0.0393
--------

# LENGTHS['SR'].mean(), LENGTHS['SRf'].mean()

print(f"SR mean length: {LENGTHS['SR'].mean()}")


print(f"SRf mean length: {LENGTHS['SRf'].mean()}")

SR mean length: 1.6


SRf mean length: 4.0

Conclusion: From the results above we see that symbolic regression models have F1 score
below 0.5, i.e., they have very poor classification performance. Hence for a fairer comparison
among the classifiers, we do not use imbalanced data in our experiments.

You might also like