0% found this document useful (0 votes)
16 views7 pages

Push - Table Copy2 2

Uploaded by

jlesalvador
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views7 pages

Push - Table Copy2 2

Uploaded by

jlesalvador
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

7/23/24, 12:53 PM push_table-Copy2

In [30]: #importing the modules, the estimator and the dataframe


from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.tree import DecisionTreeClassifier


from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from sklearn.datasets import load_breast_cancer

from sklearn.metrics import accuracy_score


from sklearn.model_selection import train_test_split
from sklearn.metrics import auc

import matplotlib.pyplot as plt


import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [31]: #loading the breast_cancer dataset and dividing it into feature and target
data = load_breast_cancer()
data_to_process = pd.DataFrame(data.data, columns=data.feature_names)
target_value = pd.Series(data.target)

In [32]: #splitting the dataset into train dataset and test dataset
x_train, x_test, y_train, y_test = train_test_split(data_to_process, target_

In [33]: x_train.head()

Out[33]: mean
mean mean mean mean mean mean mean
concave
radius texture perimeter area smoothness compactness concavity s
points

276 11.33 14.16 71.79 396.6 0.09379 0.03872 0.001487 0.003333

56 19.21 18.57 125.50 1152.0 0.10530 0.12670 0.132300 0.089940

16 14.68 20.13 94.74 684.5 0.09867 0.07200 0.073950 0.052590

352 25.73 17.46 174.20 2010.0 0.11490 0.23630 0.336800 0.191300

250 20.94 23.56 138.90 1364.0 0.10070 0.16060 0.271200 0.131000

5 rows × 30 columns

In [34]: #making classification with baggingClassifier , base estimator is Kneighbors


bclf = BaggingClassifier(base_estimator=KNeighborsClassifier(), n_estimators
max_samples= 0.5, max_features=1.0, random_state=12
bclf.fit(x_train, y_train)

file:///home/aman/Downloads/push_table-Copy2.html 1/7
7/23/24, 12:53 PM push_table-Copy2

y_pred = bclf.predict(x_test)
#bscore = accuracy_score(y_pred, y_test)

In [35]: # random forestclassifier


forest = RandomForestClassifier(n_estimators=10, max_depth=4, random_state=5
forest.fit(x_train, y_train)
y_pred = forest.predict(x_test)
#fscore = accuracy_score(y_pred, y_test)

In [36]: #AdaboostClassifier baseestimator is logistic regression


adaB = AdaBoostClassifier(base_estimator=LogisticRegression(solver='newton-c
adaB.fit(x_train, y_train)
y_pred = adaB.predict(x_test)
#abscore = accuracy_score(y_pred, y_test)

In [37]: #GradientBoostingClassifier
gbclf = GradientBoostingClassifier(n_estimators=100, random_state=12)
gbclf.fit(x_train, y_train)
y_pred = gbclf.predict(x_test)
#gbscore = accuracy_score(y_pred, y_test)

In [38]: #decisionTreeClassifier
dtclf = DecisionTreeClassifier()
dtclf.fit(x_train, y_train)
y_pred = dtclf.predict(x_test)
#dtclfscore = accuracy_score(y_pred, y_test)

In [39]: #LogisticRegression
lr = LogisticRegression(solver='newton-cg')
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
#lrscore = accuracy_score(y_test, y_pred)

In [40]: #kNeighborsClassifier
knn = KNeighborsClassifier(20)
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)
#knn_score = accuracy_score(y_pred, y_test)

In [41]: ############################################################################
#calculate coodinate roc

In [42]: # function that return the name of agiven classifier


def return_class_name(given_class):
given_class_to_string = str(given_class.__class__).split('.')[-1].rstrip
return given_class_to_string

In [51]: def TruePosRate(y_true, y_pred):


y_pred_test = pd.Series(y_pred, index=y_true.index)
TP = FP = FN = TN = 0
for i in y_true.index:
if y_true[i] == y_pred[i] == 1:
TP+=1

file:///home/aman/Downloads/push_table-Copy2.html 2/7
7/23/24, 12:53 PM push_table-Copy2

if y_pred[i] ==1 and y_true[i]!=y_pred[i]:


FP+=1
if y_pred[i]==0 and y_pred[i]!=y_true[i]:
FN+=1
if y_true[i] == y_pred[i] == 0:
TN+=1

try:
FPR = FP / (FP + TN)
except ZeroDivisionError:
FPR = 1

try:
recall = TP / (TP+FN)
except ZeroDivisionError:
recall = 1

return FPR, recall


TruePosRate(y_test, y_pred)

---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/tmp/ipykernel_82001/3978752527.py in <module>
23
24 return FPR, recall
---> 25 TruePosRate(y_test, y_pred)

/tmp/ipykernel_82001/3978752527.py in TruePosRate(y_true, y_pred)


3 TP = FP = FN = TN = 0
4 for i in y_true.index:
----> 5 if y_true[i] == y_pred[i] == 1:
6 TP+=1
7 if y_pred[i] ==1 and y_true[i]!=y_pred[i]:

IndexError: index 534 is out of bounds for axis 0 with size 143

In [44]: #varying the probability threshold


def FPR_recall_scores_list(estimator):
y_test_probs = estimator.predict_proba(x_test)[:,1]
FPR_scores = list()
recall1_scores = list()
probability_thresholds = np.linspace(0,1, num=100)
for p in probability_thresholds:
y_test_pred=list()
for prob in y_test_probs:
if prob > p :
y_test_pred.append(1)
else:
y_test_pred.append(0)
FPR, recall = TruePosRate(y_test, y_test_pred)

FPR_scores.append(FPR)
recall1_scores.append(recall)
return FPR_scores, recall1_scores
FPR_score_list, recall_score_list =FPR_recall_scores_list(knn)

file:///home/aman/Downloads/push_table-Copy2.html 3/7
7/23/24, 12:53 PM push_table-Copy2

---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/tmp/ipykernel_82001/1444861665.py in <module>
17 recall1_scores.append(recall)
18 return FPR_scores, recall1_scores
---> 19 FPR_score_list, recall_score_list =FPR_recall_scores_list(knn)

/tmp/ipykernel_82001/1444861665.py in FPR_recall_scores_list(estimator)
12 else:
13 y_test_pred.append(0)
---> 14 FPR, recall = TruePosRate(y_test, y_test_pred)
15
16 FPR_scores.append(FPR)

/tmp/ipykernel_82001/3978752527.py in TruePosRate(y_true, y_pred)


3 TP = FP = FN = TN = 0
4 for i in y_true.index:
----> 5 if y_true[i] == y_pred[i] == 1:
6 TP+=1
7 if y_pred[i] ==1 and y_true[i]!=y_pred[i]:

IndexError: list index out of range

In [ ]:

In [46]: fig, ax = plt.subplots(figsize=(7,7))


ax.plot(recall_scores, precision_scores, label='knn')
baseline=len(y_test[y_test==1]) / len(y_test)
ax.plot([0,1], [baseline, baseline], ls='--', label='Baseline')
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.legend(loc='center left')

Out[46]: <matplotlib.legend.Legend at 0x7ff5b1748850>

file:///home/aman/Downloads/push_table-Copy2.html 4/7
7/23/24, 12:53 PM push_table-Copy2

In [ ]:

In [47]: listClassifier = [bclf,forest,adaB,gbclf,dtclf,lr,knn]

def classifier_and_score(listClassifier):
data = list()
for classifier in listClassifier:
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
score = round(accuracy_score(y_pred, y_test) * 100, 2)
name = return_class_name(classifier)
auc_score = auc(precision_recall_scores_list(classifier, x_test)[1],
precision_recall_scores_list(classifier, x_test)[0])
data.append([name, score, auc_score])
return data
data = classifier_and_score(listClassifier)

file:///home/aman/Downloads/push_table-Copy2.html 5/7
7/23/24, 12:53 PM push_table-Copy2

---------------------------------------------------------------------------
NameError Traceback (most recent call last)
/tmp/ipykernel_82001/3633130248.py in <module>
13 data.append([name, score, auc_score])
14 return data
---> 15 data = classifier_and_score(listClassifier)

/tmp/ipykernel_82001/3633130248.py in classifier_and_score(listClassifier)
9 score = round(accuracy_score(y_pred, y_test) * 100, 2)
10 name = return_class_name(classifier)
---> 11 auc_score = auc(precision_recall_scores_list(classifier, x_t
est)[1], \
12 precision_recall_scores_list(classifier, x_t
est)[0])
13 data.append([name, score, auc_score])

NameError: name 'precision_recall_scores_list' is not defined

In [ ]:

In [48]: from tabulate import tabulate

datat = data
col_name = ['algorithm_name', 'accuracy(%)','AUC']
print(tabulate(datat, headers=col_name))

---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_82001/2995579325.py in <module>
3 datat = data
4 col_name = ['algorithm_name', 'accuracy(%)','AUC']
----> 5 print(tabulate(datat, headers=col_name))

~/anaconda3/lib/python3.9/site-packages/tabulate.py in tabulate(tabular_dat
a, headers, tablefmt, floatfmt, numalign, stralign, missingval, showindex, d
isable_numparse, colalign)
1526 if tabular_data is None:
1527 tabular_data = []
-> 1528 list_of_lists, headers = _normalize_tabular_data(
1529 tabular_data, headers, showindex=showindex
1530 )

~/anaconda3/lib/python3.9/site-packages/tabulate.py in _normalize_tabular_da
ta(tabular_data, headers, showindex)
1087 keys = tabular_data.keys()
1088 rows = list(
-> 1089 izip_longest(*tabular_data.values())
1090 ) # columns have to be transposed
1091 elif hasattr(tabular_data, "index"):

TypeError: 'NoneType' object is not iterable

In [ ]: #calcolo della curva roc for all algo

In [ ]:

file:///home/aman/Downloads/push_table-Copy2.html 6/7
7/23/24, 12:53 PM push_table-Copy2

file:///home/aman/Downloads/push_table-Copy2.html 7/7

You might also like