Merging - Scaled - 1D - & - Trying - Different - CLassification - ML - Models - .Ipynb - Colaboratory
Merging - Scaled - 1D - & - Trying - Different - CLassification - ML - Models - .Ipynb - Colaboratory
IMPORTING LIBRARIES
import pandas as pd
import numpy as np
import os
from natsort import natsorted
import joblib
normal = '/content/drive/MyDrive/CMPE255_PROJECT/NORMAL'
abnormal = '/content/drive/MyDrive/CMPE255_PROJECT/AHB'
MI = '/content/drive/MyDrive/CMPE255_PROJECT/MI'
MI_history = '/content/drive/MyDrive/CMPE255_PROJECT/PM'
Types_ECG = {'normal':normal,'Abnormal_hear_beat':abnormal,'MI':MI,'History_MI':MI_histo
NORMAL_ = natsorted(NORMAL_)
NORMAL_
['scaled_data_1D_1.csv',
'scaled_data_1D_2.csv',
'scaled_data_1D_3.csv',
'scaled_data_1D_4.csv',
'scaled_data_1D_5.csv',
'scaled_data_1D_6.csv',
'scaled_data_1D_7.csv',
'scaled_data_1D_8.csv',
'scaled_data_1D_9.csv',
'scaled_data_1D_10.csv',
'scaled_data_1D_11.csv',
'scaled_data_1D_12.csv',
'scaled_data_1D_13.csv']
D bl li k ( ) di
Double-click (or enter) to edit
MI_ = natsorted(MI_)
MI_
['scaled_data_1D_1.csv',
'scaled_data_1D_2.csv',
'scaled_data_1D_3.csv',
'scaled_data_1D_4.csv',
'scaled_data_1D_5.csv',
'scaled_data_1D_6.csv',
'scaled_data_1D_7.csv',
'scaled_data_1D_8.csv',
'scaled_data_1D_9.csv',
'scaled_data_1D_10.csv',
'scaled_data_1D_11.csv',
'scaled_data_1D_12.csv',
'scaled_data_1D_13.csv']
PMI_ = natsorted(PMI_)
PMI_
['scaled_data_1D_1.csv',
'scaled_data_1D_2.csv',
'scaled_data_1D_3.csv',
'scaled_data_1D_4.csv',
'scaled_data_1D_5.csv',
'scaled_data_1D_6.csv',
'scaled_data_1D_7.csv',
'scaled_data_1D_8.csv',
'scaled_data_1D_9.csv',
'scaled_data_1D_10.csv',
'scaled_data_1D_11.csv',
'scaled_data_1D_12.csv',
'scaled_data_1D_13.csv']
HB_ = natsorted(HB_)
HB_
['scaled_data_1D_1.csv',
'scaled_data_1D_2.csv',
'scaled_data_1D_3.csv',
'scaled_data_1D_4.csv',
'scaled_data_1D_5.csv',
'scaled_data_1D_6.csv',
'scaled_data_1D_7.csv',
'scaled_data_1D_8.csv',
'scaled_data_1D_9.csv',
'scaled_data_1D_10.csv',
'scaled_data_1D_11.csv',
'scaled_data_1D_12.csv',
'scaled_data_1D_13.csv']
COMBINED CSV OF EACH LEAD(1-12) FROM ALL IMAGES
#loop over and create combined csv files for each leads.
for x in range(len(MI_)):
df1=pd.read_csv('/content/drive/MyDrive/CMPE255_PROJECT/NORMAL/{}'.format(NORMAL_[x]))
df2=pd.read_csv('/content/drive/MyDrive/CMPE255_PROJECT/AHB/{}'.format(HB_[x]))
df3=pd.read_csv('/content/drive/MyDrive/CMPE255_PROJECT/MI/{}'.format(MI_[x]))
df4=pd.read_csv('/content/drive/MyDrive/CMPE255_PROJECT/PM/{}'.format(PMI_[x]))
final_df = pd.concat([df1,df2,df3,df4],ignore_index=True)
final_df.to_csv('/content/drive/MyDrive/CMPE255_PROJECT/Combined_IDLead_{}.csv'.format
df.drop(columns=['Unnamed: 0'],inplace=True)
0 1 2 3 4 5 6 7
923 0.874246 0.877014 0.864280 0.860505 0.871349 0.912404 0.958148 0.977826 0.9
924 0.829815 0.832084 0.852396 0.909665 0.988242 1.000000 0.923323 0.821865 0.7
925 0.469048 0.417983 0.362322 0.351995 0.391493 0.418305 0.440135 0.444598 0.4
926 0.682510 0.682286 0.641051 0.620212 0.608210 0.576331 0.603596 0.645714 0.6
927 0.792175 0.815695 0.819518 0.820559 0.847985 0.880933 0.902061 0.878266 0.8
0 1 2 3 4 5 6 7
KNN
# have paased less range value of hyperparamter since i'm using free tier version of goo
k_range = list(range(1, 9))
parameters = dict(knn__n_neighbors=k_range)
#input
X = result_df.iloc[:,0:-1]
#target
y=result_df.iloc[:,-1]
cv.fit(X_train,y_train)
Accuracy: 0.782258064516129
precision recall f1-score support
LOGISTIC REGRESSION
#input
X = result_df.iloc[:,0:-1]
#target
y=result_df.iloc[:,-1]
cv.fit(X_train,y_train)
Accuracy: 0.543010752688172
precision recall f1-score support
SVM
pipeline = Pipeline(steps)
#input
X = result_df.iloc[:,0:-1]
#target
y=result_df.iloc[:,-1]
# Specify the hyperparameter space, if we increase the penalty(c) and gamma value the ac
#since it takes lots of time in google colab provided only a single value
parameters = {'SVM__C':[10],'SVM__gamma':[1]}
# Create train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4,random_state=21)
cv = GridSearchCV(pipeline,parameters,cv=3)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)
SVM_Accuracy = cv.score(X_test, y_test)
print("Accuracy: {}".format(SVM_Accuracy))
print(classification_report(y_test, y_pred))
Accuracy: 0.8225806451612904
precision recall f1-score support
NOW COMBINING ALL 12 LEADS INTO A SINGLE CSV FILE AND THEN PERFROM
MODEL ANALYSIS
# have paased less range value of hyperparamter since i'm using free tier version of goo
k_range = list(range(1, 30))
parameters = dict(knn__n_neighbors=k_range)
#input
X = final_result_df.iloc[:,:-1]
#target
y=final_result_df.iloc[:,-1]
cv.fit(X_train,y_train)
Accuracy: 0.793010752688172
precision recall f1-score support
LOGISTIC REGRESSION
#input
X = final_result_df.iloc[:,:-1]
#target
y=final_result_df.iloc[:,-1]
cv.fit(X_train,y_train)
Accuracy: 0.7768817204301075
precision recall f1-score support
SVM
pipeline = Pipeline(steps)
#input
X = final_result_df.iloc[:,:-1]
#target
y=final_result_df.iloc[:,-1]
# Specify the hyperparameter space, if we increase the penalty(c) and gamma value the ac
#since it takes lots of time in google colab provided only a single value
parameters = {'SVM__C':[1, 10, 100],
'SVM__gamma':[0.1, 0.01]}
cv = GridSearchCV(pipeline,parameters,cv=3)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)
SVM_Accuracy = cv.score(X_test, y_test)
print("Accuracy: {}".format(SVM_Accuracy))
print(classification_report(y_test, y_pred))
print("Tuned Model Parameters: {}".format(cv.best_params_))
Accuracy: 0.9051724137931034
precision recall f1-score support
XGBOOST
model = XGBClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: {}".format(accuracy))
print(classification_report(y_test, y_pred))
Accuracy: 0.853448275862069
precision recall f1-score support
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train,y_train)
joblib_file='knn_model_test.pkl'
joblib.dump(knn,joblib_file)
['knn_model_test.pkl']
#input
X = pd.read_csv('final_1D.csv',header=None)
#target
y=final_result_df.iloc[:,-1]
svm=SVC(C=10,gamma=0.01)
svm.fit(X_train,y_train)
joblib_file='svm_model_test.pkl'
joblib.dump(svm,joblib_file)
['svm_model_test.pkl']
ENSEMBLE
#input
X = final_result_df.iloc[:,0:-1]
#target
y=final_result_df.iloc[:,-1]
# Stacking of ML Models
eclf = VotingClassifier(estimators=[
('SVM', SVC(probability=True)),
('knn', KNeighborsClassifier()),
('rf', ensemble.RandomForestClassifier()),
('bayes',GaussianNB()),
('logistic',LogisticRegression()),
], voting='soft')
print(grid.best_params_)
y_pred = voting_clf.predict(X_test)
print("Accuracy: {}".format(Voting_Accuracy))
print(classification_report(y_test, y_pred))
print(voting_clf.best_params_)