#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
#importing dataset
df=pd.read_csv('cancer.csv')
df.replace('?', -99999, inplace=True) #replace missing values with -
9999
df.drop(['id'], axis=1, inplace=True) # 1=> axis=1 => drop by column
X = np.array(df.drop(['classes'], axis= 1)) #X gets all the features
except classes (target)
y = np.array(df['classes']) #the ouput to predict
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =
0.35, random_state = 42)
#feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#pca
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_train = pca.fit_transform(X_train)
X_test = pca.fit_transform(X_test)
explained_variance = pca.explained_variance_ratio_
#KNN
from sklearn.neighbors import KNeighborsClassifier
knn = []
for i in range(1,21):
classifier = KNeighborsClassifier(n_neighbors=i)
trained_model=classifier.fit(X_train,y_train)
trained_model.fit(X_train,y_train )
# Predicting the Test set results
y_pred = classifier.predict(X_test)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm_KNN = confusion_matrix(y_test, y_pred)
print(cm_KNN)
print("Accuracy score of train KNN")
print(accuracy_score(y_train, trained_model.predict(X_train))*100)
print("Accuracy score of test KNN")
print(accuracy_score(y_test, y_pred)*100)
knn.append(accuracy_score(y_test, y_pred)*100)
plt.figure(figsize=(12, 6))
plt.plot(range(1, 21),knn, color='red', linestyle='dashed',
marker='o',
markerfacecolor='blue', markersize=10)
plt.title('Accuracy for different K Value')
plt.xlabel('K Value')
plt.ylabel('Accuracy')
[[154 10]
[ 9 72]]
Accuracy score of train KNN
100.0
Accuracy score of test KNN
92.24489795918367
[[160 4]
[ 15 66]]
Accuracy score of train KNN
97.79735682819384
Accuracy score of test KNN
92.24489795918367
[[160 4]
[ 7 74]]
Accuracy score of train KNN
96.91629955947137
Accuracy score of test KNN
95.51020408163265
[[160 4]
[ 7 74]]
Accuracy score of train KNN
96.47577092511013
Accuracy score of test KNN
95.51020408163265
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.69603524229075
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.69603524229075
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.69603524229075
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.91629955947137
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.69603524229075
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.69603524229075
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.69603524229075
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 3 78]]
Accuracy score of train KNN
96.69603524229075
Accuracy score of test KNN
97.14285714285714
[[160 4]
[ 1 80]]
Accuracy score of train KNN
96.25550660792952
Accuracy score of test KNN
97.95918367346938
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.25550660792952
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.25550660792952
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.25550660792952
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.25550660792952
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.25550660792952
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 2 79]]
Accuracy score of train KNN
96.25550660792952
Accuracy score of test KNN
97.55102040816327
[[160 4]
[ 3 78]]
Accuracy score of train KNN
96.0352422907489
Accuracy score of test KNN
97.14285714285714
Text(0, 0.5, 'Accuracy')
# Fitting SVM to the Training set
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
trained_model=classifier.fit(X_train,y_train)
trained_model.fit(X_train,y_train )
SVC(kernel='linear', random_state=0)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm_SVM = confusion_matrix(y_test, y_pred)
print(cm_SVM)
print("Accuracy score of train SVM")
print(accuracy_score(y_train, trained_model.predict(X_train))*100)
print("Accuracy score of test SVM")
print(accuracy_score(y_test, y_pred)*100)
[[160 4]
[ 4 77]]
Accuracy score of train SVM
96.47577092511013
Accuracy score of test SVM
96.73469387755102