0% found this document useful (0 votes)
26 views

PCA Codebase

Uploaded by

harsh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
26 views

PCA Codebase

Uploaded by

harsh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

KNN/SVM

Harsh kumar

roll no 12112011

batch cs-a-01

import math
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('data.csv')

X = df.drop(columns=['id', 'diagnosis'])
y = df['diagnosis'].map({'M': 1, 'B': 0})

split_index = int(0.8 * len(df))


X_train = X[:split_index].values
y_train = y[:split_index].values
X_test = X[split_index:].values
y_test = y[split_index:].values

def euclidean_distance(point1, point2):


return math.sqrt(sum((x - y) ** 2 for x, y in zip(point1,
point2)))

def knn_predict(X_train, y_train, X_test, k=5):


predictions = []
for test_point in X_test:
distances = [(euclidean_distance(test_point, train_point),
label) for train_point, label in zip(X_train, y_train)]
k_nearest_neighbors = sorted(distances, key=lambda x: x[0])
[:k]
k_nearest_labels = [label for _, label in k_nearest_neighbors]
most_common = Counter(k_nearest_labels).most_common(1)[0][0]
predictions.append(most_common)
return predictions

def accuracy(y_true, y_pred):


correct = sum(1 for true, pred in zip(y_true, y_pred) if true ==
pred)
return correct / len(y_true)

def precision_recall_f1(y_true, y_pred):


tp = sum(1 for true, pred in zip(y_true, y_pred) if true == pred
== 1)
fp = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and
pred == 1)
fn = sum(1 for true, pred in zip(y_true, y_pred) if true == 1 and
pred == 0)
tn = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and
pred == 0)

precision = tp / (tp + fp) if (tp + fp) > 0 else 0


recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision +
recall) > 0 else 0

return precision, recall, f1, tp, tn, fp, fn

def plot_confusion_matrix(tp, tn, fp, fn, title="Confusion Matrix"):


cm = np.array([[tn, fp], [fn, tp]])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
xticklabels=["Pred: Benign", "Pred: Malignant"], yticklabels=["Actual:
Benign", "Actual: Malignant"])
plt.title(title)
plt.show()

print("Running K-Nearest Neighbors (KNN)...")


y_pred_knn = knn_predict(X_train, y_train, X_test, k=5)
knn_acc = accuracy(y_test, y_pred_knn)
knn_prec, knn_recall, knn_f1, knn_tp, knn_tn, knn_fp, knn_fn =
precision_recall_f1(y_test, y_pred_knn)

print(f"KNN Accuracy: {knn_acc:.4f}")


print(f"KNN Precision: {knn_prec:.4f}")
print(f"KNN Recall: {knn_recall:.4f}")
print(f"KNN F1-Score: {knn_f1:.4f}")

plot_confusion_matrix(knn_tp, knn_tn, knn_fp, knn_fn, title="KNN


Confusion Matrix")

class SVM:
def __init__(self, learning_rate=0.001, lambda_param=0.01,
n_iters=1000):
self.lr = learning_rate
self.lambda_param = lambda_param
self.n_iters = n_iters
self.w = None
self.b = None

def fit(self, X, y):


y_ = np.where(y <= 0, -1, 1)
n_samples, n_features = X.shape
self.w = np.zeros(n_features)
self.b = 0

for _ in range(self.n_iters):
for idx, x_i in enumerate(X):
condition = y_[idx] * (np.dot(x_i, self.w) - self.b)
>= 1
if condition:
self.w -= self.lr * (2 * self.lambda_param *
self.w)
else:
self.w -= self.lr * (2 * self.lambda_param *
self.w - np.dot(x_i, y_[idx]))
self.b -= self.lr * y_[idx]

def predict(self, X):


linear_output = np.dot(X, self.w) - self.b
return np.sign(linear_output)

print("Running Support Vector Machine (SVM)...")


svm = SVM()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
y_pred_svm_binary = np.where(y_pred_svm <= 0, 0, 1) # Convert to (0,
1)

svm_acc = accuracy(y_test, y_pred_svm_binary)


svm_prec, svm_recall, svm_f1, svm_tp, svm_tn, svm_fp, svm_fn =
precision_recall_f1(y_test, y_pred_svm_binary)

print(f"SVM Accuracy: {svm_acc:.4f}")


print(f"SVM Precision: {svm_prec:.4f}")
print(f"SVM Recall: {svm_recall:.4f}")
print(f"SVM F1-Score: {svm_f1:.4f}")

plot_confusion_matrix(svm_tp, svm_tn, svm_fp, svm_fn, title="SVM


Confusion Matrix")

print("\n--- Final Model Comparison ---")


print(f"KNN Accuracy: {knn_acc:.4f}, Precision: {knn_prec:.4f},
Recall: {knn_recall:.4f}, F1-Score: {knn_f1:.4f}")
print(f"SVM Accuracy: {svm_acc:.4f}, Precision: {svm_prec:.4f},
Recall: {svm_recall:.4f}, F1-Score: {svm_f1:.4f}")

Running K-Nearest Neighbors (KNN)...


KNN Accuracy: 0.2281
KNN Precision: 0.2281
KNN Recall: 1.0000
KNN F1-Score: 0.3714
Running Support Vector Machine (SVM)...
SVM Accuracy: 0.2281
SVM Precision: 0.2281
SVM Recall: 1.0000
SVM F1-Score: 0.3714
--- Final Model Comparison ---
KNN Accuracy: 0.2281, Precision: 0.2281, Recall: 1.0000, F1-Score:
0.3714
SVM Accuracy: 0.2281, Precision: 0.2281, Recall: 1.0000, F1-Score:
0.3714

import numpy as np

class SVM:
def __init__(self, learning_rate=0.001, lambda_param=0.01,
n_iters=1000):
self.lr = learning_rate
self.lambda_param = lambda_param
self.n_iters = n_iters
self.w = None
self.b = None

def fit(self, X, y):


# Convert y from (0,1) to (-1,1)
y_ = np.where(y <= 0, -1, 1)

n_samples, n_features = X.shape


self.w = np.zeros(n_features)
self.b = 0

# Gradient Descent
for _ in range(self.n_iters):
for idx, x_i in enumerate(X):
condition = y_[idx] * (np.dot(x_i, self.w) - self.b)
>= 1
if condition:
self.w -= self.lr * (2 * self.lambda_param *
self.w)
else:
self.w -= self.lr * (2 * self.lambda_param *
self.w - np.dot(x_i, y_[idx]))
self.b -= self.lr * y_[idx]

def predict(self, X):


linear_output = np.dot(X, self.w) - self.b
return np.sign(linear_output)

X_train = X[:split_index].values
y_train = y[:split_index].values
X_test = X[split_index:].values
y_test = y[split_index:].values

svm = SVM()
svm.fit(X_train, y_train)

y_pred_svm = svm.predict(X_test)

y_pred_svm_binary = np.where(y_pred_svm <= 0, 0, 1) # Convert back


from (-1, 1) to (0, 1)
print(f"SVM Accuracy: {accuracy(y_test, y_pred_svm_binary)}")

SVM Accuracy: 0.22807017543859648

You might also like