PCA Codebase

Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

KNN/SVM

Harsh kumar

roll no 12112011

batch cs-a-01

import math
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('data.csv')

X = df.drop(columns=['id', 'diagnosis'])
y = df['diagnosis'].map({'M': 1, 'B': 0})

split_index = int(0.8 * len(df))


X_train = X[:split_index].values
y_train = y[:split_index].values
X_test = X[split_index:].values
y_test = y[split_index:].values

def euclidean_distance(point1, point2):


return math.sqrt(sum((x - y) ** 2 for x, y in zip(point1,
point2)))

def knn_predict(X_train, y_train, X_test, k=5):


predictions = []
for test_point in X_test:
distances = [(euclidean_distance(test_point, train_point),
label) for train_point, label in zip(X_train, y_train)]
k_nearest_neighbors = sorted(distances, key=lambda x: x[0])
[:k]
k_nearest_labels = [label for _, label in k_nearest_neighbors]
most_common = Counter(k_nearest_labels).most_common(1)[0][0]
predictions.append(most_common)
return predictions

def accuracy(y_true, y_pred):


correct = sum(1 for true, pred in zip(y_true, y_pred) if true ==
pred)
return correct / len(y_true)

def precision_recall_f1(y_true, y_pred):


tp = sum(1 for true, pred in zip(y_true, y_pred) if true == pred
== 1)
fp = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and
pred == 1)
fn = sum(1 for true, pred in zip(y_true, y_pred) if true == 1 and
pred == 0)
tn = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and
pred == 0)

precision = tp / (tp + fp) if (tp + fp) > 0 else 0


recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision +
recall) > 0 else 0

return precision, recall, f1, tp, tn, fp, fn

def plot_confusion_matrix(tp, tn, fp, fn, title="Confusion Matrix"):


cm = np.array([[tn, fp], [fn, tp]])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
xticklabels=["Pred: Benign", "Pred: Malignant"], yticklabels=["Actual:
Benign", "Actual: Malignant"])
plt.title(title)
plt.show()

print("Running K-Nearest Neighbors (KNN)...")


y_pred_knn = knn_predict(X_train, y_train, X_test, k=5)
knn_acc = accuracy(y_test, y_pred_knn)
knn_prec, knn_recall, knn_f1, knn_tp, knn_tn, knn_fp, knn_fn =
precision_recall_f1(y_test, y_pred_knn)

print(f"KNN Accuracy: {knn_acc:.4f}")


print(f"KNN Precision: {knn_prec:.4f}")
print(f"KNN Recall: {knn_recall:.4f}")
print(f"KNN F1-Score: {knn_f1:.4f}")

plot_confusion_matrix(knn_tp, knn_tn, knn_fp, knn_fn, title="KNN


Confusion Matrix")

class SVM:
def __init__(self, learning_rate=0.001, lambda_param=0.01,
n_iters=1000):
self.lr = learning_rate
self.lambda_param = lambda_param
self.n_iters = n_iters
self.w = None
self.b = None

def fit(self, X, y):


y_ = np.where(y <= 0, -1, 1)
n_samples, n_features = X.shape
self.w = np.zeros(n_features)
self.b = 0

for _ in range(self.n_iters):
for idx, x_i in enumerate(X):
condition = y_[idx] * (np.dot(x_i, self.w) - self.b)
>= 1
if condition:
self.w -= self.lr * (2 * self.lambda_param *
self.w)
else:
self.w -= self.lr * (2 * self.lambda_param *
self.w - np.dot(x_i, y_[idx]))
self.b -= self.lr * y_[idx]

def predict(self, X):


linear_output = np.dot(X, self.w) - self.b
return np.sign(linear_output)

print("Running Support Vector Machine (SVM)...")


svm = SVM()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
y_pred_svm_binary = np.where(y_pred_svm <= 0, 0, 1) # Convert to (0,
1)

svm_acc = accuracy(y_test, y_pred_svm_binary)


svm_prec, svm_recall, svm_f1, svm_tp, svm_tn, svm_fp, svm_fn =
precision_recall_f1(y_test, y_pred_svm_binary)

print(f"SVM Accuracy: {svm_acc:.4f}")


print(f"SVM Precision: {svm_prec:.4f}")
print(f"SVM Recall: {svm_recall:.4f}")
print(f"SVM F1-Score: {svm_f1:.4f}")

plot_confusion_matrix(svm_tp, svm_tn, svm_fp, svm_fn, title="SVM


Confusion Matrix")

print("\n--- Final Model Comparison ---")


print(f"KNN Accuracy: {knn_acc:.4f}, Precision: {knn_prec:.4f},
Recall: {knn_recall:.4f}, F1-Score: {knn_f1:.4f}")
print(f"SVM Accuracy: {svm_acc:.4f}, Precision: {svm_prec:.4f},
Recall: {svm_recall:.4f}, F1-Score: {svm_f1:.4f}")

Running K-Nearest Neighbors (KNN)...


KNN Accuracy: 0.2281
KNN Precision: 0.2281
KNN Recall: 1.0000
KNN F1-Score: 0.3714
Running Support Vector Machine (SVM)...
SVM Accuracy: 0.2281
SVM Precision: 0.2281
SVM Recall: 1.0000
SVM F1-Score: 0.3714
--- Final Model Comparison ---
KNN Accuracy: 0.2281, Precision: 0.2281, Recall: 1.0000, F1-Score:
0.3714
SVM Accuracy: 0.2281, Precision: 0.2281, Recall: 1.0000, F1-Score:
0.3714

import numpy as np

class SVM:
def __init__(self, learning_rate=0.001, lambda_param=0.01,
n_iters=1000):
self.lr = learning_rate
self.lambda_param = lambda_param
self.n_iters = n_iters
self.w = None
self.b = None

def fit(self, X, y):


# Convert y from (0,1) to (-1,1)
y_ = np.where(y <= 0, -1, 1)

n_samples, n_features = X.shape


self.w = np.zeros(n_features)
self.b = 0

# Gradient Descent
for _ in range(self.n_iters):
for idx, x_i in enumerate(X):
condition = y_[idx] * (np.dot(x_i, self.w) - self.b)
>= 1
if condition:
self.w -= self.lr * (2 * self.lambda_param *
self.w)
else:
self.w -= self.lr * (2 * self.lambda_param *
self.w - np.dot(x_i, y_[idx]))
self.b -= self.lr * y_[idx]

def predict(self, X):


linear_output = np.dot(X, self.w) - self.b
return np.sign(linear_output)

X_train = X[:split_index].values
y_train = y[:split_index].values
X_test = X[split_index:].values
y_test = y[split_index:].values

svm = SVM()
svm.fit(X_train, y_train)

y_pred_svm = svm.predict(X_test)

y_pred_svm_binary = np.where(y_pred_svm <= 0, 0, 1) # Convert back


from (-1, 1) to (0, 1)
print(f"SVM Accuracy: {accuracy(y_test, y_pred_svm_binary)}")

SVM Accuracy: 0.22807017543859648

You might also like