Program 4
Program 4
Develop a program to load the Iris dataset. Implement the k-Nearest Neighbors (k-NN)
algorithm for classifying flowers based on their features. Split the dataset into training and
testing sets and evaluate the model using metrics like accuracy and F1-score. Test it for
different values of 𝑘 (e.g., k=1,3,5) and evaluate the accuracy. Extend the k-NN algorithm to
assign weights based on the distance of neighbors (e.g., 𝑤𝑒𝑖𝑔ℎ𝑡=1/𝑑2 ). Compare the
performance of weighted k-NN and regular k-NN on a synthetic or real-world dataset.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from collections import Counter
#distance formula
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
class KNN:
def __init__(self, k=3, weighted=False):
self.k = k
self.weighted = weighted
if self.weighted:
weights = [1 / (distances[i] ** 2 + 1e-5) for i in k_indices]
class_votes = {}
for label, weight in zip(k_nearest_labels, weights):
class_votes[label] = class_votes.get(label, 0) + weight
return max(class_votes, key=class_votes.get)
else:
most_common = Counter(k_nearest_labels).most_common(1)
return most_common[0][0]
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Evaluate for different k values
k_values = [1, 3, 5]
for k in k_values:
knn = KNN(k=k, weighted=False)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f'k={k}, Regular k-NN -> Accuracy: {acc:.4f}, F1-score: {f1:.4f}')
# Evaluate Weighted k-NN
for k in k_values:
knn_weighted = KNN(k=k, weighted=True)
knn_weighted.fit(X_train, y_train)
y_pred_weighted = knn_weighted.predict(X_test)
acc_weighted = accuracy_score(y_test, y_pred_weighted)
f1_weighted = f1_score(y_test, y_pred_weighted, average='weighted')
print(f'k={k}, Weighted k-NN -> Accuracy: {acc_weighted:.4f}, F1-score: {f1_weighted:.4f}')
output:
k=1, Regular k-NN -> Accuracy: 0.9667, F1-score: 0.9664
k=3, Regular k-NN -> Accuracy: 1.0000, F1-score: 1.0000
k=5, Regular k-NN -> Accuracy: 1.0000, F1-score: 1.0000
k=1, Weighted k-NN -> Accuracy: 0.9667, F1-score: 0.9664
k=3, Weighted k-NN -> Accuracy: 1.0000, F1-score: 1.0000
k=5, Weighted k-NN -> Accuracy: 1.0000, F1-score: 1.0000