Python Lab-Week#7
# Iris data consists of 50 samples from each of 3 species of Iris(Iris setosa, Iris
virginica, Iris versicolor) and a multivariate dataset introduced by British
statistician and biologist Ronald Fisher in his 1936 paper.
Available at https://archive.ics.uci.edu/dataset/53/iris
# Import libraries,packages,classes, functions
import pandas as pd
from sklearn.model_selection import train_test_split
from pandas.core.common import random_state
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.metrics import classification_report, accuracy_score
# Loading the dataset
data=pd.read_csv("iris.csv")
# Splitting variables
X = data.iloc[:,:4] # independent variables
y = data.iloc[:, 4:] # dependent variable
# Splitting dataset into testing&training data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,
random_state = 11)
# Scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Building kNN model
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train, y_train.values.ravel()) #fitting kNN model
y_pred = model.predict(X_test) # prediction on test set
# Getting performance metrics
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))
#Optimizing k
accuracyList = []
ks = range(1, 30)
for k in ks:
knnModel= KNeighborsClassifier(n_neighbors=k)
knnModel.fit(X_train, y_train)
y_pred = knnModel.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracyList.append(accuracy)
# “Accuracy vs k” Plot
fig, ax = plt.subplots()
ax.plot(ks, accuracyList)
ax.set(xlabel="k",
ylabel="Accuracy",
title="Accuracy vs k")
plt.show()