0% found this document useful (0 votes)
18 views

KNN For Classification

Uploaded by

snehalkotar1153
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

KNN For Classification

Uploaded by

snehalkotar1153
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Name : Snehal Kotkar Div : A Roll No.

: 46

Practical No. : 2 Problem Statement : Build a machine learning model using k-Nearest
Neighbors algorithm to predict whether the patients in the "Pima Indians Diabetes Dataset"
have diabetes or not.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')

from google.colab import drive


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly


remount, call drive.mount("/content/drive", force_remount=True).

df = pd.read_csv('/content/drive/MyDrive/ML /diabetes.csv')
df.head()

{"summary":"{\n \"name\": \"df\",\n \"rows\": 768,\n \"fields\": [\


n {\n \"column\": \"Pregnancies\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n
\"max\": 17,\n \"num_unique_values\": 17,\n \"samples\":
[\n 6,\n 1,\n 3\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Glucose\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 31,\n
\"min\": 0,\n \"max\": 199,\n \"num_unique_values\":
136,\n \"samples\": [\n 151,\n 101,\n
112\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BloodPressure\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 19,\n \"min\": 0,\n
\"max\": 122,\n \"num_unique_values\": 47,\n
\"samples\": [\n 86,\n 46,\n 85\
n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"SkinThickness\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 15,\n \"min\": 0,\n
\"max\": 99,\n \"num_unique_values\": 51,\n \"samples\":
[\n 7,\n 12,\n 48\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Insulin\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 115,\n
\"min\": 0,\n \"max\": 846,\n \"num_unique_values\":
186,\n \"samples\": [\n 52,\n 41,\n
183\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BMI\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 7.884160320375446,\n \"min\": 0.0,\n \"max\":
67.1,\n \"num_unique_values\": 248,\n \"samples\": [\n
19.9,\n 31.0,\n 38.1\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"DiabetesPedigreeFunction\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0.3313285950127749,\n \"min\": 0.078,\n \"max\": 2.42,\n
\"num_unique_values\": 517,\n \"samples\": [\n 1.731,\
n 0.426,\n 0.138\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Age\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 11,\n \"min\": 21,\n
\"max\": 81,\n \"num_unique_values\": 52,\n \"samples\":
[\n 60,\n 47,\n 72\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Outcome\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 0,\n
\"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n
\"samples\": [\n 0,\n 1\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"df"}

df.shape

(768, 9)

df.isnull().sum()

Pregnancies 0
Glucose 0
BloodPressure 0
SkinThickness 0
Insulin 0
BMI 0
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64

X = df.drop('Outcome',axis=1).values
y = df['Outcome'].values

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.25,random_state=42, stratify=y)

#import KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier

#Setup arrays to store training and test accuracies


neighbors = np.arange(1,15)
train_accuracy =np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))

for i,k in enumerate(neighbors):


#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=k)

#Fit the model


knn.fit(X_train, y_train)

#Compute accuracy on the training set


train_accuracy[i] = knn.score(X_train, y_train)

#Compute accuracy on the test set


test_accuracy[i] = knn.score(X_test, y_test)

#Generate plot
plt.title('k-NN Varying number of neighbors')
plt.plot(neighbors, test_accuracy, label='Testing Accuracy')
plt.plot(neighbors, train_accuracy, label='Training accuracy')
plt.legend()
plt.xlabel('Number of neighbors')
plt.ylabel('Accuracy')
plt.show()
#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=4)

#Fit the model


knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=4)

#Get accuracy. Note: In case of classification algorithms score method


represents accuracy.
knn.score(X_test,y_test)

0.7291666666666666

#let us get the predictions using the classifier we had fit above
y_pred = knn.predict(X_test)

y_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0,
0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
1,
0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0])

You might also like