0% found this document useful (0 votes)
22 views

Assignment 4 Instructions

assignment 4

Uploaded by

Abhishek Kumar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
22 views

Assignment 4 Instructions

assignment 4

Uploaded by

Abhishek Kumar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Assignment_4_Instructions

July 14, 2021

[1]: from sklearn.datasets import make_classification


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy
from tqdm import tqdm
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

x,y = make_classification(n_samples=10000, n_features=2, n_informative=2,␣


,→n_redundant= 0, n_clusters_per_class=1, random_state=60)

X_train, X_test, y_train, y_test =␣


,→train_test_split(x,y,stratify=y,random_state=42)

# del X_train,X_test

[7]: X_train.shape

[7]: (7500, 2)

[2]: %matplotlib inline


import matplotlib.pyplot as plt
colors = {0:'red', 1:'blue'}
plt.scatter(X_test[:,0], X_test[:,1],c=y_test)
plt.show()

1
1 Implementing Custom RandomSearchCV
[21]: from sklearn.metrics import accuracy_score
from random import randint
import pdb
def select_test_indices(x_train,j,folds):
return list(range(int(j*len(x_train)/folds), int((j+1)*len(x_train)/folds)))

def RandomSearch(x_train,y_train,classifier, params, folds):

trainscores = []
testscores = []
for k in tqdm(params):

trainscores_folds = []
testscores_folds = []
for j in range(0, folds):
# check this out: https://stackoverflow.com/a/9755548/4084039
test_indices = select_test_indices(x_train,j,folds)
train_indices = list(set(list(range(0, len(x_train)))) -␣
,→set(test_indices))

#pdb.set_trace()
# selecting the data points based on the train_indices and␣
,→test_indices

2
X_train = x_train[train_indices]
Y_train = y_train[train_indices]
X_test = x_train[test_indices]
Y_test = y_train[test_indices]

classifier.n_neighbors = k
classifier.fit(X_train,Y_train)

Y_predicted = classifier.predict(X_test)
testscores_folds.append(accuracy_score(Y_test, Y_predicted))

Y_predicted = classifier.predict(X_train)
trainscores_folds.append(accuracy_score(Y_train, Y_predicted))
trainscores.append(np.mean(np.array(trainscores_folds)))
testscores.append(np.mean(np.array(testscores_folds)))
return trainscores,testscores

[32]: from sklearn.metrics import accuracy_score


from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from random import randint
import warnings
warnings.filterwarnings("ignore")

neigh = KNeighborsClassifier()

params = [randint(1,30) for p in range(0,10)]


params.sort()
#print(params)
folds = 3

trainscores,testscores = RandomSearch(X_train, y_train, neigh, params, folds)

plt.plot(params,trainscores, label='train cruve')


plt.plot(params,testscores, label='test cruve')
plt.title('Hyper-parameter VS accuracy plot')
plt.legend()
plt.show()

100%|����������| 10/10 [00:17<00:00, 1.75s/it]

3
[33]: def plot_decision_boundary(X1, X2, y, clf):
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

x_min, x_max = X1.min() - 1, X1.max() + 1


y_min, y_max = X2.min() - 1, X2.max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max,␣


,→ 0.02))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X1, X2, c=y, cmap=cmap_bold)

plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("2-Class classification (k = %i)" % (clf.n_neighbors))
plt.show()

4
[34]: from matplotlib.colors import ListedColormap
neigh = KNeighborsClassifier(n_neighbors = 16)
neigh.fit(X_train, y_train)
plot_decision_boundary(X_train[:, 0], X_train[:, 1], y_train, neigh)

[ ]:

You might also like