Assignment No 8
Assignment No 8
TECHNOLOGY TAXILA
Assignment No 8
MACHINE LEARNING
NAME:
AREESHA NOOR
REG NO:
21-CP-78
DATE:9-6-2024
1. Anomaly Detection
import scipy.io
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
mat = scipy.io.loadmat('data/ex8data1.mat')
mat.keys()
def estimate_gaussian(X):
mean = np.mean(X, axis=0)
var = np.var(X, axis=0)
return mean, var
precision = tp / (tp+fp)
recall = tp / (tp+fn)
f1 = (2*precision*recall)/(precision+recall)
if f1 > best_f1:
best_f1 = f1
best_epsilon = epsilon
return best_epsilon, best_f1
2. Recommender Systems
2.1 Movie Ratings Dataset
mat = scipy.io.loadmat('data/ex8_movies.mat')
Y = mat['Y']
R = mat['R']
mat2 = scipy.io.loadmat('data/ex8_movieParams.mat')
X = mat2['X']
Theta = mat2['Theta']
num_users = mat2['num_users']
num_movies = mat2['num_movies']
num_features = mat2['num_features']
J(x(1),...,x(nm),θ(1),...,θ(nu))=12∑(i,j):r(i,j)=1((θ(j))Tx(i)−y(i,j))2
C = np.subtract(X.dot(Theta.T), Y) ** 2
J = np.sum(np.sum(R*C)) / 2
return J
#reduce dataset to test
num_users = 4
num_movies = 5
num_features = 3
X = X[:num_movies, :num_features]
Theta = Theta[:num_users, :num_features]
Y = Y[:num_movies, :num_users]
R = R[:num_movies, :num_users]
J = cofi_cost_func(np.hstack((X.ravel(order='F'),
Theta.ravel(order='F'))),
Y, R, num_users, num_movies, num_features, 0)
print('Cost at loaded parameters: ', J)
print('\t(this value should be about 22.22)')
∂J∂x(i)k=∑j:r(i,j)=1((θ(j))Tx(i)−y(i,j))θ(j)k
∂J∂θ(j)k=∑i:r(i,j)=1((θ(j))Tx(i)−y(i,j))x(i)k
C = np.subtract(X.dot(Theta.T), Y) ** 2
J = np.sum(np.sum(R*C)) / 2
X_grad = np.zeros(X.shape)
Theta_grad = np.zeros(Theta.shape)
for i in range(num_movies):
idx = np.argwhere(R[i,:]==1).T[0]
X_grad[i,:] = np.subtract(X[i,:].dot(Theta[idx,:].T),
Y[i,idx]).dot(Theta[idx,:])
for j in range(num_users):
idx = np.argwhere(R[:,j]==1).T[0]
Theta_grad[j,:] = np.subtract(X[idx,:].dot(Theta[j,:].T),
Y[idx,j]).T.dot(X[idx,:])
grad = np.hstack((X_grad.ravel(order='F'),
Theta_grad.ravel(order='F')))
return J, grad
def check_cost_function(lambda_r=0):
X_t = np.random.uniform(0,1,(4,3))
Theta_t = np.random.uniform(0,1,(5,3))
Y = X_t.dot(Theta_t.T)
Y[np.random.uniform(0,1,Y.shape)>0.5] = 0
R = np.zeros(Y.shape)
R[Y!=0] = 1
X = np.random.normal(size=X_t.shape)
Theta = np.random.normal(size=Theta_t.shape)
num_users = Y.shape[1]
num_movies = Y.shape[0]
num_features = Theta_t.shape[1]
params = np.hstack((X.ravel(order='F'), Theta.ravel(order='F')))
diff = np.linalg.norm(num_grad-grad)/np.linalg.norm(num_grad+grad)
print('''If your cost function implementation is correct, then the
relative difference will
be small (less than 1e-9). Relative Difference:''', diff)
check_cost_function()
OUTPUT:
The columns should be very similar...
4.534710280985621 4.534710280987838
-5.15389514749387 -5.153895147477099
2.0676030457877914 2.0676030457883914
1.710344108012407 1.7103441080090966
-5.463542401216159 -5.463542401214781
6.668686071904517 6.668686071916491
-4.84571787458421 -4.845717874595353
-2.2667239972840036 -2.266723997296472
-1.6398196269129528 -1.6398196269149445
2.214904425379416 2.2149044253903263
-0.7788080677428866 -0.778808067722867
-0.4352805963669937 -0.4352805963653944
-0.736120681548158 -0.7361206815563007
6.822943656654701 6.822943656666209
-9.66455117627163 -9.664551176268647
1.8165669567338227 1.8165669567259517
1.9700208824424692 1.9700208824342398
-1.5015643731253192 -1.5015643731360209
-1.996882072887729 -1.996882072899521
16.464228418620053 16.464228418620817
-1.612611128756214 -1.6126111287545721
3.1563994247107985 3.156399424700015
-0.25795459185573577 -0.2579545918661169
-2.518921166281274 -2.5189211662717823
3.1874141775567466 3.1874141775485088
-0.8614888126778908 -0.8614888126751783
-1.0384236637683841 -1.038423663767987
If your cost function implementation is correct, then the relative
difference will
be small (less than 1e-9). Relative Difference:
9.47543254962173e-13
J(x(1),...,x(nm),θ(1),...,θ(nu))=12∑(i,j):r(i,j)=1((θ(j))Tx(i)−y(i,j))2+
(λ2∑j=1nu∑k=1n(θ(j)k)2)+(λ2∑i=1nm∑k=1n(x(i)k)2)
C = np.subtract(X.dot(Theta.T), Y) ** 2
J = np.sum(np.sum(R*C)) / 2 + ((lambda_r/2)*np.sum(np.sum(Theta**2)))
+ ((lambda_r/2)*np.sum(np.sum(X**2)))
X_grad = np.zeros(X.shape)
Theta_grad = np.zeros(Theta.shape)
for i in range(num_movies):
idx = np.argwhere(R[i,:]==1).T[0]
X_grad[i,:] = np.subtract(X[i,:].dot(Theta[idx,:].T),
Y[i,idx]).dot(Theta[idx,:])
for j in range(num_users):
idx = np.argwhere(R[:,j]==1).T[0]
Theta_grad[j,:] = np.subtract(X[idx,:].dot(Theta[j,:].T),
Y[idx,j]).T.dot(X[idx,:])
grad = np.hstack((X_grad.ravel(order='F'),
Theta_grad.ravel(order='F')))
return J, grad
J, _ = cofi_cost_func(np.hstack((X.ravel(order='F'),
Theta.ravel(order='F'))),
Y, R, num_users, num_movies, num_features, 1.5)
print('Cost at loaded parameters (lambda = 1.5):', J)
print('\t(this value should be about 31.34)')
OUTPUT:
∂J∂x(i)k=∑j:r(i,j)=1((θ(j))Tx(i)−y(i,j))θ(j)k+λx(i)k
∂J∂θ(j)k=∑i:r(i,j)=1((θ(j))Tx(i)−y(i,j))x(i)k+λθ(j)k
C = np.subtract(X.dot(Theta.T), Y) ** 2
J = np.sum(np.sum(R*C)) / 2 + ((lambda_r/2)*np.sum(np.sum(Theta**2)))
+ ((lambda_r/2)*np.sum(np.sum(X**2)))
X_grad = np.zeros(X.shape)
Theta_grad = np.zeros(Theta.shape)
for i in range(num_movies):
idx = np.argwhere(R[i,:]==1).T[0]
X_grad[i,:] = np.subtract(X[i,:].dot(Theta[idx,:].T),
Y[i,idx]).dot(Theta[idx,:]) + (lambda_r * X[i,:])
for j in range(num_users):
idx = np.argwhere(R[:,j]==1).T[0]
Theta_grad[j,:] = np.subtract(X[idx,:].dot(Theta[j,:].T),
Y[idx,j]).T.dot(X[idx,:]) + (lambda_r * Theta[j,:])
grad = np.hstack((X_grad.ravel(order='F'),
Theta_grad.ravel(order='F')))
return J, grad
check_cost_function(1.5)
OUTPUT:
len(movie_list)
OUTPUT:
1682
movie_list[:5]
OUTPUT:
CODE:
OUTPUT:
mat = scipy.io.loadmat('data/ex8_movies.mat')
Y = np.hstack((my_ratings,mat['Y']))
R = np.hstack((my_ratings!=0,mat['R']))
Y.shape
OUTPUT:
(1682, 944)
num_users = Y.shape[1]
num_movies = Y.shape[0]
num_features = 10
X = np.random.normal(size=(num_movies, num_features))
Theta = np.random.normal(size=(num_users, num_features))
initial_params = np.hstack((X.ravel(order='F'), Theta.ravel(order='F')))
X = np.reshape(theta[:num_movies*num_features], newshape=(num_movies,
num_features), order='F')
Theta = np.reshape(theta[num_movies*num_features:], newshape=(num_users,
num_features), order='F')
Recommendation
p = X.dot(Theta.T)
my_predictions = p[:,0] + Y_mean.T[0]
sort_idxs = np.argsort(my_predictions)[::-1]
print('Top recommendations for you:')
for i in range(10):
j = sort_idxs[i]
print('Predicting rating {0} for movie {1}'.format(my_predictions[j],
movie_list[j]))
OUTPUT:
print('Original ratings provided:')
for i,r in enumerate(my_ratings):
if r>0:
print('Rated {0} for {1}'.format(int(r[0]), movie_list[i]))