Mlee Lab1
Mlee Lab1
filepath='/content/drive/MyDrive/data.csv'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv(filepath)
from re import X
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values
def cost_function(X,y,w):
hypothesis = np.dot(X,w.T)
J = (1/(2*len(y)))*np.sum((hypothesis-y)**2)
return J
def batch_gradient_descent(X,y,w,alpha,iters):
cost_history = np.zeros(iters)
for i in range(iters):
hypothesis = np.dot(X,w.T)
w = w - (alpha/len(y)) * np.dot(hypothesis - y, X)
cost_history[i] = cost_function(X,y,w)
return w, cost_history
ind_x = X[rand_index:rand_index+1]
ind_y = y[rand_index:rand_index+1]
w = w- alpha * np.dot((np.dot(ind_x,w.T) - ind_y), ind_x)
cost_history[i] = cost_function(ind_x,ind_y,w)
return w, cost_history
def mean_abs_error(Ypred,Yact):
sum_error=abs(Yact - Ypred)
ma_error=sum(sum_error)/Ypred.shape[0]
return ma_error
def mean_square_error(Ypred,Yact):
for i in range(Ypred.shape[0]):
sum_error=(Yact - Ypred)**2
ms_error=sum(sum_error)/Ypred.shape[0]
return ms_error
def correcoff(Ypred,Yact):
ypm=np.mean(Ypred)##mean of Ypred data
yam=np.mean(Yact)##mean of Yactual data
sum_numerator=((Yact - yam)*(Ypred-ypm))
sum_d1=((Yact - yam)**2)
sum_d2=((Ypred - ypm)**2)
sum_d1=pow(sum(sum_d1),1/2)
sum_d2=pow(sum(sum_d2),1/2)
th_error=sum(sum_numerator)/(sum_d1*sum_d2)
return th_error
X=np.asarray(X)
y=np.asarray(y)
m=X.shape[0]
xmin=np.min(X,axis=0)
xmax=np.max(X,axis=0)
X_normalized= (X-xmin)/(xmax-xmin)
m=X_normalized.shape[0]
pp=np.ones([m,1])
X_normalized=np.append(pp,X_normalized,axis=1)
train_percentage = 0.70
train_size = int(len(X) * train_percentage)
# split the data into training and testing
X_train = X_normalized[:train_size]
y_train = y_normalized[:train_size]
X_test = X_normalized[train_size:]
y_test = y_normalized[train_size:]
print("Training Features Shape:", X_train.shape)
print("Testing Features Shape:", X_test.shape)
print("Training Target Shape:", y_train.shape)
print("Testing Target Shape:", y_test.shape)
(9,)
y_pred_bgd=X_test.dot(bgd.T)
yp_unnorm=(y_pred_bgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)
b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
c=correcoff(yp_unnorm,y_test_unnorm)
print("CORRELATION COEFF:",c)
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
R2: -226.79600202581813
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output (Line Plot)')
plt.legend()
plt.show()
alpha=0.4
iters=5000 ###iterations
w_n_l2,J_sgd_l2 = stochastic_gradient_descent(X_train,y_train,w,alpha,
iters)
plt.plot(range(iters),J_sgd_l2)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('SGD Cost vs Iterations')
plt.show()
sgd=w_n_l2
print("WEIGHT VECTOR",bgd)
y_pred_sgd=X_test.dot(sgd.T)
yp_unnorm=(y_pred_sgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)
b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
c=correcoff(y_test_unnorm, yp_unnorm)
print("CORRELATION COEFF:",c)
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output (Line Plot)')
plt.legend()
plt.show()
print("WEIGHT VECTOR",bgd)
y_pred_mbgd=X_test.dot(mbgd.T)
yp_unnorm=(y_pred_mbgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)
b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
c=correcoff(yp_unnorm,y_test_unnorm)
print("CORRELATION COEFF:",c)
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output')
plt.legend()
plt.show()