0% found this document useful (0 votes)
7 views9 pages

Mlee Lab1

The document contains Python code for implementing various gradient descent algorithms (batch, stochastic, and mini-batch) for linear regression using a dataset loaded from a CSV file. It includes functions for calculating cost, mean absolute error, mean square error, correlation coefficient, and R-squared value, along with data normalization and splitting into training and testing sets. The code visualizes the cost over iterations and compares predicted outputs against actual outputs using various metrics.

Uploaded by

f20220250
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views9 pages

Mlee Lab1

The document contains Python code for implementing various gradient descent algorithms (batch, stochastic, and mini-batch) for linear regression using a dataset loaded from a CSV file. It includes functions for calculating cost, mean absolute error, mean square error, correlation coefficient, and R-squared value, along with data normalization and splitting into training and testing sets. The code visualizes the cost over iterations and compares predicted outputs against actual outputs using various metrics.

Uploaded by

f20220250
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

from google.

colab import drive

filepath='/content/drive/MyDrive/data.csv'

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data=pd.read_csv(filepath)

from re import X
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values

def cost_function(X,y,w):
hypothesis = np.dot(X,w.T)
J = (1/(2*len(y)))*np.sum((hypothesis-y)**2)
return J

def batch_gradient_descent(X,y,w,alpha,iters):
cost_history = np.zeros(iters)
for i in range(iters):
hypothesis = np.dot(X,w.T)
w = w - (alpha/len(y)) * np.dot(hypothesis - y, X)
cost_history[i] = cost_function(X,y,w)
return w, cost_history

def stochastic_gradient_descent(X,y,w,alpha, iters):


cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-1)

ind_x = X[rand_index:rand_index+1]
ind_y = y[rand_index:rand_index+1]
w = w- alpha * np.dot((np.dot(ind_x,w.T) - ind_y), ind_x)
cost_history[i] = cost_function(ind_x,ind_y,w)
return w, cost_history

def MB_gradient_descent(X,y,w,alpha, iters, batch_size):


cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-batch_size)
ind_x = X[rand_index:rand_index+batch_size]
ind_y = y[rand_index:rand_index+batch_size]
w = w - (alpha/batch_size) * (ind_x.T.dot(ind_x.dot(w) -
ind_y))
cost_history[i] = cost_function(ind_x,ind_y,w)
return w, cost_history

def mean_abs_error(Ypred,Yact):
sum_error=abs(Yact - Ypred)
ma_error=sum(sum_error)/Ypred.shape[0]
return ma_error

def mean_square_error(Ypred,Yact):
for i in range(Ypred.shape[0]):
sum_error=(Yact - Ypred)**2
ms_error=sum(sum_error)/Ypred.shape[0]
return ms_error

def r_squared(y_true, y_pred):


ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
return 1 - (ss_residual / ss_total)

def correcoff(Ypred,Yact):
ypm=np.mean(Ypred)##mean of Ypred data
yam=np.mean(Yact)##mean of Yactual data
sum_numerator=((Yact - yam)*(Ypred-ypm))
sum_d1=((Yact - yam)**2)
sum_d2=((Ypred - ypm)**2)
sum_d1=pow(sum(sum_d1),1/2)
sum_d2=pow(sum(sum_d2),1/2)
th_error=sum(sum_numerator)/(sum_d1*sum_d2)
return th_error

X = data.iloc[:, :-1] # All rows, all columns except the last


y = data.iloc[:, -1]

X=np.asarray(X)
y=np.asarray(y)

m=X.shape[0]
xmin=np.min(X,axis=0)
xmax=np.max(X,axis=0)
X_normalized= (X-xmin)/(xmax-xmin)

ymin = np.min(y, axis = 0)


ymax = np.max(y, axis = 0)
y_normalized = (y- ymin)/(ymax-ymin)

m=X_normalized.shape[0]
pp=np.ones([m,1])
X_normalized=np.append(pp,X_normalized,axis=1)

train_percentage = 0.70
train_size = int(len(X) * train_percentage)
# split the data into training and testing
X_train = X_normalized[:train_size]
y_train = y_normalized[:train_size]
X_test = X_normalized[train_size:]
y_test = y_normalized[train_size:]
print("Training Features Shape:", X_train.shape)
print("Testing Features Shape:", X_test.shape)
print("Training Target Shape:", y_train.shape)
print("Testing Target Shape:", y_test.shape)

Training Features Shape: (56, 9)


Testing Features Shape: (25, 9)
Training Target Shape: (56,)
Testing Target Shape: (25,)

w= np.zeros((X_normalized.shape[1])) ###weight initialization


w.shape

(9,)

alpha=0.4 ##learning rate


iters=1500 ###iterations
batch_w,J_his = batch_gradient_descent(X_train,y_train,w,alpha,iters)
plt.plot(range(iters),J_his)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('BGD Cost vs Iterations')
plt.show()
bgd=batch_w
print("WEIGHT VECTOR",bgd)

y_pred_bgd=X_test.dot(bgd.T)

WEIGHT VECTOR [0.21794334 0.10606424 0.0478828 0.03935448 0.03229864


0.03136558
0.11796494 0.06546174 0.03341064]

yp_unnorm=(y_pred_bgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin

a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)

MEAN ABSOLUTE ERROR 1.1326579129012353

b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)

MEAN SQUARE ERROR: 1.819265635873902

c=correcoff(yp_unnorm,y_test_unnorm)
print("CORRELATION COEFF:",c)

CORRELATION COEFF: 0.2602600569650748

d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)

R2: -226.79600202581813

plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output (Line Plot)')
plt.legend()
plt.show()
alpha=0.4
iters=5000 ###iterations
w_n_l2,J_sgd_l2 = stochastic_gradient_descent(X_train,y_train,w,alpha,
iters)
plt.plot(range(iters),J_sgd_l2)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('SGD Cost vs Iterations')
plt.show()
sgd=w_n_l2
print("WEIGHT VECTOR",bgd)
y_pred_sgd=X_test.dot(sgd.T)
yp_unnorm=(y_pred_sgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)

b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
c=correcoff(y_test_unnorm, yp_unnorm)
print("CORRELATION COEFF:",c)
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output (Line Plot)')
plt.legend()
plt.show()

WEIGHT VECTOR [0.21794334 0.10606424 0.0478828 0.03935448 0.03229864


0.03136558
0.11796494 0.06546174 0.03341064]
MEAN ABSOLUTE ERROR 0.9617008469068318
MEAN SQUARE ERROR: 1.6948874101095208
CORRELATION COEFF: 0.7968621371120339
R2: -6.284048634553031
alpha=0.4
iters=2000 ###iterations
batch_size=15
mb_w_l1,J_mb_l1 = MB_gradient_descent(X_train,y_train,w,alpha, iters,
batch_size)
plt.plot(range(iters),J_mb_l1)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('MBGD Cost vs Iterations')
plt.show()
mbgd=mb_w_l1

print("WEIGHT VECTOR",bgd)
y_pred_mbgd=X_test.dot(mbgd.T)
yp_unnorm=(y_pred_mbgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)
b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
c=correcoff(yp_unnorm,y_test_unnorm)
print("CORRELATION COEFF:",c)
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output')
plt.legend()
plt.show()

WEIGHT VECTOR [0.21794334 0.10606424 0.0478828 0.03935448 0.03229864


0.03136558
0.11796494 0.06546174 0.03341064]
MEAN ABSOLUTE ERROR 1.0860545963179458
MEAN SQUARE ERROR: 1.7247709330182672
CORRELATION COEFF: 0.39688976236696366
R2: -293.6095175999243

You might also like