0% found this document useful (0 votes)
23 views11 pages

Mlee Lab4

The document outlines the implementation of logistic regression using various gradient descent techniques: Batch Gradient Descent (BGD), Stochastic Gradient Descent (SGD), and Mini-Batch Gradient Descent (MBGD). It includes data loading, preprocessing, model training, and evaluation with metrics such as accuracy, sensitivity, specificity, precision, F1-score, and Cohen's Kappa. The results demonstrate the performance of each gradient descent method on a dataset, providing weight vectors and confusion matrices for analysis.

Uploaded by

h20240173
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
23 views11 pages

Mlee Lab4

The document outlines the implementation of logistic regression using various gradient descent techniques: Batch Gradient Descent (BGD), Stochastic Gradient Descent (SGD), and Mini-Batch Gradient Descent (MBGD). It includes data loading, preprocessing, model training, and evaluation with metrics such as accuracy, sensitivity, specificity, precision, F1-score, and Cohen's Kappa. The results demonstrate the performance of each gradient descent method on a dataset, providing weight vectors and confusion matrices for analysis.

Uploaded by

h20240173
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 11

Logistic Regression Implementation

Using Batch Gradient Descent (BGD), Stochastic Gradient Descent


(SGD), and Mini-Batch Gradient Descent (MBGD)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
file_path = '/content/dataLOR.csv'
data = pd.read_csv(file_path)

# Display first few rows of dataset


print(data.head())

6 148 72 35 0 33.6 0.627 50 1


0 1 85 66 29 0 26.6 0.351 31 0
1 8 183 64 0 0 23.3 0.672 32 1
2 1 89 66 23 94 28.1 0.167 21 0
3 0 137 40 35 168 43.1 2.288 33 1
4 5 116 74 0 0 25.6 0.201 30 0

# Splitting features and target variable


X = data.iloc[:, :-1].to_numpy()
y = data.iloc[:, -1].to_numpy()

# Normalize features
X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)
X_normalized = (X - X_min) / (X_max - X_min)

# Add bias term


X_normalized = np.c_[np.ones([X_normalized.shape[0], 1]),
X_normalized]

# Train-test split (70-30%)


train_percentage = 0.7
train_size = int(len(X) * train_percentage)

X_train, y_train = X_normalized[:train_size], y[:train_size]


X_test, y_test = X_normalized[train_size:], y[train_size:]

# Display dataset shapes


print("Training Features Shape:", X_train.shape)
print("Testing Features Shape:", X_test.shape)
print("Training Target Shape:", y_train.shape)
print("Testing Target Shape:", y_test.shape)

w= np.zeros((X_train.shape[1])) ###weight initialization

Training Features Shape: (536, 9)


Testing Features Shape: (231, 9)
Training Target Shape: (536,)
Testing Target Shape: (231,)

# Sigmoid activation function


def sigmoid(z):
return 1 / (1 + np.exp(-z))

# Cost function for logistic regression


def compute_cost_LOR(y, y_pred):
m = y.shape[0]
return -(1 / m) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 -
y_pred))

# L2 Regularized Cost Function


def compute_cost_LOR_L2(y, y_pred, weights, lamb):
m = y.shape[0]
return compute_cost_LOR(y, y_pred) + (lamb / (2 * m)) *
np.sum(weights ** 2)

# L1 Regularized Cost Function


def compute_cost_LOR_L1(y, y_pred, weights, lamb):
m = y.shape[0]
return compute_cost_LOR(y, y_pred) + (lamb / m) *
np.sum(np.abs(weights))

# Batch Gradient Descent (BGD)


def batch_gradientLOR(X, y, w, alpha, iters):
cost_history = np.zeros(iters)
for i in range(iters):
hypothesis = sigmoid(np.dot(X, w.T))
w -= (alpha / len(y)) * np.dot(hypothesis - y, X)
cost_history[i] = compute_cost_LOR(y, hypothesis)
return w, cost_history

def batch_gradient_descentLOR_L2(X,y,w,alpha,iters, lamb):


cost_history = np.zeros(iters)
for i in range(iters):
hypothesis = sigmoid(np.dot(X,w.T))
w = w*(1-(alpha*lamb)/len(y)) - (alpha/len(y)) *
np.dot(hypothesis - y, X)
cost_history[i] = compute_cost_LOR_L2(y,hypothesis,w, lamb)
return w, cost_history

def batch_gradient_descent_l1(X,y,w,alpha,iters,lamb):
cost_history = np.zeros(iters)
for i in range(iters):
hypothesis = sigmoid(np.dot(X,w.T))
w = w - (alpha/len(y)) * (X.T.dot(hypothesis - y) -
(lamb/len(y))*np.sign(w))
cost_history[i] = compute_cost_LOR_L1(y,hypothesis,w,lamb)
return w, cost_history

# Stochastic Gradient Descent (SGD)


def SGD_gradientLOR(X, y, w, alpha, iters):
cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y) - 1)
ind_x, ind_y = X[rand_index:rand_index+1],
y[rand_index:rand_index+1]
hypothesis = sigmoid(np.dot(ind_x, w.T))
w -= alpha * np.dot((hypothesis - ind_y), ind_x)
cost_history[i] = compute_cost_LOR(ind_y, hypothesis)
return w, cost_history

def stochastic_gradient_descentLOR_L2(X,y,w,alpha, iters,lamb):


cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-1)
ind_x = X[rand_index:rand_index+1]
ind_y = y[rand_index:rand_index+1]
hypothesis = sigmoid(np.dot(ind_x,w.T))
w = w*(1-(alpha*lamb)/len(ind_y)) - (alpha/len(ind_y)) *
np.dot((hypothesis - ind_y), ind_x)
cost_history[i] = compute_cost_LOR_L2(ind_y,hypothesis,w,lamb)
return w, cost_history

def stochastic_gradient_descent_l1(X,y,w,alpha, iters,lamb):


cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-1)
ind_x = X[rand_index:rand_index+1]
ind_y = y[rand_index:rand_index+1]
hypothesis = sigmoid(np.dot(ind_x,w.T))
w = w - (alpha/len(ind_y)) * (ind_x.T.dot(hypothesis - ind_y)
- (lamb/len(ind_y))*np.sign(w))
cost_history[i] = compute_cost_LOR_L1(ind_y,hypothesis,w,lamb)
return w, cost_history

# Mini-Batch Gradient Descent (MBGD)


def MBGD_gradientLOR(X, y, w, alpha, iters, batch_size):
cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y) - batch_size)
ind_x, ind_y = X[rand_index:rand_index+batch_size],
y[rand_index:rand_index+batch_size]
hypothesis = sigmoid(np.dot(ind_x, w.T))
w -= (alpha / batch_size) * (ind_x.T.dot(hypothesis - ind_y))
cost_history[i] = compute_cost_LOR(ind_y, hypothesis)
return w, cost_history

def MB_gradient_descentLOR_L2(X,y,w,alpha, iters, batch_size,lamb):


cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-batch_size)
ind_x = X[rand_index:rand_index+batch_size]
ind_y = y[rand_index:rand_index+batch_size]
hypothesis = sigmoid(np.dot(ind_x,w.T))
w = w*(1-(alpha*lamb)/batch_size) - (alpha/batch_size) *
(ind_x.T.dot(hypothesis - ind_y))
cost_history[i] = compute_cost_LOR_L2(ind_y,hypothesis,w,lamb)
return w, cost_history

def MB_gradient_descent_l1(X,y,w,alpha, iters, batch_size,lamb):


cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-batch_size)
ind_x = X[rand_index:rand_index+batch_size]
ind_y = y[rand_index:rand_index+batch_size]
hypothesis = sigmoid(np.dot(ind_x,w.T))
w = w - (alpha/batch_size) * (ind_x.T.dot(hypothesis - ind_y)
- (lamb/batch_size)*np.sign(w))
cost_history[i] = compute_cost_LOR_L1(ind_y,hypothesis,w,lamb)
return w, cost_history

# Model Evaluation Function


def evaluate_classification_metrics(y_true, y_pred):
y_true = np.array(y_true)
y_pred = np.array(y_pred)

TP = np.sum((y_true == 1) & (y_pred == 1))


TN = np.sum((y_true == 0) & (y_pred == 0))
FP = np.sum((y_true == 0) & (y_pred == 1))
FN = np.sum((y_true == 1) & (y_pred == 0))

accuracy = (TP + TN) / (TP + TN + FP + FN)


sensitivity = TP / (TP + FN) if (TP + FN) != 0 else 0
specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = sensitivity
f1_score = (2 * precision * recall) / (precision + recall) if
(precision + recall) != 0 else 0

# Cohen's Kappa
total = TP + TN + FP + FN
po = accuracy
pe = ((TP + FP) * (TP + FN) + (TN + FP) * (TN + FN)) / (total *
total)
kappa = (po - pe) / (1 - pe)

print(f"Confusion Matrix:\n[[{TN} {FP}]\n [{FN} {TP}]]")


print(f"Accuracy: {accuracy:.4f}")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1-Score: {f1_score:.4f}")
print(f"Cohen's Kappa: {kappa:.4f}")
return

# Model Training & Evaluation


alpha = 0.8 # Learning rate
iters = 3000 # Iterations

# Training BGD
batch_w, J_his = batch_gradientLOR(X_train, y_train,
np.zeros(X_train.shape[1]), alpha, iters)
plt.plot(range(iters), J_his)
plt.show()

bgd=batch_w
print("WEIGHT VECTOR",bgd)
y_p = sigmoid(np.dot(X_test, bgd))
y_pred=np.where(y_p >= 0.5, 1, 0)
evaluate_classification_metrics(y_test, y_pred)
WEIGHT VECTOR [-7.40788569 2.09931485 6.09559795 -1.29255361
0.09041135 -0.91725038
5.85874336 2.07590874 0.33689408]
Confusion Matrix:
[[140 12]
[35 44]]
Accuracy: 0.7965
Sensitivity (Recall): 0.5570
Specificity: 0.9211
Precision: 0.7857
F1-Score: 0.6519
Cohen's Kappa: 0.5139

alpha=0.8 ##learning rate


iters=10000 ###iterations

sgd_w,J_his = SGD_gradientLOR(X_train,y_train,w,alpha, iters)

plt.plot(range(iters),J_his)
plt.show()

sgd=sgd_w
print("WEIGHT VECTOR",sgd)
y_p = sigmoid(np.dot(X_test, sgd))
y_pred=np.where(y_p >= 0.5, 1, 0)
evaluate_classification_metrics(y_test, y_pred)
WEIGHT VECTOR [-9.18806781 1.43797825 6.88130076 -0.99367934
0.28946922 0.021364
8.73590391 2.51991963 0.02451394]
Confusion Matrix:
[[129 23]
[32 47]]
Accuracy: 0.7619
Sensitivity (Recall): 0.5949
Specificity: 0.8487
Precision: 0.6714
F1-Score: 0.6309
Cohen's Kappa: 0.4561

alpha=0.8 ##learning rate


iters=6000 ###iterations
batch_size=100

mbgd_w,J_his = MBGD_gradientLOR(X_train,y_train,w,alpha,
iters,batch_size)
plt.plot(range(iters),J_his)
plt.show()

mbgd=mbgd_w
print("WEIGHT VECTOR",mbgd)
y_p = sigmoid(np.dot(X_test, mbgd))
y_pred=np.where(y_p >= 0.5, 1, 0)
evaluate_classification_metrics(y_test, y_pred)

WEIGHT VECTOR [-8.36700826 1.98994993 6.59379274 -1.00677037 -


0.60810991 -1.53727099
7.09880355 2.68807685 0.29989812]
Confusion Matrix:
[[139 13]
[34 45]]
Accuracy: 0.7965
Sensitivity (Recall): 0.5696
Specificity: 0.9145
Precision: 0.7759
F1-Score: 0.6569
Cohen's Kappa: 0.5171

alpha=0.3 ##learning rate


iters=6000 ###iterations
batch_size=50 # Define the batch size
lamb=0.001

# Include the batch_size when calling the function


batch_w,J_his =
MB_gradient_descentLOR_L2(X_train,y_train,w,alpha,iters, batch_size,
lamb)
plt.plot(range(iters),J_his)
plt.show()

bgd=batch_w
print("WEIGHT VECTOR",bgd)
y_pred_bgd=X_test.dot(bgd.T)
y_pred=np.where(y_pred_bgd >= 0.5, 1, 0)
evaluate_classification_metrics(y_test, y_pred)

WEIGHT VECTOR [-7.78414012 2.21251319 6.17019818 -1.29495812 -


0.39642982 -1.34043533
6.98233024 2.40259707 0.08713487]
Confusion Matrix:
[[142 10]
[41 38]]
Accuracy: 0.7792
Sensitivity (Recall): 0.4810
Specificity: 0.9342
Precision: 0.7917
F1-Score: 0.5984
Cohen's Kappa: 0.4584

alpha=0.8 ##learning rate


iters=6000 ###iterations
batch_size=50 # Define the batch size
lamb=0.01

# Include the batch_size


batch_w,J_his = MB_gradient_descent_l1(X_train,y_train,w,alpha,iters,
batch_size, lamb)
plt.plot(range(iters),J_his)
plt.show()

bgd=batch_w
print("WEIGHT VECTOR",bgd)
y_pred_bgd=X_test.dot(bgd.T)
y_pred=np.where(y_pred_bgd >= 0.5, 1, 0)
evaluate_classification_metrics(y_test, y_pred)

WEIGHT VECTOR [-8.06887238 2.15809791 6.44260528 -1.37347245 -


0.61465674 -1.47737089
7.0884864 2.5224105 0.18476219]
Confusion Matrix:
[[142 10]
[43 36]]
Accuracy: 0.7706
Sensitivity (Recall): 0.4557
Specificity: 0.9342
Precision: 0.7826
F1-Score: 0.5760
Cohen's Kappa: 0.4334

You might also like