0% found this document useful (0 votes)
14 views

Code

Uploaded by

xahage9739
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views

Code

Uploaded by

xahage9739
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Assignment 1

Name: SAIKAT MOI


Roll Number: 20CS10050
# import all the necessary libraries here
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

data = pd.read_csv('../../dataset/linear-regression.csv')
print(data)

fixed acidity volatile acidity citric acid residual sugar


chlorides \
0 7.4 0.700 0.00 1.9
0.076
1 7.8 0.880 0.00 2.6
0.098
2 7.8 0.760 0.04 2.3
0.092
3 11.2 0.280 0.56 1.9
0.075
4 7.4 0.700 0.00 1.9
0.076
... ... ... ... ...
...
1594 6.2 0.600 0.08 2.0
0.090
1595 5.9 0.550 0.10 2.2
0.062
1596 6.3 0.510 0.13 2.3
0.076
1597 5.9 0.645 0.12 2.0
0.075
1598 6.0 0.310 0.47 3.6
0.067

free sulfur dioxide total sulfur dioxide density pH


sulphates \
0 11.0 34.0 0.99780 3.51
0.56
1 25.0 67.0 0.99680 3.20
0.68
2 15.0 54.0 0.99700 3.26
0.65
3 17.0 60.0 0.99800 3.16
0.58
4 11.0 34.0 0.99780 3.51
0.56
... ... ... ... ...
...
1594 32.0 44.0 0.99490 3.45
0.58
1595 39.0 51.0 0.99512 3.52
0.76
1596 29.0 40.0 0.99574 3.42
0.75
1597 32.0 44.0 0.99547 3.57
0.71
1598 18.0 42.0 0.99549 3.39
0.66

alcohol quality
0 9.4 5
1 9.8 5
2 9.8 5
3 9.8 6
4 9.4 5
... ... ...
1594 10.5 5
1595 11.2 6
1596 11.0 6
1597 10.2 5
1598 11.0 6

[1599 rows x 12 columns]

X = data.drop(columns=['quality'])
y = data['quality']

# Feature scaling is a common preprocessing step in machine learning


that helps improve the performance and convergence
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y,


test_size=0.5, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp,
test_size=0.4, random_state=42)

print("Train set shapes - X:", X_train.shape, "y:", y_train.shape)


print("Validation set shapes - X:", X_val.shape, "y:", y_val.shape)
print("Test set shapes - X:", X_test.shape, "y:", y_test.shape)
Train set shapes - X: (799, 11) y: (799,)
Validation set shapes - X: (480, 11) y: (480,)
Test set shapes - X: (320, 11) y: (320,)

# concatenating the column vector of ones with the X_train matrix,


effectively adding a new column to the beginning of X_train
# bias term allows the model to learn an offset from the origin.

X_train_with_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]

# Normal Equation θ = (X^T X)^(-1) X^T y


theta = np.linalg.inv(X_train_with_bias.T @ X_train_with_bias) @
X_train_with_bias.T @ y_train

# Predict on the test set


X_test_with_bias = np.c_[np.ones((X_test.shape[0], 1)), X_test]
y_test_pred = X_test_with_bias @ theta

# Calculate R-squared on the test set

ssr = np.sum(( y_test-y_test_pred ) ** 2)


sst = np.sum((y_test - np.mean(y_test)) ** 2)
r2 = 1 - (ssr / sst)

# RMSE = sqrt((1 / n) * Σ(y_pred - y_actual)^2)


# Calculate RMSE on the test set

rmse = np.sqrt(np.mean((y_test - y_test_pred) ** 2))

print("Test Set - R-squared:", r2)


print("Test Set - Root Mean Squared Error (RMSE):", rmse)

Test Set - R-squared: 0.3967805084557149


Test Set - Root Mean Squared Error (RMSE): 0.6657115962476505

# the mean squared error loss between the predicted values and the
actual target values.
def compute_loss(X, y, theta):
y_pred = X @ theta
loss = np.mean((y_pred - y) ** 2)
return loss

# ∇(MSE) = -2 * (X^T @ (y - Xθ)) / N

def gradient_ascent(X, y, theta, learning_rate, num_iterations):


losses = []
for _ in range(num_iterations):
y_pred = X @ theta
gradient = -2 * (X.T @ (y - y_pred)) / len(y)
theta -= learning_rate * gradient
loss = compute_loss(X, y, theta)
losses.append(loss)
return theta, losses

learning_rates = [0.01, 0.001, 0.0001]


num_iterations = 50000

for lr in learning_rates:
theta_initial = np.zeros(X_train.shape[1] + 1) # +1 for the bias
term
X_train_with_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]
theta_optimized, losses = gradient_ascent(X_train_with_bias,
y_train, theta_initial, lr, num_iterations)

plt.plot(range(num_iterations), losses, label=f'lr = {lr}')

X_test_with_bias = np.c_[np.ones((X_test.shape[0], 1)), X_test]


y_test_pred = X_test_with_bias @ theta_optimized

# Calculate R-squared on the test set


ssr = np.sum(( y_test-y_test_pred ) ** 2)
sst = np.sum((y_test - np.mean(y_test)) ** 2)

r2 = 1-(ssr / sst)

# Calculate RMSE on the test set


rmse = np.sqrt(np.mean((y_test - y_test_pred) ** 2))

print(f"Learning Rate: {lr}")


print("Test Set - R-squared:", r2)
print("Test Set - Root Mean Squared Error (RMSE):", rmse)
print()

plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Loss Function vs. Iteration for Different Learning Rates')
plt.legend()
plt.show()

Learning Rate: 0.01


Test Set - R-squared: 0.3967805084557158
Test Set - Root Mean Squared Error (RMSE): 0.6657115962476501

Learning Rate: 0.001


Test Set - R-squared: 0.3967803979339991
Test Set - Root Mean Squared Error (RMSE): 0.6657116572333991

Learning Rate: 0.0001


Test Set - R-squared: 0.39639733535434873
Test Set - Root Mean Squared Error (RMSE): 0.6659229971434986

You might also like