0% found this document useful (0 votes)

13 views5 pages

Message

The document is a Python script for a machine learning homework assignment focused on regression techniques, specifically Linear and Ridge Regression using the California housing dataset. It includes functions for data preprocessing, model training, evaluation, and visualization of results. The script also compares the performance of different models and regularization techniques using metrics like RMSE and R².

Uploaded by

jjie9622

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

13 views5 pages

Message

Uploaded by

jjie9622

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 5

# Homework 3 - main file

# COMP.4220 Machine Learning

import itertools, functools

import numpy as np
import matplotlib.pyplot as plt
from regression import LinearRegression, RidgeRegression
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Ridge as skRidge
from sklearn.linear_model import LinearRegression as skLinearRegression
from sklearn.metrics import root_mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler as skStandardScalar
from sklearn.preprocessing import StandardScaler

def train_test_split(X, t, test_size=0.2, random_state=None):

"""Splits data into training and testing sets using only NumPy."""

if random_state:
np.random.seed(random_state)

# ---- Part (d) ---- #

# 1. Shuffle the data

indices = np.arange(1)
X = X
t = t

# 2. Split the data

#split_index = 1

X_train = X
X_test = []
t_train = t
t_test = []

return X_train, X_test, t_train, t_test

def standardscalar(x: np.ndarray):

# ---- Part (b) ---- #
Xs = X
Xs = standardscalar(X)

return (x - np.mean(x, axis=0)) / np.std(x, axis=0)

class PolynomialFeature(object):
def __init__(self, degree=2):
assert isinstance(degree, int)
self.degree = degree

def transform(self, x):

if x.ndim == 1:
x = x[:, None]
x_t = x.transpose()
features = [np.ones(len(x))]
for degree in range(1, self.degree + 1):
for items in itertools.combinations_with_replacement(x_t, degree):
features.append(functools.reduce(lambda x, y: x * y, items))
return np.asarray(features).transpose()

def main():
# ---- Part (a) ---- #
housing = fetch_california_housing()
X = housing.data
t = housing.target
print(X.shape, t.shape)
print(housing.feature_names[:6])
print(housing.DESCR)

# ---- Part (b) ---- #

def standardscalar(x):
mean = np.mean(x, axis=0)
std = np.std(x, axis=0)
return (x - mean) / std

X_scaled = standardscalar(X)

# Standardize the data using sklearn's StandardScaler

scaler = StandardScaler()
X_sklearn_scaled = scaler.fit_transform(X)
print(np.allclose(X_scaled, X_sklearn_scaled)) # Should be True
print(np.allclose(np.mean(X_scaled, axis=0), np.mean(X_sklearn_scaled,
axis=0))) # Should be True
print(np.allclose(np.std(X_scaled, axis=0), np.std(X_sklearn_scaled, axis=0)))
# Should be True

# ---- Part (c) ---- #

X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.2,
random_state=42)
scaler = StandardScaler()
X_sk_standardized = scaler.fit_transform(X)
print((Xs - Xss))

# ---- Part (d) ---- #

class RidgeRegression:
def __init__(self, lambda_):
self.lambda_ = lambda_

def fit(self, X, t):

# Add the regularization term (lambda * I)
I = np.identity(X.shape[1])
self.w = np.linalg.inv(X.T @ X + self.lambda_ * I) @ X.T @ t

def predict(self, X):

return X @ self.w
X_train, X_test, t_train, t_test = [], [], [], []

# ---- Part (k) ---- #

linreg = LinearRegression()
linreg.fit(X_train, t_train)

# Predictions and evaluation for Linear Regression

t_pred_linreg = linreg.predict(X_test)
rmse_linreg = np.sqrt(mean_squared_error(t_test, t_pred_linreg))
r2_linreg = r2_score(t_test, t_pred_linreg)

print(f'Linear Regression RMSE: {rmse_linreg}')

print(f'Linear Regression R²: {r2_linreg}')

# Compare the performance with the best Ridge model (with lambda = 1.0)
best_ridge_model = ridge_models[lambdas.index(1.0)]
t_pred_best_ridge = best_ridge_model.predict(X_test)
rmse_best_ridge = np.sqrt(mean_squared_error(t_test, t_pred_best_ridge))
r2_best_ridge = r2_score(t_test, t_pred_best_ridge)

print(f'Best Ridge Regression (lambda=1.0) RMSE: {rmse_best_ridge}')

print(f'Best Ridge Regression (lambda=1.0) R²: {r2_best_ridge}')

# Comparing the results

print("\nComparison of Linear Regression and Ridge Regression (lambda=1.0):")
print(f"Linear Regression RMSE: {rmse_linreg}, R²: {r2_linreg}")
print(f"Ridge Regression (lambda=1.0) RMSE: {rmse_best_ridge}, R²:
{r2_best_ridge}")

# ---- Part (g, h) ---- #

# (g) Plotting true vs predicted for both models
plt.figure(figsize=(12, 6))

# Plot for Ridge Regression

plt.subplot(1, 2, 1)
plt.scatter(t_test, t_pred, color='blue')
plt.plot([t_test.min(), t_test.max()], [t_test.min(), t_test.max()],
color='red')
plt.title('Ridge Regression: True vs Predicted')

# Plot for Linear Regression

plt.subplot(1, 2, 2)
plt.scatter(t_test, t_pred_linreg, color='green')
plt.plot([t_test.min(), t_test.max()], [t_test.min(), t_test.max()],
color='red')
plt.title('Linear Regression: True vs Predicted')

plt.show()

# (h) Try different values of regularization lambda for Ridge regression

ridge_2 = RidgeRegression(lambda_=10.0)
ridge_2.fit(X_train, t_train)
t_pred_ridge_2 = ridge_2.predict(X_test)

# Evaluate new Ridge regression

rmse_ridge_2 = np.sqrt(mean_squared_error(t_test, t_pred_ridge_2))
r2_ridge_2 = r2_score(t_test, t_pred_ridge_2)

print(f'Ridge Regression (lambda=10) RMSE: {rmse_ridge_2}')

print(f'Ridge Regression (lambda=10) R²: {r2_ridge_2}')
# Model building
lr = LinearRegression()
y_lr = []
print('Linear Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')
rr = RidgeRegression(lambd=1.0)
y_rr = []
print('Ridge Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')

# ---- Part (i) ---- #

# (i) Ridge Regression for different lambda values
lambdas = [0.1, 1.0, 10.0, 100.0]
ridge_models = []
rmse_values = []
r2_values = []

for lambda_val in lambdas:

ridge_model = RidgeRegression(lambda_=lambda_val)
ridge_model.fit(X_train, t_train)
t_pred_ridge = ridge_model.predict(X_test)

rmse_ridge = np.sqrt(mean_squared_error(t_test, t_pred_ridge))

r2_ridge = r2_score(t_test, t_pred_ridge)

ridge_models.append(ridge_model)
rmse_values.append(rmse_ridge)
r2_values.append(r2_ridge)

lr_sk = skLinearRegression()
y_lr_sk = []
print('Sklearn Linear Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')

rr_sk = skRidge(alpha=1.0)
y_rr_sk = []
print('Sklearn Ridge Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')

# ---- Part (j) ---- #

plt.figure(figsize=(12, 6))

# Plot RMSE
plt.subplot(1, 2, 1)
plt.plot(lambdas, rmse_values, marker='o', linestyle='-', color='b')
plt.xscale('log')
plt.title('RMSE for Ridge Regression with different lambda values')
plt.xlabel('Lambda')
plt.ylabel('RMSE')

# Plot R²
plt.subplot(1, 2, 2)
plt.plot(lambdas, r2_values, marker='o', linestyle='-', color='r')
plt.xscale('log')
plt.title('R² for Ridge Regression with different lambda values')
plt.xlabel('Lambda')
plt.ylabel('R²')
plt.show()

# Plot the results

plt.figure(figsize=(12, 6))

plt.subplot(2, 2, 1)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.subplot(2, 2, 2)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.subplot(2, 2, 3)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.subplot(2, 2, 4)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.tight_layout()
plt.show()

if __name__=='__main__':
main()

Intermediate Statistics For Dummies 1st Edition Rumsey Instant Download
No ratings yet
Intermediate Statistics For Dummies 1st Edition Rumsey Instant Download
52 pages
Employee Salary Prediction Slides
No ratings yet
Employee Salary Prediction Slides
21 pages
Linear Regression
No ratings yet
Linear Regression
25 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
(Ebook PDF) Principles of Econometrics, 5th Editioninstant Download
100% (4)
(Ebook PDF) Principles of Econometrics, 5th Editioninstant Download
49 pages
I Implementation of Regression
No ratings yet
I Implementation of Regression
6 pages
Assignment 3
No ratings yet
Assignment 3
5 pages
Assignment 2
No ratings yet
Assignment 2
3 pages
ML Lab Record
No ratings yet
ML Lab Record
17 pages
Bayesian Inference For Generalized Linear Mixed Models A Comparison of Different Statistical Software Procedures
No ratings yet
Bayesian Inference For Generalized Linear Mixed Models A Comparison of Different Statistical Software Procedures
17 pages
Lasso Regression Aim: Roll Number: 160122733094 Date
No ratings yet
Lasso Regression Aim: Roll Number: 160122733094 Date
8 pages
Ib+Dp+Math+Aa+Ai+Test+1+Review+Sl+4.1 4.6
No ratings yet
Ib+Dp+Math+Aa+Ai+Test+1+Review+Sl+4.1 4.6
8 pages
F 11
No ratings yet
F 11
3 pages
Lab ML
No ratings yet
Lab ML
26 pages
22 Practice Polynomial Regression
No ratings yet
22 Practice Polynomial Regression
6 pages
5th Session Forecasting Business
No ratings yet
5th Session Forecasting Business
13 pages
BDS-Homework-1-Submission - Ipynb - Colab
No ratings yet
BDS-Homework-1-Submission - Ipynb - Colab
11 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
Argha's ML LAB - 240927 - 121838
No ratings yet
Argha's ML LAB - 240927 - 121838
13 pages
Realistic Conversational Question Answering With Answer Selection Based On Calibrated Confidence and Uncertainty Measurement
No ratings yet
Realistic Conversational Question Answering With Answer Selection Based On Calibrated Confidence and Uncertainty Measurement
14 pages
ML Manual
No ratings yet
ML Manual
9 pages
Message
No ratings yet
Message
2 pages
Ridge Regression
No ratings yet
Ridge Regression
3 pages
Probability and Statistics (Final Sample)
0% (1)
Probability and Statistics (Final Sample)
25 pages
ML Exp 3-7 Manuval
No ratings yet
ML Exp 3-7 Manuval
21 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
ML Lab....... 3-Converted New
No ratings yet
ML Lab....... 3-Converted New
27 pages
Rockchip Trouble Shooting RKNN Toolkit V1.2.1 EN
No ratings yet
Rockchip Trouble Shooting RKNN Toolkit V1.2.1 EN
22 pages
ML Record
No ratings yet
ML Record
19 pages
ML Manual
No ratings yet
ML Manual
30 pages
Python File
No ratings yet
Python File
5 pages
Assignment8 22051899-1
No ratings yet
Assignment8 22051899-1
2 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
1
No ratings yet
1
13 pages
LAB5 Regularization
No ratings yet
LAB5 Regularization
6 pages
Da 012307
No ratings yet
Da 012307
8 pages
TUGAS STATISTIK EKONOMI-AGNES - SPV (Document1)
No ratings yet
TUGAS STATISTIK EKONOMI-AGNES - SPV (Document1)
26 pages
11 CORRELATION Point-Biserial
No ratings yet
11 CORRELATION Point-Biserial
26 pages
INSY446 - 3 - Linear Model Part 2
No ratings yet
INSY446 - 3 - Linear Model Part 2
27 pages
Print Out ML - Finallllllllllllllll
No ratings yet
Print Out ML - Finallllllllllllllll
11 pages
Zerox Ready
No ratings yet
Zerox Ready
21 pages
ML
No ratings yet
ML
17 pages
Data Science Record - 05
No ratings yet
Data Science Record - 05
20 pages
SML - Week 3
No ratings yet
SML - Week 3
5 pages
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
Big Data Assignment - 4
No ratings yet
Big Data Assignment - 4
6 pages
Machine Learning
No ratings yet
Machine Learning
10 pages
Tpe 2 Mixed Methods Portfolio (Grupal)
No ratings yet
Tpe 2 Mixed Methods Portfolio (Grupal)
7 pages
Docu 4
No ratings yet
Docu 4
3 pages
Machine
100% (1)
Machine
45 pages
Data Analytics
No ratings yet
Data Analytics
10 pages
Linear Regression - Numpy and Sklearn
No ratings yet
Linear Regression - Numpy and Sklearn
7 pages
Test Preparation C 1000 1642 1.642 0.230066 Math Score 1000 66089 66.089 229.919
No ratings yet
Test Preparation C 1000 1642 1.642 0.230066 Math Score 1000 66089 66.089 229.919
63 pages
L3 Hypothesis Test
No ratings yet
L3 Hypothesis Test
17 pages
Experiment Number: 3: Aim:-Study of The Linear Regression in The Machine Learning Using The Boston Housing Dataset. 1)
No ratings yet
Experiment Number: 3: Aim:-Study of The Linear Regression in The Machine Learning Using The Boston Housing Dataset. 1)
14 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
ML Remaining
No ratings yet
ML Remaining
17 pages
Regression Analysis - Cheatsheet
No ratings yet
Regression Analysis - Cheatsheet
9 pages
Week 2 Test Statistics
No ratings yet
Week 2 Test Statistics
61 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
Regression: Finding The Equation of The Line of Best Fit: Background and General Principle
No ratings yet
Regression: Finding The Equation of The Line of Best Fit: Background and General Principle
6 pages
Econometrics Beat - Dave Giles' Blog - ARDL Modelling in EViews 9
No ratings yet
Econometrics Beat - Dave Giles' Blog - ARDL Modelling in EViews 9
26 pages
Boston Housing Kaggle Challenge With Linear Regression
No ratings yet
Boston Housing Kaggle Challenge With Linear Regression
3 pages
External
No ratings yet
External
11 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Linear Regression
No ratings yet
Linear Regression
4 pages
Linear Regression
No ratings yet
Linear Regression
15 pages
Assignment2 Dhairya - Shah
No ratings yet
Assignment2 Dhairya - Shah
7 pages
Multiple Regression Analysis: DR Hédi Essid
No ratings yet
Multiple Regression Analysis: DR Hédi Essid
23 pages
Final ML File
No ratings yet
Final ML File
34 pages
Exploratory Data Analysis PDF
100% (4)
Exploratory Data Analysis PDF
791 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
Student
No ratings yet
Student
42 pages
Group Work Assignment Supervised and Unsupervised Learning
No ratings yet
Group Work Assignment Supervised and Unsupervised Learning
10 pages
Message
No ratings yet
Message
2 pages
Assignment No.4 - (20-Ele-68)
No ratings yet
Assignment No.4 - (20-Ele-68)
17 pages
Machine Learnin
100% (2)
Machine Learnin
23 pages
Unit 4 Test Review Answers
No ratings yet
Unit 4 Test Review Answers
3 pages
Chapter 10
No ratings yet
Chapter 10
35 pages
Cs 7265 Big Data Analytics Regularization On Linear Model: Mingon Kang, PH.D Computer Science, Kennesaw State University
No ratings yet
Cs 7265 Big Data Analytics Regularization On Linear Model: Mingon Kang, PH.D Computer Science, Kennesaw State University
24 pages
Wa0002.
No ratings yet
Wa0002.
5 pages
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
No ratings yet
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
4 pages
Chi-Square Test For Association: Cramer's V Correlation
No ratings yet
Chi-Square Test For Association: Cramer's V Correlation
23 pages
MULTIPLE CHOICE. Choose The One Alternative That Best Completes The Statement or Answers The
No ratings yet
MULTIPLE CHOICE. Choose The One Alternative That Best Completes The Statement or Answers The
13 pages
An Introduction To Sequential Monte Carlo: Nicolas Chopin Omiros Papaspiliopoulos
No ratings yet
An Introduction To Sequential Monte Carlo: Nicolas Chopin Omiros Papaspiliopoulos
390 pages
Lab 3 - Linear Regression
No ratings yet
Lab 3 - Linear Regression
15 pages
SME6024 Teaching Statistics & Probability
No ratings yet
SME6024 Teaching Statistics & Probability
18 pages
Topic 16 Identifying Parameters For Testing in Given Real-Life Problems
No ratings yet
Topic 16 Identifying Parameters For Testing in Given Real-Life Problems
4 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet

Message

Uploaded by

Message

Uploaded by

# Homework 3 - main file

# COMP.4220 Machine Learning

import itertools, functools

def train_test_split(X, t, test_size=0.2, random_state=None):

# ---- Part (d) ---- #

# 2. Split the data

return X_train, X_test, t_train, t_test

def standardscalar(x: np.ndarray):

return (x - np.mean(x, axis=0)) / np.std(x, axis=0)

def transform(self, x):

# ---- Part (b) ---- #

# Standardize the data using sklearn's StandardScaler

# ---- Part (c) ---- #

# ---- Part (d) ---- #

def fit(self, X, t):

def predict(self, X):

# ---- Part (k) ---- #

# Predictions and evaluation for Linear Regression

print(f'Linear Regression RMSE: {rmse_linreg}')

print(f'Best Ridge Regression (lambda=1.0) RMSE: {rmse_best_ridge}')

# Comparing the results

# ---- Part (g, h) ---- #

# Plot for Ridge Regression

# Plot for Linear Regression

# (h) Try different values of regularization lambda for Ridge regression

# Evaluate new Ridge regression

print(f'Ridge Regression (lambda=10) RMSE: {rmse_ridge_2}')

# ---- Part (i) ---- #

for lambda_val in lambdas:

rmse_ridge = np.sqrt(mean_squared_error(t_test, t_pred_ridge))

# ---- Part (j) ---- #

# Plot the results

You might also like