0% found this document useful (0 votes)
11 views8 pages

Lasso Regression Aim: Roll Number: 160122733094 Date

The document outlines the implementation and evaluation of Lasso and Ridge regression models on the California Housing dataset, analyzing performance metrics such as MSE, RMSE, R², and MAE, and visualizing results through scatter plots and heatmaps. Additionally, it describes the application of a Bernoulli Naive Bayes classifier for text classification, including the creation of a binary term-document matrix and evaluation of model metrics like accuracy and F1 score. A comparison table of regression metrics for Lasso, Ridge, and Linear regression is also included.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views8 pages

Lasso Regression Aim: Roll Number: 160122733094 Date

The document outlines the implementation and evaluation of Lasso and Ridge regression models on the California Housing dataset, analyzing performance metrics such as MSE, RMSE, R², and MAE, and visualizing results through scatter plots and heatmaps. Additionally, it describes the application of a Bernoulli Naive Bayes classifier for text classification, including the creation of a binary term-document matrix and evaluation of model metrics like accuracy and F1 score. A comparison table of regression metrics for Lasso, Ridge, and Linear regression is also included.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

Roll Number: 160122733094 Date:

LASSO REGRESSION
Aim: To implement and evaluate a Lasso regression model on the California Housing dataset.
To analyze model performance using MSE, RMSE, R², and MAE metrics.
To visualize feature correlations, predicted vs. actual values, and residuals using heatmaps and
scatter plots.
Code:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from math import sqrt

# Load the California Housing dataset


california_housing = fetch_california_housing()
X = california_housing.data
y = california_housing.target

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (important for Lasso)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Page Number: Signature: ……………………


Roll Number: 160122733094 Date:

lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
y_pred = lasso.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
rmse = sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--')
plt.title('Predicted vs Actual Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.show()
df_features = pd.DataFrame(X, columns=california_housing.feature_names)
correlation_matrix = df_features.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', linewidths=0.5)
plt.title("Feature Correlation Heatmap")
plt.show()
# Calculate residuals (difference between actual and predicted values)
residuals = y_test - y_pred
residuals_df = pd.DataFrame(residuals, columns=['Residuals'])
plt.figure(figsize=(8, 6))
sns.heatmap(residuals_df.T, annot=True, cmap='coolwarm', linewidths=0.5)

Page Number: Signature: ……………………


Roll Number: 160122733094 Date:

plt.title("Residual Heatmap")
plt.show()
Output:

Page Number: Signature: ……………………


Roll Number: 160122733094 Date:

RIDGE REGRESSION
Aim: To implement and evaluate a Ridge regression model on the California Housing dataset.
To analyze model performance using MSE, RMSE, R², and MAE metrics.
To visualize the predicted vs. actual values to assess the model’s accuracy.
Code:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from math import sqrt
california_housing = fetch_california_housing()

Page Number: Signature: ……………………


Roll Number: 160122733094 Date:

X = california_housing.data
y = california_housing.target
print("\nChecking for missing values in the features (X):")
print(pd.DataFrame(X).isnull().sum()) # No missing values expected
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"\nMean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--')
plt.title('Predicted vs Actual Values (Ridge Regression)')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.show()
Output:

Page Number: Signature: ……………………


Roll Number: 160122733094 Date:

COMPARISON TABLE

Metric Lasso Ridge Linear


Regression Regression Regression
Mean Square Error (MSE) 0.6796 0.5559 0.559
Root Mean Squared Error (RMSE) 0.8244 0.7456 0.7456
R-Squared(R2) 0.4814 0.5758 0.5758
Mean Absolute Error (MAE) 0.6222 0.5332 0.5332

Page Number: Signature: ……………………


Roll Number: 160122733094 Date:

Bernoulli Naive Bayes Algorithm


Aim: To implement a Bernoulli Naïve Bayes classifier for text classification using a binary term-
document matrix and predict the class of a new test document based on the trained model.
Code:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
documents = ["The rooms were good and I liked the location since it was good",
"The hotel was very bad and the stay was unpleasant",
"Liked the huge play area and the food was nice",
"The stay was good and pleasant",
"The location was good but was bad overall because the staff were rude"
]
labels = ['+', '-', '+', '+', '-']
features = ['good', 'liked', 'bad', 'unpleasant', 'nice', 'pleasant', 'rude']
vectorizer = CountVectorizer(binary=True, vocabulary=features)
X = vectorizer.fit_transform(documents)
tdm_df = pd.DataFrame(X.toarray(), columns=features)
tdm_df.index = [f"Doc {i+1}" for i in range(len(documents))]
print("\nBinary Term-Document Matrix:")
print(tdm_df)
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.6, stratify=labels,
random_state=42)
model = BernoulliNB(alpha=1.0)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label='+', zero_division=1)
recall = recall_score(y_test, y_pred, pos_label='+', zero_division=1)

Page Number: Signature: ……………………


Roll Number: 160122733094 Date:

f1 = f1_score(y_test, y_pred, pos_label='+', zero_division=1)

print("\nModel Evaluation Metrics:")


print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# Predicting on a new sample


test_doc = ["I'm bad"]
test_X = vectorizer.transform(test_doc)
predicted_class = model.predict(test_X)

print("\nPredicted class for test document:", predicted_class[0])


Output:

Page Number: Signature: ……………………

You might also like