21bit0706 VL2024250106861 Da

Download as pdf or txt
Download as pdf or txt
You are on page 1of 7

Digital Assignment-1

Fall Semester 2024-2025

Name- Sparsh Verma


Reg No. 21BIT0706
Course Code-BITE405L
Course Title -Soft Computing
Slot-G2+TG2

Code:
!pip install -q Kaggle
!mkdir `/.Kaggle
!cp /content/kaggle.json /.Kaggle
! kaggle datasets download -d mssmartypants/paris-housing-price-prediction
! unzip /content/paris-housing-price-prediction.zip
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
df = pd.read_csv('/content/ParisHousing.csv')
df.head()
df.isnull().sum()

Gradient Descent

Code
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# Load the Iris dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
data = pd.read_csv(url, delimiter=',', names=column_names)

print(data.head())

# Encode the target variable


le = LabelEncoder()
data['species'] = le.fit_transform(data['species'])

# Split the data into features and target variable


X = data.drop('species', axis=1)
y = data['species']

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Standardize the features


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

"""**Gradient Descent Implementation**"""

class LogisticRegressionGD:
def __init__(self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations

def fit(self, X, y):


self.m, self.n = X.shape
self.theta = np.zeros(self.n)
self.bias = 0

for i in range(self.n_iterations):
y_pred = self.predict(X)
cost = self._compute_cost(y_pred, y)
self._update_weights(X, y, y_pred)

def predict(self, X):


linear_model = np.dot(X, self.theta) + self.bias
return 1 / (1 + np.exp(-linear_model))
def _compute_cost(self, y_pred, y):
m = len(y)
return - (1 / m) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

def _update_weights(self, X, y, y_pred):


m = len(y)
d_theta = (1 / m) * np.dot(X.T, (y_pred - y))
d_bias = (1 / m) * np.sum(y_pred - y)
self.theta -= self.learning_rate * d_theta
self.bias -= self.learning_rate * d_bias

# Train the model


model_gd = LogisticRegressionGD(learning_rate=0.01, n_iterations=1000)
model_gd.fit(X_train_scaled, y_train)

# Predict on the test set


y_pred_gd_prob = model_gd.predict(X_test_scaled)
y_pred_gd = np.round(y_pred_gd_prob)

"""**Performance Measurement of GD**"""

from sklearn.metrics import confusion_matrix, accuracy_score

# Define the performance measures for classification


def classification_performance(y_true, y_pred):
accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred, target_names=le.classes_)
return accuracy, report

# Calculate performance for Gradient Descent model


accuracy_gd, report_gd = classification_performance(y_test, y_pred_gd)
print("Gradient Descent Performance:")
print(f"Accuracy: {accuracy_gd}")
print("Classification Report:")
print(report_gd)

"""**Backpropagation Neural Network (BPNN)**"""

# Build the BPNN model for classification


model_bpnn = Sequential([
Dense(64, input_dim=X_train.shape[1], activation='relu'),
Dense(32, activation='relu'),
Dense(3, activation='softmax') # Output layer for 3 classes
])
# Compile the model
model_bpnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

# Train the model


history = model_bpnn.fit(X_train_scaled, y_train, epochs=100,
validation_data=(X_test_scaled, y_test), verbose=1)

# Predict on the test set


y_pred_bpnn_prob = model_bpnn.predict(X_test_scaled)
y_pred_bpnn = np.argmax(y_pred_bpnn_prob, axis=1)

"""**Performance for BPNN model**"""

# Calculate performance for BPNN model


accuracy_bpnn, report_bpnn = classification_performance(y_test, y_pred_bpnn)
print("\nBPNN Performance:")
print(f"Accuracy: {accuracy_bpnn}")
print("Classification Report:")
print(report_bpnn)

"""**Performance metric bar charts and loss history plots for both Models**"""

# Define performance metrics for classification models


metrics = ['Accuracy']
gd_values = [accuracy_gd]
bpnn_values = [accuracy_bpnn]

x = range(len(metrics))

plt.figure(figsize=(12, 6))
plt.bar(x, gd_values, width=0.4, label='Gradient Descent', align='center', color='b')
plt.bar([i + 0.4 for i in x], bpnn_values, width=0.4, label='BPNN', align='center',
color='g')
plt.xlabel('Performance Metrics')
plt.ylabel('Values')
plt.title('Performance Metrics Comparison')
plt.xticks([i + 0.2 for i in x], metrics)
plt.legend()
plt.show()

# Plot the cost history for Gradient Descent and loss history for BPNN in the same
chart
plt.figure(figsize=(12, 6))
plt.plot(model_gd.cost_history, label='Gradient Descent Training Cost', color='b')
plt.plot(history.history['loss'], label='BPNN Training Loss', color='g')
plt.plot(history.history['val_loss'], label='BPNN Validation Loss', color='r')
plt.xlabel('Iterations/Epochs')
plt.ylabel('Cost/Loss')
plt.title('Training Cost/Loss History')
plt.legend()
plt.show()

LinearRegression

Code:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load the Ames Housing Dataset


housing = fetch_openml(name="house_prices", as_frame=True)
data = housing.frame

# Display the first few rows of the dataset


print("Dataset Sample:")
print(data.head())

# Step 2: Data Preprocessing


# Select numeric features and target variable
features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'FullBath',
'BedroomAbvGr', 'TotRmsAbvGrd']
X = data[features]
y = data['SalePrice']

# Handle missing values


X.fillna(X.median(), inplace=True)
y.fillna(y.median(), inplace=True)

# Standardize the features


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2,
random_state=42)

# Add intercept term to X_train and X_test


X_train_b = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test]

# Initialize Parameters for Gradient Descent


theta = np.zeros(X_train_b.shape[1]) # Initialize weights
alpha = 0.01 # Learning rate
iterations = 1000 # Number of iterations

# Step 3: Implement Gradient Descent


def gradient_descent(X, y, theta, alpha, iterations):
m = len(y)
cost_history = np.zeros(iterations)

for i in range(iterations):
gradients = X.T.dot(X.dot(theta) - y) / m
theta -= alpha * gradients
cost_history[i] = mean_squared_error(y, X.dot(theta))

return theta, cost_history

theta, cost_history = gradient_descent(X_train_b, y_train, theta, alpha, iterations)

# Step 4: Make Predictions


y_pred = X_test_b.dot(theta)

# Step 5: Evaluate the Model


mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")


print(f"R-squared Score: {r2:.2f}")

# Step 6: Plotting Results

# Plot 1: Cost Function Over Iterations


plt.figure(figsize=(10, 6))
plt.plot(range(iterations), cost_history, color='blue')
plt.xlabel("Number of Iterations")
plt.ylabel("Cost (Mean Squared Error)")
plt.title("Cost Function over Iterations")
plt.grid(True)
plt.show()

# Plot 2: Actual vs Predicted House Prices


plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5, color='blue', label='Predicted vs Actual')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red',
linewidth=2, label='Ideal Prediction')
plt.xlabel("Actual House Prices")
plt.ylabel("Predicted House Prices")
plt.title("Actual vs Predicted House Prices")
plt.legend()
plt.grid(True)
plt.show()

# Plot 3: Residuals Plot (Errors)


residuals = y_test - y_pred
plt.figure(figsize=(10, 6))
plt.scatter(y_pred, residuals, alpha=0.5, color='purple', label='Residuals')
plt.hlines(y=0, xmin=min(y_pred), xmax=max(y_pred), color='red', linewidth=2,
label='Zero Error Line')
plt.xlabel("Predicted House Prices")
plt.ylabel("Residuals (Actual - Predicted)")
plt.title("Residuals of Predictions")
plt.legend()
plt.grid(True)
plt.show()

# (Optional) Step 7: Display Final Theta Values


print("\nFinal Theta Values:")
print(theta)

You might also like