LAB-4 Report
LAB-4 Report
In [ ]:
In [ ]:
iris.head()
In [ ]:
In [ ]:
fig.show()
In [ ]:
fig.show()
In [ ]:
fig.show()
In [ ]:
class KNN:
"""
K-Nearest Neighbors (KNN) classification algorithm
Parameters:
-----------
n_neighbors : int, optional (default=5)
Number of neighbors to use in the majority vote.
Methods:
--------
fit(X_train, y_train):
Stores the values of X_train and y_train.
predict(X):
Predicts the class labels for each example in X.
"""
def __init__(self, n_neighbors=5):
self.n_neighbors = n_neighbors
Parameters:
-----------
x1 : numpy.ndarray, shape (n_features,)
A data point in the dataset.
Returns:
--------
distance : float
The Euclidean distance between x1 and x2.
"""
return np.linalg.norm(x1 - x2)
Parameters:
-----------
X_train : numpy.ndarray, shape (n_samples, n_features)
The training dataset.
Parameters:
-----------
X : numpy.ndarray, shape (n_samples, n_features)
The test dataset.
Returns:
--------
predictions : numpy.ndarray, shape (n_samples,)
The predicted class labels for each example in X.
"""
# Create empty array to store the predictions
predictions = []
# Loop over X examples
for x in X:
# Get prediction using the prediction helper function
prediction = self._predict(x)
# Append the prediction to the predictions list
predictions.append(prediction)
return np.array(predictions)
Parameters:
-----------
x : numpy.ndarray, shape (n_features,)
A data point in the test dataset.
Returns:
--------
most_occuring_value : int
The predicted class label for x.
"""
# Create empty array to store distances
distances = []
# Loop over all training examples and compute the distance between
x and all the training examples
for x_train in self.X_train:
distance = self.euclidean_distance(x, x_train)
distances.append(distance)
distances = np.array(distances)
In [ ]:
Parameters:
X (numpy.ndarray): Features array of shape (n_samples, n_features).
y (numpy.ndarray): Target array of shape (n_samples,).
random_state (int): Seed for the random number generator. Default
is 42.
test_size (float): Proportion of samples to include in the test
set. Default is 0.2.
Returns:
Tuple[numpy.ndarray]: A tuple containing X_train, X_test, y_train,
y_test.
"""
# Get number of samples
n_samples = X.shape[0]
# Split the features and target arrays into test and train
X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]
In [ ]:
In [ ]:
model = KNN(7)
model.fit(X_train, y_train)
In [ ]:
Parameters:
y_true (numpy array): A numpy array of true labels for each data point.
y_pred (numpy array): A numpy array of predicted labels for each data
point.
Returns:
float: The accuracy of the model, expressed as a percentage.
"""
y_true = y_true.flatten()
total_samples = len(y_true)
correct_predictions = np.sum(y_true == y_pred)
return (correct_predictions / total_samples)
In [ ]:
X_test
In [ ]:
predictions = model.predict(X_test)
accuracy = compute_accuracy(y_test, predictions)
print(f" our model got accuracy score of : {accuracy}")
our model got accuracy score of : 0.9666666666666667
In [ ]:
In [ ]:
In [ ]:
sk_predictions = skmodel.predict(X_test)
sk_accuracy = compute_accuracy(y_test, sk_predictions)
print(f" sklearn-model got accuracy score of : {sk_accuracy}")
outputs:
LAB-3
LINEAR REGRESSION:
OBSERVATION:
CODE:
import math
import numpy as np
import pandas as pd
import plotly.express as px
import pickle
In [ ]:
In [ ]:
train_data.head()
In [ ]:
px.scatter(x=train_data['x'], y=train_data['y'],template='seaborn')
In [ ]:
In [ ]:
Parameters:
X_train (numpy.ndarray): Training data.
X_test (numpy.ndarray): Testing data.
Returns:
Tuple of standardized training and testing data.
"""
# Calculate the mean and standard deviation using the training data
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
In [ ]:
In [ ]:
class LinearRegression:
"""
Linear Regression Model with Gradient Descent
Parameters:
learning_rate (float): The learning rate used in gradient descent.
convergence_tol (float, optional): The tolerance for convergence
(stopping criterion). Defaults to 1e-6.
Attributes:
W (numpy.ndarray): Coefficients (weights) for the linear regression
model.
b (float): Intercept (bias) for the linear regression model.
Methods:
initialize_parameters(n_features): Initialize model parameters.
forward(X): Compute the forward pass of the linear regression
model.
compute_cost(predictions): Compute the mean squared error cost.
backward(predictions): Compute gradients for model parameters.
fit(X, y, iterations, plot_cost=True): Fit the linear regression
model to training data.
predict(X): Predict target values for new input data.
save_model(filename=None): Save the trained model to a file using
pickle.
load_model(filename): Load a trained model from a file using
pickle.
Examples:
>>> from linear_regression import LinearRegression
>>> model = LinearRegression(learning_rate=0.01)
>>> model.fit(X_train, y_train, iterations=1000)
>>> predictions = model.predict(X_test)
"""
Parameters:
n_features (int): The number of features in the input data.
"""
self.W = np.random.randn(n_features) * 0.01
self.b = 0
def forward(self, X):
"""
Compute the forward pass of the linear regression model.
Parameters:
X (numpy.ndarray): Input data of shape (m, n_features).
Returns:
numpy.ndarray: Predictions of shape (m,).
"""
return np.dot(X, self.W) + self.b
Parameters:
predictions (numpy.ndarray): Predictions of shape (m,).
Returns:
float: Mean squared error cost.
"""
m = len(predictions)
cost = np.sum(np.square(predictions - self.y)) / (2 * m)
return cost
Parameters:
predictions (numpy.ndarray): Predictions of shape (m,).
Updates:
numpy.ndarray: Gradient of W.
float: Gradient of b.
"""
m = len(predictions)
self.dW = np.dot(predictions - self.y, self.X) / m
self.db = np.sum(predictions - self.y) / m
def fit(self, X, y, iterations, plot_cost=True):
"""
Fit the linear regression model to the training data.
Parameters:
X (numpy.ndarray): Training input data of shape (m,
n_features).
y (numpy.ndarray): Training labels of shape (m,).
iterations (int): The number of iterations for gradient
descent.
plot_cost (bool, optional): Whether to plot the cost during
training. Defaults to True.
Raises:
AssertionError: If input data and labels are not NumPy arrays
or have mismatched shapes.
Plots:
Plotly line chart showing cost vs. iteration (if plot_cost is
True).
"""
assert isinstance(X, np.ndarray), "X must be a NumPy array"
assert isinstance(y, np.ndarray), "y must be a NumPy array"
assert X.shape[0] == y.shape[0], "X and y must have the same number
of samples"
assert iterations > 0, "Iterations must be greater than 0"
self.X = X
self.y = y
self.initialize_parameters(X.shape[1])
costs = []
for i in range(iterations):
predictions = self.forward(X)
cost = self.compute_cost(predictions)
self.backward(predictions)
self.W -= self.learning_rate * self.dW
self.b -= self.learning_rate * self.db
costs.append(cost)
if i % 100 == 0:
print(f'Iteration: {i}, Cost: {cost}')
if plot_cost:
fig = px.line(y=costs, title="Cost vs Iteration",
template="plotly_dark")
fig.update_layout(
title_font_color="#41BEE9",
xaxis=dict(color="#41BEE9", title="Iterations"),
yaxis=dict(color="#41BEE9", title="Cost")
)
fig.show()
def predict(self, X):
"""
Predict target values for new input data.
Parameters:
X (numpy.ndarray): Input data of shape (m, n_features).
Returns:
numpy.ndarray: Predicted target values of shape (m,).
"""
return self.forward(X)
Parameters:
filename (str): The name of the file to save the model to.
"""
model_data = {
'learning_rate': self.learning_rate,
'convergence_tol': self.convergence_tol,
'W': self.W,
'b': self.b
}
@classmethod
def load_model(cls, filename):
"""
Load a trained model from a file using pickle.
Parameters:
filename (str): The name of the file to load the model from.
Returns:
LinearRegression: An instance of the LinearRegression class
with loaded parameters.
"""
with open(filename, 'rb') as file:
model_data = pickle.load(file)
return loaded_model
In [ ]:
lr = LinearRegression(0.01)
lr.fit(X_train, y_train, 10000)
In [ ]:
lr.save_model('model.pkl')
In [ ]:
model = LinearRegression.load_model("model.pkl")
In [ ]:
class RegressionMetrics:
@staticmethod
def mean_squared_error(y_true, y_pred):
"""
Calculate the Mean Squared Error (MSE).
Args:
y_true (numpy.ndarray): The true target values.
y_pred (numpy.ndarray): The predicted target values.
Returns:
float: The Mean Squared Error.
"""
assert len(y_true) == len(y_pred), "Input arrays must have the same
length."
mse = np.mean((y_true - y_pred) ** 2)
return mse
@staticmethod
def root_mean_squared_error(y_true, y_pred):
"""
Calculate the Root Mean Squared Error (RMSE).
Args:
y_true (numpy.ndarray): The true target values.
y_pred (numpy.ndarray): The predicted target values.
Returns:
float: The Root Mean Squared Error.
"""
assert len(y_true) == len(y_pred), "Input arrays must have the same
length."
mse = RegressionMetrics.mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
return rmse
@staticmethod
def r_squared(y_true, y_pred):
"""
Calculate the R-squared (R^2) coefficient of determination.
Args:
y_true (numpy.ndarray): The true target values.
y_pred (numpy.ndarray): The predicted target values.
Returns:
float: The R-squared (R^2) value.
"""
assert len(y_true) == len(y_pred), "Input arrays must have the same
length."
mean_y = np.mean(y_true)
ss_total = np.sum((y_true - mean_y) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
return r2
In [ ]:
y_pred = model.predict(X_test)
mse_value = RegressionMetrics.mean_squared_error(y_test, y_pred)
rmse_value = RegressionMetrics.root_mean_squared_error(y_test, y_pred)
r_squared_value = RegressionMetrics.r_squared(y_test, y_pred)
In [ ]:
model.predict([[2]])
OUTPUT:
MULTIPLE LINEAR REGRESSION:
OBSERVATION:
CODE:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# import warnings
import warnings
warnings.filterwarnings("ignore")
X = df[['Weight', 'Volume']]
y = df['CO2']
In [9]:
fig, axs = plt.subplots(2, figsize = (5,5))
plt1 = sns.boxplot(df['Weight'], ax = axs[0])
plt2 = sns.boxplot(df['Volume'], ax = axs[1])
plt.tight_layout()
sns.distplot(df['CO2']);
In [11]:
sns.pairplot(df, x_vars=['Weight', 'Volume'], y_vars='CO2', height=4, aspect=1,
kind='scatter')
plt.show()
In [12]:
# Create the correlation matrix and represent it as a heatmap.
sns.heatmap(df.corr(), annot = True, cmap = 'coolwarm')
plt.show()
In [13]:
X_train,X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,
random_state = 100)
In [14]:
y_train.shape
y_test.shape
Out[15]:
(11,)
In [16]:
reg_model = linear_model.LinearRegression()
In [17]:
#Fitting the Multiple Linear Regression model
reg_model = LinearRegression().fit(X_train, y_train)
In [18]:
#Printing the model coefficients
print('Intercept: ',reg_model.intercept_)
# pair the feature names with the coefficients
list(zip(X, reg_model.coef_))
Intercept: 74.33882836589245
#Predicting the Test and Train set result
y_pred= reg_model.predict(X_test)
x_pred= reg_model.predict(X_train)
In [20]:
print("Prediction for test set: {}".format(y_pred))
Prediction for test set: [ 90.41571939 102.16323413 99.56363213 104.56661845
101.54657652
95.94770019 108.64011848 102.22654214 92.80374837 97.27327129
97.57074463]
In [21]:
#Actual value and the predicted value
reg_model_diff = pd.DataFrame({'Actual value': y_test, 'Predicted value': y_pred})
In [22]:
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
OUTPUT: