0% found this document useful (0 votes)
13 views14 pages

Btech1007022 Lab5

The document contains three separate programs focused on data analysis and machine learning. Program 1 implements linear regression to predict salary based on experience, Program 2 uses linear regression on an insurance dataset, and Program 3 applies logistic regression to classify species in the Iris dataset. Each program includes data loading, preprocessing, model training, and evaluation steps.

Uploaded by

C30 Md arbab
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as RTF, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views14 pages

Btech1007022 Lab5

The document contains three separate programs focused on data analysis and machine learning. Program 1 implements linear regression to predict salary based on experience, Program 2 uses linear regression on an insurance dataset, and Program 3 applies logistic regression to classify species in the Iris dataset. Each program includes data loading, preprocessing, model training, and evaluation steps.

Uploaded by

C30 Md arbab
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as RTF, PDF, TXT or read online on Scribd
You are on page 1/ 14

Name-Ankit Kumar

Roll - BTECH/10066/22

LAB-5

Program1:

import csv

import numpy as np

import matplotlib.pyplot as plt

# Load the data

data = []

with open('Salary_data.csv', 'r') as file:

reader = csv.reader(file)

next(reader) # Skip header

for row in reader:

data.append([float(row[0]), float(row[1])])

# Separate the data into Experience (X) and Salary (Y)

X = np.array([row[0] for row in data])

Y = np.array([row[1] for row in data])

# Plot Experience vs. Salary


plt.scatter(X, Y, color='blue')

plt.xlabel('Experience (years)')

plt.ylabel('Salary')

plt.title('Experience vs. Salary')

plt.show()

# Initialize parameters

m = 0 # Slope

b = 0 # Intercept

learning_rate = 0.01

iterations = 1000

n = len(X)

# Function to compute Mean Squared Error

def compute_mse(X, Y, m, b):

total_error = 0

for i in range(len(X)):

total_error += (Y[i] - (m * X[i] + b)) ** 2

return total_error / n

# Gradient Descent

errors = []
for _ in range(iterations):

m_grad = 0

b_grad = 0

for i in range(len(X)):

m_grad += -2 * X[i] * (Y[i] - (m * X[i] + b))

b_grad += -2 * (Y[i] - (m * X[i] + b))

m -= (m_grad / n) * learning_rate

b -= (b_grad / n) * learning_rate

mse = compute_mse(X, Y, m, b)

errors.append(mse)

print(f"Final Parameters: m = {m}, b = {b}")

# Plot Training Error at Each Iteration

plt.plot(range(iterations), errors, color='red')

plt.xlabel('Iteration')

plt.ylabel('Mean Squared Error')

plt.title('Training Error at Each Iteration')

plt.show()

# Plot Experience vs. Salary with Best Fit Line

plt.scatter(X, Y, color='blue')
plt.plot(X, [m * x + b for x in X], color='red') # Best fit line

plt.xlabel('Experience (years)')

plt.ylabel('Salary')

plt.title('Experience vs. Salary with Best Fit Line')

plt.show()

OUTPUT:
Program2:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset


data = pd.read_csv('insurance.csv')
# Display the top 10 samples of the dataset
print(data.head(10))

# Display the features and label


features = data.columns[:-1]
label = data.columns[-1]

print("Features (Independent Variables):", features.tolist())


print("Label (Dependent Variable):", label)

# Remove missing value samples


data = data.dropna()

print("Number of samples after removing missing values:", len(data))

# Convert categorical variables to numeric using one-hot encoding


data = pd.get_dummies(data, columns=['sex', 'smoker', 'region'], drop_first=True)

# Update the features to reflect one-hot encoded columns


features = data.columns[:-1]

# Normalize the feature set


scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])
print("Normalized feature set:")
print(data.head(10))

# Split the data into training and testing sets


X = data[features]
y = data[label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

print("Number of training samples:", len(X_train))


print("Number of testing samples:", len(X_test))

# Train the regression model


model = LinearRegression()
model.fit(X_train, y_train)

print("Model coefficients:", model.coef_)


print("Model intercept:", model.intercept_)

# Predict the test data


y_pred = model.predict(X_test)

# Calculate and display the testing error (Mean Squared Error)


mse = mean_squared_error(y_test, y_pred)
print("Testing Error (Mean Squared Error):", mse)

OUTPUT:
Program3:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load the Iris dataset from the local CSV file


data = pd.read_csv('iris.csv')

# Display the top 10 samples of the dataset


print(data.head(10))

# Check the column names to identify the target variable


print("Column names:", data.columns)

# The target variable column name is 'Species'


target_variable = 'Species'

# Encode the class labels into numeric values


label_encoder = LabelEncoder()
data[target_variable] = label_encoder.fit_transform(data[target_variable])

# Split the data into features (X) and labels (y)


X = data.iloc[:, 1:-1] # Exclude the 'Id' column and the target variable column
y = data.iloc[:, -1]

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Logistic Regression implementation


class LogisticRegression:
def __init__(self, learning_rate=0.01, iterations=1000):
self.learning_rate = learning_rate
self.iterations = iterations

def sigmoid(self, z):


return 1 / (1 + np.exp(-z))

def fit(self, X, y):


self.m, self.n = X.shape
self.weights = np.zeros(self.n)
self.bias = 0
self.errors = []
epsilon = 1e-7 # Small epsilon value to avoid log(0)

for _ in range(self.iterations):
linear_model = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(linear_model)

dw = (1 / self.m) * np.dot(X.T, (y_pred - y))


db = (1 / self.m) * np.sum(y_pred - y)

self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db

loss = - (1 / self.m) * np.sum(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1


- y_pred + epsilon))
self.errors.append(loss)

def predict(self, X):


linear_model = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(linear_model)
return [1 if i > 0.5 else 0 for i in y_pred]

# Train the logistic regression model


log_reg = LogisticRegression(learning_rate=0.01, iterations=1000)
log_reg.fit(X_train, y_train)

print("Model weights:", log_reg.weights)


print("Model bias:", log_reg.bias)

# Predict the test data


y_pred = log_reg.predict(X_test)
# Calculate and display the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

OUTPUT:

You might also like