0% found this document useful (0 votes)
16 views9 pages

Btech1007022 Lab5.1

The document contains three programs demonstrating data analysis and machine learning techniques using Python. Program 1 performs linear regression on salary data based on experience, Program 2 applies linear regression to an insurance dataset, and Program 3 implements logistic regression on the Iris dataset. Each program includes data loading, preprocessing, model training, and evaluation steps.

Uploaded by

C30 Md arbab
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views9 pages

Btech1007022 Lab5.1

The document contains three programs demonstrating data analysis and machine learning techniques using Python. Program 1 performs linear regression on salary data based on experience, Program 2 applies linear regression to an insurance dataset, and Program 3 implements logistic regression on the Iris dataset. Each program includes data loading, preprocessing, model training, and evaluation steps.

Uploaded by

C30 Md arbab
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

Name-Md Arbab

Roll - BTECH/10070/22

LAB-5

Program1:

import csv

import numpy as np

import matplotlib.pyplot as plt

# Load the data

data = []

with open('Salary_data.csv', 'r') as le:

reader = csv.reader( le)

next(reader) # Skip header

for row in reader:

data.append([ oat(row[0]), oat(row[1])])

# Separate the data into Experience (X) and Salary (Y)

X = np.array([row[0] for row in data])

Y = np.array([row[1] for row in data])

# Plot Experience vs. Salary

plt.scatter(X, Y, color='blue')

plt.xlabel('Experience (years)')

plt.ylabel('Salary')

plt.title('Experience vs. Salary')

plt.show()
fl
fi
fl
fi
# Initialize parameters

m = 0 # Slope

b = 0 # Intercept

learning_rate = 0.01

iterations = 1000

n = len(X)

# Function to compute Mean Squared Error

def compute_mse(X, Y, m, b):

total_error = 0

for i in range(len(X)):

total_error += (Y[i] - (m * X[i] + b)) ** 2

return total_error / n

# Gradient Descent

errors = []

for _ in range(iterations):

m_grad = 0

b_grad = 0

for i in range(len(X)):

m_grad += -2 * X[i] * (Y[i] - (m * X[i] + b))

b_grad += -2 * (Y[i] - (m * X[i] + b))

m -= (m_grad / n) * learning_rate

b -= (b_grad / n) * learning_rate

mse = compute_mse(X, Y, m, b)

errors.append(mse)
print(f"Final Parameters: m = {m}, b = {b}")

# Plot Training Error at Each Iteration

plt.plot(range(iterations), errors, color='red')

plt.xlabel('Iteration')

plt.ylabel('Mean Squared Error')

plt.title('Training Error at Each Iteration')

plt.show()

# Plot Experience vs. Salary with Best Fit Line

plt.scatter(X, Y, color='blue')

plt.plot(X, [m * x + b for x in X], color='red') # Best t line

plt.xlabel('Experience (years)')

plt.ylabel('Salary')

plt.title('Experience vs. Salary with Best Fit Line')

plt.show()

OUTPUT:

Program2:

import pandas as pd

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error


fi
# Load the dataset

data = pd.read_csv('insurance.csv')

# Display the top 10 samples of the dataset

print(data.head(10))

# Display the features and label

features = data.columns[:-1]

label = data.columns[-1]

print("Features (Independent Variables):", features.tolist())

print("Label (Dependent Variable):", label)

# Remove missing value samples

data = data.dropna()

print("Number of samples after removing missing values:", len(data))

# Convert categorical variables to numeric using one-hot encoding

data = pd.get_dummies(data, columns=['sex', 'smoker', 'region'], drop_ rst=True)

# Update the features to re ect one-hot encoded columns

features = data.columns[:-1]

# Normalize the feature set

scaler = MinMaxScaler()

data[features] = scaler. t_transform(data[features])

print("Normalized feature set:")


fi
fl
fi
print(data.head(10))

# Split the data into training and testing sets

X = data[features]

y = data[label]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Number of training samples:", len(X_train))

print("Number of testing samples:", len(X_test))

# Train the regression model

model = LinearRegression()

model. t(X_train, y_train)

print("Model coef cients:", model.coef_)

print("Model intercept:", model.intercept_)

# Predict the test data

y_pred = model.predict(X_test)

# Calculate and display the testing error (Mean Squared Error)

mse = mean_squared_error(y_test, y_pred)

print("Testing Error (Mean Squared Error):", mse)

OUTPUT:
fi
fi
Program3:

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import accuracy_score

# Load the Iris dataset from the local CSV le

data = pd.read_csv('iris.csv')

# Display the top 10 samples of the dataset

print(data.head(10))

# Check the column names to identify the target variable

print("Column names:", data.columns)

# The target variable column name is 'Species'

target_variable = 'Species'

# Encode the class labels into numeric values

label_encoder = LabelEncoder()

data[target_variable] = label_encoder. t_transform(data[target_variable])

# Split the data into features (X) and labels (y)

X = data.iloc[:, 1:-1] # Exclude the 'Id' column and the target variable column

y = data.iloc[:, -1]

# Split the data into training and testing sets (80% training, 20% testing)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


fi
fi
# Logistic Regression implementation

class LogisticRegression:

def __init__(self, learning_rate=0.01, iterations=1000):

self.learning_rate = learning_rate

self.iterations = iterations

def sigmoid(self, z):

return 1 / (1 + np.exp(-z))

def t(self, X, y):

self.m, self.n = X.shape

self.weights = np.zeros(self.n)

self.bias = 0

self.errors = []

epsilon = 1e-7 # Small epsilon value to avoid log(0)

for _ in range(self.iterations):

linear_model = np.dot(X, self.weights) + self.bias

y_pred = self.sigmoid(linear_model)

dw = (1 / self.m) * np.dot(X.T, (y_pred - y))

db = (1 / self.m) * np.sum(y_pred - y)

self.weights -= self.learning_rate * dw

self.bias -= self.learning_rate * db

loss = - (1 / self.m) * np.sum(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 -


y_pred + epsilon))

self.errors.append(loss)
fi
def predict(self, X):

linear_model = np.dot(X, self.weights) + self.bias

y_pred = self.sigmoid(linear_model)

return [1 if i > 0.5 else 0 for i in y_pred]

# Train the logistic regression model

log_reg = LogisticRegression(learning_rate=0.01, iterations=1000)

log_reg. t(X_train, y_train)

print("Model weights:", log_reg.weights)

print("Model bias:", log_reg.bias)

# Predict the test data

y_pred = log_reg.predict(X_test)

# Calculate and display the accuracy

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

OUTPUT:
fi

You might also like