Assignment 9
Assignment 9
In [1]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_sc
In [2]:
# Step 2: Load the dataset
data = pd.read_csv('/home/pict/Downloads/Social_Network_Ads.csv')
# Check the first few rows of the data to understand its structure
print(data.head())
In [3]:
# Step 3: Select the features (X) and target (y)
# Assuming that the dataset has 'Age', 'EstimatedSalary' as features and 'Purchased' as the target
X = data[['Age', 'EstimatedSalary']].values
y = data['Purchased'].values
In [4]:
# Step 4: Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [5]:
# Step 5: Feature Scaling (standardize features)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
In [6]:
# Step 6: Train the Logistic Regression model
classifier = LogisticRegression(random_state=42)
classifier.fit(X_train, y_train)
LogisticRegression(random_state=42)
In [7]:
# Step 7: Make predictions on the test set
y_pred = classifier.predict(X_test)
In [8]:
# Step 8: Compute the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
localhost:8888/notebooks/Untitled19.ipynb?kernel_name=python3 1/2
05/03/2025, 12:35 Untitled19 - Jupyter Notebook
In [9]:
# Step 9: Calculate Accuracy, Precision, Recall, F1 Score, and Error Rate
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
error_rate = 1 - accuracy
# Print confusion matrix and metrics
print("Confusion Matrix:")
print(cm)
print("\nPerformance Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Error Rate: {error_rate:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
Confusion Matrix:
[[50 2]
[ 9 19]]
Performance Metrics:
Accuracy: 0.8625
Error Rate: 0.1375
Precision: 0.9048
Recall: 0.6786
F1 Score: 0.7755
In [10]:
# Optional: Visualize the confusion matrix (for better understanding)
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Purchased', 'Purchased'], yti
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
localhost:8888/notebooks/Untitled19.ipynb?kernel_name=python3 2/2