Ashutosh Anand
DL Assignment Lab 3 Employee
202318035
DATASET 3: EMPLOYEE
In [ ]: import pandas as pd
import seaborn as sns
import [Link] as plt
# Load the dataset
file_path = '/content/[Link]'
employee_data = pd.read_csv(file_path)
# Display the first few rows of the dataset
employee_data.head()
from [Link] import accuracy_score, classification_report
# Data preprocessing
employee_data['Gender'] = employee_data['Gender'].map({'Male': 1, 'Female': 0})
employee_data['EverBenched'] = employee_data['EverBenched'].map({'Yes': 1, 'No': 0})
employee_data = employee_data.drop(columns=['Education', 'City', 'JoiningYear'])
# Display the cleaned dataset
employee_data.head()
# Gender distribution plot
[Link](x='Gender', data=employee_data)
[Link]('Gender Distribution')
[Link]()
# Correlation matrix heatmap
[Link](employee_data.corr(), annot=True, cmap='coolwarm')
[Link]('Feature Correlation Matrix')
[Link]()
# Age distribution by gender
[Link](data=employee_data, x='Age', hue='Gender', multiple='stack', kde=True)
[Link]('Age Distribution by Gender')
[Link]()
# Experience in current domain by gender
[Link](data=employee_data, x='Gender', y='ExperienceInCurrentDomain')
[Link]('Experience in Current Domain by Gender')
[Link]()
# Payment tier distribution by gender
[Link](x='PaymentTier', data=employee_data, hue='Gender')
[Link]('Payment Tier Distribution by Gender')
[Link]()
In [ ]: from [Link] import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score, classification_report
# Feature scaling
scaler = StandardScaler()
X = employee_data.drop(columns=['Gender'])
X_scaled = scaler.fit_transform(X)
# Define features and target
y = employee_data['Gender']
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=4
# Initialize and train the logistic regression model
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
# Predict on the test set
predictions = logistic_model.predict(X_test)
# Evaluate the model
model_accuracy = accuracy_score(y_test, predictions)
classification_report_result = classification_report(y_test, predictions)
print(f'Accuracy: {model_accuracy:.4f}')
print('Classification Report:')
print(classification_report_result)
Accuracy: 0.6584317937701396
Classification Report:
precision recall f1-score support
0 0.72 0.29 0.42 388
1 0.65 0.92 0.76 543
accuracy 0.66 931
macro avg 0.68 0.61 0.59 931
weighted avg 0.68 0.66 0.62 931
In [ ]: import torch
import [Link] as nn
import [Link] as optim
from [Link] import DataLoader, TensorDataset
from [Link] import accuracy_score, classification_report
# Convert features and target to PyTorch tensors
X_train_tensor = [Link](X_train, dtype=torch.float32)
X_test_tensor = [Link](X_test, dtype=torch.float32)
y_train_tensor = [Link](y_train.values, dtype=torch.float32).unsqueeze(1) # Reshape for
y_test_tensor = [Link](y_test.values, dtype=torch.float32).unsqueeze(1)
# Create datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
# Define the neural network model
class ANNModel([Link]):
def __init__(self):
super(ANNModel, self).__init__()
self.fc1 = [Link](X_train_tensor.shape[1], 64)
self.fc2 = [Link](64, 64)
self.fc_out = [Link](64, 1)
def forward(self, x):
x = [Link](self.fc1(x))
x = [Link](self.fc2(x))
x = [Link](self.fc_out(x)) # Output layer for binary classification
return x
# Initialize the model, loss function, and optimizer
model = ANNModel()
criterion = [Link]() # Binary Cross Entropy Loss
optimizer = [Link]([Link](), lr=0.001)
# Training loop
num_epochs = 50
for epoch in range(num_epochs):
[Link]()
total_loss = 0.0
for batch_inputs, batch_targets in train_loader:
optimizer.zero_grad()
predictions = model(batch_inputs)
loss = criterion(predictions, batch_targets)
[Link]()
[Link]()
total_loss += [Link]()
# Optionally print training progress
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}')
# Evaluation
[Link]()
predictions_list = []
with torch.no_grad():
for batch_inputs, _ in test_loader:
batch_predictions = model(batch_inputs)
predictions_list.append(batch_predictions)
# Concatenate all predictions and convert to numpy array
all_predictions = [Link](predictions_list).numpy()
all_predictions = (all_predictions > 0.5).astype(int) # Convert probabilities to binary class
# Evaluate the model
accuracy = accuracy_score(y_test, all_predictions)
report = classification_report(y_test, all_predictions)
print(f'Accuracy: {accuracy:.4f}')
print(f'Classification Report:\n{report}')
Accuracy: 0.6702470461868958
Classification Report:
precision recall f1-score support
0 0.72 0.34 0.46 388
1 0.66 0.90 0.76 543
accuracy 0.67 931
macro avg 0.69 0.62 0.61 931
weighted avg 0.68 0.67 0.64 931