# Importing the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix
# Step 1: Import dataset (Bank_data.csv)
data = pd.read_csv("Bank_data.csv")
# Step 2: Preprocess the dataset
# Remove the unnecessary column
if 'Unnamed: 0' in data.columns:
data = data.drop(columns=['Unnamed: 0'])
# Encode the target variable 'y' ('no' to 0 and 'yes' to 1)
data['y'] = data['y'].map({'no': 0, 'yes': 1})
# Step 3: Define the Dependent and Independent variables
X = data[['interest_rate', 'credit', 'march', 'may', 'previous', 'duration']] # Independent variables
y = data['y'] # Dependent variable
# Step 4: Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Step 5: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
# Step 6: Build the Model using SVM (Linear kernel)
model = SVC(kernel='linear', random_state=42)
model.fit(X_train, y_train)
# Step 7: Predict the test data
y_pred = model.predict(X_test)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Results on test data:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
# Step 8: Import the new dataset (Bank_data_testing.csv)
new_data = pd.read_csv("Bank_data_testing.csv")
# Step 9: Apply necessary data preprocessing
# Remove the unnecessary column
if 'Unnamed: 0' in new_data.columns:
new_data = new_data.drop(columns=['Unnamed: 0'])
# Encode the target variable 'y' ('no' to 0 and 'yes' to 1)
new_data['y'] = new_data['y'].map({'no': 0, 'yes': 1})
# Define Independent and Dependent variables
X_new = new_data[['interest_rate', 'credit', 'march', 'may', 'previous', 'duration']]
y_new = new_data['y']
# Scale the new data
X_new_scaled = scaler.transform(X_new)
# Predict the results with the new dataset
y_new_pred = model.predict(X_new_scaled)
# Calculate metrics for the new dataset
new_accuracy = accuracy_score(y_new, y_new_pred)
new_recall = recall_score(y_new, y_new_pred)
new_f1 = f1_score(y_new, y_new_pred)
new_conf_matrix = confusion_matrix(y_new, y_new_pred)
print("\nResults on new dataset:")
print(f"Accuracy: {new_accuracy:.2f}")
print(f"Recall: {new_recall:.2f}")
print(f"F1 Score: {new_f1:.2f}")
print("Confusion Matrix:")
print(new_conf_matrix)
# Expected results for the new dataset:
# Accuracy: 0.86
# Recall: 0.91
# F1 Score: 0.87