0% found this document useful (0 votes)
15 views6 pages

Customer Churn Prediction Project

The document outlines a Customer Churn Prediction Project consisting of multiple Python scripts for data loading, preprocessing, model training, and evaluation. It includes functions for loading data from a CSV file, preprocessing it for machine learning, training logistic regression, decision tree, and random forest models, and evaluating their performance using various metrics. The main script orchestrates the entire process, ensuring that models are trained and evaluated on the customer churn dataset.

Uploaded by

Fayaz Basha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views6 pages

Customer Churn Prediction Project

The document outlines a Customer Churn Prediction Project consisting of multiple Python scripts for data loading, preprocessing, model training, and evaluation. It includes functions for loading data from a CSV file, preprocessing it for machine learning, training logistic regression, decision tree, and random forest models, and evaluating their performance using various metrics. The main script orchestrates the entire process, ensuring that models are trained and evaluated on the customer churn dataset.

Uploaded by

Fayaz Basha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

Customer Churn Prediction Project

1. data_loader.py

import pandas as pd

def load_data(file_path):

try:

df = pd.read_csv(file_path)

return df

except FileNotFoundError:

return None

def explore_data(df):

if df is not None:

print(df.head())

print(df.info())

print(df.describe())

print(df['Churn'].value_counts())

if __name__ == "__main__":

data_file = 'customer_churn.csv'

churn_df = load_data(data_file)

if churn_df is not None:

explore_data(churn_df)

2. data_preprocessing.py

import pandas as pd

from sklearn.model_selec�on import train_test_split

from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

from sklearn.compose import ColumnTransformer

from sklearn.pipeline import Pipeline


def preprocess_data(df):

df = df.copy()

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

df.dropna(subset=['TotalCharges'], inplace=True)

categorical_features = df.select_dtypes(include='object').columns.tolist()

numerical_features = df.select_dtypes(include=['int64', 'float64']).columns.tolist()

if 'customerID' in categorical_features:

categorical_features.remove('customerID')

target_variable = 'Churn'

if target_variable in categorical_features:

categorical_features.remove(target_variable)

numerical_transformer = StandardScaler()

categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(

transformers=[

('num', numerical_transformer, numerical_features),

('cat', categorical_transformer, categorical_features)])

label_encoder = LabelEncoder()

df[target_variable] = label_encoder.fit_transform(df[target_variable])

X = df.drop(target_variable, axis=1)

y = df[target_variable]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stra�fy=y)

return preprocessor, X_train, X_test, y_train, y_test

if __name__ == "__main__":

data_file = 'customer_churn.csv'

churn_df = pd.read_csv(data_file)

if churn_df is not None:

preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)


3. model_training.py

from sklearn.linear_model import Logis�cRegression

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.pipeline import Pipeline

def train_logis�c_regression(preprocessor, X_train, y_train):

model = Pipeline(steps=[('preprocessor', preprocessor),

('classifier', Logis�cRegression(random_state=42))])

model.fit(X_train, y_train)

return model

def train_decision_tree(preprocessor, X_train, y_train):

model = Pipeline(steps=[('preprocessor', preprocessor),

('classifier', DecisionTreeClassifier(random_state=42))])

model.fit(X_train, y_train)

return model

def train_random_forest(preprocessor, X_train, y_train):

model = Pipeline(steps=[('preprocessor', preprocessor),

('classifier', RandomForestClassifier(random_state=42))])

model.fit(X_train, y_train)

return model

if __name__ == "__main__":

from data_loader import load_data

from data_preprocessing import preprocess_data

data_file = 'customer_churn.csv'

churn_df = load_data(data_file)

if churn_df is not None:


preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)

logis�c_model = train_logis�c_regression(preprocessor, X_train, y_train)

decision_tree_model = train_decision_tree(preprocessor, X_train, y_train)

random_forest_model = train_random_forest(preprocessor, X_train, y_train)

4. model_evalua�on.py

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,


confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sns

def evaluate_model(model, X_test, y_test, model_name):

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

precision = precision_score(y_test, y_pred)

recall = recall_score(y_test, y_pred)

f1 = f1_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)

print(f"\n--- {model_name} Evalua�on ---")

print(f"Accuracy: {accuracy:.4f}")

print(f"Precision: {precision:.4f}")

print(f"Recall: {recall:.4f}")

print(f"F1-Score: {f1:.4f}")

print("\nConfusion Matrix:")

print(conf_matrix)

plt.figure(figsize=(6, 5))

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',

x�cklabels=['No Churn', 'Churn'], y�cklabels=['No Churn', 'Churn'])

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.�tle(f'Confusion Matrix - {model_name}')


plt.show()

if __name__ == "__main__":

from data_loader import load_data

from data_preprocessing import preprocess_data

from model_training import train_logis�c_regression, train_decision_tree, train_random_forest

data_file = 'customer_churn.csv'

churn_df = load_data(data_file)

if churn_df is not None:

preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)

logis�c_model = train_logis�c_regression(preprocessor, X_train, y_train)

decision_tree_model = train_decision_tree(preprocessor, X_train, y_train)

random_forest_model = train_random_forest(preprocessor, X_train, y_train)

evaluate_model(logis�c_model, X_test, y_test, "Logis�c Regression")

evaluate_model(decision_tree_model, X_test, y_test, "Decision Tree")

evaluate_model(random_forest_model, X_test, y_test, "Random Forest")

5. main.py

from data_loader import load_data

from data_preprocessing import preprocess_data

from model_training import train_logis�c_regression, train_decision_tree, train_random_forest

from model_evalua�on import evaluate_model

def main():

data_file = 'customer_churn.csv'

churn_df = load_data(data_file)

if churn_df is not None:

preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)

logis�c_model = train_logis�c_regression(preprocessor, X_train, y_train)

decision_tree_model = train_decision_tree(preprocessor, X_train, y_train)


random_forest_model = train_random_forest(preprocessor, X_train, y_train)

evaluate_model(logis�c_model, X_test, y_test, "Logis�c Regression")

evaluate_model(decision_tree_model, X_test, y_test, "Decision Tree")

evaluate_model(random_forest_model, X_test, y_test, "Random Forest")

if __name__ == "__main__":

main()

You might also like