Open navigation menu

Scribd

0% found this document useful (0 votes)

15 views6 pages

Customer Churn Prediction Project

The document outlines a Customer Churn Prediction Project consisting of multiple Python scripts for data loading, preprocessing, model training, and evaluation. It includes functions for loading data from a CSV file, preprocessing it for machine learning, training logistic regression, decision tree, and random forest models, and evaluating their performance using various metrics. The main script orchestrates the entire process, ensuring that models are trained and evaluated on the customer churn dataset.

Uploaded by

Copyright

© © All Rights Reserved

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

15 views6 pages

Customer Churn Prediction Project

The document outlines a Customer Churn Prediction Project consisting of multiple Python scripts for data loading, preprocessing, model training, and evaluation. It includes functions for loading data from a CSV file, preprocessing it for machine learning, training logistic regression, decision tree, and random forest models, and evaluating their performance using various metrics. The main script orchestrates the entire process, ensuring that models are trained and evaluated on the customer churn dataset.

Uploaded by

Copyright

© © All Rights Reserved

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 6

Customer Churn Prediction Project

1. data_loader.py

import pandas as pd

def load_data(ﬁle_path):

try:

df = pd.read_csv(ﬁle_path)

return df

except FileNotFoundError:

return None

def explore_data(df):

if df is not None:

print(df.head())

print(df.info())

print(df.describe())

print(df['Churn'].value_counts())

if __name__ == "__main__":

data_ﬁle = 'customer_churn.csv'

churn_df = load_data(data_ﬁle)

if churn_df is not None:

explore_data(churn_df)

2. data_preprocessing.py

import pandas as pd

from sklearn.model_selec�on import train_test_split

from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

from sklearn.compose import ColumnTransformer

from sklearn.pipeline import Pipeline

def preprocess_data(df):

df = df.copy()

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

df.dropna(subset=['TotalCharges'], inplace=True)

categorical_features = df.select_dtypes(include='object').columns.tolist()

numerical_features = df.select_dtypes(include=['int64', 'ﬂoat64']).columns.tolist()

if 'customerID' in categorical_features:

categorical_features.remove('customerID')

target_variable = 'Churn'

if target_variable in categorical_features:

categorical_features.remove(target_variable)

numerical_transformer = StandardScaler()

categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(

transformers=[

('num', numerical_transformer, numerical_features),

('cat', categorical_transformer, categorical_features)])

label_encoder = LabelEncoder()

df[target_variable] = label_encoder.ﬁt_transform(df[target_variable])

X = df.drop(target_variable, axis=1)

y = df[target_variable]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stra�fy=y)

return preprocessor, X_train, X_test, y_train, y_test

if __name__ == "__main__":

data_ﬁle = 'customer_churn.csv'

churn_df = pd.read_csv(data_ﬁle)

if churn_df is not None:

preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)

3. model_training.py

from sklearn.linear_model import Logis�cRegression

from sklearn.tree import DecisionTreeClassiﬁer

from sklearn.ensemble import RandomForestClassiﬁer

from sklearn.pipeline import Pipeline

def train_logis�c_regression(preprocessor, X_train, y_train):

model = Pipeline(steps=[('preprocessor', preprocessor),

('classiﬁer', Logis�cRegression(random_state=42))])

model.ﬁt(X_train, y_train)

return model

def train_decision_tree(preprocessor, X_train, y_train):

model = Pipeline(steps=[('preprocessor', preprocessor),

('classiﬁer', DecisionTreeClassiﬁer(random_state=42))])

model.ﬁt(X_train, y_train)

return model

def train_random_forest(preprocessor, X_train, y_train):

model = Pipeline(steps=[('preprocessor', preprocessor),

('classiﬁer', RandomForestClassiﬁer(random_state=42))])

model.ﬁt(X_train, y_train)

return model

if __name__ == "__main__":

from data_loader import load_data

from data_preprocessing import preprocess_data

data_ﬁle = 'customer_churn.csv'

churn_df = load_data(data_ﬁle)

if churn_df is not None:

preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)

logis�c_model = train_logis�c_regression(preprocessor, X_train, y_train)

decision_tree_model = train_decision_tree(preprocessor, X_train, y_train)

random_forest_model = train_random_forest(preprocessor, X_train, y_train)

4. model_evalua�on.py

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,

confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sns

def evaluate_model(model, X_test, y_test, model_name):

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

precision = precision_score(y_test, y_pred)

recall = recall_score(y_test, y_pred)

f1 = f1_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)

print(f"\n--- {model_name} Evalua�on ---")

print(f"Accuracy: {accuracy:.4f}")

print(f"Precision: {precision:.4f}")

print(f"Recall: {recall:.4f}")

print(f"F1-Score: {f1:.4f}")

print("\nConfusion Matrix:")

print(conf_matrix)

plt.ﬁgure(ﬁgsize=(6, 5))

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',

x�cklabels=['No Churn', 'Churn'], y�cklabels=['No Churn', 'Churn'])

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.�tle(f'Confusion Matrix - {model_name}')

plt.show()

if __name__ == "__main__":

from data_loader import load_data

from data_preprocessing import preprocess_data

from model_training import train_logis�c_regression, train_decision_tree, train_random_forest

data_ﬁle = 'customer_churn.csv'

churn_df = load_data(data_ﬁle)

if churn_df is not None:

preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)

logis�c_model = train_logis�c_regression(preprocessor, X_train, y_train)

decision_tree_model = train_decision_tree(preprocessor, X_train, y_train)

random_forest_model = train_random_forest(preprocessor, X_train, y_train)

evaluate_model(logis�c_model, X_test, y_test, "Logis�c Regression")

evaluate_model(decision_tree_model, X_test, y_test, "Decision Tree")

evaluate_model(random_forest_model, X_test, y_test, "Random Forest")

5. main.py

from data_loader import load_data

from data_preprocessing import preprocess_data

from model_training import train_logis�c_regression, train_decision_tree, train_random_forest

from model_evalua�on import evaluate_model

def main():

data_ﬁle = 'customer_churn.csv'

churn_df = load_data(data_ﬁle)

if churn_df is not None:

preprocessor, X_train, X_test, y_train, y_test = preprocess_data(churn_df)

logis�c_model = train_logis�c_regression(preprocessor, X_train, y_train)

decision_tree_model = train_decision_tree(preprocessor, X_train, y_train)

random_forest_model = train_random_forest(preprocessor, X_train, y_train)

evaluate_model(logis�c_model, X_test, y_test, "Logis�c Regression")

evaluate_model(decision_tree_model, X_test, y_test, "Decision Tree")

evaluate_model(random_forest_model, X_test, y_test, "Random Forest")

if __name__ == "__main__":

main()

You might also like

Model Evaluation and Selection Cheatsheet 1708023215
No ratings yet
Model Evaluation and Selection Cheatsheet 1708023215
7 pages
Stats Tools Package
100% (1)
Stats Tools Package
44 pages
MCQ On Experimental Design
88% (8)
MCQ On Experimental Design
4 pages
Predictive Modelling
67% (3)
Predictive Modelling
64 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Spearman
No ratings yet
Spearman
9 pages
Chapter - AR and MA 7. Dec. 2020
No ratings yet
Chapter - AR and MA 7. Dec. 2020
60 pages
Hanoi - 2021: (Document Title)
No ratings yet
Hanoi - 2021: (Document Title)
19 pages
Home Work
No ratings yet
Home Work
12 pages
Naive Bayes Model With Python 1684166563
No ratings yet
Naive Bayes Model With Python 1684166563
9 pages
Karmbir 19 ML
No ratings yet
Karmbir 19 ML
20 pages
Siti Khairani (Hal 305-314) - 0
No ratings yet
Siti Khairani (Hal 305-314) - 0
10 pages
All Classifair
No ratings yet
All Classifair
3 pages
Peterson 2009 RFS - Mitchell A - Double Cluster
No ratings yet
Peterson 2009 RFS - Mitchell A - Double Cluster
46 pages
Quiz 4 - Practice PDF
100% (2)
Quiz 4 - Practice PDF
8 pages
Forecasting
No ratings yet
Forecasting
14 pages
Forec
No ratings yet
Forec
6 pages
Econ423 HAC Estimation
No ratings yet
Econ423 HAC Estimation
16 pages
A Brief Guide To Decisions at Each Step of The Propensity Score M
No ratings yet
A Brief Guide To Decisions at Each Step of The Propensity Score M
12 pages
Flight Price Predection 2
No ratings yet
Flight Price Predection 2
6 pages
Econ 231 Chapter 10 HW Solutions
No ratings yet
Econ 231 Chapter 10 HW Solutions
8 pages
Correction
No ratings yet
Correction
3 pages
Hasil Desktiptif
No ratings yet
Hasil Desktiptif
11 pages
Untitled Document
No ratings yet
Untitled Document
8 pages
Coe Projects
No ratings yet
Coe Projects
7 pages
Random Forest
No ratings yet
Random Forest
2 pages
Gridding Report - : Data Source
No ratings yet
Gridding Report - : Data Source
7 pages
2321-Article Text-8649-1-10-20200130
No ratings yet
2321-Article Text-8649-1-10-20200130
17 pages
Bank Customer Churn Analysis - Jupyter Notebook
No ratings yet
Bank Customer Churn Analysis - Jupyter Notebook
11 pages
Kabir Khan 1147 - 4
No ratings yet
Kabir Khan 1147 - 4
4 pages
Polynomial Regression
No ratings yet
Polynomial Regression
6 pages
Evaluation Metrics
No ratings yet
Evaluation Metrics
10 pages
Machine Learning Assignment 1
No ratings yet
Machine Learning Assignment 1
4 pages
Imbalanced Dataset Customer Churn
No ratings yet
Imbalanced Dataset Customer Churn
9 pages
Simple Linear Regression
No ratings yet
Simple Linear Regression
11 pages
MLT 1 - 7 Kanish
No ratings yet
MLT 1 - 7 Kanish
24 pages
Machine Learning Lab Mannual R20
No ratings yet
Machine Learning Lab Mannual R20
26 pages
ML Lab 146
No ratings yet
ML Lab 146
50 pages
Asssiment 3
No ratings yet
Asssiment 3
3 pages
New Chat: 1. Predicting Uber Ride Prices
No ratings yet
New Chat: 1. Predicting Uber Ride Prices
16 pages
Assignment 1
No ratings yet
Assignment 1
2 pages
MlLabManualdocx 2024 09 04 22 02 58
No ratings yet
MlLabManualdocx 2024 09 04 22 02 58
19 pages
One-Way Analysis of Variance by Abhishek Vijayvargiya
No ratings yet
One-Way Analysis of Variance by Abhishek Vijayvargiya
2 pages
Hasnain Saeed Lab Task # 11
No ratings yet
Hasnain Saeed Lab Task # 11
11 pages
Najir Shaikh Practical 4
No ratings yet
Najir Shaikh Practical 4
4 pages
Py - Customer Churn Classification - Actuaries' Analytical Cookbook
No ratings yet
Py - Customer Churn Classification - Actuaries' Analytical Cookbook
76 pages
Unit 2
No ratings yet
Unit 2
5 pages
# Use This Cell To Write Your Code
No ratings yet
# Use This Cell To Write Your Code
2 pages
Notebook - Main Code
No ratings yet
Notebook - Main Code
4 pages
Session 15-Logistic Regression
No ratings yet
Session 15-Logistic Regression
16 pages
AI ML - Cycle 2 Programs
No ratings yet
AI ML - Cycle 2 Programs
15 pages
ML Manual With Outputs
No ratings yet
ML Manual With Outputs
30 pages
ML Project Part B
No ratings yet
ML Project Part B
8 pages
CH 03
No ratings yet
CH 03
54 pages
Program
No ratings yet
Program
2 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
Capstone Project - Jaro-Prof. Babji
No ratings yet
Capstone Project - Jaro-Prof. Babji
5 pages
ML External Xerox
No ratings yet
ML External Xerox
1 page
ML Lab Programs
No ratings yet
ML Lab Programs
9 pages
Decision Tree
No ratings yet
Decision Tree
6 pages
22-CP-63 ML Assignment Report
No ratings yet
22-CP-63 ML Assignment Report
5 pages
23BCE7092 ML Lab Assignment
No ratings yet
23BCE7092 ML Lab Assignment
14 pages
DS Food
No ratings yet
DS Food
23 pages
Da 012307
No ratings yet
Da 012307
8 pages
Ann Experiential Learning
No ratings yet
Ann Experiential Learning
43 pages
Expt 1 - Curve Fitting
No ratings yet
Expt 1 - Curve Fitting
29 pages
PYHTONPRACT
No ratings yet
PYHTONPRACT
4 pages
SAC 1 Revision - Transformations
No ratings yet
SAC 1 Revision - Transformations
5 pages
HW 1
No ratings yet
HW 1
7 pages
Ensembles Models and Decision Tree
No ratings yet
Ensembles Models and Decision Tree
21 pages
F 11
No ratings yet
F 11
3 pages
ML Adv
No ratings yet
ML Adv
51 pages
Sample Code
No ratings yet
Sample Code
9 pages
Ds Assign 33
No ratings yet
Ds Assign 33
7 pages
ASM Question Paper
No ratings yet
ASM Question Paper
2 pages
AI
No ratings yet
AI
16 pages
Shobit Sharma (2124399) ML Lab File PDF
No ratings yet
Shobit Sharma (2124399) ML Lab File PDF
19 pages
Methods Ecol Evol - 2012 - Nakagawa - A General and Simple Method For Obtaining R2 From Generalized Linear Mixed Effects
No ratings yet
Methods Ecol Evol - 2012 - Nakagawa - A General and Simple Method For Obtaining R2 From Generalized Linear Mixed Effects
10 pages
Classification
No ratings yet
Classification
3 pages
Exp 3
No ratings yet
Exp 3
5 pages
AI Assignment-6
No ratings yet
AI Assignment-6
7 pages
Da Lab Mannual
No ratings yet
Da Lab Mannual
25 pages
Varshini Phase 3
No ratings yet
Varshini Phase 3
12 pages
Varshini Phase 2
No ratings yet
Varshini Phase 2
19 pages
Phase 3
No ratings yet
Phase 3
12 pages
ML Lab-1
No ratings yet
ML Lab-1
32 pages
Car Mock - ML Ans
No ratings yet
Car Mock - ML Ans
6 pages
DWM Lab 07 Saif Bodu
No ratings yet
DWM Lab 07 Saif Bodu
100 pages
Practicalpgm ML
No ratings yet
Practicalpgm ML
33 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet