Customer Churn Prediction Project
Customer Churn Prediction Project
1. data_loader.py
import pandas as pd
def load_data(file_path):
try:
df = pd.read_csv(file_path)
return df
except FileNotFoundError:
return None
def explore_data(df):
if df is not None:
print(df.head())
print(df.info())
print(df.describe())
print(df['Churn'].value_counts())
if __name__ == "__main__":
data_file = 'customer_churn.csv'
churn_df = load_data(data_file)
explore_data(churn_df)
2. data_preprocessing.py
import pandas as pd
df = df.copy()
df.dropna(subset=['TotalCharges'], inplace=True)
categorical_features = df.select_dtypes(include='object').columns.tolist()
if 'customerID' in categorical_features:
categorical_features.remove('customerID')
target_variable = 'Churn'
if target_variable in categorical_features:
categorical_features.remove(target_variable)
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
preprocessor = ColumnTransformer(
transformers=[
label_encoder = LabelEncoder()
df[target_variable] = label_encoder.fit_transform(df[target_variable])
X = df.drop(target_variable, axis=1)
y = df[target_variable]
if __name__ == "__main__":
data_file = 'customer_churn.csv'
churn_df = pd.read_csv(data_file)
('classifier', Logis�cRegression(random_state=42))])
model.fit(X_train, y_train)
return model
('classifier', DecisionTreeClassifier(random_state=42))])
model.fit(X_train, y_train)
return model
('classifier', RandomForestClassifier(random_state=42))])
model.fit(X_train, y_train)
return model
if __name__ == "__main__":
data_file = 'customer_churn.csv'
churn_df = load_data(data_file)
4. model_evalua�on.py
y_pred = model.predict(X_test)
f1 = f1_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
plt.figure(figsize=(6, 5))
plt.xlabel('Predicted')
plt.ylabel('Actual')
if __name__ == "__main__":
data_file = 'customer_churn.csv'
churn_df = load_data(data_file)
5. main.py
def main():
data_file = 'customer_churn.csv'
churn_df = load_data(data_file)
if __name__ == "__main__":
main()