Python Analysis
Python Analysis
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
df = pd.read_csv('customer churn.csv')
df
Churn
0 No
1 No
2 Yes
3 No
4 Yes
... ...
7038 No
7039 No
7040 No
7041 Yes
7042 No
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 customerID 7043 non-null object
1 gender 7043 non-null object
2 SeniorCitizen 7043 non-null int64
3 Partner 7043 non-null object
4 Dependents 7043 non-null object
5 tenure 7043 non-null int64
6 PhoneService 7043 non-null object
7 MultipleLines 7043 non-null object
8 InternetService 7043 non-null object
9 OnlineSecurity 7043 non-null object
10 OnlineBackup 7043 non-null object
11 DeviceProtection 7043 non-null object
12 TechSupport 7043 non-null object
13 StreamingTV 7043 non-null object
14 StreamingMovies 7043 non-null object
15 Contract 7043 non-null object
16 PaperlessBilling 7043 non-null object
17 PaymentMethod 7043 non-null object
18 MonthlyCharges 7043 non-null float64
19 TotalCharges 7043 non-null object
20 Churn 7043 non-null object
dtypes: float64(1), int64(2), object(18)
memory usage: 1.1+ MB
df.isnull().sum()
customerID 0
gender 0
SeniorCitizen 0
Partner 0
Dependents 0
tenure 0
PhoneService 0
MultipleLines 0
InternetService 0
OnlineSecurity 0
OnlineBackup 0
DeviceProtection 0
TechSupport 0
StreamingTV 0
StreamingMovies 0
Contract 0
PaperlessBilling 0
PaymentMethod 0
MonthlyCharges 0
TotalCharges 0
Churn 0
dtype: int64
df.describe()
df.duplicated().sum()
df["customerID"].duplicated().sum()
0
def conv(value):
if value == 1:
return "Yes"
else:
return "No"
df['SeniorCitizen'] = df["SeniorCitizen"].apply(conv)
df.head(25)
# From ChatGPT
cross_tab = pd.crosstab(df['SeniorCitizen'], df['Churn'])
# Calculate percentages
cross_tab_percentage = cross_tab.div(cross_tab.sum(axis=1), axis=0) *
100
plt.figure(figsize= (9,4))
sns.histplot(x = "tenure", data=df, bins = 72, hue = "Churn")
plt.show()
plt.figure(figsize = (5,6))
ax = sns.countplot(x = "Contract", data=df, hue= "Churn")
ax.bar_label(ax.containers[0])
plt.title("Count of Customer by Churn")
plt.show()
df.columns.values
# Number of columns for the subplot grid (you can change this)
n_cols = 3
n_rows = (len(columns) + n_cols - 1) // n_cols # Calculate number of
rows needed
# Create subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, n_rows * 4)) #
Adjust figsize as needed
# Flatten the axes array for easy iteration (handles both 1D and 2D
arrays)
axes = axes.flatten()
plt.tight_layout()
plt.show()
plt.figure(figsize = (9,6))
ax = sns.countplot(x = "PaymentMethod", data=df, hue= "Churn")
ax.bar_label(ax.containers[0])
ax.bar_label(ax.containers[1])
plt.title("Count of Customer by PaymentMethod")
plt.show()