0% found this document useful (0 votes)
4 views

Code

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

Code

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

data = pd.read_csv('Banking_data.csv')

# 1. Distribution of age among the clients

sns.histplot(data['age'], kde=True)

plt.title('Distribution of Age Among Clients')

plt.xlabel('Age')

plt.ylabel('Frequency')

plt.show()

# 2. Job type distribution among the clients

sns.countplot(y='job', data=data)

plt.title('Job Type Distribution Among Clients')

plt.xlabel('Count')

plt.ylabel('Job Type')

plt.show()

# 3. Marital status distribution of the clients

sns.countplot(x='marital', data=data)

plt.title('Marital Status Distribution')

plt.xlabel('Marital Status')

plt.ylabel('Count')

plt.show()

# 4. Level of education among the clients

sns.countplot(x='education', data=data)

plt.title('Education Level Distribution')

plt.xlabel('Education Level')

plt.ylabel('Count')
plt.show()

# 5. Proportion of clients with credit in default

sns.countplot(x='default', data=data)

plt.title('Credit in Default')

plt.xlabel('Default')

plt.ylabel('Count')

plt.show()

# 6. Distribution of average yearly balance

sns.histplot(data['balance'], kde=True)

plt.title('Distribution of Average Yearly Balance')

plt.xlabel('Balance')

plt.ylabel('Frequency')

plt.show()

# 7. Clients with housing loans

sns.countplot(x='housing', data=data)

plt.title('Housing Loans Distribution')

plt.xlabel('Housing Loan')

plt.ylabel('Count')

plt.show()

# 8. Clients with personal loans

plt.figure(figsize=(7, 5))

sns.countplot(x='loan', data=data)

plt.title('Distribution of Personal Loans Among Clients')

plt.xlabel('Personal Loan')

plt.ylabel('Count')

plt.show()
# 9. Communication types used during the campaign

plt.figure(figsize=(7, 5))

sns.countplot(x='contact', data=data)

plt.title('Communication Types Used During Campaign')

plt.xlabel('Type of Communication')

plt.ylabel('Count')

plt.show()

# 10. Distribution of the last contact day of the month

plt.figure(figsize=(10, 6))

sns.histplot(data['day'], bins=31, kde=False)

plt.title('Distribution of Last Contact Day of the Month')

plt.xlabel('Day of the Month')

plt.ylabel('Frequency')

plt.show()

# 11. Last contact month distribution

plt.figure(figsize=(10, 6))

order_months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

sns.countplot(x='month', data=data, order=order_months)

plt.title('Distribution of Last Contact Months')

plt.xlabel('Month')

plt.ylabel('Frequency')

plt.show()

# 12. Distribution of the duration of the last contact

plt.figure(figsize=(10, 6))

sns.histplot(data['duration'], bins=30, kde=True)

plt.title('Duration of Last Contact')

plt.xlabel('Duration (seconds)')

plt.ylabel('Frequency')
plt.show()

# 13. Number of contacts performed during the campaign for each client

plt.figure(figsize=(10, 6))

sns.countplot(x='campaign', data=data)

plt.title('Number of Contacts Performed During Campaign')

plt.xlabel('Number of Contacts')

plt.ylabel('Frequency')

plt.xticks(rotation=90)

plt.show()

# 14. Distribution of the number of days passed since last contact from a previous campaign

plt.figure(figsize=(10, 6))

# Filtering out clients who were not contacted before (pdays = -1)

filtered_data = data[data['pdays'] != -1]

sns.histplot(filtered_data['pdays'], bins=30, kde=True)

plt.title('Days Passed Since Last Contact from Previous Campaign')

plt.xlabel('Days')

plt.ylabel('Frequency')

plt.show()

# 15. Number of contacts performed before the current campaign for each client

plt.figure(figsize=(10, 6))

sns.countplot(x='previous', data=data)

plt.title('Number of Contacts Before Current Campaign')

plt.xlabel('Number of Previous Contacts')

plt.ylabel('Frequency')

plt.xticks(rotation=90)

plt.show()
# 16. Outcomes of the previous marketing campaigns

plt.figure(figsize=(7, 5))

sns.countplot(x='poutcome', data=data)

plt.title('Outcomes of Previous Marketing Campaigns')

plt.xlabel('Outcome')

plt.ylabel('Count')

plt.show()

# 17. Distribution of clients who subscribed to a term deposit vs. those who did not

plt.figure(figsize=(7, 5))

sns.countplot(x='y', data=data)

plt.title('Subscription to Term Deposit')

plt.xlabel('Subscribed')

plt.ylabel('Count')

plt.show()

# 18. Correlations between different attributes and the likelihood of subscribing to a term
deposit

# First, convert categorical variables to numeric for correlation analysis

data_numeric = pd.get_dummies(data, drop_first=True)

plt.figure(figsize=(15, 10))

sns.heatmap(data_numeric.corr(), annot=False, cmap='coolwarm')

plt.title('Correlation Matrix with Subscription Indicator')

plt.show()

# To focus on correlation with term deposit subscription

plt.figure(figsize=(2, 8))

sns.heatmap(data_numeric.corr()[['y_yes']].sort_values(by='y_yes', ascending=False),
annot=True, cmap='coolwarm

You might also like