0% found this document useful (0 votes)
7 views5 pages

Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views5 pages

Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

data = pd.read_csv('Banking_data.csv')

# 1. Distribution of age among the clients

sns.histplot(data['age'], kde=True)

plt.title('Distribution of Age Among Clients')

plt.xlabel('Age')

plt.ylabel('Frequency')

plt.show()

# 2. Job type distribution among the clients

sns.countplot(y='job', data=data)

plt.title('Job Type Distribution Among Clients')

plt.xlabel('Count')

plt.ylabel('Job Type')

plt.show()

# 3. Marital status distribution of the clients

sns.countplot(x='marital', data=data)

plt.title('Marital Status Distribution')

plt.xlabel('Marital Status')

plt.ylabel('Count')

plt.show()

# 4. Level of education among the clients

sns.countplot(x='education', data=data)

plt.title('Education Level Distribution')

plt.xlabel('Education Level')

plt.ylabel('Count')
plt.show()

# 5. Proportion of clients with credit in default

sns.countplot(x='default', data=data)

plt.title('Credit in Default')

plt.xlabel('Default')

plt.ylabel('Count')

plt.show()

# 6. Distribution of average yearly balance

sns.histplot(data['balance'], kde=True)

plt.title('Distribution of Average Yearly Balance')

plt.xlabel('Balance')

plt.ylabel('Frequency')

plt.show()

# 7. Clients with housing loans

sns.countplot(x='housing', data=data)

plt.title('Housing Loans Distribution')

plt.xlabel('Housing Loan')

plt.ylabel('Count')

plt.show()

# 8. Clients with personal loans

plt.figure(figsize=(7, 5))

sns.countplot(x='loan', data=data)

plt.title('Distribution of Personal Loans Among Clients')

plt.xlabel('Personal Loan')

plt.ylabel('Count')

plt.show()
# 9. Communication types used during the campaign

plt.figure(figsize=(7, 5))

sns.countplot(x='contact', data=data)

plt.title('Communication Types Used During Campaign')

plt.xlabel('Type of Communication')

plt.ylabel('Count')

plt.show()

# 10. Distribution of the last contact day of the month

plt.figure(figsize=(10, 6))

sns.histplot(data['day'], bins=31, kde=False)

plt.title('Distribution of Last Contact Day of the Month')

plt.xlabel('Day of the Month')

plt.ylabel('Frequency')

plt.show()

# 11. Last contact month distribution

plt.figure(figsize=(10, 6))

order_months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

sns.countplot(x='month', data=data, order=order_months)

plt.title('Distribution of Last Contact Months')

plt.xlabel('Month')

plt.ylabel('Frequency')

plt.show()

# 12. Distribution of the duration of the last contact

plt.figure(figsize=(10, 6))

sns.histplot(data['duration'], bins=30, kde=True)

plt.title('Duration of Last Contact')

plt.xlabel('Duration (seconds)')

plt.ylabel('Frequency')
plt.show()

# 13. Number of contacts performed during the campaign for each client

plt.figure(figsize=(10, 6))

sns.countplot(x='campaign', data=data)

plt.title('Number of Contacts Performed During Campaign')

plt.xlabel('Number of Contacts')

plt.ylabel('Frequency')

plt.xticks(rotation=90)

plt.show()

# 14. Distribution of the number of days passed since last contact from a previous campaign

plt.figure(figsize=(10, 6))

# Filtering out clients who were not contacted before (pdays = -1)

filtered_data = data[data['pdays'] != -1]

sns.histplot(filtered_data['pdays'], bins=30, kde=True)

plt.title('Days Passed Since Last Contact from Previous Campaign')

plt.xlabel('Days')

plt.ylabel('Frequency')

plt.show()

# 15. Number of contacts performed before the current campaign for each client

plt.figure(figsize=(10, 6))

sns.countplot(x='previous', data=data)

plt.title('Number of Contacts Before Current Campaign')

plt.xlabel('Number of Previous Contacts')

plt.ylabel('Frequency')

plt.xticks(rotation=90)

plt.show()
# 16. Outcomes of the previous marketing campaigns

plt.figure(figsize=(7, 5))

sns.countplot(x='poutcome', data=data)

plt.title('Outcomes of Previous Marketing Campaigns')

plt.xlabel('Outcome')

plt.ylabel('Count')

plt.show()

# 17. Distribution of clients who subscribed to a term deposit vs. those who did not

plt.figure(figsize=(7, 5))

sns.countplot(x='y', data=data)

plt.title('Subscription to Term Deposit')

plt.xlabel('Subscribed')

plt.ylabel('Count')

plt.show()

# 18. Correlations between different attributes and the likelihood of subscribing to a term
deposit

# First, convert categorical variables to numeric for correlation analysis

data_numeric = pd.get_dummies(data, drop_first=True)

plt.figure(figsize=(15, 10))

sns.heatmap(data_numeric.corr(), annot=False, cmap='coolwarm')

plt.title('Correlation Matrix with Subscription Indicator')

plt.show()

# To focus on correlation with term deposit subscription

plt.figure(figsize=(2, 8))

sns.heatmap(data_numeric.corr()[['y_yes']].sort_values(by='y_yes', ascending=False),
annot=True, cmap='coolwarm

You might also like