Assignment1
Assignment1
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
import numpy as np
import pandas as pd
df=pd.read_csv(r'/content/drive/MyDrive/Colab Notebooks/training_set.csv')
df.shape
(614, 13)
df.head()
Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term
Not
3 LP001006 Male Yes 0 No 2583.0 2358.0 120.0 360.0
Graduate
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Loan_ID 614 non-null object
1 Gender 599 non-null object
2 Married 611 non-null object
3 Dependents 599 non-null object
4 Education 613 non-null object
5 Self_Employed 582 non-null object
6 ApplicantIncome 612 non-null float64
7 CoapplicantIncome 613 non-null float64
8 LoanAmount 592 non-null float64
9 Loan_Amount_Term 600 non-null float64
10 Credit_History 564 non-null float64
11 property_Area 614 non-null object
12 Loan_Status 614 non-null object
dtypes: float64(5), object(8)
memory usage: 62.5+ KB
df.describe()
max 81000 000000 41667 000000 700 000000 480 00000 1 000000
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 1/12
1/14/25, 10:45 PM CPC Notebook - Colab
Dependents: 15 missing values
Education: 1 missing values
Self_Employed: 32 missing values
ApplicantIncome: 2 missing values
CoapplicantIncome: 1 missing values
LoanAmount: 22 missing values
Loan_Amount_Term: 14 missing values
Credit_History: 50 missing values
property_Area: 0 missing values
Loan_Status: 0 missing values
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
df.duplicated().sum()
df.isnull().sum(axis=0)
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 2/12
1/14/25, 10:45 PM CPC Notebook - Colab
Loan_ID 0
Gender 15
Married 3
Dependents 15
Education 1
Self_Employed 32
ApplicantIncome 2
CoapplicantIncome 1
LoanAmount 22
Loan_Amount_Term 14
Credit_History 50
property_Area 0
Loan_Status 0
df['Credit_History'].value_counts()
count
Credit_History
1.0 475
0.0 89
# Scatter Plots
for col in ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History']:
plt.figure(figsize=(8, 6))
sns.scatterplot(x=col, y='Loan_Status', data=df)
plt.title(f'Scatter Plot of Loan Status vs {col}')
plt.show()
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 3/12
1/14/25, 10:45 PM CPC Notebook - Colab
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 4/12
1/14/25, 10:45 PM CPC Notebook - Colab
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 5/12
1/14/25, 10:45 PM CPC Notebook - Colab
# Box Plots
for col in ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']:
plt.figure(figsize=(8, 6))
sns.boxplot(x='Loan_Status', y=col, data=df)
plt.title(f'Box Plot of {col} by Loan Status')
plt.show()
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 6/12
1/14/25, 10:45 PM CPC Notebook - Colab
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 7/12
1/14/25, 10:45 PM CPC Notebook - Colab
# Histograms
for col in ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']:
plt.figure(figsize=(8, 6))
sns.histplot(df[col], kde=True)
plt.title(f'Histogram of {col}')
plt.show()
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 8/12
1/14/25, 10:45 PM CPC Notebook - Colab
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 9/12
1/14/25, 10:45 PM CPC Notebook - Colab
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 10/12
1/14/25, 10:45 PM CPC Notebook - Colab
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 11/12
1/14/25, 10:45 PM CPC Notebook - Colab
https://colab.research.google.com/drive/1oCT8fK2XOY_ErDytEOuTb6Lmvua88V6Y#scrollTo=ZgWQraYJz1rL&printMode=true 12/12