import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
loandata=pd.read_csv('loansData-Copy1.csv')#Reading csv file
#Fetching the names of the columns
loandata.columns.values
array(['Amount.Requested', 'Amount.Funded.By.Investors',
'Interest.Rate',
'Loan.Length', 'Loan.Purpose', 'Debt.To.Income.Ratio', 'State',
'Home.Ownership', 'Monthly.Income', 'FICO.Range',
'Open.CREDIT.Lines', 'Revolving.CREDIT.Balance',
'Inquiries.in.the.Last.6.Months', 'Employment.Length'],
dtype=object)
#Initial lines of datasets
loandata.head()
Amount.Requested Amount.Funded.By.Investors Interest.Rate
Loan.Length \
81174 20000 20000.0 8.90% 36
months
99592 19200 19200.0 12.12% 36
months
80059 35000 35000.0 21.98% 60
months
15825 10000 9975.0 9.99% 36
months
33182 12000 12000.0 11.71% 36
months
Loan.Purpose Debt.To.Income.Ratio State Home.Ownership \
81174 debt_consolidation 14.90% SC MORTGAGE
99592 debt_consolidation 28.36% TX MORTGAGE
80059 debt_consolidation 23.81% CA MORTGAGE
15825 debt_consolidation 14.30% KS MORTGAGE
33182 credit_card 18.78% NJ RENT
Monthly.Income FICO.Range Open.CREDIT.Lines
Revolving.CREDIT.Balance \
81174 6541.67 735-739 14.0
14272.0
99592 4583.33 715-719 12.0
11140.0
80059 11500.00 690-694 14.0
21977.0
15825 3833.33 695-699 10.0
9346.0
33182 3195.00 695-699 11.0
14469.0
Inquiries.in.the.Last.6.Months Employment.Length
81174 2.0 < 1 year
99592 1.0 2 years
80059 1.0 2 years
15825 0.0 5 years
33182 0.0 9 years
#Getting first 5 lines of Monthly Income
loandata['Monthly.Income'][0:5]
81174 6541.67
99592 4583.33
80059 11500.00
15825 3833.33
33182 3195.00
Name: Monthly.Income, dtype: float64
# Histogram of Monthly Income
plt.figure()
plt.title('Histogram of Monthly Income')
inc=loandata['Monthly.Income']
h=inc.hist()
plt.show()
plt.title('Histogram of Monthly Income')
plt.hist(inc)
(array([2.337e+03, 1.510e+02, 8.000e+00, 1.000e+00, 0.000e+00,
0.000e+00,
1.000e+00, 0.000e+00, 0.000e+00, 1.000e+00]),
array([ 588.5 , 10804.65, 21020.8 , 31236.95, 41453.1 ,
51669.25,
61885.4 , 72101.55, 82317.7 , 92533.85, 102750. ]),
<BarContainer object of 10 artists>)
# Converting Monthly Income to log
import math
loandata['Monthly.Logincome']=[math.log(x) for x in inc]
loginc=loandata['Monthly.Logincome']
h2=loginc.hist()
plt.show()
loandata.describe()
Amount.Requested Amount.Funded.By.Investors Monthly.Income \
count 2500.000000 2500.000000 2499.000000
mean 12406.500000 12001.573236 5688.931321
std 7801.544872 7745.320754 3963.118185
min 1000.000000 -0.010000 588.500000
25% 6000.000000 6000.000000 3500.000000
50% 10000.000000 10000.000000 5000.000000
75% 17000.000000 16000.000000 6800.000000
max 35000.000000 35000.000000 102750.000000
Open.CREDIT.Lines Revolving.CREDIT.Balance \
count 2498.000000 2498.000000
mean 10.075661 15244.559648
std 4.508644 18308.549795
min 2.000000 0.000000
25% 7.000000 5585.750000
50% 9.000000 10962.000000
75% 13.000000 18888.750000
max 38.000000 270800.000000
Inquiries.in.the.Last.6.Months Monthly.Logincome
count 2498.000000 2499.000000
mean 0.906325 8.501915
std 1.231036 0.523019
min 0.000000 6.377577
25% 0.000000 8.160518
50% 0.000000 8.517193
75% 1.000000 8.824678
max 9.000000 11.540054
loandata['Monthly.Income'].describe()
count 2499.000000
mean 5688.931321
std 3963.118185
min 588.500000
25% 3500.000000
50% 5000.000000
75% 6800.000000
max 102750.000000
Name: Monthly.Income, dtype: float64