0% found this document useful (0 votes)

62 views7 pages

Credit Modelling in R

Uploaded by

Seshendra Vemuri

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

62 views7 pages

Credit Modelling in R

Uploaded by

Seshendra Vemuri

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 7

Credit Modelling in R

Now let us start using R for Credit Modelling:

The first thing we need to do is to load the R packages into the library:
# Load R packages into the library

# Data management packages

library(DescTools)

library(skimr)

library(plyr)

library(dplyr)

library(aod)

library(readxl)

# Visualization packages

library(Deducer)

library(ggplot2)

# Machine learnning method packages

library(ROCR)

library(pROC)

library(caret)

library(MASS)

Now it is time to load the dataset and do some data management. We will work with
the loan lending club dataset. The below coding is the data management:
# Import dataset

loan_data <- read.csv("/loan.csv")

# Selecting the relevant variables in the dataset:

loan_data <-
loan_data[,c("grade","sub_grade","term","loan_amnt","issue_d","loan_sta
tus","emp_length",

"home_ownership",
"annual_inc","verification_status","purpose","dti",

"delinq_2yrs","addr_state","int_rate",
"inq_last_6mths","mths_since_last_delinq",
"mths_since_last_record","open_acc","pub_rec"
,"revol_bal","revol_util","total_acc")]

# Data management for missing observations

loan_data$mths_since_last_delinq[is.na(loan_data$mths_since_last_delinq
)] <- 0

loan_data$mths_since_last_record[is.na(loan_data$mths_since_last_record
)] <- 0

var.has.na <- lapply(loan_data, function(x){any(is.na(x))})

num_na <- which( var.has.na == TRUE )

per_na <- num_na/dim(loan_data)[1]

loan_data <- loan_data[complete.cases(loan_data),]

Although this is the second step of a credit modeling analysis, the visualization step
can be found in my previous article, let us do minimum of visualization in case the
reader only reads this article:
# Visualization of the data

# Bar chart of the loan amount

loanamount_barchart <- ggplot(data=loan_data, aes(loan_data$loan_amnt))

geom_histogram(breaks=seq(0, 35000, by=1000),

col="black", aes(fill=..count..)) +

scale_fill_gradient("Count", low="green1", high="yellowgreen")+

labs(title="Loan Amount", x="Amount", y="Number of Loans")

loanamount_barchart

ggplotly(p = ggplot2::last_plot())

# Box plot of loan amount

box_plot_stat <- ggplot(loan_data, aes(loan_status, loan_amnt))

box_plot_stat + geom_boxplot(aes(fill = loan_status)) +

theme(axis.text.x = element_blank()) +

labs(list(title = "Loan amount by status", x = "Loan Status", y =

"Amount"))

ggplotly(p = ggplot2::last_plot())
The above coding gives us the following two visualizations:

Lets see some descriptive statistics of the data as well:

skim(loan_data)

Skim summary statistics

n obs: 886877
n variables: 23

-- Variable type:factor
--------------------------------------------------------

variable missing complete n n_unique

top_counts ordered

addr_state 0 886877 886877 51 CA: 129456, NY:

74033, TX: 71100, FL: 60901 FALSE

emp_length 0 886877 886877 12 10+: 291417, 2 y:

78831, < 1: 70538, 3 y: 69991 FALSE

grade 0 886877 886877 7 B: 254445, C:

245721, A: 148162, D: 139414 FALSE

home_ownership 0 886877 886877 6 MOR: 443319, REN:

355921, OWN: 87408, OTH: 180 FALSE

issue_d 0 886877 886877 103 Oct: 48619, Jul:

45938, Dec: 44323, Oct: 38760 FALSE

loan_status 0 886877 886877 8 Cur: 601533, Ful:

209525, Cha: 45956, Lat: 11582 FALSE

purpose 0 886877 886877 14 deb: 524009, cre:

206136, hom: 51760, oth: 42798 FALSE

sub_grade 0 886877 886877 35 B3: 56301, B4:

55599, C1: 53365, C2: 52206 FALSE

term 0 886877 886877 2

36: 620739, 60: 266138, NA: 0 FALSE

verification_status 0 886877 886877 3 Sou: 329393,

Ver: 290896, Not: 266588, NA: 0 FALSE

-- Variable type:numeric
-------------------------------------------------------

variable missing complete n mean sd p0

p25 p50 p75 p100 hist

annual_inc 0 886877 886877 75019.4 64687.38 0

45000 65000 90000 9500000 ????????

delinq_2yrs 0 886877 886877 0.31 0.86 0

0 0 0 39 ????????

dti 0 886877 886877 18.16 17.19 0

11.91 17.66 23.95 9999 ????????
inq_last_6mths 0 886877 886877 0.69 1 0
0 0 1 33 ????????

int_rate 0 886877 886877 13.25 4.38 5.32

9.99 12.99 16.2 28.99 ????????

loan_amnt 0 886877 886877 14756.97 8434.43 500

8000 13000 20000 35000 ????????

mths_since_last_delinq 0 886877 886877 16.62 22.89 0

0 0 30 188 ????????

mths_since_last_record 0 886877 886877 10.83 27.65 0

0 0 0 129 ????????

open_acc 0 886877 886877 11.55 5.32 1

8 11 14 90 ????????

pub_rec 0 886877 886877 0.2 0.58 0

0 0 0 86 ????????

revol_bal 0 886877 886877 16924.56 22414.33 0

6450 11879 20833 2904836 ????????

revol_util 0 886877 886877 55.07 23.83 0

37.7 56 73.6 892.3 ????????

total_acc 0 886877 886877 25.27 11.84 1

17 24 32 169 ????????
Next we need to do some more data management to prepare the dataset for machine
learning analysis
# Focus on the historical loans

loan_data=as.data.frame(loan_data[loan_data$loan_status!="Current", ])

limits_inc = quantile(loan_data$annual_inc, seq(0,1,0.1))

labels <- c(0, limits_inc[2:10], "+inf")

labels <- prettyNum(labels, big.mark = ",")

labels <- paste(labels[1:10], labels[2:11], sep = "-")

loan_data$annual_inc <- cut(loan_data$annual_inc, limits_inc, labels =

labels, include.lowest = T)

loan_data[,"annual_inc"] <- as.character(loan_data[,"annual_inc"])

# Create binary variables for the logistic regression analysis

# Annual_inc

loan_data$annual_inc[loan_data$annual_inc == "70,000- 80,000"|

loan_data$annual_inc == "80,000- 94,000" | loan_data$annual_inc ==
"94,000-120,000" | loan_data$annual_inc == "120,000- +inf" ] <- 1
loan_data$annual_inc[loan_data$annual_inc != 1] <- 0

loan_data$annual_inc <- as.numeric(loan_data$annual_inc)

# Home_ownership

loan_data$home_ownership <- as.character(loan_data$home_ownership)

loan_data$home_ownership[loan_data$home_ownership=="OWN" |
loan_data$home_ownership=="MORTGAGE" ] <- 1

loan_data$home_ownership[loan_data$home_ownership!=1] <- 0

# Dealinq_2yrs

loan_data$delinq_2yrs <- as.character(loan_data$delinq_2yrs)

loan_data$delinq_2yrs[loan_data$delinq_2yrs=="0"] <- 0

loan_data$delinq_2yrs[loan_data$delinq_2yrs!= 0] <- 1

# Verification status: if Verified = 1 ; otherwise = 0

loan_data$verification_status =
as.character(loan_data$verification_status)

loan_data$verification_status[loan_data$verification_status ==
"Verified" | loan_data$verification_status == "Source Verified"] = 1

loan_data$verification_status[loan_data$verification_status != 1] = 0

loan_data$verification_status=as.numeric(loan_data$verification_status)

# Dti

dti_quant <- quantile(loan_data$dti, seq(0, 1, 0.1))

labels = c(0,prettyNum(dti_quant[2:10], big.mark = ","), "+Inf")

labels = paste(labels[1:10],labels[2:11], sep = "-")

loan_data <- mutate(loan_data, dti= cut(loan_data$dti, breaks =

dti_quant, labels = factor(labels), include.lowest = T))

loan_data$dti <- as.character(loan_data$dti)

loan_data$dti[loan_data$dti == "0-6.57" | loan_data$dti == "12.13-

14.32" | loan_data$dti == "14.32-16.49" ] <- 1

loan_data$dti[loan_data$dti!=1] <- 0

# Status

loan_data$loan_status <- as.character(loan_data$loan_status)

loan_data$loan_status[loan_data$loan_status == "Charged Off" |

loan_data$loan_status == "Default" ] <- 1

loan_data$loan_status[loan_data$loan_status != 1] <- 0
table(loan_data$loan_status)

PercTable(loan_data$loan_status)

# Change to nummeric variables:

loan_data[,"revol_util"] <- as.numeric(sub("%",

"",loan_data$"revol_util", fixed =TRUE))/100

loan_data[,"int_rate"] <- as.numeric(sub("%", "",loan_data$"int_rate",

fixed =TRUE))/100

loan_data$loan_status <- as.numeric(loan_data$loan_status)

# Grouping variables

loan_data$purpose <- as.character(loan_data$purpose)

loan_data$purpose[loan_data$purpose == "car" | loan_data$purpose ==

"major_purchase" |

loan_data$purpose == "home_improvement"|
loan_data$purpose == "credit_card" ] <- 2

loan_data$purpose[loan_data$purpose == "moving" | loan_data$purpose ==

"small_business" |

loan_data$purpose == "renewable_energy" ] <- 0

loan_data$purpose[loan_data$purpose!= 0 & loan_data$purpose!= 2 ] <- 1

loan_data$purpose <- as.factor(loan_data$purpose)

Now it is time to make the machine learning regression analysis. We will work with
multiple logistic regression. Logistic regression is applied when you have a binary
variable (y) to explain. The logistic regression model uses the cumulative distribution
function to estimate the logistic function of the model with a group of explanatory
variables (the x’s). We will work with a stepwise model in order to find a final model
for the logistic regression. The below coding generates the multiple logistic regression
analysis:

Principles: Life and Work
From Everand
Principles: Life and Work
Ray Dalio
4/5 (643)
The Gifts of Imperfection: Let Go of Who You Think You're Supposed to Be and Embrace Who You Are
From Everand
The Gifts of Imperfection: Let Go of Who You Think You're Supposed to Be and Embrace Who You Are
Brené Brown
4/5 (1175)
The Glass Castle: A Memoir
From Everand
The Glass Castle: A Memoir
Jeannette Walls
4.5/5 (1856)
The Perks of Being a Wallflower
From Everand
The Perks of Being a Wallflower
Stephen Chbosky
4.5/5 (4103)
Sing, Unburied, Sing: A Novel
From Everand
Sing, Unburied, Sing: A Novel
Jesmyn Ward
4/5 (1267)
Her Body and Other Parties: Stories
From Everand
Her Body and Other Parties: Stories
Carmen Maria Machado
4/5 (903)
Shoe Dog: A Memoir by the Creator of Nike
From Everand
Shoe Dog: A Memoir by the Creator of Nike
Phil Knight
4.5/5 (629)
Steve Jobs
From Everand
Steve Jobs
Walter Isaacson
4.5/5 (1139)
The Emperor of All Maladies: A Biography of Cancer
From Everand
The Emperor of All Maladies: A Biography of Cancer
Siddhartha Mukherjee
4.5/5 (298)
The Yellow House: A Memoir (2019 National Book Award Winner)
From Everand
The Yellow House: A Memoir (2019 National Book Award Winner)
Sarah M. Broom
4/5 (100)
Angela's Ashes: A Memoir
From Everand
Angela's Ashes: A Memoir
Frank McCourt
4.5/5 (943)
The World Is Flat 3.0: A Brief History of the Twenty-first Century
From Everand
The World Is Flat 3.0: A Brief History of the Twenty-first Century
Thomas L. Friedman
3.5/5 (2289)
The Outsider: A Novel
From Everand
The Outsider: A Novel
Stephen King
4/5 (2885)
A Heartbreaking Work Of Staggering Genius: A Memoir Based on a True Story
From Everand
A Heartbreaking Work Of Staggering Genius: A Memoir Based on a True Story
Dave Eggers
3.5/5 (233)
Team of Rivals: The Political Genius of Abraham Lincoln
From Everand
Team of Rivals: The Political Genius of Abraham Lincoln
Doris Kearns Goodwin
4.5/5 (244)
Rise of ISIS: A Threat We Can't Ignore
From Everand
Rise of ISIS: A Threat We Can't Ignore
Jay Sekulow
3.5/5 (144)
Manhattan Beach: A Novel
From Everand
Manhattan Beach: A Novel
Jennifer Egan
3.5/5 (919)
Mra Project: Prepared By: Deepak Batabyal Date:-09 Feb 2020
100% (2)
Mra Project: Prepared By: Deepak Batabyal Date:-09 Feb 2020
32 pages
Café Chain Restaurant Project
100% (2)
Café Chain Restaurant Project
21 pages
Heavy Equipment
88% (8)
Heavy Equipment
78 pages
Fear: Trump in the White House
From Everand
Fear: Trump in the White House
Bob Woodward
3.5/5 (836)
John Adams
From Everand
John Adams
David McCullough
4.5/5 (2546)
Use Plotly
No ratings yet
Use Plotly
4 pages
Credit Modelling in R
No ratings yet
Credit Modelling in R
7 pages
FIGURE 2.10: Diamond Price by Clarity and Cut. For The Interactive, See
No ratings yet
FIGURE 2.10: Diamond Price by Clarity and Cut. For The Interactive, See
5 pages
2.2 Intro To Plotly - Js
No ratings yet
2.2 Intro To Plotly - Js
3 pages
Introduction To Credit Modelling: First Step in This Article
No ratings yet
Introduction To Credit Modelling: First Step in This Article
1 page
2.1 Intro To: Plot - Ly
No ratings yet
2.1 Intro To: Plot - Ly
1 page
Guide The Recruiter To
No ratings yet
Guide The Recruiter To
1 page
I 'Black' Colors 'Set2': Layout Plot - Ly
No ratings yet
I 'Black' Colors 'Set2': Layout Plot - Ly
2 pages
If We Assign Variable Names
No ratings yet
If We Assign Variable Names
4 pages
Vivek Dubey - Marketing & Retail Analytics
100% (2)
Vivek Dubey - Marketing & Retail Analytics
20 pages
MRA CafeChain Analysis
No ratings yet
MRA CafeChain Analysis
23 pages
Marketing and Retail Analytics - Assignment1
100% (1)
Marketing and Retail Analytics - Assignment1
24 pages
Project of TRNG & Dvlpmnt-1
No ratings yet
Project of TRNG & Dvlpmnt-1
76 pages
The Unwinding: An Inner History of the New America
From Everand
The Unwinding: An Inner History of the New America
George Packer
4/5 (45)
The Light Between Oceans: A Novel
From Everand
The Light Between Oceans: A Novel
M.L. Stedman
4.5/5 (815)
Little Women
From Everand
Little Women
Louisa May Alcott
4.5/5 (2369)
Detailed LP Cookery 6
No ratings yet
Detailed LP Cookery 6
8 pages
Language Learning Reflection Website
No ratings yet
Language Learning Reflection Website
4 pages
CRM Project Report
No ratings yet
CRM Project Report
47 pages
O Captain! My Captain!-Walt Whitman
100% (1)
O Captain! My Captain!-Walt Whitman
3 pages
Libby Chapter 13 JH Notes JH 8th
No ratings yet
Libby Chapter 13 JH Notes JH 8th
12 pages
Republic v. Court of Appeals, G.R. No. 108998, August 24, 1994
No ratings yet
Republic v. Court of Appeals, G.R. No. 108998, August 24, 1994
18 pages
12signs Astrotwins
100% (1)
12signs Astrotwins
15 pages
Understanding The Self
No ratings yet
Understanding The Self
7 pages
Mis List
No ratings yet
Mis List
47 pages
VSR 411 QB Anaesthesia
No ratings yet
VSR 411 QB Anaesthesia
7 pages
Indonesia and Malay Archipelago
No ratings yet
Indonesia and Malay Archipelago
5 pages
The $5 Trillion Cold War Hoax - Eustace Mullins
100% (4)
The $5 Trillion Cold War Hoax - Eustace Mullins
16 pages
Hatchet Draft 1
No ratings yet
Hatchet Draft 1
2 pages
Presented By-Anupriya Srivastava Mds III RD Year
No ratings yet
Presented By-Anupriya Srivastava Mds III RD Year
93 pages
Candida-Associated Denture Stomatitis
No ratings yet
Candida-Associated Denture Stomatitis
5 pages
Aloe Vera Proposal
No ratings yet
Aloe Vera Proposal
20 pages
Lesson 13. Death
No ratings yet
Lesson 13. Death
22 pages
Ultimate Beneficial Ownership Self Declaration Form 2025
No ratings yet
Ultimate Beneficial Ownership Self Declaration Form 2025
2 pages
IT-Technical Recruiter
No ratings yet
IT-Technical Recruiter
3 pages
Law 515 Jurisprudence and Legal Theory I
No ratings yet
Law 515 Jurisprudence and Legal Theory I
71 pages
Use of Motivation in The Teaching-Learning Process Intrinsic and Extrinsic Motivation
No ratings yet
Use of Motivation in The Teaching-Learning Process Intrinsic and Extrinsic Motivation
14 pages
The Prelude Emigree Effects of Nature Essay
No ratings yet
The Prelude Emigree Effects of Nature Essay
2 pages
Sokolova2019 PDF
No ratings yet
Sokolova2019 PDF
9 pages
He Pushed His Wife To Save Himself From A Sinking Cruise Ship
No ratings yet
He Pushed His Wife To Save Himself From A Sinking Cruise Ship
2 pages
Kaveh Afrasiabi - Vilification of A Scholar
No ratings yet
Kaveh Afrasiabi - Vilification of A Scholar
4 pages
General Mathematics: Quarter 1 - Module 26: Domain and Range of Logarithmic Functions
No ratings yet
General Mathematics: Quarter 1 - Module 26: Domain and Range of Logarithmic Functions
19 pages
Notes For Assignment
No ratings yet
Notes For Assignment
1 page
108 & Iob
No ratings yet
108 & Iob
75 pages
Embroidery
No ratings yet
Embroidery
3 pages
Syllabus and Previous Year Paper Analysis (FOR UPSC ALC - 2022)
No ratings yet
Syllabus and Previous Year Paper Analysis (FOR UPSC ALC - 2022)
5 pages