0% found this document useful (0 votes)

106 views

Supervised Learning in R Classification

The document discusses various machine learning techniques for classifying road signs and predicting locations using k-nearest neighbors (kNN), naive Bayes, logistic regression, and decision trees. It includes code to load datasets, explore data, build models using different algorithms, make predictions, evaluate accuracy, and visualize results. Examples analyze sign classification with kNN, location prediction using naive Bayes, donor modeling with logistic regression, and loan outcomes with decision trees.

Uploaded by

Octavio Flores

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

106 views

Supervised Learning in R Classification

Uploaded by

Octavio Flores

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 7

Recognizing a road sign with kNN

# Load the 'class' package

library(class)

# Create a vector of labels

sign_types <- signs$sign_type

# Classify the next sign observed

knn(train = signs[-1], test = next_sign, cl = sign_types)

//2

Exploring the traffic sign dataset

# Examine the structure of the signs dataset
str(signs)

# Count the number of signs of each type

table(signs$sign_type)

# Check r10's average red level by sign type

aggregate(r10 ~ sign_type, data = signs, mean)

Classifying a collection of road signs

# Use kNN to identify the test road signs
sign_types <- signs$sign_type
signs_pred <- knn(train = signs[-1], test = test_signs[-1], cl = sign_types)

# Create a confusion matrix of the predicted versus actual values

signs_actual <- test_signs$sign_type
table(signs_pred, signs_actual)

# Compute the accuracy

mean(signs_pred == signs_actual)

//4

Testing other 'k' values

# Compute the accuracy of the baseline model (default k = 1)
k_1 <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types)
mean(signs_actual == k_1)

# Modify the above to set k = 7

k_7 <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types, k = 7)
mean(signs_actual == k_7)

# Set k = 15 and compare to the above

k_15 <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types, k =
15)
mean(signs_actual == k_15)

Seeing how the neighbors voted

# Use the prob parameter to get the proportion of votes for the winning class
sign_pred <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types, k
= 7, prob = TRUE)

# Get the "prob" attribute from the predicted classes

sign_prob <- attr(sign_pred, "prob")

# Examine the first several predictions

head(sign_pred)

# Examine the proportion of votes for the winning class

head(sign_prob)

//1

Computing probabilities
# Compute P(A)
p_A <- nrow(subset(where9am, location == "office")) / nrow(where9am)

# Compute P(B)
p_B <- nrow(subset(where9am, daytype == "weekday")) / nrow(where9am)

# Compute the observed P(A and B)

p_AB <- nrow(subset(where9am, location == "office" & daytype == "weekday")) /
nrow(where9am)

# Compute P(A | B) and print its value

p_A_given_B <- p_AB / p_B
p_A_given_B

//4

A simple Naive Bayes location model

# Load the naivebayes package
library(naivebayes)

# Build the location prediction model

locmodel <- naive_bayes(location ~ daytype, data = where9am)

# Predict Thursday's 9am location

predict(locmodel, thursday9am)

# Predict Saturdays's 9am location

predict(locmodel, saturday9am)

Examining "raw" probabilities

# The 'naivebayes' package is loaded into the workspace
# and the Naive Bayes 'locmodel' has been built

# Examine the location prediction model

locmodel

# Obtain the predicted probabilities for Thursday at 9am

predict(locmodel, thursday9am, type = "prob")

# Obtain the predicted probabilities for Saturday at 9am

predict(locmodel, saturday9am, type = "prob")

//3
//2

A more sophisticated location model

# The 'naivebayes' package is loaded into the workspace already

# Build a NB model of location

locmodel <- naive_bayes(location ~ daytype + hourtype, data = locations)
# Predict Brett's location on a weekday afternoon
predict(locmodel, weekday_afternoon)

# Predict Brett's location on a weekday evening

predict(locmodel, weekday_evening)

Preparing for unforeseen circumstances

# The 'naivebayes' package is loaded into the workspace already
# The Naive Bayes location model (locmodel) has already been built

# Observe the predicted probabilities for a weekend afternoon

predict(locmodel, weekend_afternoon, type = "prob")

# Build a new model using the Laplace correction

locmodel2 <- naive_bayes(location ~ daytype + hourtype, data = locations,
laplace = 1)

# Observe the new predicted probabilities for a weekend afternoon

predict(locmodel2, weekend_afternoon, type = "prob")

//1
//4

Building simple logistic regression models

# Examine the dataset to identify potential independent variables
str(donors)

# Explore the dependent variable

table(donors$donated)

# Build the donation model

donation_model <- glm(donated ~ bad_address + interest_religion +
interest_veterans,
data = donors, family = "binomial")

# Summarize the model results

summary(donation_model)

Making a binary prediction

# Estimate the donation probability
donors$donation_prob <- predict(donation_model, type = "response")

# Find the donation probability of the average prospect

mean(donors$donated)

# Predict a donation if probability of donation is greater than average

donors$donation_pred <- ifelse(donors$donation_prob > 0.0504, 1, 0)

# Calculate the model's accuracy

mean(donors$donated == donors$donation_pred)

//4

Calculating ROC Curves and AUC

# Load the pROC package
library(pROC)

# Create a ROC curve

ROC <- roc(donors$donated, donors$donation_prob)
# Plot the ROC curve
plot(ROC, col = "blue")

# Calculate the area under the curve (AUC)

auc(ROC)

//4

Coding categorical features

# Convert the wealth rating to a factor
donors$wealth_levels <- factor(donors$wealth_rating, levels = c(0, 1, 2, 3),
labels = c("Unknown", "Low", "Medium", "High"))

# Use relevel() to change reference category

donors$wealth_levels <- relevel(donors$wealth_levels, ref = "Medium")

# See how our factor coding impacts the model

summary(glm(donated ~ wealth_levels, data = donors, family = "binomial"))

Handling missing data

# Find the average age among non-missing values
summary(donors$age)

# Impute missing age values with the mean age

donors$imputed_age <- ifelse(is.na(donors$age), round(mean(donors$age, na.rm
= TRUE), 2), donors$age)

# Create missing value indicator for age

donors$missing_age <- ifelse(is.na(donors$age), 1, 0)

//4

Building a more sophisticated model

# Build a recency, frequency, and money (RFM) model
rfm_model <- glm(donated ~ recency * frequency + money, data = donors, family
= "binomial")

# Summarize the RFM model to see how the parameters were coded
summary(rfm_model)

# Compute predicted probabilities for the RFM model

rfm_prob <- predict(rfm_model, data = donors, type = "response")

# Plot the ROC curve for the new model

library(pROC)
ROC <- roc(donors$donated, rfm_prob)
plot(ROC, col = "red")
auc(ROC)

//2

Building a stepwise regression model

# Specify a null model with no predictors
null_model <- glm(donated ~ 1, data = donors, family = "binomial")

# Specify the full model using all of the potential predictors

full_model <- glm(donated ~ ., data = donors, family = "binomial")
# Use a forward stepwise algorithm to build a parsimonious model
step_model <- step(null_model, scope = list(lower = null_model, upper =
full_model), direction = "forward")

# Estimate the stepwise donation probability

step_prob <- predict(step_model, type = "response")

# Plot the ROC of the stepwise model

library(pROC)
ROC <- roc(donors$donated, step_prob)
plot(ROC, col = "red")
auc(ROC)

Building a simple decision tree

# Load the rpart package
library(rpart)

# Build a lending model predicting loan outcome versus loan amount and credit
score
loan_model <- rpart(outcome ~ loan_amount + credit_score, data = loans,
method = "class", control = rpart.control(cp = 0))

# Make a prediction for someone with good credit

predict(loan_model, good_credit, type = "class")

# Make a prediction for someone with bad credit

predict(loan_model, bad_credit, type = "class")

Visualizing classification trees

# Examine the loan_model object
loan_model

# Load the rpart.plot package

library(rpart.plot)

# Plot the loan_model with default settings

rpart.plot(loan_model)

# Plot the loan_model with customized settings

rpart.plot(loan_model, type = 3, box.palette = c("red", "green"),
fallen.leaves = TRUE)

//4
//3

Creating random test datasets

# Determine the number of rows for training
nrow(loans) * 0.75

# Create a random sample of row IDs

sample_rows <- sample(nrow(loans), nrow(loans) * 0.75)

# Create the training dataset

loans_train <- loans[sample_rows, ]

# Create the test dataset

loans_test <- loans[-sample_rows, ]

Building and evaluating a larger tree

# Grow a tree using all of the available applicant data
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0))

# Make predictions on the test dataset

loans_test$pred <- predict(loan_model, loans_test, type = "class")

# Examine the confusion matrix

table(loans_test$pred, loans_test$outcome)

# Compute the accuracy on the test dataset

mean(loans_test$pred == loans_test$outcome)

//2

Preventing overgrown trees

# Grow a tree with maxdepth of 6
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0, maxdepth = 6))

# Make a class prediction on the test set

loans_test$pred <- predict(loan_model, loans_test, type = "class")

# Compute the accuracy of the simpler tree

mean(loans_test$pred == loans_test$outcome)
//////////////////////////
# Swap maxdepth for a minimum split of 500
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0, minsplit = 500))

# Run this. How does the accuracy change?

loans_test$pred <- predict(loan_model, loans_test, type = "class")
mean(loans_test$pred == loans_test$outcome)

Creating a nicely pruned tree

# Grow an overly complex tree
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0))

# Examine the complexity plot

plotcp(loan_model)

# Prune the tree

loan_model_pruned <- prune(loan_model, cp = 0.0014)

# Compute the accuracy of the pruned tree

loans_test$pred <- predict(loan_model_pruned, loans_test, type = "class")
mean(loans_test$pred == loans_test$outcome)

//4
//3

Building a random forest model

# Load the randomForest package
library(randomForest)

# Build a random forest model

loan_model <- randomForest(outcome ~ ., data = loans_train)
# Compute the accuracy of the random forest
loans_test$pred <- predict(loan_model, loans_test)
mean(loans_test$pred == loans_test$outcome)

UNIVERSAL BANK CASE SOLUTION
No ratings yet
UNIVERSAL BANK CASE SOLUTION
9 pages
Modern Applied Regressions
No ratings yet
Modern Applied Regressions
298 pages
Parallel Programming in R
100% (4)
Parallel Programming in R
14 pages
Forecasting Sheet Soltion
100% (1)
Forecasting Sheet Soltion
10 pages
4+1 View in UML
No ratings yet
4+1 View in UML
4 pages
Object Oriented Methods A Foundation PDF
No ratings yet
Object Oriented Methods A Foundation PDF
2 pages
R Assignment
No ratings yet
R Assignment
8 pages
Codes
No ratings yet
Codes
14 pages
A Note On R
No ratings yet
A Note On R
90 pages
Rstudio Study Notes For PA 20181126
No ratings yet
Rstudio Study Notes For PA 20181126
6 pages
Practical Machine Learning Course Notes
No ratings yet
Practical Machine Learning Course Notes
76 pages
BDA MSC It
No ratings yet
BDA MSC It
35 pages
7708 - MBA PredAnanBigDataNov21
No ratings yet
7708 - MBA PredAnanBigDataNov21
11 pages
Machine Learning A Z Q A
100% (1)
Machine Learning A Z Q A
52 pages
WEEK
No ratings yet
WEEK
17 pages
FRA Assignment - India Credit Model
No ratings yet
FRA Assignment - India Credit Model
14 pages
Handling The Dataset Using R - Word
No ratings yet
Handling The Dataset Using R - Word
54 pages
KNN - Model: Train Test CL K
No ratings yet
KNN - Model: Train Test CL K
2 pages
Lab 4
No ratings yet
Lab 4
20 pages
DM Slip Solutions
100% (1)
DM Slip Solutions
24 pages
Record
No ratings yet
Record
23 pages
R For Statistical Learning
No ratings yet
R For Statistical Learning
301 pages
Discussion 3 Supervised
No ratings yet
Discussion 3 Supervised
14 pages
saurabh
No ratings yet
saurabh
22 pages
ISYE6501-Homework-1
No ratings yet
ISYE6501-Homework-1
7 pages
CASOS
No ratings yet
CASOS
12 pages
1
No ratings yet
1
19 pages
cor
No ratings yet
cor
6 pages
Predict and Co
No ratings yet
Predict and Co
6 pages
PE IV - Practical Machine Learning
No ratings yet
PE IV - Practical Machine Learning
7 pages
Machine Learning Project: Choice of Employee Mode of Transport
No ratings yet
Machine Learning Project: Choice of Employee Mode of Transport
35 pages
Solution 1
No ratings yet
Solution 1
6 pages
ML Fundamentals
No ratings yet
ML Fundamentals
38 pages
Final Project
No ratings yet
Final Project
9 pages
Statistics Consulting Cheat Sheet: Kris Sankaran October 1, 2017
100% (1)
Statistics Consulting Cheat Sheet: Kris Sankaran October 1, 2017
44 pages
Machine learning with Titanic dataset tutorial
No ratings yet
Machine learning with Titanic dataset tutorial
7 pages
Uni T - 2 - R Programming
No ratings yet
Uni T - 2 - R Programming
10 pages
R Codes
No ratings yet
R Codes
23 pages
Sol Forests
No ratings yet
Sol Forests
5 pages
DSA lab
No ratings yet
DSA lab
29 pages
Lecture 3 - MachineLearning-CrashCourse2023
No ratings yet
Lecture 3 - MachineLearning-CrashCourse2023
99 pages
R- language
No ratings yet
R- language
23 pages
Lab 4 Classification v.0
No ratings yet
Lab 4 Classification v.0
5 pages
Bank Loan Title
No ratings yet
Bank Loan Title
7 pages
Modelling With R
No ratings yet
Modelling With R
3 pages
Final Data Lab
No ratings yet
Final Data Lab
21 pages
Logistic Regression Assignment
No ratings yet
Logistic Regression Assignment
20 pages
STAT-2450 Assignment 1: Name:, Student ID: B00
No ratings yet
STAT-2450 Assignment 1: Name:, Student ID: B00
9 pages
Ml2 Script v2
No ratings yet
Ml2 Script v2
123 pages
Article - 10 Machine Learning Algorithms in R
No ratings yet
Article - 10 Machine Learning Algorithms in R
2 pages
Practical 3 2022
No ratings yet
Practical 3 2022
8 pages
Boulder Handout 2019
No ratings yet
Boulder Handout 2019
187 pages
R Course
No ratings yet
R Course
7 pages
Digital Assignment-6: Read The Data
No ratings yet
Digital Assignment-6: Read The Data
30 pages
R Commands
No ratings yet
R Commands
18 pages
R Code - Session 11
No ratings yet
R Code - Session 11
4 pages
Note 5
No ratings yet
Note 5
24 pages
UseR2013 Abstract Booklet
No ratings yet
UseR2013 Abstract Booklet
183 pages
Notes 23 Regression R
No ratings yet
Notes 23 Regression R
5 pages
R Tutorial Slides
No ratings yet
R Tutorial Slides
13 pages
Janani Prakash Loan Prediction Study
No ratings yet
Janani Prakash Loan Prediction Study
97 pages
How to a Developers Guide to 4k: Developer edition, #3
From Everand
How to a Developers Guide to 4k: Developer edition, #3
Xinc Cyberwizard
No ratings yet
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Core Java Programming Book
From Everand
Core Java Programming Book
Manish Soni
No ratings yet
Visualizing Big Data With Trelliscope
No ratings yet
Visualizing Big Data With Trelliscope
7 pages
Writing Efficient R Code
No ratings yet
Writing Efficient R Code
5 pages
Scalable Data Processing in R
No ratings yet
Scalable Data Processing in R
8 pages
Introduction To Spark With Sparklyr in R
No ratings yet
Introduction To Spark With Sparklyr in R
11 pages
Prob Expos Moo
No ratings yet
Prob Expos Moo
17 pages
Basic Structure SQL
No ratings yet
Basic Structure SQL
41 pages
Data Modeling PDF
No ratings yet
Data Modeling PDF
78 pages
OOAD With UML and The UP - Session 4 - Elaboration
No ratings yet
OOAD With UML and The UP - Session 4 - Elaboration
82 pages
Session 1
No ratings yet
Session 1
5 pages
3063 Business Model Canvas Template
No ratings yet
3063 Business Model Canvas Template
5 pages
Advanced SQL
No ratings yet
Advanced SQL
46 pages
Use Julia
No ratings yet
Use Julia
68 pages
Database Management System PDF
No ratings yet
Database Management System PDF
51 pages
SAR/QSAR/QSPR Modeling: Quantitative Structure-Activity Relationships Quantitative Structure-Property-Relationships
No ratings yet
SAR/QSAR/QSPR Modeling: Quantitative Structure-Activity Relationships Quantitative Structure-Property-Relationships
64 pages
Syllabus - CSS 605 - Fall 2010
No ratings yet
Syllabus - CSS 605 - Fall 2010
7 pages
Demand Forecasting
No ratings yet
Demand Forecasting
18 pages
Dbms Lab Manual
No ratings yet
Dbms Lab Manual
166 pages
Factor Analysis
No ratings yet
Factor Analysis
6 pages
Internship Report: The Semantic Validation Platform
No ratings yet
Internship Report: The Semantic Validation Platform
15 pages
SQL Notes Full PDF
No ratings yet
SQL Notes Full PDF
72 pages
Theory and Concept1prac
No ratings yet
Theory and Concept1prac
9 pages
Course Grid
No ratings yet
Course Grid
2 pages
Dbms MCQ: Show Answer Workspace
No ratings yet
Dbms MCQ: Show Answer Workspace
15 pages
Database Systems Assignment 1
No ratings yet
Database Systems Assignment 1
5 pages
MC Net Adapter
No ratings yet
MC Net Adapter
31 pages
Dbms Lab Report: K.Priyanka 191fa04570
No ratings yet
Dbms Lab Report: K.Priyanka 191fa04570
4 pages
Autocad Raster Design 2011
No ratings yet
Autocad Raster Design 2011
7 pages
OOAD-Ali Bahrami
78% (9)
OOAD-Ali Bahrami
122 pages
MCQs Guide
No ratings yet
MCQs Guide
24 pages
Image Processing Math Prob1
No ratings yet
Image Processing Math Prob1
13 pages
Chi-Square Test Presentation
No ratings yet
Chi-Square Test Presentation
28 pages