0% found this document useful (0 votes)
39 views4 pages

R Code - Session 11

The document shows how to build decision tree models in R using various packages and techniques. It loads several packages, separates data into training and testing sets, builds decision tree models on the training set, prunes the trees, evaluates the models on the testing set, and plots the trees.

Uploaded by

raj
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
39 views4 pages

R Code - Session 11

The document shows how to build decision tree models in R using various packages and techniques. It loads several packages, separates data into training and testing sets, builds decision tree models on the training set, prunes the trees, evaluates the models on the testing set, and plots the trees.

Uploaded by

raj
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

library(rpart)# tree models

library(caret) # feature selection


library(rpart.plot) # plot dtree
library(ROCR) # model evaluation
library(e1071) # tuning model
library(RColorBrewer)
library(rattle)# optional, if you can't install it, it's okay
library(tree)
library(ISLR)

############################## 1 way of doing


########################################

setwd("C:\\Users\\Subhojit\\Desktop\\R class July 2017\\Class 9 Decision Tree")


Carseats <- read.csv("data.csv")

## separating the independent and dependent variables


ind_var <- training[,-11]
dep_var <- training[,11]

set.seed(2)
train <- sample(1:nrow(Carseats), nrow(Carseats)/2)
test <- -train
training <- Carseats[train,]
testing <- Carseats[test,]
test_high <- testing$high

# Decision Tree Model


mtree <- tree(high ~ ., training)

#Plot tree
plot(mtree)
text(mtree, preety=0)

predictions <- predict(mtree, testing, type="class")


mean(predictions != test_high) # error rate

#testing$predictions <- predictions


#setwd("C:\\Users\\Subhojit\\Desktop\\R class July 2017\\Class 9 Decision Tree")
#write.csv(testing,"bhul.csv")

predictions <- predict(mtree, ind_var, type="class")


confusionMatrix(data=predictions, reference=dep_var, positive="Yes")

############################
########Pruning#############
############################

## cross validation to check where to stop puring

set.seed(3)
cv_tree <- cv.tree(mtree,FUN=prune.misclass)
names(cv_tree)

plot(cv_tree$size, cv_tree$dev,type="b")
table(cv_tree$size, cv_tree$dev)

## different way of prune


pruned <- prune.misclass(mtree, best=9)
plot(pruned)
text(pruned, pretty=0)

predictions <- predict(pruned, testing, type="class")


mean(predictions != test_high)

predictions <- predict(pruned, ind_var, type="class")


confusionMatrix(data=predictions, reference=dep_var, positive="Yes")

############################################################ 2nd way of doing


########

############################## 2nd way of doing


########################################

setwd("C:\\Users\\Subhojit\\Desktop\\R class July 2017\\Class 9 Decision Tree")


Carseats <- read.csv("data.csv")

## separating the independent and dependent variables


ind_var <- training[,-11]
dep_var <- training[,11]

set.seed(2)
train <- sample(1:nrow(Carseats), nrow(Carseats)/2)
test <- -train
training <- Carseats[train,]
testing <- Carseats[test,]
test_high <- testing$high

library(rpart)
mtree <- rpart(high~., data = training, method="class",
control = rpart.control(minsplit = 20, minbucket = 7,
maxdepth = 10, usesurrogate = 2, xval =10 ))

#Beautify tree
#view1
prp(mtree, faclen = 0, cex = 0.8, extra = 1)

#view2 - total count at each node


tot_count <- function(x, labs, digits, varlen)
{paste(labs, "\n\nn =", x$frame$n)}

prp(mtree, faclen = 0, cex = 0.8, node.fun=tot_count)

#view3- fancy Plot


rattle()
fancyRpartPlot(mtree)

predictions <- predict(mtree, testing, type="class")


mean(predictions != test_high)

#testing$predictions <- predictions


#setwd("C:\\Users\\Subhojit\\Desktop\\R class July 2017\\Class 9 Decision Tree")
#write.csv(testing,"bhul.csv")

predictions <- predict(mtree, ind_var, type="class")


confusionMatrix(data=predictions, reference=dep_var, positive="Yes")

############################
########Pruning#############
############################

printcp(mtree)## check where xerror is lowest


bestcp <- mtree$cptable[which.min(mtree$cptable[,"xerror"]),"CP"]

# Prune the tree using the best cp.


pruned <- prune(mtree, cp = bestcp)

# Plot pruned tree


prp(pruned, faclen = 0, cex = 0.8, extra = 1)

predictions <- predict(pruned, testing, type="class")


mean(predictions != test_high)

predictions <- predict(pruned, ind_var, type="class")


confusionMatrix(data=predictions, reference=dep_var, positive="Yes")

# Advanced Plot
prp(pruned, main="Beautiful Tree",
extra=106,
nn=TRUE,
fallen.leaves=TRUE,
branch=.5,
faclen=0,
trace=1,
shadow.col="gray",
branch.lty=3,
split.cex=1.2,
split.prefix="is ",
split.suffix="?",
split.box.col="lightgray",
split.border.col="darkgray",
split.round=.5)

You might also like