CarSeats: Classification Tree
# Attach the library
# Loading the library
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.3.3
library(tree)
## Warning: package 'tree' was built under R version 3.3.3
# Attach the data set
attach(Carseats)
# Creating the dummy variable for Sales
High=ifelse(Sales<=8,"No","Yes")
Carseats=data.frame(Carseats,High)
# Creating the Training and Test Data set
set.seed(2)
train=sample(1:400, 200)
Carseats.test=Carseats[-train,]
High.test=High[-train]
# Fitting the Classification Tree
tree.carseats=tree(High~.-Sales,Carseats,subset=train)
# Plotting the tree
plot(tree.carseats)
text(tree.carseats,pretty=0)
# Prediction for Test Data
tree.pred=predict(tree.carseats,Carseats.test,type="class")
# Confusion Matrix
table(tree.pred,High.test)
## High.test
## tree.pred No Yes
## No 86 27
## Yes 30 57
# Deciding the Optimal Size using Cross Validation
set.seed(3)
cv.carseats=cv.tree(tree.carseats,FUN=prune.misclass)
names(cv.carseats)
## [1] "size" "dev" "k" "method"
cv.carseats
## $size
## [1] 19 17 14 13 9 7 3 2 1
##
## $dev
## [1] 55 55 53 52 50 56 69 65 80
##
## $k
## [1] -Inf 0.0000000 0.6666667 1.0000000 1.7500000 2.0000000
## [7] 4.2500000 5.0000000 23.0000000
##
## $method
## [1] "misclass"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
# Plotting Cross-Validation Error against Size
plot(cv.carseats$size,cv.carseats$dev,type="b")
# Pruning the Tree
prune.carseats=prune.misclass(tree.carseats,best=9)
plot(prune.carseats)
text(prune.carseats,pretty=0)
# Predition for the Test Data Set using Pruned Tree
tree.pred=predict(prune.carseats,Carseats.test,type="class")
#Confusion Matrix
table(tree.pred,High.test)
## High.test
## tree.pred No Yes
## No 94 24
## Yes 22 60
# Predition for the Test Data Set using Pruned Tree
tree.prob=predict(prune.carseats,Carseats.test,type="vector")[,2]
# ROC Plot
library(pROC)
## Warning: package 'pROC' was built under R version 3.3.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
roc(High.test,tree.prob)
##
## Call:
## roc.default(response = High.test, predictor = tree.prob)
##
## Data: tree.prob in 116 controls (High.test No) < 84 cases (High.test Yes).
## Area under the curve: 0.7788
plot(roc(High.test,tree.prob),col="blue",legacy.axes = TRUE)