0% found this document useful (0 votes)

383 views33 pages

Introduction To Statistical Learning R Labs and Exercises Code

This document provides an overview of simple linear regression, multiple linear regression, and other regression techniques using the Boston housing data set. It demonstrates how to fit linear regression models in R, interpret outputs, analyze residuals and leverage, compare models, and handle different predictor variable types like qualitative variables. Exercises at the end apply these techniques to other data sets and explore additional methods like transformations, interactions and adding polynomial terms.

Uploaded by

Matei Stănescu

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

383 views33 pages

Introduction To Statistical Learning R Labs and Exercises Code

Uploaded by

Matei Stănescu

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 33

####### 2.

0########
#3.6.2 Simple Linear Regression
fix(Boston)
names(Boston)
attach(Boston)
#medv response, lstat predictor
lm.fit=lm(medv~lstat)
#basic information
lm.fit
#detailed information
summary(lm.fit)
#pieces of information stored in lm.fit
names(lm.fit)
terms(lm.fit)
residuals(lm.fit)
plot(residuals(lm.fit))
#confidence intervals
confint(lm.fit)
#predict function used to compute confidence/prediction intervals for a given va
lue of the predictor
predict(lm.fit,data.frame(lstat=c(5,10,15)),interval="confidence")
predict(lm.fit,data.frame(lstat=c(5,10,15)),interval="prediction")# both centere
d on the same value but the latter is wider
#plot the response (medv), predictor(lstat), and regression line
plot(lstat,medv)
abline(lm.fit)
#diagnostic plots
par(mfrow=c(2,2))
plot(lm.fit)
#residuals() returns the residuals while rstudent() returns the studentized resi
duals, which we plot against the fitted values[predict(lm.fit)]
plot(predict(lm.fit),residuals(lm.fit))
plot(predict(lm.fit),rstudent(lm.fit))
#leverage statistics
plot(hatvalues(lm.fit))
which.max(hatvalues(lm.fit))
#3.6.3 Multiple Linear Regression
lm.fit=lm(medv~lstat+age,data=Boston)
summary(lm.fit)
lm.fit=lm(medv~.,data=Boston)
summary(lm.fit)
library(car)
#variance inflation factor
vif(lm.fit)
#all predictors except age
lm.fit=lm(medv~.-age,data=Boston)
summary(lm.fit)
#3.6.4 Interaction Terms
#lstat*age=lstat+age+lstat:age <- interaction term
summary(lm(medv~lstat*age,data=Boston))
#3.6.5 Non-Linear Transformation of the Predictors
lm.fit2=lm(medv~lstat+I(lstat^2),data=Boston)
summary(lm.fit2)
#comparing models using anova

anova(lm.fit,lm.fit2)
lm.fit5=lm(medv~poly(lstat,5),data=Boston)
#log transformation of the predictors [ useful for heteroscedascity and other pr
oblems]
summary(lm(medv~log(rm),data=Boston))
#3.6.6 Qualitative Predictors
fix(Carseats)
names(Carseats)
#lm with interaction terms
lm.fit=lm(Sales~.+Income:Advertising+Price:Age,data=Carseats) #all plus interact
ion terms added
summary(lm.fit)
#contrasts() returns the coding done for dummy vars.
attach(Carseats)
contrasts(ShelveLoc)

################ EXERCISES ##############

attach(Auto)
summary(Auto)
##8
#a
lm.fit=lm(mpg~horsepower,data=Auto)
summary(lm.fit)
#i: Since F statistic is far larger than 1 and the p -value of the F stat. is ve
ry small,we cana reject the null hipot.
#ii: We calculade residual error relative to the response using MEAN and RSE; me
an(mpg)=24.449 and RSE of lm.fit was
# 4.906/24.449*100=20.06%. Rsq = .6 meaning 60% of var is explained by the mod
el
#iii: Rel betweeen mpg and horsepower is negative.
#iv.
predict (lm.fit ,data.frame(horsepower=c(98)),interval ="prediction")
predict (lm.fit ,data.frame(horsepower=c(98)),interval ="confidence")
#b
plot(horsepower,mpg)
abline(lm.fit)
#c
par(mfrow=c(2,2))
plot(lm.fit)
##9
#a
pairs(Auto)
#b
cor(subset(Auto,select=-name))

#c
lm.fit=lm(mpg~.-name,data=Auto)
summary(lm.fit)
#i. yes, F stat is far from one and very small p value
#iii. mpg incereases by the value of the coefficient
#d
plot(lm.fit)
par(mfrow=c(1,1))
plot(predict(lm.fit),rstudent(lm.fit))
#e
lm.fit1=lm(mpg~cylinders*displacement+displacement:weight)
summary(lm.fit1)
#f
lm.fit=lm(mpg~sqrt(weight)+log(displacement)+I(cylinders^2),data=Auto)
summary(lm.fit)
##10
#a
attach(Carseats)
names(Carseats)
summary(Carseats)
fix(Carseats)
lm.fit=lm(Sales~Price+Urban+US,data=Carseats)
summary(lm.fit)
#b
# as price increases , sales decrease . significant , small p value
# the model suggests that there is no relationship between the location of the s
tore and sales
# the fact that the store is located in the US is significant. sales increase by
1200
#c
#sales=13.04-0.05*price-0.02*urbanYES+1.2*USyes
#d
#for price and usyes
#e
lm.fit2=lm(Sales~Price+US,data=Carseats)
summary(lm.fit)
#f
#similar values for R Squared and RSE
#g
confint(lm.fit2)
#h
plot(predict(lm.fit2),rstudent(lm.fit2)) # all betweeen -3 and 3 so no outliers
plot(lm.fit2) #points that exceed (p+1) / n have high leverage
dim(Carseats)
(2+1)/400
##11
#a
set.seed(1)
x=rnorm(100)
y=2*x+rnorm(100)
lm.fit=lm(y~x+0)
summary(lm.fit)
#b

lm.fit1=lm(x~y+0)
summary(lm.fit1)
#c
plot(x,y)
abline(lm.fit)
abline(lm.fit1)
#d
sqrt(length(x)-1)*sum(x*y)/sqrt(sum(x^2)*sum(y^2) - sum(x*y)^2
# t stat shown above
#f
lm.fit1=lm(x~y)
lm.fit2=lm(y~x)
summary(lm.fit1)
summary(lm.fit2)

##15
#a
library(MASS)
attach(Boston)
names(Boston)
lm.fit1=lm(crim~zn,data=Boston)
summary(lm.fit1)
#...
#b
lm.fit.all=lm(crim~.,data=Boston)
summary(lm.fit.all) # zn, age, dis, black, medv
################################################################################
#######################################################
#ala cu boxplots rezumat , apoi lab si exercitii
#Logisitc regression, LDA, QDA, KNN
################################################################################
######################################################
# PAGE 151. !!
#Both LOG REG and QDA both produce linear boundaries. The only difference lies i
n the fact that B0 and B1 are estimated using maximul
#likelyhood, whereasc c0 and c1 are computed using the estimated mean and varia
nce from a normal distribution.
# LDA assumes that the obs. are drawn from a gaussian distribution with a common
covariance matrix in each class and provides improvemen
# ts over log .reg. when this assumption holds. When the gaussian assumptions ar
e not met, log .reg. outperforms LDA.
# On the other hand KNN is expected to dominate log. reg. and LDA when the boun
dary is highly non linear because it is non-parametric
# and makes no assumption about the shape. The KNN doesnt tell which predictors
are important and doesent give a coefficients table.
# QDA serves as an intermediary between LDA+Log. Reg. and KNN because it assum
es a quadratic decision boundary , and therefore is
# more flexible.
# ##############################################################################
#######################################################
#
# #Scenario 1(linear): 20 obs in each class, uncorelated random normal obs., d
ifferent mean in each class.
#
LDA first because it assumes exactly this boundary, Log Reg after bc. i

t is similar. KNN performed poorly bc its

#
increace in variance was not offset by a reduction in bias. QDA perfor
med worse than LDA and LOG but better than KNN bc
#
it assumes a quadratic boundary.
#
# #Scenario 2(linear): same scenario as the first one but with cor. of -0.5 b
etween variables. Same results
#
# #Scenario 3(linear): samples x1 and x2 drawn from the t-distribution with 5
0 obs per class. due to the fact that t distr yields
#
more extreme values, even though the decision boundary
is still linear, it violates the assumptions of
#
the LDA(which assumes obs. are drawn from normal distr
ib.). Log reg best. QDA worst.
#
# #Scenario 4(non-linear): normal distribution, cor 0.5 first group, cor -0.5
second group. this corresponds to QDA which outperforms all
#
# #Scenario 5(non-linear): two classes, uncorrelated predictors, normal distri
butions. the responses were sampled from the log reg
#
using x1^2, x2^2, x1 x x2 as predictors.This corresp
onds to quadratic, which performed best followed
#
by KNN-CV, linear methods having the worst performan
ce.
# #Scenario 6(non-linear): same as previous but sampled from a more complicate
d non linear function.KNN best , followed by QDA and
#
linear methods.
## Using transformations of the predictors, we can create more flexible versions
of these methods : A more flexible version of the
# log regrssion(use x^2,x^3,x^3 as predictors);
################################################################################
######################################################
#4.6 Lab: Logistic Regression, LDA, QDA, and KNN

library(ISLR)
names(Smarket)
dim(Smarket)
summary(Smarket)
pairs(Smarket)
cor(Smarket)
cor(Smarket[,-9])
attach(Smarket)
plot(Volume)
#4.6.2 Logistic Regression
glm.fit=glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,data=Smarket,family = bino
mial)
summary(glm.fit)
coef(glm.fit)
summary(glm.fit)$coef

#no data because it takes train set

glm.probs=predict(glm.fit,type="response")
glm.probs[1:10]
#see how the qualitative var is split
contrasts(Direction)
#convert probabilities into class labels up/ down for 0.5
glm.pred=rep("No",dim(Smarket)[1])
glm.pred[glm.probs>.5]="Up"
table(glm.pred,Direction)
(145+507)/(145+507+457+141)
mean(glm.pred==Direction)
#create a train set and a test set
train=(Year<2005)
Smarket.2005=Smarket[!train,]
dim(Smarket.2005)
Direction.2005=Direction[!train]
glm.fit=glm(Direction~Lag1+Lag2+Lag3+Lag5+Volume,data=Smarket,family = binomial,
subset=train)
glm.probs=predict(glm.fit,Smarket.2005,type="response")#Smarket.2005 test set
dim(Smarket[!train,])
glm.pred=rep("Down",252)
glm.probs[glm.pred>.5]="Up"
table(glm.pred,Direction.2005)
mean(glm.pred==Direction.2005)
mean(glm.pred!=Direction.2005)
#we remone vars that are not important bc their increase in variance is not met
by a reduction in bias
glm.fit=glm(Direction~Lag1+Lag2,data=Smarket,subset = train,family=binomial)
glm.probs=predict(glm.fit,Smarket.2005,type="response")
glm.pred=rep("Down",252)
glm.pred[glm.probs>.5]="Up"
table(glm.pred,Direction.2005)
mean(glm.pred==Direction.2005)
#predict value of Direction for values of the two vars
predict(glm.fit,newdata=data.frame(Lag1=c(1.2,1.5),Lag2=c(1.1,-0.8)),type="respo
nse")
#4.6.3 LDA
library(MASS)
lda.fit=lda(Direction~Lag1+Lag2,data=Smarket,subset=train)
lda.fit
#group means are averages of each predictor in each class
# when the market increases, there is a tendency of the market to go down and a
tendency to go up when
# the market goes down
#the coeffs are used to create a linear combination used to form LDA
#predict() returns three elements:
# class= prediction of LDA about the movement of the market
# posterior = k th column contains the posterior probability that the obs
coresponds to the k th class
# x = linear discriminants
lda.pred=predict(lda.fit,Smarket.2005)
names(lda.pred)
lda.pred$class
lda.pred$posterior
lda.pred$x

lda.class=lda.pred$class
table(lda.class,Direction.2005)
mean(lda.class==Direction.2005)
#apply 50% threshold to the posterior probabilities and recreate the predictions
in lda.pred$class
sum(lda.pred$posterior[,1]>=.5)
sum(lda.pred$posterior[,1]<.5)
lda.pred$posterior[1:20,1]
lda.class[1:20]
#different probability threshold
sum(lda.pred$posterior[,1]>.9)
#4.6.4 QDA
library(MASS)
qda.fit=qda(Direction~Lag1+Lag2,data=Smarket,subset=train)
qda.fit# contains the group means but not the coeffs bc QDA involves a quadratic
functions
qda.class=predict(qda.fit,Smarket.2005)$class #same as
table(qda.class,Direction.2005)
(30+121)/(30+121+81+20)
mean(qda.class==Direction.2005)
#4.6.5 KNN
# matrix containing the predictors associated with the training data, labeled tr
ain.x
# matrix containing the predictors associated with the test data, labeled test.x
# a vector containing the class labels for the training observations, labeled tr
ain.Direction
# a value for K, number of nearest neighbors
library(class)
#4.6.6 Application to caravan insurance data
attach(Caravan)
standardized.x=scale(Caravan[,-86])
test=1:1000
train.x=standardized.x[-test,]
test.x=standardized.x[test,]
train.y=Purchase[-test]
test.y=Purchase[test]
set.seed(1)
knn.pred=knn(train.x,test.x,train.y,k=1)
mean(test.y!=knn.pred)
mean(test.y!="No")
table(knn.pred,test.y)
#log reg with .25 threshold
glm.fit=glm(Purchase~.,data=Caravan,family=binomial,subset=-test)
glm.probs=predict(glm.fit,Caravan[test,],type="response")
glm.pred=rep("No",1000)
glm.pred[glm.probs>.25]="Yes"
table(glm.pred,test.y)
#4.7 Exercises

#9
library(ISLR)
attach(Weekly)
#a
pairs(Weekly) #year and volume
cor(Weekly)
summary(Weekly)
cor(Weekly[,-9])
#b
glm.fit=glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,data=Weekly,family=binomia
l)
summary(glm.fit) #lag2
#c
glm.probs=predict(glm.fit,type="response")
glm.pred=rep("Down",length(glm.probs))
glm.pred[glm.probs>.5]="Up"
table(glm.pred,Direction)
(41+557)/(41+557+430+37)
mean(glm.pred==Direction)
557/(48+557) #up
54/(54+430) #down
#d
train=(Year<2009)
Weekly.test=Weekly[!train,]
glm.fit=glm(Direction~Lag2,data=Weekly,subset=train,family=binomial)
glm.probs=predict(glm.fit,Weekly.test,type="response")
glm.pred=rep("Down",length(glm.probs))
glm.pred[glm.probs>.5]="Up"
Direction.test=Direction[!train]
table(glm.pred,Direction.test)
train = (Year < 2009)
Weekly.0910 = Weekly[!train, ]
glm.fit = glm(Direction ~ Lag2, data = Weekly, family = binomial, subset = train
)
glm.probs = predict(glm.fit, Weekly.0910, type = "response")
glm.pred = rep("Down", length(glm.probs))
glm.pred[glm.probs > 0.5] = "Up"
Direction.0910 = Direction[!train]
table(glm.pred, Direction.0910)
(9+56)/(9+56+34+5)
mean(glm.pred==Direction.0910)
#e
library(MASS)
lda.fit=lda(Direction~Lag2,data=Weekly,subset=train)
lda.pred=predict(lda.fit,Weekly.0910)
lda.class=lda.pred$class
table(lda.class,Direction.0910)
(9+56)/(34+56+14)
mean(lda.class==Direction.0910)
#f
qda.fit=qda(Direction~Lag2,data=Weekly,subset = train)
qda.class=predict(qda.fit,Weekly.0910)$class
table(qda.class,Direction.0910)
mean(qda.class==Direction.0910)
#g
library(class)

train.x=as.matrix(Lag2[train])
test.x=as.matrix(Lag2[!train])
train.direction=Direction[train]
set.seed(1)
knn.pred=knn(train.x,test.x,train.direction,k=1)
table(knn.pred,Direction.0910)
(21+31)/(21+31+22+30)
mean(knn.pred==Direction.0910)
#h
#lda
#i

library(class)
train.x=as.matrix(Lag2[train])
test.x=as.matrix(Lag2[!train])
train.direction=Direction[train]
set.seed(1)
knn.pred=knn(train.x,test.x,train.direction,k=15)
mean(knn.pred==Direction.0910)
#11
#a
attach(Auto)
dim=dim(Auto)[1]
mpg01=rep(0,dim)
mpg01
mpg01[mpg>median(mpg)]=1
mpg01
Auto=data.frame(Auto,mpg01)
fix(Auto)
#b
summary(Auto)
cor(Auto[,-12])
pairs(Auto)
#c
train=(year%%2==0)
test=!train
Auto.train=Auto[train,]
Auto.test=Auto[test,]
mpg01.test=mpg01[test]
#cylinders + weight + displacement + horsepower
#d
library(MASS)
lda.fit=lda(mpg01~cylinders+weight+displacement+horsepower,data=Auto,subset=trai
n)
lda.pred=predict(lda.fit,Auto.test)
mean(lda.pred!=mpg01.test) #12% test error rate
#e
qda.fit=qda(mpg01~cylinders+weight+displacement+horsepower,data=Auto,subset=trai
n)
qda.class=predict(qda.fit,Auto.test)$class

mean(qda.class!=mpg01.test)#13% test error rate

#f
glm.fit=glm(mpg01~cylinders+weight+displacement+horsepower,data=Auto,subset=trai
n)
glm.probs=predict(glm.fit,Auto.test)
glm.pred=rep(0,length(glm.probs))
glm.pred[glm.probs>.5]=1
mean(glm.pred!=mpg01.test)
#g
library(class)
train.x=cbind(cylinders,weight,displacement,horsepower)[train,]
test.x=cbind(cylinders,weight,displacement,horsepower)[test,]
train.mpg=mpg01[train]
knn.pred=knn(train.x,test.x,train.mpg,k=1)
mean(knn.pred!=mpg01.test)#15
knn.pred=knn(train.x,test.x,train.mpg,k=10)
mean(knn.pred!=mpg01.test)
knn.pred=knn(train.x,test.x,train.mpg,k=20)
mean(knn.pred!=mpg01.test)
knn.pred=knn(train.x,test.x,train.mpg,k=100)
mean(knn.pred!=mpg01.test)#14 best
#12
#a
power=function()
{2^3}
print(power())
#b
power2=function(x,a)
{x^a}
power2(2,3)
power2(3,8)
#c
#d
power3=function(x,a)
{
result=x^a
return(result)
}
power3(2,4)
#e
x=1:10
plot(x,power3(x,2))
#f
PlotPower = function(x, a) {
plot(x, Power3(x, a))
}
PlotPower(1:10, 3)

#13
library(MASS)
attach(Boston)
summary(Boston)
#??
################################################################################
#################################
#5.3.1 Validation Set Aproach
library(ISLR)
set.seed(1)
train=sample(392,196)
train
attach(Auto)
lm.fit=lm(mpg~horsepower,data=Auto,subset=train)
mean((mpg-predict(lm.fit,Auto))[-train]^2)
lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)
mean((mpg-predict(lm.fit2,Auto))[-train]^2)
lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)
mean((mpg-predict(lm.fit3,Auto))[-train]^2)
set.seed(2)
train=sample(392,196)
lm.fit=lm(mpg~horsepower,data=Auto,subset=train)
mean((mpg-predict(lm.fit,Auto))[-train]^2)
lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)
mean((mpg-predict(lm.fit2,Auto))[-train]^2)
lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)
mean((mpg-predict(lm.fit3,Auto))[-train]^2)
#5.3.2 LOOCV
#we use glm() not lm() bc. we can use cv.glm() for crossvalidation
glm.fit=glm(mpg~horsepower,data=Auto)
coef(glm.fit)
lm.fit=lm(mpg~horsepower,data=Auto)
coef(lm.fit)
#they are the same
library(boot)
glm.fit=glm(mpg~horsepower,data=Auto)
cv.err=cv.glm(Auto,glm.fit)
cv.err$delta
#delta cointains cv results
#for loop for polynomial fits
cv.error=rep(0,5)
for(i in 1:5)
{glm.fit=glm(mpg~poly(horsepower,i),data=Auto)
cv.error[i]=cv.glm(Auto,glm.fit)$delta[1]
}
cv.error

#5.3.3 K-fold Cross Validations

set.seed(17)
cv.error.10=rep(0,10)
for(i in 1:10)
{
glm.fit=glm(mpg~poly(horsepower,i),data=Auto)
cv.error.10[i]=cv.glm(Auto,glm.fit,K=10)$delta[1]
}
cv.error.10
#5.3.4 The Bootstrap
alpha.fn=function(data,index)
{
X=data$X[index]
Y=data$Y[index]
return( ( var(Y)-cov(X,Y) ) /

var(X)+var(Y)-2*cov(X,Y)

}
alpha.fn(Portfolio,1:100)
#this is automated using the boot() function
boot(Portfolio,alpha.fn,R=1000)
#create a function that takes a set and indices and returns slope and intercept
boot.fn=function(data,index)
{
return(coef(lm(mpg~horsepower,data=data,subset=index)))
}
boot.fn(Auto,1:392)
boot.fn(Auto,sample(392,392,replace=T))
boot(Auto,boot.fn,1000)
summary(lm(mpg~horsepower,data=data,subset=index))$coef
boot.fn=function (data ,index )
{ coefficients(lm(mpg~horsepower +I( horsepower ^2) ,data=data , subset =index
)) }
set.seed (1)
boot(Auto ,boot.fn ,1000)
##5.4 execises
#5
#a
library(ISLR)
attach(Default)
glm.fit=glm(default~income+balance,data=Default,family = binomial)
#b
FiveB= function()
{
train=sample(dim(Default)[1],dim(Default)[1]/2)
glm.fit=glm(default~income+balance,data=Default,family = binomial,subset=train
)
glm.pred=rep("No",dim(Default)[1]/2)
glm.probs=predict(glm.fit,Default[-train,],type="response")
glm.pred[glm.probs>0.5]="Yes"

return( mean ( glm.pred!=Default[-train,]$default

) )

}
FiveB()
#c
#d
FiveB= function()
{
train=sample(dim(Default)[1],dim(Default)[1]/2)
glm.fit=glm(default~income+balance+student,data=Default,family = binomial,subs
et=train)
glm.pred=rep("No",dim(Default)[1]/2)
glm.probs=predict(glm.fit,Default[-train,],type="response")
glm.pred[glm.probs>0.5]="Yes"
return( mean ( glm.pred!=Default[-train,]$default
) )
}
FiveB()
#6
#a
library(ISLR)
attach(Default)
set.seed(1)
glm.def=glm(default~income+balance,data=Default,family = binomial)
summary(glm.def)
#b
boot.fn = function(data, index) return(coef(glm(default ~ income + balance, data
= data, family = binomial, subset = index)))

#c
library(boot)
boot(Default,boot.fn,50)
#d
#same
#8
#a
set.seed (1)
y=rnorm (100)
x=rnorm (100)
y=x-2* x^2+ rnorm (100)
#b
plot(x,y)
#c
Data=data.frame(x,y)
glm.fit=glm(y~x)

cv.glm(Data,glm.fit)$delta
glm.fit=glm(y~poly(x,2))
#d
set.seed(2)
Data=data.frame(x,y)
glm.fit=glm(y~x)
cv.glm(Data,glm.fit)$delta
glm.fit=glm(y~poly(x,2))
#9
#a
attach(Boston)
################################################################################
###################################
#6.5 Lab 1: Subset Selection Methods
library(ISLR)
attach(Hitters)
sum(is.na(Hitters))
Hitters=na.omit(Hitters)
sum(is.na(Hitters))
#regsubsets() used for best subset selection using RSS [same syntax as lm]
library(leaps)
regfit.full=regsubsets(Salary~.,data=Hitters)
summary(regfit.full)
#the function reports the best model up to eight variables but var no can be cha
nged using nvmax=...
regfit.full=regsubsets(Salary~.,data=Hitters,nvmax = 19)
reg.summary=summary(regfit.full)
reg.summary
#we can use R SQ, RSS, Adj R sq, Cp and BIC to asses the model
names(reg.summary)
reg.summary$rsq
#plot R SQ., Adj. R, Cp and BIC at once to asses the model
par(mfrow=c(2,2))
plot(reg.summary$rss,xlab="no of var",ylab="RSS",type="l")
plot(reg.summary$adjr2,xlab="no of var",ylab="adj r sq",type="l")
which.max (reg.summary$adjr2)
points(11,reg.summary$adjr2[11],col="red",cex=2,pch=20)#plot on the plot the bes
t point
plot(reg.summary$cp,xlab="no of var",ylab="CP",type="l")
which.min(reg.summary$cp)
points(10,reg.summary$cp[10],col="blue",cex=3,pch=21)
plot(reg.summary$bic,xlab="no of var",ylab="BIC",type="l")
which.min(reg.summary$bic)
points(6,reg.summary$bic[6],col="blue",cex=3,pch=21)
plot(regfit.full ,scale ="r2")

plot(regfit.full ,scale =" adjr2 ")

plot(regfit.full ,scale ="Cp")
plot(regfit.full ,scale ="bic ")
coef(regfit.full,6)
coef(regfit.full,8)
#6.5.2 Forward and Backward Stepwise Selection
#we see that the models up to six variables are identical for best subset,forwa
rd, and backward selection
#forward selection
regfit.fwd=regsubsets(Salary~.,data=Hitters,nvmax=19,method="forward")
summary(regfit.fwd)
#backward selection
regfit.bwd=regsubsets(Salary~.,data=Hitters,nvmax = 19,method="backward")
summary(regfit.bwd)
coef(regfit.full,7)
coef(regfit.fwd,7)
coef(regfit.bwd,7)
#6.5.3 Choosing Among Models Using the Validation Set Approach and Cross-Valida
tion
set.seed(1)
train=sample(c(TRUE,FALSE),nrow(Hitters),rep=T)
train
test=!train
#now apply regsubsets() to perform best subset selection
regfit.best=regsubsets(Salary~.,data=Hitters[train,],nvmax=19)
#we now compute MSE for the best model of each size
# to do this we run a loop, and for each size i we extract the coeffs from regfi
t.best for the model of that size,
# multiply them into the appropriate columns of the test model matrix[ model.mat
rix() ] , THEN compute MSE
test.mat=model.matrix(Salary~.,data=Hitters[test,])
val.errors=rep(NA,19)
for (i in 1:19)
{
coefi=coef(regfit.best,id=i)
pred=test.mat[,names(coefi)]%*%coefi
val.errors[i]=mean( (Hitters$Salary[test]-pred)^2
)
}
val.errors
which.min(val.errors)
coef(regfit.full,10)
####################
coefi=coef(regfit.best,id=2)
coefi
names(coefi)
pred=test.mat[,names(coefi)]%*%coefi
a<-test.mat[,names(coefi)]
fix(a)
fix(pred)
#####################

#finnaly we perform best subset on the full data set, and select the best 10 va
riable model!
reg.full=regsubsets(Salary~.,data=Hitters,nvmax=19)
coef(reg.full,10)
##CV
#we create a vector that allocates each observation to one of k=10 folds and cre
ate a matrix that stores the results
k=10
set.seed(1)
folds=sample(1:k,nrow(Hitters),replace=T)
folds
cv.errors=matrix(NA,k,19,dimnames = list(NULL,paste(1:19)))
cv.errors
#
cv.errors=matrix(NA,k,19)
cv.errors
#
#we write a loop that performs cross validation.
for(j in 1:k)
{
best.fit=regsubsets(Salary~.,data=Hitters[folds!=j,],nvmax=19)
for(i in 1:19)
{
pred=predict(best.fit,Hitters[folds==j,],id=i)
cv.errors[j,i]=mean((Hitters$Salary[folds==j]-pred )^2)
}
}
#
for(j in 1:k){
best.fit =regsubsets (Salary~.,data=Hitters [folds !=j,],
nvmax =19)
for(i in 1:19) {
pred=predict (best.fit ,Hitters [folds ==j,], id=i)
cv.errors [j,i]=mean( (Hitters$Salary[folds ==j]-pred)^2)
}
}
#
cv.errors
best.fit =regsubsets (Salary~.,data=Hitters [folds !=j,],nvmax =19)
coef(best.fit,11)
##6.6 Lab 2: Ridge Regression and the Lasso
#we use the glmnet() func to fit ridge and lasso. the function needs x as an mat
rix and y as a vector
#we remove missing values
x=model.matrix(Salary~.,Hitters)[,-1]
y=Hitters$Salary

#model.matrix() produces a matrix corresp to the 19 predictors as well as turnin

g all qualitative vars into dummy
# bc glmnet() takes only quantitative inputs
#6.6.1 RIDGE
#glmnet() has an arg. alpha that for 0 performs ridge and lasso for 1
library(glmnet)
grid=10^seq(10,-2,length=100)
ridge.mod=glmnet(x,y,alpha=0,lambda=grid)
#the func performs ridge reg for an automaticaly selected range of lambda. here
we have chosen to implement the fun
#ction over a range o values ranging from the null model(only the intercept) to
the least squares fit
# !! [ very high value = null model / lammbda=0 is least squares(+arg. exact=T)
]
#with each value of lambda there is a vector of ridge regr coeffs ,stored in a m
atrix.
#here we have a 20X100 matrix, with 20 rows(one for each predictor+intercept) an
d 100columns for each value of lambda
dim(coef(ridge.mod))
#the coeff estimates are much smaller in terms of l2 norm when a larger value of
lambda is used
#and bigger when a small value of lambda is used
ridge.mod$lambda[50]
coef(ridge.mod)[,50]
ridge.mod$lambda[60]
coef(ridge.mod)[,60]
#we can use the predict function to obtain ridge reg coeffs for a new value of l
ambda,say 50
predict(ridge.mod,s=50,type="coefficients")[1:20,]
# we split the data into a training set and a test set . there are two methods t
o do this
# 1. produce a random vector of true and false, and select elements coresponding
to true for the training data
# 2. randomly choose a subset of numbers between 1 and n and use them as indices
for the training observations
set.seed(1)
train=sample(1:nrow(x),nrow(x)/2)
train
test=-train
y.test=y[test]
#we fit a ridge regression on the training set and test it on the test set , usi
ng lambda =4
# in the predict() funct we replace "coefficients" with the newx argument
ridge.mod=glmnet(x[train,],y[train],alpha=0,lambda=grid,thresh = 1e-12)
ridge.pred=predict(ridge.mod,s=4,newx = x[test,])
mean( (ridge.pred-y.test)^2 )
#fit a model with just the intercept (very large value of lambda 1e10 = 10^10)
ridge.pred=predict(ridge.mod,s=1e10,newx=x[test,])
mean ( (ridge.pred-y.test)^2 )
# lambda = 4 leads to much lower MSE than just an intercept
# we now check if there is any benefit to using lambda = 4 instead of using a le
ast square regression

ridge.pred=predict(ridge.mod,s=0,newx=x[test,],exact=T)
mean ( (ridge.pred-y.test)^2 )
predict(ridge.mod,s=0,exact=T,type="coefficients")[1:20,]
#instead of choosing lambda by hand we can use cross validation with the cv.glmn
et(), which performs ten fold
#validation [ no. of folds can be changed using nfolds argument.]
set.seed(1)
cv.out=cv.glmnet(x[train,],y[train],alpha=0)
plot(cv.out)
bestlam=cv.out$lambda.min
bestlam
#212 is best lambda value
#MSE for this value of lambda is
ridge.pred=predict(ridge.mod,s=bestlam,newx=x[test,])
mean( (ridge.pred-y.test)^2 )
#fit model on full data set and get coeffs
out=glmnet(x,y,alpha=0)
predict(out,type="coefficients",s=bestlam)[1:20,]
## 6.6.2 The Lasso
library(glmnet)
lasso.mod=glmnet(x[train,],y[train],alpha=1,lambda = grid)
plot(lasso.mod)
#Cv lasso
set.seed(1)
cv.out=cv.glmnet(x[train,],y[train],alpha=1)
plot(cv.out)
bestlam=cv.out$lambda.min
lasso.pred=predict(lasso.mod,s=bestlam,newx=x[test,])
mean(
(lasso.pred-y.test)^2
)
#apply on full set
out=glmnet(x,y,alpha=1,lambda=grid)
lasso.coef=predict(out,type="coefficients",s=bestlam)[1:20,]
lasso.coef
#6.7 Lab 3: PCR and PLS Regression
#6.7.1 Principal Components Regression
library(pls)
set.seed(2)
pcr.fit=pcr(Salary~.,data=Hitters,scale=T,validation="CV") # scale = T is scalin
g and CV is ten fold CV
summary(pcr.fit) # 38.31 % explained by one variable ....
## !!!!! PCR reports root MSE , so we have to square this quantity to obtain rea
l MSE
validationplot(pcr.fit,val.type="MSE") #CV scores
#perform PCR on the training data and evaluate its test performance
set.seed(1)
pcr.fit=pcr(Salary~.,data=Hitters,scale=T,subset=train,validation="CV")
validationplot(pcr.fit,val.type = "MSEP")
pcr.pred=predict(pcr.fit,x[test,],ncomp = 7)
mean( (pcr.pred-y.test)^2 )

#pcr on the full data set

pcr.fit=pcr(y~x,scale=T,ncomp=7)
summary(pcr.fit)
#6.7.2 Partial Least Squares
set.seed(1)
pls.fit=plsr(Salary~.,data=Hitters,subset=train,scale=T,validation="CV")
summary(pls.fit)
#lowest MSE when M=2
pls.pred=predict(pls.fit,x[test,],ncomp=2)
mean( (pls.pred-y.test)^2 )
#PLS on full data set with M=2
pls.fit=plsr(Salary~.,data=Hitters,subset=train,scale=T,ncomp=2)
summary(pls.fit)
###### EXERCISES ###############
##8
#a
set.seed(1)
x=rnorm(100)
eps=rnorm(100)
x
eps
#b 0=3, 1=2, 2=-3 and 3=0.3.
beta0=3
beta1=2
beta2=-3
beta3=0.3
y=beta0+beta1*x+beta2*x^2+beta3*x^3+eps
#c
data=data.frame(x,y)
fix(data)
library(leaps)
regfit.full=regsubsets(y~poly(x,10,raw=T),data=data,nvmax = 10)
reg.summary=summary(regfit.full)
#adj r sq
plot(reg.summary$adjr2,xlab="no of var",ylab="adj r sq",type="l")
which.max(reg.summary$adjr2)
points(3,reg.summary$adjr2[3],cex=4,pch=33)
#CP
plot(reg.summary$cp,xlab="no of var",ylab="cp",type="l")
which.min(reg.summary$cp)
points(3,reg.summary$cp[3],cex=4,pch=33)
#BIC
plot(reg.summary$bic,xlab="no of var",ylab="BIC",type="l")
which.min(reg.summary$bic)
points(3,reg.summary$adjr2[3],cex=4,pch=33)
coefficients(reg.full,id=3)
#d
regfit.fwd=regsubsets(y~poly(x,10,raw=T),data=data,nvmax=10,method="forward")
regfit.sum=summary(regfit.fwd)
#adj r sq
plot(regfit.sum$adjr2,xlab="no of var",ylab="adj r sq",type="l")
which.max(regfit.sum$adjr2)

points(3,regfit.sum$adjr2[3],cex=4,pch=33)
#CP
plot(regfit.sum$cp,xlab="no of var",ylab="CP",type="l")
which.min(regfit.sum$cp)
points(3,regfit.sum$cp[3],cex=4,pch=33)
#BIC
plot(regfit.sum$bic,xlab="no of var",ylab="bic",type="l")
which.min(regfit.sum$bic)
points(3,regfit.sum$bic[3],cex=4,pch=33)
coefficients(regfit.fwd,id=3)
regfit.bwd=regsubsets(y~poly(x,10,raw=T),data=data,nvmax=10,method="backward")
regfit.sum1=summary(regfit.bwd)
#adj r sq
plot(regfit.sum1$adjr2,xlab="no of var",ylab="adj r sq",type="l")
which.max(regfit.sum1$adjr2)
points(4,regfit.sum1$adjr2[3],cex=4,pch=33)
#CP
plot(regfit.sum1$cp,xlab="no of var",ylab="CP",type="l")
which.min(regfit.sum1$cp)
points(3,regfit.sum1$cp[3],cex=4,pch=33)
#BIC
plot(regfit.sum1$bic,xlab="no of var",ylab="bic",type="l")
which.min(regfit.sum1$bic)
points(3,regfit.sum1$bic[3],cex=4,pch=33)
coefficients(regfit.bwd,id=3)
coefficients(regfit.bwd,id=4)
#e
library(glmnet)
lasso.mod=glmnet()
xmat=model.matrix(y~poly(x,10,raw=T),data=data)[,-1]
fix(data)
cv.out=cv.glmnet(xmat,y,alpha=1)
best.lam=cv.out$lambda.min
plot(cv.out)
#fit model on the full data using best lambda found
cv.out=cv.glmnet(xmat,y,alpha=1)
predict(cv.out,s=best.lam,type="coefficients")
#f
beta7 = 7
y = beta0 + beta7 * x^7 + eps
data=data.frame(x,y)
regfit.full=regsubsets(y~poly(x,10,raw=T),data=data,nvmax = 10)
reg.summary=summary(regfit.full)
which.max(reg.summary$adjr2)
which.min(reg.summary$cp)
which.min(reg.summary$bic)
coefficients(regfit.full,id=4)
coefficients(regfit.full,id=2)
coefficients(regfit.full,id=1)

xmat=model.matrix(y~poly(x,10,raw=T),data=data)[,-1]
mod.lasso=cv.glmnet(xmat,y,alpha=1)
best.lam=mod.lasso$lambda.min
best.lam
best.model=cv.glmnet(xmat,y,alpha=1)
predict(best.model,s=best.lam,type="coefficients")
##9
#a
library(ISLR)
rm(College)
fix(College)
sum(is.na(College))
set.seed(11)
train.size=dim(College)[1]/2
train=sample(1:dim(College)[1],train.size)
dim(College)
train.size
dim(College)[1]
college.train=College[train,]
college.test=College[-train,]
#b
lm.fit=lm(Apps~.,data=college.train)
lm.pred=predict(lm.fit,college.test)
mean(
(college.test[,"Apps"]-lm.pred)^2
college.test[,"Apps"]

#c
library(glmnet)
train.mat=model.matrix(Apps~.,data=college.train)
test.mat=model.matrix(Apps~.,data=college.test)
grid=10^seq(4,-2,length=100)
ridge.mod=cv.glmnet(train.mat,college.train[,"Apps"],alpha=0,lambda=grid,thresh
= 1e-12)
lambda.best=ridge.mod$lambda.min
lambda.best
ridge.pred=predict(ridge.mod,s=lambda.best,newx = test.mat)
mean( (College.test[,"Apps"]-ridge.pred)^2 )

#d
mod.lasso=cv.glmnet(train.mat,college.train[,"Apps"],alpha=1,lambda=grid,thresh=
1e-12)
lambda.best=mod.lasso$lambda.min
lambda.min
lasso.pred=predict(mod.lasso,newx=test.mat,s=lambda.best)
mean( (College.test[,"Apps"]-lasso.pred)^2 )
#coefs
mod.lasso=glmnet(model.matrix(Apps~.,data=College),College[,"Apps"],alpha=1)
predict(mod.lasso,s=lambda.best,type="coefficients")
#e

library(pls)
cr.fit=pcr(Apps~.,data=college.train,scale=T,validation="CV")
validationplot(pcr.fit,val.type = "MSEP")
pcr.pred=predict(cr.fit,college.test,ncomp=10)
mean( (College.test[,"Apps"]-data.frame(pcr.pred))^2 )
#f
cr.fit=plsr(Apps~.,data=college.train,scale=T,validation="CV")
validationplot(pcr.fit,val.type = "MSEP")
pcr.pred=predict(cr.fit,college.test,ncomp=10)
mean( (College.test[,"Apps"]-data.frame(pcr.pred))^2 )
#g
test.avg = mean(College.test[, "Apps"])
lm.test.r2 = 1 - mean((College.test[, "Apps"] - lm.pred)^2) /mean((College.test[
, "Apps"] - test.avg)^2)
ridge.test.r2 = 1 - mean((College.test[, "Apps"] - ridge.pred)^2) /mean((College
.test[, "Apps"] - test.avg)^2)
lasso.test.r2 = 1 - mean((College.test[, "Apps"] - lasso.pred)^2) /mean((College
.test[, "Apps"] - test.avg)^2)
pcr.test.r2 = 1 - mean((College.test[, "Apps"] - data.frame(pcr.pred))^2) /mean(
(College.test[, "Apps"] - test.avg)^2)
pls.test.r2 = 1 - mean((College.test[, "Apps"] - data.frame(pls.pred))^2) /mean(
(College.test[, "Apps"] - test.avg)^2)
barplot(c(lm.test.r2, ridge.test.r2, lasso.test.r2, pcr.test.r2, pls.test.r2), c
ol="red", names.arg=c("OLS", "Ridge", "Lasso", "PCR", "PLS"), main="Test R-squar
ed")
##10
#a
set.seed(1)
n=1000
p=20
x=matrix(rnorm(n*p),n,p)
x
b=rnorm(p)
b[3]=0
b[4]=0
b[9]=0
b[19]=0
b[10]=0
eps=rnorm(p)
y=x*b+eps
plot(x)
#b
set.seed(1)
train=sample(seq(1000),100,replace=F)
seq(1000)
y.train=y[train]
y.test=y[-train]
x.train=x[train,]
x.test=x[-train,]
#c
#Perform best subset selection on the training set, and plot the
#training set MSE associated with the best model of each size.

library(leaps)
regfit.full = regsubsets(y ~ ., data = data.frame(x = x.train, y = y.train), nvm
ax = p)
val.errors = rep(NA, p)
x_cols = colnames(x, do.NULL = FALSE, prefix = "x.")
x_cols
for (i in 1:p) {
coefi = coef(regfit.full, id = i)
pred = as.matrix(x.train[, x_cols %in% names(coefi)]) %*% coefi[names(coefi) %
in% x_cols]
pred
as.matrix(x.train[,x_cols %in% names(coefi) ]) * coefi[names(coefi) %in% x_col
s]
val.errors[i] = mean((y.train - pred)^2)
}
plot(val.errors, ylab = "Training MSE", pch = 19, type = "b")
#d
val.errors = rep(NA, p)
for (i in 1:p) {
coefi = coef(regfit.full, id = i)
pred = as.matrix(x.test[, x_cols %in% names(coefi)]) %*% coefi[names(coefi) %i
n% x_cols]
val.errors[i] = mean((y.test - pred)^2)
}
plot(val.errors, ylab = "Test MSE", pch = 19, type = "b")
#e
which.min(val.errors)
#f
coef(regfit.full,id=16)
#g
##11
#a
#best subset
set.seed(1)
library(MASS)
library(leaps)
library(glmnet)
rm(Boston)
fix(Boston)
predict.regsubsets = function(object, newdata, id, ...) {
form = as.formula(object$call[[2]])

mat = model.matrix(form, newdata)

coefi = coef(object, id = id)
mat[, names(coefi)] %*% coefi
}
k = 10
p = ncol(Boston) - 1
folds = sample(rep(1:k, length = nrow(Boston)))
cv.errors = matrix(NA, k, p)
for (i in 1:k) {
best.fit = regsubsets(crim ~ ., data = Boston[folds != i, ], nvmax = p)
for (j in 1:p) {
pred = predict(best.fit, Boston[folds == i, ], id = j)
cv.errors[i, j] = mean((Boston$crim[folds == i] - pred)^2)
}
}
rmse.cv = sqrt(apply(cv.errors, 2, mean))
plot(rmse.cv, pch = 19, type = "b")
#b
#lasso
attach(Boston)
xmat=model.matrix(crim~.-1,data=Boston)
cv.out=cv.glmnet(xmat,Boston$crim,type.measure = "mse")
plot(cv.out)
coef(cv.out)
sqrt(cv.out$cvm[cv.out$lambda==cv.out$lambda.1se] )
#ridge
cv.ridge=cv.glmnet(xmat,Boston$crim,type.measure = "mse",alpha=0)
plot(cv.ridge)
coef(cv.ridge)
sqrt( cv.ridge$cvm[cv.ridge$lambda==cv.ridge$lambda.lse])
#pcr
library(pls)
pcr.fit=pcr(crim~.,data=Boston,scale=T,validation="CV")
summary(pcr.fit)
################################################################################
###################################

#POLYNOMIAL REGRESSION : extends the linear model by adding extra predictors ,

obtained by raising each of the original
# predictors to a power. For ex. a cubic regression uses three variables , x,x^2
,x^3 as predictors.
#STEP FUNCTIONS: cut the range of a variable into K distinct regions in order to
produce a qualitative variable.
#REGRESSION SPLINES : they are an extension of polynomial and step functions . t
hey involve dividing the range of X
# into K distinct regions. within each region, a polynomial function is fit , b
ut they are constrained so that they
# join smoothly at the region boundaries (knots). provided the region is divided
into enough regions, they can provide
# an extremly flexible fit.

#SMOOTHING SPLINES:
similar to regression splines, but result from minimizin
g a RSS subject to a smoothness penalty
#LOCAL REGRESSION: similar to splines but the regions are allowed to overlap
#GAM's: allow the extension of the above methods in order to deal with multiple
predictors
################################################################################
###################################
# 7.8 Lab: Non-linear Modeling
library(ISLR)
attach(Wage)
#7.8.1 Polynomial Regression and Step Functions
fit=lm(wage~poly(age,4),data=Wage)
coef(summary(fit))
fit2=lm(wage~cbind(age,age^2,age^3,age^4),data=Wage)
#create a grid of values for age at which we want predictions
agelims=range(age)
agelims
age.grid=seq(from=agelims[1],to=agelims[2])
age.grid
preds=predict(fit,newdata = list(age=age.grid),se=T)
se.bands=cbind(preds$fit+2*preds$se.fit,preds$fit-2*preds$se.fit)
se.bands
plot(age,wage,col="red")
title("Degree 4 polynomial",outer=T)
lines(age.grid,preds$fit,lwd=2,col="blue")
matlines(age.grid,se.bands,col="blue")
fit.1=lm(wage~age,data=Wage)
fit.2=lm(wage~poly(age,2),data=Wage)
fit.3=lm(wage~poly(age,3),data=Wage)
fit.4=lm(wage~poly(age,4),data=Wage)
fit.5=lm(wage~poly(age,5),data=Wage)
anova(fit.1,fit.2,fit.3,fit.4,fit.5)
fit.1= lm(wage~education +age ,data=Wage)
fit.2= lm(wage~education +poly(age ,2) ,data=Wage)
fit.3= lm(wage~education +poly(age ,3) ,data=Wage)
anova(fit.1, fit.2, fit.3)
#STEP Functions
# Next we consider the task of predicting whether an individual earns more
# than $250,000 per year.
fit=glm(I(wage>250)~poly(age,4),data=Wage,family = binomial)
preds=predict(fit,newdata = list(age=age.grid),se=T)
pfit=exp(preds$fit)/(1+exp(preds$fit))

se.bands.logit=cbind(preds$fit+2*preds$se.fit,preds$fit-2*preds$se.fit)
se.bands=exp(se.bands.logit)/(1+exp(1+se.bands.logit))
plot(age,I(wage>250),xlim=agelims,ylim=c(0,.2),type="n")
points(jitter(age),I((wage>250)/5),pch="|",col="darkgrey")
lines(age.grid,pfit,col="blue")
matlines(age.grid,se.bands,col="blue")
# We have drawn the age values corresponding to the observations with wage
# values above 250 as gray marks on the top of the plot, and those with wage
# values below 250 are shown as gray marks on the bottom of the plot.
table(cut(age,4))
fit=lm(wage~cut(age,4),data=Wage)
coef(summary(fit))
#The age<33.5 category is left out, so the intercept coefficient of
#$94,160 can be interpreted as the average salary for those under 33.5 years
#of age, and the other coefficients can be interpreted as the average additional
#salary for those in the other age groups.

#7.8.2 Splines
#fit wage to age using a regression spline. by default, cubic regression splines
are used
attach(Wage)
library(splines)
fit=lm(wage~bs(age,knots=c(25,40,60)),data=Wage)
pred=predict(fit,newdata = list(age=age.grid),se=T)
plot(age,wage,col="red")
lines(age.grid,pred$fit,lwd=4)
lines(age.grid,pred$fit+2*pred$se,lty="dashed")
lines(age.grid,pred$fit-2*pred$se,lty="dashed")
#here we have made knots at 25,40,60 which produces a spline with six basis func
tions .
# a cubic spline with three knots has seven degrees of freedom - one for interce
pt plus six basis functions
attr(bs(age,df=6),"knots")
#r chooses the splits
#bs has arg df which chooses the degree of the function rather that the default
cubic one
#NATURAL SPLINES
#ns with 4 df
fit2=lm(wage~ns(age,df=4),data=Wage)
pred2=predict(fit,newdata=list(age=age.grid),se=T)
lines(age.grid,pred2$fit,col="blue",lwd=4)
#as with the bs , we could have specified the knots dirrectly using knots
#SMOOTHING SPLINE
plot(age,wage,xlim=agelims,cex=.5,col="darkgrey")
title("Smoothing Spline")
fit=smooth.spline(age,wage,df=16)
fit1=smooth.spline(age,wage,cv=T)
fit1$df
lines(fit,col="red")

lines(fit1,col="blue")
legend("topright",legend=c("16 DF","6.8 DF"),col=c("red","blue"),lty=1,lwd=2,cex
=.8)
#when we specified df 16 the funct calculates the value of lambda needed for 16
df
#when we selected df chosen by CV which yields a df of 6.8
#LOCAL REGRESSION
plot(age,wage,xlim=agelims,col="darkgrey")
title("Local Regression")
fit=loess(wage~age,span=.2,data=Wage)
fit1=loess(wage~age,span=.5,data=Wage)
lines(age.grid,predict(fit,newdata=data.frame(age=age.grid)))
lines(age.grid,predict(fit1,newdata=data.frame(age=age.grid)))
#local regr with spans .2 and .5 . each neighborhood consists of 20% or 50% of t
he observations.
# the longer the span the smoother the regression
#GAMS
#we fit a GAM to predict wage using natural spline functions of year and age , t
reating education as a qualitative pred
gam1=lm(wage~ns(year,4)+ns(age,5)+education,data=Wage)
#we now fit a model that uses smoothing splines, rather than natural splines .
#we need to use the gam() function
library(gam)
# s() is used for smooothing spline
gam.m3=gam(wage~s(year,4)+s(age,4)+education,data=Wage)
par(mfrow=c(1,3))
plot(gam.m3,se=T)
plot(gam1)
plot.gam(gam1,se=T)
#m1 gam that excludes year
#m2 gam that uses linear funct of year
#m3 gam that uses a spline function
gam.m1=gam(wage~s(age,5)+education,data=Wage)
gam.m2=gam(wage~year+s(age,5)+education,data=Wage)
gam.m3=gam(wage~s(year,4)+s(age,5)+education,data=Wage)
anova(gam.m1,gam.m2,gam.m3,test="F")
summary(gam.m3)
#the p values of the model reinforce the ideea that a linear model is needed for
year and a non linear one for age
#predictions on the training set
preds=predict(gam.m2,newdata=Wage)
# we can also use local regression as the building blocks of GAM with the lo() f
gam.lo=gam(wage~s(year,df=4)+lo(age,span=0.7)+education,data=Wage)
plot.gam(gam.lo,se=T)
gam.lo.i=gam(wage~lo(year,age,span=0.5),data=Wage)
library(akima)
plot(gam.lo.i)

par(mfrow=c(1,1))
#gams with log reg
gam.lr=gam(I(wage>250)~year+s(age,df=5)+education,family=binomial,data=wage)
gam.lr=gam(I(wage >250)~year+s(age ,df =5)+education ,family =binomial ,data=Wag
e)
par(mfrow =c(1,3))
plot(gam.lr,se=T)
table(education,I(wage>250))
gam.lr=gam(I(wage >250)~year+s(age ,df =5)+education ,family =binomial ,data=Wag
e,subset=(education!="1. < HS Grad"))
plot(gam.lr,se=T)
################################################################################
#####################################
#7.9 Execrise
#6
#a
set.seed(1)
library(boot)
all.deltas=rep(NA,10)
for(i in 1:10)
{
glm.fit=glm(wage~poly(age,i),data=Wage)
all.deltas[i]=cv.glm(Wage,glm.fit,K=10)$delta[2]
}
all.deltas
plot(1:10,all.deltas,xlab="degree",ylab="CV error",type="b",ylim=c(1590,1700))
min.point=min(all.deltas)
sd.point=sd(all.deltas)
abline(h=min.point+0.2*sd.point,lty="dashed")
abline(h=min.point-0.2*sd.point,lty="dashed")
legend("topright","0.2 sd line ",lty="dashed")
# 3 cuts
agelims=range(age)
agelims
age.grid=seq(from=agelims[1],to=agelims[2])
age.grid
preds=predict(fit,data.frame(age=age.grid))
lm.fit=lm(wage~poly(age,3),data=Wage)
plot(wage~age,data=Wage)
lines(age.grid,preds,col="blue",lwd=3)

fit.1=lm(wage~poly(age,1),data=Wage)
fit.2=lm(wage~poly(age,2),data=Wage)
fit.3=lm(wage~poly(age,3),data=Wage)
fit.4=lm(wage~poly(age,4),data=Wage)
fit.5=lm(wage~poly(age,5),data=Wage)
fit.6=lm(wage~poly(age,6),data=Wage)
fit.7=lm(wage~poly(age,7),data=Wage)
fit.8=lm(wage~poly(age,8),data=Wage)
fit.9=lm(wage~poly(age,9),data=Wage)
fit.10=lm(wage~poly(age,10),data=Wage)

anova(fit.1,fit.2,fit.3,fit.4,fit.5,fit.6,fit.7,fit.8,fit.9,fit.10)
#b
all.cvs=rep(NA,10)
for(i in 2:10)
{
Wage$age.cut=cut(Wage$age,i)
lm.fit=glm(wage~age.cut,data=Wage)
all.cvs[i]=cv.glm(Wage,lm.fit,K=10)$delta[2]
}
all.cvs
plot(2:10,all.cvs[-1],xlab="no of cuts",ylab="cv err",type="b")
#8 cuts
lm.fit=glm(wage~cut(age,8),data=Wage)
agelims=range(age)
agelims
age.grid=seq(from=agelims[1],to=agelims[2])
age.grid
lm.pred=predict(lm.fit,data.frame(age=age.grid))
plot(wage~age,data=Wage)
lines(age.grid,lm.pred,col="red",lwd=4)
##7
#a
set.seed(1)
summary(Wage$maritl)
plot(Wage$maritl)
summary(Wage$jobclass)
plot(Wage$jobclass)
par(mfrow=c(1,2))
plot(Wage$maritl,Wage$wage)
plot(Wage$jobclass,Wage$wage)
fit=lm(wage~maritl,data=Wage)
deviance(fit)
fit=lm(wage~jobclass,data=Wage)
deviance(fit)
fit=lm(wage~maritl+jobclass,data=Wage)
deviance(fit)
#gam
fit=gam(wage~maritl+jobclass+s(age,4),data=Wage)
deviance(fit)
##8
pairs(Auto)
#mpg inv prop to cyl displ horesp weight
cv.errs=rep(NA,10)
for (i in 1:10)
{
fit=glm(mpg~poly(displacement,i),data=Auto)

cv.errs[i]=cv.glm(Auto,fit,K=10)$delta[2]
}
cv.errs
which.min(cv.errs)
#10 th degree polynomial
attach(Auto)
plot(displacement,mpg)
lm.fit.poly=glm(mpg~poly(displacement,10),data=Auto)
summary(displacement)
disprange=range(displacement)
disprange
disp.grid=seq(from=disprange[1],to=disprange[2])
preds=predict(lm.fit.poly,data.frame(displacement=disp.grid))
lines(disp.grid,preds,col="red",lwd=5)
pol1=lm(mpg~poly(displacement,1),data=Auto)
pol2=lm(mpg~poly(displacement,2),data=Auto)
pol3=lm(mpg~poly(displacement,3),data=Auto)
pol4=lm(mpg~poly(displacement,4),data=Auto)
pol5=lm(mpg~poly(displacement,5),data=Auto)
pol6=lm(mpg~poly(displacement,6),data=Auto)
pol7=lm(mpg~poly(displacement,7),data=Auto)
pol8=lm(mpg~poly(displacement,8),data=Auto)
pol9=lm(mpg~poly(displacement,9),data=Auto)
pol10=lm(mpg~poly(displacement,10),data=Auto)
anova(pol1,pol2,pol3,pol4,pol5,pol6,pol7,pol8,pol9,pol10)
#step f
all.cvs=rep(NA,10)
for(i in 2:10)
{
Auto$dis.cut=cut(Auto$displacement,i)
lm.fit=glm(mpg~dis.cut,data=Auto)
all.cvs[i]=cv.glm(Auto,lm.fit,K=10)$delta[2]
}
all.cvs
plot(1:10,all.cvs,xlab="degree",ylab="CV error",type="b")
which.min(all.cvs)
#9 cuts
disprange=range(displacement)
disprange
disp.grid=seq(from=disprange[1],to=disprange[2])
preds=predict(lm.fit,data.frame(displacement=disp.grid))
plot(displacement,mpg)
lines(disp.grid,preds,col="red",lwd=5)
#splines
library(splines)
cv.errs=rep(NA,10)
for(df in 3:10)
{
fit=glm(mpg~ns(displacement,df=df),data=Auto)
cv.errs[df]=cv.glm(Auto,fit,K=10)$delta[2]
}
cv.errs
which.min(cv.errs)

plot(wa)
plot(displacement,mpg)
fit2=lm(mpg~ns(displacement,df=10),data=Auto)
pred2=predict(fit2,newdata=list(displacement=disp.grid),se=T)
lines(disp.grid,pred2$fit,col="blue",lwd=4)
#gams
fit = gam(mpg ~ s(displacement, 4) + s(horsepower, 4), data = Auto)
summary(fit)
##9
#a
poly.fit=lm(nox~poly(dis,3),data=Boston)
attach(Boston)
dis.range=range(dis)
dis.range
dis.grid=seq(from=dis.range[1],to=dis.range[2])
preds=predict(poly.fit,data.frame(dis=dis.grid))
plot(dis,nox)
lines(dis.grid,preds,col="red",lwd=5)
title("Pen-Pineapple-Apple-Pen")
#b
all.rss=rep(NA,10)
for (i in 1:10)
{
poly.fit=lm(nox~poly(dis,i),data=Boston)
all.rss[i]=sum(poly.fit$residuals^2)
}
all.rss
plot(1:10,all.rss,type="b")
#c
library(boot)
all.rss=rep(NA,10)
for (i in 1:10)
{
poly.fit=glm(nox~poly(dis,i),data=Boston)
all.rss[i]=cv.glm(Boston,poly.fit,K=10)$delta[2]
}
all.rss
plot(1:10,all.rss,xlab="no of deg",ylab="cv err",type="b")
which.min(all.rss)
#4 knots
#d
library(splines)
sp.fit=lm(nox~bs(dis,df=4,knots=c(4,7,11)),data=Boston)
summary(sp.fit)
sp.pred=predict(sp.fit,list(dis=dis.grid))
plot(nox~dis,data=Boston)
plot(dis,nox)
lines(dis.grid,sp.pred,col="blue",lwd=3)

#e
all.cvs=rep(NA,16)
for(i in 3:16)
{
lm.fit=lm(nox~bs(dis,df=i),data=Boston)
all.cvs[i]=sum(lm.fit$residuals^2)
}
all.cvs
which.min(all.cvs)
#f
all.cv = rep(NA, 16)
for (i in 3:16) {
lm.fit = glm(nox ~ bs(dis, df = i), data = Boston)
all.cv[i] = cv.glm(Boston, lm.fit, K = 10)$delta[2]
}
all.cv
plot(3:16,all.cv[-c(1,2)],xlab="no of var",ylab="cv err",type="b")
##10
#a
set.seed(1)
library(ISLR)
attach(College)
fix(College)
train=sample(length(Outstate),length(Outstate)/2)
test=-train
college.train=College[train,]
college.test=College[test,]
#fwd stepwise selection on training set
dim(College)
library(leaps)
regfit.fwd=regsubsets(Outstate~.,data=college.train,nvmax=17,method="forward")
reg.summary=summary(regfit.fwd)
plot(reg.summary$adjr2,xlab="no of var",ylab="adj r sq",type="l",ylim=c(0.4,0.84
))
which.max(reg.summary$adjr2)
points(13,reg.summary$adjr2[13],cex=2.5,pch=15)
max.adjr2=max(reg.summary$adjr2)
sd.adjr2=sd(reg.summary$adjr2)
abline(h=max.adjr2+0.2*sd.adjr2,lty="dashed")
abline(h=max.adjr2-0.2*sd.adjr2,lty="dashed")
max.adjr2-0.2*sd.adjr2
max.adjr2+0.2*sd.adjr2
plot(reg.summary$bic,xlab="no of var",ylab="bic",type="l")
which.min(reg.summary$bic)
points(6,reg.summary$bic[6],cex=2.5,pch=15)
sd.bic=sd(reg.summary$bic)
max.bic=max(reg.summary$bic)
abline(h=max.bic+0.2*sd.bic,lty="dashed")
abline(h=max.bic-0.2*sd.bic,lty="dashed")
max.bic+0.2*sd.bic
max.bic-0.2*sd.bic

plot(reg.summary$cp, xlab = "Number of Variables", ylab = "Cp", type = "l")

which.min(reg.summary$cp)
min.cp = min(reg.summary$cp)
std.cp = sd(reg.summary$cp)
abline(h = min.cp + 0.2 * std.cp, col = "red", lty = 2)
abline(h = min.cp - 0.2 * std.cp, col = "red", lty = 2)
#6 vars so id = 6
reg.fit=regsubsets(Outstate~.,data=college.train,method="forward")
coefi=coef(reg.fit,id=6)
names(coefi)
#b
library(gam)
gam.fit=gam(Outstate~Private+s(Room.Board,df=2)+s(PhD,df=2)+s(perc.alumni,df=2)+
s(Expend,df=2)+s(Grad.Rate,df=2),data=college.train)
par(mfrow = c(2, 3))
plot(gam.fit, se = T, col = "blue")
#c
gam.pred=predict(gam.fit,college.test)
gamm.err=mean(
(college.test$Outstate-gam.pred)^2 )
gam.err
gam.tss = mean((College.test$Outstate - mean(College.test$Outstate))^2)
test.rss = 1 - gam.err/gam.tss
test.rss
#d
summary(gam.fit)
################################################################################
########################################

20mia1006_FDA_LAB_REGRESSION_TYPES
No ratings yet
20mia1006_FDA_LAB_REGRESSION_TYPES
11 pages
R Module 11 - Statistics
No ratings yet
R Module 11 - Statistics
35 pages
DMV Unit 3 PPT_RSK_250419_125620 jfhuehiwhu
No ratings yet
DMV Unit 3 PPT_RSK_250419_125620 jfhuehiwhu
89 pages
r notesss
No ratings yet
r notesss
12 pages
Tools Manual Maitreya Fields
No ratings yet
Tools Manual Maitreya Fields
6 pages
m565A3-24F
No ratings yet
m565A3-24F
22 pages
R CODES
No ratings yet
R CODES
5 pages
intro to regression
No ratings yet
intro to regression
4 pages
Statistical Models in R
No ratings yet
Statistical Models in R
18 pages
Engineering Catalogue
No ratings yet
Engineering Catalogue
290 pages
Edit Code
No ratings yet
Edit Code
9 pages
1910206037 Slid
No ratings yet
1910206037 Slid
19 pages
Angulo Capital Corp. Et Al v. Skadden, Arps, Slate, Meagher & Flom, LLP - Document No. 5
No ratings yet
Angulo Capital Corp. Et Al v. Skadden, Arps, Slate, Meagher & Flom, LLP - Document No. 5
2 pages
ASSIGNMENT NO- 2, FDAS_ SUMANYAKUMARI_bfia
No ratings yet
ASSIGNMENT NO- 2, FDAS_ SUMANYAKUMARI_bfia
6 pages
stats notes
No ratings yet
stats notes
4 pages
USG110 V4.33 Ed1
No ratings yet
USG110 V4.33 Ed1
1,053 pages
Linear Regression
No ratings yet
Linear Regression
22 pages
Business Analytics C-2
No ratings yet
Business Analytics C-2
7 pages
Regression Analysis Assignment1111
No ratings yet
Regression Analysis Assignment1111
13 pages
M348 Applied Statistical Modelling - Linear Models
No ratings yet
M348 Applied Statistical Modelling - Linear Models
504 pages
Week 2 and Week 3
No ratings yet
Week 2 and Week 3
14 pages
Assignment 1
No ratings yet
Assignment 1
3 pages
Assignment AI-ML
No ratings yet
Assignment AI-ML
13 pages
Machine Learning-Lecture 2(Student)
No ratings yet
Machine Learning-Lecture 2(Student)
9 pages
Regression Analysis Script
No ratings yet
Regression Analysis Script
24 pages
Order Block Trading Guide
No ratings yet
Order Block Trading Guide
7 pages
BUSINESS STUDIES P1 GR11 QP NOV2023 - Deaf
No ratings yet
BUSINESS STUDIES P1 GR11 QP NOV2023 - Deaf
10 pages
20BCE1205 Lab3
No ratings yet
20BCE1205 Lab3
9 pages
Lecture 10
No ratings yet
Lecture 10
5 pages
R Workshop PART 2
No ratings yet
R Workshop PART 2
36 pages
Internship Training Report: Submitted by
No ratings yet
Internship Training Report: Submitted by
3 pages
Machine Learning-Lecture 1(Student)
No ratings yet
Machine Learning-Lecture 1(Student)
14 pages
36-708 Statistical Machine Learning Homework #3 Solutions: DUE: March 29, 2019
No ratings yet
36-708 Statistical Machine Learning Homework #3 Solutions: DUE: March 29, 2019
22 pages
Assignment 1 (MM19B053)
No ratings yet
Assignment 1 (MM19B053)
20 pages
Health and Food Technology Dissertation Topics
100% (2)
Health and Food Technology Dissertation Topics
6 pages
Desempenho No Trabalho: Revisão de Literatura
No ratings yet
Desempenho No Trabalho: Revisão de Literatura
16 pages
Fuel Filter
No ratings yet
Fuel Filter
6 pages
Analytics
No ratings yet
Analytics
11 pages
KPI
No ratings yet
KPI
16 pages
6th Lecture Note 108335647 230518 203102
No ratings yet
6th Lecture Note 108335647 230518 203102
35 pages
Architectural History-2: Comprehensive Architecture Licensure Examination Review + Preparation Program
No ratings yet
Architectural History-2: Comprehensive Architecture Licensure Examination Review + Preparation Program
6 pages
Summary of Topics For Midterm Exam #2: STA 371G, Fall 2017
No ratings yet
Summary of Topics For Midterm Exam #2: STA 371G, Fall 2017
6 pages
Mercier 2007
No ratings yet
Mercier 2007
15 pages
Common Stat 101 Commands For Rstudio: 1 One Categorical Variable
No ratings yet
Common Stat 101 Commands For Rstudio: 1 One Categorical Variable
5 pages
Chapter 06-Regression Analysis
No ratings yet
Chapter 06-Regression Analysis
41 pages
Certificate
No ratings yet
Certificate
4 pages
Final Predictive Vaibhav 2020
No ratings yet
Final Predictive Vaibhav 2020
101 pages
ISLP - Website-135-200 (1) - 1-60
No ratings yet
ISLP - Website-135-200 (1) - 1-60
60 pages
Linear Regression
No ratings yet
Linear Regression
17 pages
2022 CFLGA - Data Capture Form
No ratings yet
2022 CFLGA - Data Capture Form
22 pages
ISLP - Website 135 200
No ratings yet
ISLP - Website 135 200
66 pages
Coliere Fixare Normafix Normetta
No ratings yet
Coliere Fixare Normafix Normetta
4 pages
Simple Regression Model Fitting
No ratings yet
Simple Regression Model Fitting
5 pages
MIT 302 - Statistical Computing II - Tutorial 03
No ratings yet
MIT 302 - Statistical Computing II - Tutorial 03
16 pages
R Lab 4
No ratings yet
R Lab 4
7 pages
Calculus 10th Edition H. Anton Chapter 13
100% (1)
Calculus 10th Edition H. Anton Chapter 13
12 pages
Step-By-Step Guide To Execute Linear Regression in R
No ratings yet
Step-By-Step Guide To Execute Linear Regression in R
12 pages
Braceros Jen
No ratings yet
Braceros Jen
1 page
Cursus Advanced Econometrics
No ratings yet
Cursus Advanced Econometrics
129 pages
CS 2008 3complete PDF
No ratings yet
CS 2008 3complete PDF
53 pages
Statistical Testing and Prediction Using Linear Regression: Abstract
No ratings yet
Statistical Testing and Prediction Using Linear Regression: Abstract
10 pages
C# Package 100 Knock: 1-Hour Mastery Series 2024 Edition
From Everand
C# Package 100 Knock: 1-Hour Mastery Series 2024 Edition
Tenko
5/5 (1)
Admission 2014-15 Eligibility Criteria
No ratings yet
Admission 2014-15 Eligibility Criteria
2 pages
R Course
No ratings yet
R Course
7 pages
Irtg PDF
No ratings yet
Irtg PDF
8 pages
Lab 3. Linear Regression 230223
100% (1)
Lab 3. Linear Regression 230223
7 pages
Untitled Document
No ratings yet
Untitled Document
6 pages
Mindanao State University General Santos City: Simple Linear Regression
No ratings yet
Mindanao State University General Santos City: Simple Linear Regression
12 pages
Non-Linear Data Models: Anol Bhattacherjee, Ph.D. University of South Florida
No ratings yet
Non-Linear Data Models: Anol Bhattacherjee, Ph.D. University of South Florida
28 pages
Evans Analytics2e PPT 08
No ratings yet
Evans Analytics2e PPT 08
65 pages
Lab-3: Regression Analysis and Modeling Name: Uid No. Objective
No ratings yet
Lab-3: Regression Analysis and Modeling Name: Uid No. Objective
9 pages
MATH6183 Introduction+Regression
No ratings yet
MATH6183 Introduction+Regression
70 pages
Empathy
No ratings yet
Empathy
14 pages
Latex Beamer
No ratings yet
Latex Beamer
68 pages
Introduction To Statistical Learning: With Applications in R
No ratings yet
Introduction To Statistical Learning: With Applications in R
13 pages
ISLR
No ratings yet
ISLR
9 pages
3 Regression Diagnostics
100% (1)
3 Regression Diagnostics
53 pages
Java 3 Mitad de Curso Completito
No ratings yet
Java 3 Mitad de Curso Completito
506 pages
Business English Final
No ratings yet
Business English Final
13 pages
Linear Regression PDF
100% (1)
Linear Regression PDF
32 pages
Sea Clean Heavy Duty Tyre Mark Remover SC 2272: Sea-Chem LTD Safety Data Sheet
No ratings yet
Sea Clean Heavy Duty Tyre Mark Remover SC 2272: Sea-Chem LTD Safety Data Sheet
3 pages
Anon - A Twentieth Century Economic System (1943)
No ratings yet
Anon - A Twentieth Century Economic System (1943)
61 pages
Add On Room Tutorial
No ratings yet
Add On Room Tutorial
30 pages
Chapter 14
No ratings yet
Chapter 14
15 pages
Dusane Casino Management System Brochure
100% (1)
Dusane Casino Management System Brochure
8 pages
Linear Regression
100% (2)
Linear Regression
228 pages
Linear Regression With LM Function, Diagnostic Plots, Interaction Term, Non-Linear Transformation of The Predictors, Qualitative Predictors
100% (1)
Linear Regression With LM Function, Diagnostic Plots, Interaction Term, Non-Linear Transformation of The Predictors, Qualitative Predictors
15 pages
Homework 2
100% (1)
Homework 2
14 pages
Cochin Biennale - An Energy Booster To Kerala Tourism
No ratings yet
Cochin Biennale - An Energy Booster To Kerala Tourism
2 pages
Introduction to PHP, Part 2, Second Edition
From Everand
Introduction to PHP, Part 2, Second Edition
Adam Majczak
No ratings yet