0% found this document useful (0 votes)
281 views5 pages

Code Segmented Regression SAS

This document contains SAS and R code to perform time series analysis on traffic data. It loads traffic data, cleans it by removing outliers and creating additional variables. Segmented regression and autoregressive integrated moving average (ARIMA) models are fitted to the data in SAS. In R, the data is plotted, stationarity is checked using augmented Dickey-Fuller test, and differences are calculated to make the time series stationary.

Uploaded by

Sowmya Tatavarty
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
281 views5 pages

Code Segmented Regression SAS

This document contains SAS and R code to perform time series analysis on traffic data. It loads traffic data, cleans it by removing outliers and creating additional variables. Segmented regression and autoregressive integrated moving average (ARIMA) models are fitted to the data in SAS. In R, the data is plotted, stationarity is checked using augmented Dickey-Fuller test, and differences are calculated to make the time series stationary.

Uploaded by

Sowmya Tatavarty
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

SASCode

/*Setthegraphicsenvironment*/

goptionsreset=allbordercback=whitehtitle=12pthtext=10pt

FILENAMEREFFILE"/home/suryachaitanya1/MyFolder/Switch_Data_Case.csv"

/*SegmentedRegressionanalysisinSAS*/

dataWORK.TRAFFIC
%let_EFIERR_=0/*settheERRORdetectionmacrovariable*/
infileREFFILEdelimiter=','MISSOVERDSDfirstobs=11
informatVAR1anydtdtm40.
informatVAR218.
informatVAR318.
informatVAR418.
informatVAR518.
formatVAR1datetime.
formatVAR218.
formatVAR318.
formatVAR418.
formatVAR518.
input
VAR1
VAR2$
VAR3$
VAR4$
VAR5$

if_ERROR_thencallsymputx('_EFIERR_',1)/*setERRORdetectionmacrovariable
*/
run

datatraffic(rename=(VAR1=DateVAR2=InboundVAR3=col2_cdefaVAR4=OutboundVAR5
=col4_cdeff))
settraffic
run

datatraffic
settraffic
total_traffic=Inbound+Outbound
run


datatraffic2
settraffic
ifdatepart(Date)in('13AUG15'd,'05NOV15'd)thendo
total_traffic=.
end
run

datatraffic2
settraffic2
if_n_=1thendo
time=1
retaintime
end
elsedo
time=time+1
retaintime
end
ifdatepart(Date)ge'27Aug15'dthendo
event=1
end
elsedo
event=0
end
run

datatraffic2
settraffic2
ifdatepart(date)eq'27Aug15'dthendo
time_from_event=0
retaintime_from_event
end
ifdatepart(date)gt'27Aug15'dthendo
time_from_event=time_from_event+1
retaintime_from_event
end
ifdatepart(date)lt'27Aug15'dthendo
time_from_event=0
end
run

datatraffic2
settraffic2

time2=datepart(date)'27Aug15'd
run

%macrotraffic(n=)

ProcAutoregData=traffic2outest=param_estimates_autoreg
model&n=time2eventtime_from_event/dwprob
run

procarimadata=traffic2
identifyvar=&nstationarity=(dickey=0)
quitrun

ProcAutoregData=traffic2outest=param_estimates_autoreg
model&n=time2eventtime_from_event/
method=mlNLAG=6BackstepDWPROBLOGLIKL
outputout=out1p=pvarr=rvar
run

%mend

%traffic(n=total_traffic)
*%traffic(n=InBound)
*%traffic(n=OutBound)

RCode

install.packages("zoo")
install.packages("fpp")
install.packages("forecast")
library(plyr)
library(zoo)
library(fpp)
library(forecast)

#LoadDataIntoR
switchcase<read.csv(file.choose(),header=F)
#Thisisjustthefirst9rows
switchcase_1<switchcase[c(1:9),]

#Thisiswithoutthefirst9rows
switchcase2<switchcase[c(1:10),]
#Renamingcolumns
switchcase2<rename(switchcase2,c("V1"="Date","V2"="Inbound","V3"="col2cdefa","V4
"="Outbound","V5"="col4cdeff"))
str(switchcase2)
switchcase2$total<as.numeric(as.character(switchcase2$Inbound))+
as.numeric(as.character(switchcase2$Outbound))

#justforplotting
sc_plot<switchcase2[,c(2,3,4,5)]
#convertingthevaluestologvaluesforbetterinterpretability
sc_plot$total2<sc_plot$total
sc_plot<sc_plot[,c(2)]
sc_plot2=ts(sc_plot$total2)
Zoo1<zoo(sc_plot$total2,order.by=as.Date(as.character(sc_plot$Date),
format='%m/%d/%Y'))
ts(Zoo1)

#splittheseriesatAugust27,2015andcomparethemeans,mediansandvariances
sc_plot$row<as.numeric(row.names(sc_plot))10

sc_plot$total2<ifelse(sc_plot$row==175,NA,sc_plot$total2)
sc_plot$total2<ifelse(sc_plot$row==259,NA,sc_plot$total2)
#usingtheZoolibrarytoforceittobecomeTSobject
Zoo2<zoo(sc_plot$total2,order.by=as.Date(as.character(sc_plot$Date),
format='%m/%d/%Y'))
ts(Zoo2)

#plotwiththeinterventionasanabline
plot(Zoo1,type="b",xlab="Time",ylab="totalinboundoutbound",main="Traffic")
abline(v=as.Date("20150827"),col="red",lwd=2)
text(x=as.Date("20150827"),y=1,"knownintervention",col="red",pos=3)

#plottingnowwithoutthespike
plot(Zoo2,type="b",xlab="Time",ylab="totalinboundoutbound",main="Traffic")
abline(v=as.Date("20150827"),col="red",lwd=2)
text(x=as.Date("20150827"),y=1,"knownintervention",col="red",pos=3)

#Stationaritytest


stationarity<adf.test(na.omit(Zoo),alternative="stationary")
#attributes(stationarity)
print(stationarity$p.value)
#thepvalueis0.01whichislessthanthethresholdof0.05.Thissuggeststhatwereject
thenullhypothesisthatthe
#seriesisnonstationary.Wealsovalidatethatusingthendiffsfunction
ndiffs(na.omit(Zoo),alpha=0.05,test="adf")

You might also like