Stukas ML Loop Rev.r
Stukas ML Loop Rev.r
#set library
library(xlsx)
library(skimr)
library(forecastML)
library(forecast)
library(glmnet)
library(forecast)
library(randomForest)
library(TTR)
library(graphics)
library(tseries)
library(e1071)
#------------------------------------------------------------------------------
# IMPORT DATA DARI AWAL & CLEANING
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
# IMPORT DATA YANG SUDAH CLEANING
#------------------------------------------------------------------------------
#kalau ingin running ulang, import datafix pakai sintax di bawah ini saja
datafix<-readRDS(file="datafix.rds")
#------------------------------------------------------------------------------
# FUNGSI-FUNGSI YANG AKAN DIPAKAI
#------------------------------------------------------------------------------
# MODEL FUNCTION FOR LASSO
model_function <- function(data, my_outcome_col) {
x <- data[, -1, drop = FALSE]
y <- data[, 1, drop = FALSE]
x <- as.matrix(x, ncol = ncol(x))
y <- as.matrix(y, ncol = ncol(y))
set.seed(2345)
model <- glmnet::cv.glmnet(x, y,nfolds = length(y),grouped=FALSE)
return(model)
}
#------------------------------------------------------------------------------
# Set the date for whole data
date_frequency<-"3 months"
dates<-seq(as.Date("2009-09-01"),as.Date("2019-12-01"),by=date_frequency)
length(dates)
#------------------------------------------------------------------------------
# LOOPING UNTUK 26 DATA YANG MAU DI NOWCAST
#------------------------------------------------------------------------------
#split data
#set dataset untuk pemodelan
training<-datafix[1:(nrow(datafix)-(27-i)),]
#windows
windows<- forecastML::create_windows(data_train, window_length = 0)
#------------------------------------------------------------------------------
#FINAL MODEL UNTUK MASING2 METODE
model_results <- forecastML::train_model(data_train,
windows = windows,
model_name = "LASSO",
model_function = model_function)
model_results_2 <- forecastML::train_model(data_train,
windows = windows,
model_name = "RF",
model_function = model_function_2)
model_results_3 <- forecastML::train_model(data_train,
windows = windows,
model_name = "RIDGE",
model_function = model_function_3)
model_results_4 <- forecastML::train_model(data_train,
windows = windows,
model_name = "ENET",
model_function = model_function_4)
model_results_5 <- forecastML::train_model(data_train,
windows = windows,
model_name = "SVM",
model_function = model_function_5)
#------------------------------------------------------------------------------
# Forecast with FINAL MODEL
ypredict<-rbind(ypredict,data.frame(model=data_forecast$model,
growth_pred=data_forecast$growth_pred))
}
ypredict
#------------------------------------------------------------------------------
# EVALUASI MODEL MACHINE LEARNING
#------------------------------------------------------------------------------
metode<-c("LASSO","RF","RIDGE","ENET","SVM","NN")
actual<-datafix$growth[(nrow(datafix)-26+1):nrow(datafix)]
#evaluasi kedekatan pola pakai nilai korelasi actual dan setial metode
df.all<-df
df.all$actual<-actual
(korelasi<-cor(df.all[,1:6],df.all[,1:7])[,7])
(eval.ml<-data.frame(eval,MAD,korelasi))
#export df.all
saveRDS(df.all,"df.result.rds")
#------------------------------------------------------------------------------
# AR(1) MODELING
#------------------------------------------------------------------------------
#series
y<-datafix[1:nrow(datafix),1]
#plot
plot.ts(y,ylab="%", xlab="time",main="PDB Current Price Growth")
#differencing
ydiff1<-diff(y)
plot.ts(ydiff1,ylab="%", xlab="time",main="diff1 PDB Growth")
adf.test(ydiff1)
acf(ydiff1,lag.max = 20,main="ACF PDB CuP Gr")
#model AR(1)
model<-arima(training,order=c(1,0,0),method="ML")
#forecast
duga<-forecast(model)
#take 1 Quarter
pred<-duga$mean[1]
#make dataframe
y.ar<-rbind(y.ar,data.frame(ar=pred))
}
y.ar
#evaluasi
ar1<-(accuracy(ts(y.ar),actual))
MAD<-colMeans(abs(actual-y.ar))
(eval.ar<-data.frame(ar1,MAD,korelasi=cor(y.ar,actual)))
#plot
plot.ts(actual,ylab="GDP Growth",ylim=c(-6,7),main="ARIMA(1,1,0) vs Actual")
lines(y.ar,col="red")
plot(actual,t(y.ar),main="AR(1)",ylab="forecast")
#------------------------------------------------------------------------------
# TIME SERIES plot: ACTUAL VS NOWCAST
#------------------------------------------------------------------------------
par()
#multiple plot
m <- matrix(c(1,2,3,4,5,6,7,7,7),nrow = 3,ncol = 3,byrow = TRUE)
m
nf<-layout(mat = m,heights = c(0.4,0.4,0.2))
layout.show(nf)
par(mar = c(5,5,5,5))
time<-c("2014","2015","2016","2017","2018","2019")
#plot RF
plot.ts(actual,ylab="GDP Growth",ylim=c(-6,7),main="RF",xaxt="n")
lines(df$RF,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.8)
#plot LASSO
plot.ts(actual,ylab="GDP Growth",ylim=c(-6,7),main="LASSO",xaxt="n")
lines(df$LASSO,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.8)
#plot RIDGE
plot.ts(actual,ylab="GDP Growth",ylim=c(-6,7),main="RIDGE",xaxt="n")
lines(df$RIDGE,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.8)
#plot ENET
plot.ts(actual,ylab="GDP Growth",ylim=c(-6,7),main="ENET",xaxt="n")
lines(df$ENET,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.8)
#plot SVM
plot.ts(actual,ylab="GDP Growth",ylim=c(-6,7),main="SVM",xaxt="n")
lines(df$SVM,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.8)
#plot NN
plot.ts(actual,ylab="GDP Growth",ylim=c(-6,7),main="NN",xaxt="n")
lines(df$NN,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.8)
par(mar = c(1,1,1.5,1))
plot(1, type = "n", axes=FALSE, xlab="", ylab="")
legend(x="top", inset=0, legend = c("Actual","Forecast"),
col=c("black","red"), lty=c(1,3),lwd=3,cex=0.9, horiz=TRUE)
par(mfrow = c(1, 1))
#------------------------------------------------------------------------------
# SCATTER plot: ACTUAL VS NOWCAST
#------------------------------------------------------------------------------
m <- matrix(c(1,2,3,4,5,6),nrow = 2,ncol = 3, byrow = TRUE)
layout(mat = m,heights = c(0.4,0.4,0.2))
par(mar = c(5,5,5,5))
plot(actual,df$RF,main="RF",ylab="Forecast",xlab="Actual")
plot(actual,df$LASSO,main="LASSO",ylab="Forecast",xlab="Actual")
plot(actual,df$RIDGE,main="RIDGE",ylab="Forecast",xlab="Actual")
plot(actual,df$ENET,main="ENET",ylab="Forecast",xlab="Actual")
plot(actual,df$SVM,main="SVM",ylab="Forecast",xlab="Actual")
plot(actual,df$NN,main="NN",ylab="Forecast",xlab="Actual")
par(mfrow = c(1, 1))
#------------------------------------------------------------------------------
# ENSEMBLE: EQUAL WEIGHTING
#------------------------------------------------------------------------------
#equal weighting for 3 methods: RF,ENET, NN
ensemble1<-rowMeans(df[,c("RF","ENET","NN")])
cbind(accuracy(ensemble1,actual),
MAD=mean(abs(actual-ensemble1)),
korelasi=cor(ensemble1,actual))
#------------------------------------------------------------------------------
# ENSEMBLE: LASSO REGRESSION
#------------------------------------------------------------------------------
head(df.all)
x<-as.matrix(df.all[,-7])
y<-as.matrix(df.all[,7])
set.seed(1234)
cv_glm<-cv.glmnet(x,y,standardize=TRUE,alpha=1,
type.measure = 'mse',nfolds = 5)
#------------------------------------------------------------------------------
# PLOT HASIL ENSEMBLE
#------------------------------------------------------------------------------
m <- matrix(c(1,2,3,4,5,5),nrow = 3,ncol = 2, byrow = TRUE)
layout(mat = m,heights = c(0.4,0.4,0.2))
par(mar = c(5,5,5,5))
time<-c("2014","2015","2016","2017","2018","2019")
plot.ts(actual,main="RF-ENET-NN",ylab="GDP Growth",ylim=c(-6,7),xaxt="n")
lines(ensemble1,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.9)
plot.ts(actual,main="RF-ENET",ylab="GDP Growth",ylim=c(-6,7),xaxt="n")
lines(ensemble2,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.9)
plot.ts(actual,main="RF-NN",ylab="GDP Growth",ylim=c(-6,7),xaxt="n")
lines(ensemble3,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.9)
plot.ts(actual,main="Lasso Reg",ylab="GDP Growth",ylim=c(-6,7),xaxt="n",xlab="")
lines(lasso.fit,col="red",lty=3)
axis(1, at=c(3,7,11,15,19,23), labels=time,las=1,srt=315, cex.axis=0.9)
par(mar = c(1,1,1.5,1))
plot(1, type = "n", axes=FALSE, xlab="", ylab="")
legend(x="top", inset=0, legend = c("Actual","Forecast"),
col=c("black","red"), lty=c(1,3),lwd=3,cex=0.9, horiz=TRUE)
par(mfrow = c(1, 1))