source("dbd-regression.R")



# experimental parameters
npt <- 400
nexperiments <- 300
nlabeleds <- round(exp(seq(log(5),log(320),length=6)))
alphas <- c(0,8,16,32,96,192)

# value of k to use in k-nn density estimator; square root of N is reasonable
k.nn <- 20


##################################
# skip the following code segment (ending after the loop) in order to use pre-computed
# MSE values; the loop itself takes a while to run
##################################

ress.300 <- array(dim=c(length(alphas),length(nlabeleds),nexperiments))

for(nrepi in 1:nexperiments) {
	cat(nrepi,"/",nexperiments,"\n")

	# generate data
	tmp<-getdata_spa_swissroll(npt)
	X<-tmp[[1]]
	Y<-tmp[[2]]

	# compute DBD distances
	graphs <- dbd.graph(X, k=k.nn,alphas)
	for(ai in 1:length(alphas)) {
		for(nli in 1:length(nlabeleds)) {
			# randomly select subset to be used as labeled
			labd <- sort(sample.int(npt,size=nlabeleds[nli]))

			# find kernel regression bandwidth through leave one out cross-validation
			hinv <- dkern.loocv(graphs$dbd.dmats[[ai]][labd,labd],Y[labd])
			Yftrl <- Y
			Yftrl[-labd] <- NA
			# estimate labels of "unlabeled" data
			Yr <- dkern.regress(graphs$dbd.dmats[[ai]],Yftrl,hinv=hinv)
			# compute and store MSE
			mserstyi <- mean((Yr-Y)[-labd]^2)
			ress.300[ai,nli,nrepi] <- mserstyi
		}
	}
}; cat("\n")


##################################
# execute from here to avoid re-running the experiments
##################################

# save raw experimental results, since the above code takes a while to run
#save(ress.300 ,file="ress.300.dat")
load("ress.300.dat")
ress <- ress.300

# compute mean MSEs and error bars
means.gk.abs <- matrix(0,nrow=length(alphas),ncol=length(nlabeleds))
sds.gk.abs <- matrix(0,nrow=length(alphas),ncol=length(nlabeleds))
for(i in 1:length(alphas)) {
for(j in 1:length(nlabeleds)) {
means.gk.abs[i,j] <- mean(ress[i,j,])
sds.gk.abs[i,j] <- sd(ress[i,j,])/sqrt(nexperiments)*qnorm(0.975)
}
}

# plot
nlabeleds.pl <- nlabeleds
pdf("errplot.pdf")
plot(nlabeleds.pl,means.gk.abs[1,],ylim=c(0,max(means.gk.abs+sds.gk.abs)),type='o',lty=1,pch=1,col=1,lwd=2,cex=1,xlab="Labeled sample size",ylab="MSE (95% CI)",log="x",main="Swiss roll regression", xaxt = "n", yaxt = "n")
errbar(nlabeleds.pl,means.gk.abs[1,],means.gk.abs[1,]+sds.gk.abs[1,],means.gk.abs[1,]-sds.gk.abs[1,],type='p',lty=1,pch=1,lwd=2,cex=1,add=T)
for(i in 2:length(alphas)) {
	lines(nlabeleds.pl,means.gk.abs[i,],type='l',lty=i,pch=i,col=i,lwd=2,cex=1)
	errbar(nlabeleds.pl,means.gk.abs[i,],means.gk.abs[i,]+sds.gk.abs[i,],means.gk.abs[i,]-sds.gk.abs[i,],type='p',lty=1,lwd=2,cex=1,add=T,errbar.col=i,pch=i,col=i)
}
axis(1,at=nlabeleds)
axis(2,at=c(0,5,10,15,floor(max(means.gk.abs+sds.gk.abs))))
legend(max(nlabeleds.pl) - 0.7 * (max(nlabeleds.pl)-min(nlabeleds.pl)) ,max(means.gk.abs), legend = sapply(1:length(alphas),function(i) paste("alpha =", alphas[i])) , lwd=2,cex=1,lty=1:length(alphas),pch=1:length(alphas),col=1:length(alphas))
dev.off()


