
library(Hmisc)
library(igraph)


getdata_spa_swissroll <- function(n) {
	z1 <- runif(n,0,(5*pi)^2)^0.5; z2<-runif(n,6,10)
	x1 <- z1*sin(z1)/4; x2<-z1*cos(z1)/4; x3<-z2-8
	x <- cbind(x1,x3,x2)
	xsq <- apply(x^2,1,sum)
	y<-z1
	return(list(x,y))
}

# transductive kernel regression using given distance matrix
# dmat should be square, Y should be vector of same length as sides of dmat
# NA entries of Y will be filled in with predictions
dkern.regress <- function(dmat,Y,hinv,kernel.f=kernel.gaussian) {
	unobi <- is.na(Y)
	if(!any(unobi)) return(Y)
	if(all(unobi)) {
		print("need at least one observation")
		return(NULL)
	}

	regdmat <- kernel.f(hinv,dmat[unobi,!unobi,drop=FALSE])
	Ytreg <- (regdmat %*% Y[!unobi]) / rowSums(regdmat)
	Ytreg[is.nan(Ytreg)] <- 0
	Yr <- Y
	Yr[unobi] <- Ytreg
	return(Yr)
}


# calculate approximate pairwise density based distances using k-nn density estimator
# X is the data matrix (rows are data points), k is k, alphas is a vector of values for parameter alpha
dbd.graph <- function(X, k,alphas) {
	if(length(dim(X))!=2) {
		print("X should be numeric matrix")
		return(NULL)
	}
	if(k<1 || k%%1!=0) {
		print("k should be integer >=1")
		return(NULL)
	}
	if(any(alphas<0)) {
		print("alphas should be non-negative")
		return(NULL)
	}
	ndim <- ncol(X)
	npt <- nrow(X)

	Xselfdists<-as.matrix(dist(X))
	dimnames(Xselfdists) <- NULL

	knn.ind.mat <- Xselfdists
	for(i in 1:nrow(knn.ind.mat))
		knn.ind.mat[i,] <- rank(knn.ind.mat[i,]) <= (k + 1)

	knn.density.est <- k/(npt*apply(Xselfdists*knn.ind.mat,1,max)^ndim)

	mean.densities <- (matrix(knn.density.est,nrow=npt,ncol=npt,byrow=F) + matrix(knn.density.est,nrow=npt,ncol=npt,byrow=T))/2

	DBD.dmats <- list()
	for(i in 1:length(alphas)) {
		if(alphas[i]==0) {
			DBD.dmats <- c(DBD.dmats,list(Xselfdists))
			next
		}
		raw.dmat <- Xselfdists*exp(-alphas[i] * knn.ind.mat * mean.densities)
		Ag <- graph.adjacency(raw.dmat, mode="min", weighted=T, diag=F)
		sp<-shortest.paths(Ag, mode = "all")
		DBD.dmats <- c(DBD.dmats,list(sp))
	}
	return(list(alphas=alphas,dbd.dmats=DBD.dmats))
}



loss.sqerror<-function(truths,estimates) {
	return(mean((truths-estimates)^2))
}
kernel.gaussian <- function(hinv, dist) {
	return(exp(-dist^2*hinv))
}

# leave one out cross-validation for the bandwidth in kernel regression with custom distance matrix
# dmat should be square, Y should be vector of same length as sides of dmat
# if hinv.range==NULL, a reasonable range is chosen based on the quantiles of the distance matrix
dkern.loocv <- function(dmat,Y,loss.f=loss.sqerror,kernel.f=kernel.gaussian,hinv.range=NULL) {
	if(is.null(hinv.range))	hinv.range <- as.numeric(3/quantile(dmat[which(dmat!=0)],probs=c(0.99,0.01)))
	if(nrow(dmat)!=ncol(dmat) || ncol(dmat) != length(Y)) {
		print("incorrect input dimensions")
		return(NULL)
	}
	if(any(hinv.range<0)) {
		print("range of inverse bandwidths should be non-negative")
		return(NULL)
	}
	if(hinv.range[1]>hinv.range[2]) hinv.range <- rev(hinv.range)

	tmp.hopt.lossfunc<-function(hinv) {
		lr.self.kern <- kernel.f(hinv, dmat)
		diag(lr.self.kern) <- 0

		loo.self.preds <- (lr.self.kern %*% Y) / rowSums(lr.self.kern)
		loo.self.preds[is.nan(loo.self.preds)]<-0
		return(loss.f(Y, loo.self.preds))
	}

	optret <- optimize(tmp.hopt.lossfunc, interval = hinv.range)

	hinv.opt <- optret$minimum
	return(hinv.opt)
}

