List of Functions
List of Functions
#
# List of functions intended as support for Exploratory Data Analysis
# compute correlation coefficient
# plot scatterplot and graphics
# Check for normality
# --------------------------------------------------------------------
# 7 .Read csv - function to read csv file data from any location
#------------------------------------------------------------
# Function to compute Pearson Correlation Using the formula
# using two vectors as input data
# -----------------------------------------------------------
correlation = function(datX,datY){
sumX <- sum(datX)
sumY <- sum(datY)
sumXY <- sum(datX*datY)
sumxsqr <- sum(datX*datX)
sumysqr <- sum(datY*datY)
n <- length(datX)
# -----------------------------------------------------------
# Function to compute Pearson Correlation Using a dataframe
# the dataframe may cocnsist several columns
# -----------------------------------------------------------
Correcorre = function(data){
counter = 0
result <- NULL
for (i in 1:limit1){
for ( j in (i+1):limit2){
counter <- counter +1
result[counter] <- correlation(data[,i],data[,j])
}
}
return(result)
}
# --------------------------------------------------------------------------
# Function to compute Pearson Correlation using two sets of variables
# -------------------------------------------------------------------------
# -----------------------------------------------------------------------
# Function to compute pairwise using s single set of variables
# -----------------------------------------------------------------------
counter =0
for(i in 1:(numcol-1)){
j= i+1
for (k in j:numcol){
counter = counter +1
}
counter
}
# -----------------------------------------------------------
# function to compute pairwise correlation using R packages
# -----------------------------------------------------------
library(sjPlot)
tab_corr(data,
na.deletion = "pairwise",
corr.method = corrtype,
title = mtitle,
var.labels = NULL,
wrap.labels = 40,
show.p = TRUE,
p.numeric = FALSE,
fade.ns = TRUE,
val.rm = NULL,
digits = 3,
triangle = "lower",
string.diag = NULL,
CSS = NULL,
encoding = NULL,
file = NULL,
use.viewer = TRUE,
remove.spaces = TRUE)
}
# ----------------------------------------------------
# function to draw Correlations using R packages
# ---------------------------------------------------
DrawCorrelations = function(cordata){
require(mvtnorm)
par(mfrow = c(2, 3), mar = 0.1+c(4,4,1,1), oma = c(0, 0, 2, 0))
library(ggpubr)
ggscatter(data, x = X, y = Y,
color = color, cor.coef = TRUE,
cor.method = corrtype,
xlab = Xlab, ylab = Ylab,
add= "reg.line")
}
CheckforNormality = function(dat){
library(dplyr)
for (k in 1:nrow(df)){
library(ggplot2)
ggplot(data = data_long, mapping = aes(sample = value, color = variable, fill =
variable)) +
stat_qq_band(alpha=0.5, conf=0.95, qtype=1, bandType = "boot", B=500) +
stat_qq_line(identity=TRUE) +
stat_qq_point(col="black") +
ggtitle("Quantile-Quantile Test for Normality \n Normally distributed if points
are inside the confidence band")+
facet_wrap(~ variable, scales = "free") +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles") + theme_bw()
return(invisible())
}
PlotHistDensity = function(vars,data_wide){
par(mfrow = c(3,2))
for (i in 1:3){
dat <- na.omit(data_wide[,i])
plot(dat)
x = seq(from=min(dat), to=max(dat), length.out=50)
norm_dist = dnorm(x, mean=mean(dat), sd=sd(dat)) * (max(dat)-min(dat))/
20*length(dat)
hist(dat,main=colnames(data_wide)[i])
lines(x, norm_dist, col='violet',lwd=4)
}
par(mfrow = c(1,1))
}