0% found this document useful (0 votes)

28 views7 pages

List of Functions

The document describes 12 functions for exploratory data analysis including computing correlations, plotting scatterplots and histograms, and checking for normality. It provides the code for functions to compute Pearson correlation using vectors, dataframes, and multiple variables. Additional functions allow plotting correlations, reading CSV files, performing correlation plots, and checking for normality.

Uploaded by

Cyd Duque

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

28 views7 pages

List of Functions

Uploaded by

Cyd Duque

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 7

# --------------------------------------------------------------------

# Develop by Prof. Carlito O. Daarol

# Math Department
# Mindanao State University
# General Santos City
# September 12, 2023

#
# List of functions intended as support for Exploratory Data Analysis
# compute correlation coefficient
# plot scatterplot and graphics
# Check for normality
# --------------------------------------------------------------------

# 1. correlation - function to compute Pearson r correlation using the summation

formula
# using two vectors or arrays X and Y

# 2. Correcorre - function to compute correlation using a dataframe of any size as

input

# 3. pairwiseCor - function to compute pairwise correlations from two sets of

variables

# 4. singlesetCor - function to compute pairwise correlation from 1 set of

variables

# 5. CorrsjPlot - function to compute pairwise correlation using a dataframe and

sjPlot package

# 6. DrawCorrelations - function to draw correlation image or heat map using R

package

# 7 .Read csv - function to read csv file data from any location

# 8. CorrePlotXY - function to Perform correlation plot for two variables

# 9. CheckforNormality - function to check for normality of any numeric variable

# using Wilk-Shapiro test

# 10. QQNormality_Plot - function to check and plot nornmality of distribution

# 11. QQPlot - function to check and plot nornmality using R package

# 12 PlotHistDensity - function to Plot Histogram and Density using a dataframe as

input

#------------------------------------------------------------
# Function to compute Pearson Correlation Using the formula
# using two vectors as input data
# -----------------------------------------------------------

correlation = function(datX,datY){
sumX <- sum(datX)
sumY <- sum(datY)
sumXY <- sum(datX*datY)
sumxsqr <- sum(datX*datX)
sumysqr <- sum(datY*datY)
n <- length(datX)

numerator <- nsumXY - sumXsumY

denominator <- (n*sumxsqr - sumX*sumX) * ((n*sumysqr - sumY*sumY))
r = numerator / sqrt(denominator)
return(r)
}

# -----------------------------------------------------------
# Function to compute Pearson Correlation Using a dataframe
# the dataframe may cocnsist several columns
# -----------------------------------------------------------

Correcorre = function(data){
counter = 0
result <- NULL

limit1 <- ncol(data)-1

limit2 <- ncol(data)

for (i in 1:limit1){
for ( j in (i+1):limit2){
counter <- counter +1
result[counter] <- correlation(data[,i],data[,j])
}
}
return(result)
}

# --------------------------------------------------------------------------
# Function to compute Pearson Correlation using two sets of variables
# -------------------------------------------------------------------------

pairwiseCor <- function(depvar,indepvar,corrtype){

# depvar, indepvar and pairwiseCor are temporary variables.

# you can choose any name that you want

# establish counter to count how many pairs are possible

counter =0
for(i in 1:length(depvar)){
for (j in 1:length(indepvar)){
counter = counter +1
}
}

# define holder of correlation results

df <- data.frame(VarX =rep(0,counter), VarY=rep(0,counter),
AbsCor=rep(0,counter), Cor=rep(0,counter),
PValue=rep(0,counter),
Assessment=rep(0,counter))

# fill in the dataframe with details

k=0
for(i in 1:length(depvar)){
for (j in 1:length(indepvar)){
k=k+1
df[k,1] <- names(depvar)[i] # pick
the first variable
df[k,2] <- names(indepvar)[j] #
pickup the second variable
df[k,3] <- round(abs(cor(depvar[,i],indepvar[,j])),2) #
compute the absolute value of the correlation
df[k,4] <- round(cor(depvar[,i],indepvar[,j]),2) #
compute the correlation coefficient (pos or negative result)
tt <- cor.test(depvar[,i],indepvar[,j],method=corrtype) #
defines the tt object to store other info like pvalue
df[k,5] <- round(tt$p.value,2) #
pickup the p=value for test of significance

msg1 <- "Reject Ho. Linear Relationship is true"

msg2 <- "Not correlated variables"
df[k,6] <- ifelse((df[k,5] < 0.05), msg1, msg2)
}
}
colnames(df) <- c("VariableX", "VariableY", "AbsCor","Cor","PValue","Assessment
(Pvalue <0.05)")
return(df) #
returns the entire result as a dataframe
}

# -----------------------------------------------------------------------
# Function to compute pairwise using s single set of variables
# -----------------------------------------------------------------------

# function to compute pairwise correlation from a single sets

singlesetCor <- function(singleset,corrtype){

# establish counter to count how many pairs are possible

numcol = ncol(singleset)

counter =0
for(i in 1:(numcol-1)){
j= i+1
for (k in j:numcol){
counter = counter +1
}
counter
}

# define holder of correlation results

df <- data.frame(VarX =rep(0,counter), VarY=rep(0,counter),
AbsCor=rep(0,counter), Cor=rep(0,counter),
PValue=rep(0,counter),
Assessment=rep(0,counter))
# fill in the dataframe with details
k=0
for(i in 1:(numcol-1)){
n= i+1
for (j in n:numcol){
k = k +1
df[k,1] <- names(singleset)[i] #
pick the first variable
df[k,2] <- names(singleset)[j] #
pickup the second variable
df[k,3] <- round(abs(cor(singleset[,i],singleset[,j])),2) #
compute the absolute value of the correlation
df[k,4] <- round(cor(singleset[,i],singleset[,j]),2) #
compute the correlation coefficient (pos or negative result)
tt <- cor.test(singleset[,i],singleset[,j],method=corrtype) #
defines the tt object to store other info like pvalue
df[k,5] <- round(tt$p.value,2) #
pickup the p=value for test of significance
df[k,6] <- ifelse((df[k,5] < 0.05), "<0.05 (sig)", "> 0.05 (not sig") #
determines if coefficient is significant or not
}
}
return(df) #
returns the entire result as a dataframe
}

# -----------------------------------------------------------
# function to compute pairwise correlation using R packages
# -----------------------------------------------------------

CorrsjPlot <- function(data,corrtype,mtitle){

library(sjPlot)
tab_corr(data,
na.deletion = "pairwise",
corr.method = corrtype,
title = mtitle,
var.labels = NULL,
wrap.labels = 40,
show.p = TRUE,
p.numeric = FALSE,
fade.ns = TRUE,
val.rm = NULL,
digits = 3,
triangle = "lower",
string.diag = NULL,
CSS = NULL,
encoding = NULL,
file = NULL,
use.viewer = TRUE,
remove.spaces = TRUE)
}

# ----------------------------------------------------
# function to draw Correlations using R packages
# ---------------------------------------------------
DrawCorrelations = function(cordata){

require(mvtnorm)
par(mfrow = c(2, 3), mar = 0.1+c(4,4,1,1), oma = c(0, 0, 2, 0))

xlen <- length(cordata)

for (i in 1:xlen){
S <- matrix(c(1,cordata[i],cordata[i],1),2,2)
AB <- rmvnorm(mean=c(0,0),sig=S,n=100)
U <- pnorm(AB) #Now U is uniform

x <- qnorm(U[,1],1,.02) #y is beta distributed with two shape parameters 1 and

2
y <- qnorm(U[,2],1,.02) #y is beta distributed with two shape parameters 1 and
2

corval <- cor(x,y)

plot(x,y, main = paste0(" Pearson r = ",round(corval,3)))
fit <- lm(y~x)
abline(fit, col = "red")
}
par(mfrow=c(1,1))
#return(invisible())
}

# Read csv data from any location

readcsv <- function(datapath,dataname){

data <- read.csv(paste(datapath,"/",dataname,sep=""))
return(data)
}

## Perform correlation plot for two variables

CorrePlotXY <- function(data,X,Y,color,Xlab,Ylab,corrtype){

library(ggpubr)
ggscatter(data, x = X, y = Y,
color = color, cor.coef = TRUE,
cor.method = corrtype,
xlab = Xlab, ylab = Ylab,
add= "reg.line")
}

CheckforNormality = function(dat){
library(dplyr)

df <- dat %>%

group_by(variable) %>%
summarise(`W Statistic` = shapiro.test(Score)$statistic,
`p-value` = shapiro.test(Score)$p.value)
df <- as.data.frame(df)
df$`Variable Distribution Assessment` <- ""

for (k in 1:nrow(df)){

df[k,2] <- round(df[k,2],3)

df[k,3] <- round(df[k,3],3)
pval <- round(df[k,3],2)
if (pval < 0.05) {
msg <- paste0(df[k,1] ," is not normal(pval<.05)")
} else {
msg <- paste0(df[k,1] ," distribution is normal")
}
df[k,4] <- msg
}
return(df)
}

# Function call: Normality Test

QQNormality_Plot = function(data_long){

colnames(data_long) <- c("variable","value")

ggplot(data = data_long, mapping = aes(sample = value, color = variable, fill =
variable)) +
stat_qq_band(alpha=0.5, conf=0.95, qtype=1, bandType = "boot", B=5000) +
stat_qq_line(identity=TRUE) +
stat_qq_point(col="black") +
ggtitle("QQ Plot for Normality of Distribution \n Points must lie inside the
confidence band")+
facet_wrap(~ variable, scales = "free") +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles") + theme_bw()
}

# Function call: Normality Test

QQPlot = function(data_long, Toword, fileout){

library(ggplot2)
ggplot(data = data_long, mapping = aes(sample = value, color = variable, fill =
variable)) +
stat_qq_band(alpha=0.5, conf=0.95, qtype=1, bandType = "boot", B=500) +
stat_qq_line(identity=TRUE) +
stat_qq_point(col="black") +
ggtitle("Quantile-Quantile Test for Normality \n Normally distributed if points
are inside the confidence band")+
facet_wrap(~ variable, scales = "free") +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles") + theme_bw()

return(invisible())
}
PlotHistDensity = function(vars,data_wide){
par(mfrow = c(3,2))
for (i in 1:3){
dat <- na.omit(data_wide[,i])
plot(dat)
x = seq(from=min(dat), to=max(dat), length.out=50)
norm_dist = dnorm(x, mean=mean(dat), sd=sd(dat)) * (max(dat)-min(dat))/
20*length(dat)
hist(dat,main=colnames(data_wide)[i])
lines(x, norm_dist, col='violet',lwd=4)
}
par(mfrow = c(1,1))
}

Discovery Service Manual Rev 04
100% (4)
Discovery Service Manual Rev 04
198 pages
Sap HCM User Manual Organizational Management1
100% (1)
Sap HCM User Manual Organizational Management1
29 pages
Correlation Analysis in Python
100% (1)
Correlation Analysis in Python
6 pages
CORRELATION AND COVARIANCE in R
100% (1)
CORRELATION AND COVARIANCE in R
24 pages
Assignment 1
No ratings yet
Assignment 1
12 pages
Pearson and Spearman Correlation
No ratings yet
Pearson and Spearman Correlation
50 pages
Scoring Rubrics1
100% (2)
Scoring Rubrics1
2 pages
DRDO Books PDF
No ratings yet
DRDO Books PDF
2 pages
R-Unit 5
No ratings yet
R-Unit 5
76 pages
Rallfun v37
No ratings yet
Rallfun v37
1,294 pages
Exploratory Data Analysis
No ratings yet
Exploratory Data Analysis
5 pages
Correlation
No ratings yet
Correlation
26 pages
Bivariate Analysis
No ratings yet
Bivariate Analysis
24 pages
Correlation and Regression Analysis Using SPSS
No ratings yet
Correlation and Regression Analysis Using SPSS
102 pages
SSRN Id3512994
No ratings yet
SSRN Id3512994
34 pages
Topic 6 Correlation and Regression
100% (1)
Topic 6 Correlation and Regression
25 pages
Topic4 Linear Models
No ratings yet
Topic4 Linear Models
72 pages
8.5-8.6 Exponential Growth and Decay Models
No ratings yet
8.5-8.6 Exponential Growth and Decay Models
28 pages
Regression Analysis Script
No ratings yet
Regression Analysis Script
24 pages
DA Lab Manual
No ratings yet
DA Lab Manual
60 pages
Practical 02 - Pca
No ratings yet
Practical 02 - Pca
14 pages
Co4 (10) Sem R
No ratings yet
Co4 (10) Sem R
12 pages
Name: Badigi Shivakumar Reg - No: 20MIS0173 Lab - Slot: L9+L10 Date: 02-09-2021
No ratings yet
Name: Badigi Shivakumar Reg - No: 20MIS0173 Lab - Slot: L9+L10 Date: 02-09-2021
10 pages
Lab Exercise 1
No ratings yet
Lab Exercise 1
16 pages
Big Data Analytics
No ratings yet
Big Data Analytics
13 pages
Adhithyan
No ratings yet
Adhithyan
22 pages
Practical No 12 SML
No ratings yet
Practical No 12 SML
6 pages
Task 1
No ratings yet
Task 1
9 pages
AMDA Practical - A048
No ratings yet
AMDA Practical - A048
35 pages
Ww2 Coastal Edu Kingw Statistics R Tutorials Simplelinear HT
No ratings yet
Ww2 Coastal Edu Kingw Statistics R Tutorials Simplelinear HT
15 pages
Nthu Bacs-Hw
No ratings yet
Nthu Bacs-Hw
4 pages
8 - Cia 3 Key
No ratings yet
8 - Cia 3 Key
3 pages
CB161 (R Lab Manual)
No ratings yet
CB161 (R Lab Manual)
32 pages
Correlation and Regression
No ratings yet
Correlation and Regression
2 pages
Commands For Data Analysis Using R
No ratings yet
Commands For Data Analysis Using R
11 pages
Stats C183 P4
No ratings yet
Stats C183 P4
6 pages
Intro To R Software
No ratings yet
Intro To R Software
7 pages
Practicals R Codes
No ratings yet
Practicals R Codes
3 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
R Code
No ratings yet
R Code
9 pages
Maths Lab
No ratings yet
Maths Lab
17 pages
Ds
No ratings yet
Ds
2 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
Correlation Diploma
No ratings yet
Correlation Diploma
10 pages
Correlation and Regration
No ratings yet
Correlation and Regration
57 pages
R
No ratings yet
R
6 pages
R Code For Canonical Correlation Analysis
No ratings yet
R Code For Canonical Correlation Analysis
10 pages
Data Mining and Warehousing
No ratings yet
Data Mining and Warehousing
18 pages
Module - 4 (R Training) - Basic Stats & Modeling
No ratings yet
Module - 4 (R Training) - Basic Stats & Modeling
15 pages
R Programming End Term
No ratings yet
R Programming End Term
4 pages
Pearson Correlation Coefficient
No ratings yet
Pearson Correlation Coefficient
7 pages
R Console
No ratings yet
R Console
6 pages
BONGGA Statistics-and-Probability 4Q SLM8
No ratings yet
BONGGA Statistics-and-Probability 4Q SLM8
10 pages
Machine Learning-Intro
No ratings yet
Machine Learning-Intro
7 pages
Practice Questions On Symmetry Corr Reg On Vectors
No ratings yet
Practice Questions On Symmetry Corr Reg On Vectors
3 pages
R Syntax Examples 1
No ratings yet
R Syntax Examples 1
6 pages
CRM Cheat Sheet
No ratings yet
CRM Cheat Sheet
7 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
FloBoss s600+ Instruction Manual
No ratings yet
FloBoss s600+ Instruction Manual
124 pages
BAN5
No ratings yet
BAN5
2 pages
Oe Statistics Notes
No ratings yet
Oe Statistics Notes
32 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
Which Test When: 1 Exploratory Tests
No ratings yet
Which Test When: 1 Exploratory Tests
5 pages
SAN Problems
No ratings yet
SAN Problems
34 pages
Package 1. Level 2. Level 3. Level 4. Level Node-Name Image Type Description Instructions Tutorial-Links Comment Last Edit
No ratings yet
Package 1. Level 2. Level 3. Level 4. Level Node-Name Image Type Description Instructions Tutorial-Links Comment Last Edit
21 pages
QMM 1
No ratings yet
QMM 1
18 pages
2014 TV Firmware Upgrade Instruction T-NT14MDEUC
No ratings yet
2014 TV Firmware Upgrade Instruction T-NT14MDEUC
5 pages
Constant Correlation Model
No ratings yet
Constant Correlation Model
3 pages
Knowledge and Skill Guidelines For Aquarists: 2001 Marine Advanced Technology Education Center
No ratings yet
Knowledge and Skill Guidelines For Aquarists: 2001 Marine Advanced Technology Education Center
29 pages
Nikon D90 Specifications
No ratings yet
Nikon D90 Specifications
7 pages
Introduction To Programming Language & Classification of Software
No ratings yet
Introduction To Programming Language & Classification of Software
10 pages
Nmap For Packet Tracing
No ratings yet
Nmap For Packet Tracing
19 pages
CSE114 Unit2
No ratings yet
CSE114 Unit2
74 pages
Stamps/Mti Manual: Version 3.3B1
No ratings yet
Stamps/Mti Manual: Version 3.3B1
36 pages
GPT Custom Instructions Builder 3
No ratings yet
GPT Custom Instructions Builder 3
10 pages
Dijkstra's Shortest Path Algorithm Serial and Parallel Execution Performance Analysis
No ratings yet
Dijkstra's Shortest Path Algorithm Serial and Parallel Execution Performance Analysis
5 pages
PAS ISA Safety Symposium
No ratings yet
PAS ISA Safety Symposium
58 pages
PIRATE KING Resume - White
No ratings yet
PIRATE KING Resume - White
2 pages
Machine Learning Approaches For Soil Type Classification in
No ratings yet
Machine Learning Approaches For Soil Type Classification in
20 pages
Controller Based Power Theft Location Detection System: Ntroduction
No ratings yet
Controller Based Power Theft Location Detection System: Ntroduction
4 pages
AppTokenGen JCCV12.1
No ratings yet
AppTokenGen JCCV12.1
13 pages
6416 978-1-5386-7150-4/18/$31.00 ©2018 Ieee Igarss 2018
No ratings yet
6416 978-1-5386-7150-4/18/$31.00 ©2018 Ieee Igarss 2018
4 pages
Rainbow Technology: BY P.Aswanth Sai
No ratings yet
Rainbow Technology: BY P.Aswanth Sai
15 pages
(Script) The Advantages of Using The Internet
No ratings yet
(Script) The Advantages of Using The Internet
2 pages
De Thi Vao 10 Mon Anh Bac Ninh 2025 Co Dap An Va Loi Giai Chi Tiet 1749435781
No ratings yet
De Thi Vao 10 Mon Anh Bac Ninh 2025 Co Dap An Va Loi Giai Chi Tiet 1749435781
7 pages
View Message
No ratings yet
View Message
1 page
PowerShield Centurion RT 1-3K Brochure
No ratings yet
PowerShield Centurion RT 1-3K Brochure
2 pages
Non-Textual Form PDF
No ratings yet
Non-Textual Form PDF
2 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet