# R program to illustrate
# Permutation Hypothesis Test
# load the data set
d <- read.table(file = "ChickData.csv",
header = T, sep = ",")
# print the dataset
print(d)
# check the names
names(d)
levels(d$feed)
# how many observations in each diet?
table(d$feed)
# let's look at a boxplot of weight gain by those 2 diets
boxplot(d$weight~d$feed, las = 1,
ylab = "weight (g)",
xlab = "feed",
main = "Weight by Feed")
# calculate the difference in sample MEANS
mean(d$weight[d$feed == "casein"]) # mean for casein
mean(d$weight[d$feed == "meatmeal"]) # mean for meatmeal
# lets calculate the absolute diff in means
test.stat1 <- abs(mean(d$weight[d$feed == "casein"]) -
mean(d$weight[d$feed == "meatmeal"]))
test.stat1
# calculate the difference in sample MEDIANS
median(d$weight[d$feed == "casein"]) # median for casein
median(d$weight[d$feed == "meatmeal"]) # median for meatmeal
# lets calculate the absolute diff in medians
test.stat2 <- abs(median(d$weight[d$feed == "casein"]) -
median(d$weight[d$feed == "meatmeal"]))
test.stat2
# Permutation Test
# for reproducability of results
set.seed(1979)
# the number of observations to sample
n <- length(d$feed)
# the number of permutation samples to take
P <- 100000
# the variable we will resample from
variable <- d$weight
# initialize a matrix to store the permutation data
PermSamples <- matrix(0, nrow = n, ncol = P)
# each column is a permutation sample of data
# now, get those permutation samples, using a loop
# let's take a moment to discuss what that code is doing
for(i in 1:P)
{
PermSamples[, i] <- sample(variable,
size = n,
replace = FALSE)
}
# we can take a quick look at the first 5 columns of PermSamples
PermSamples[, 1:5]
# initialize vectors to store all of the Test-stats
Perm.test.stat1 <- Perm.test.stat2 <- rep(0, P)
# loop thru, and calculate the test-stats
for (i in 1:P)
{
# calculate the perm-test-stat1 and save it
Perm.test.stat1[i] <- abs(mean(PermSamples[d$feed == "casein",i]) -
mean(PermSamples[d$feed == "meatmeal",i]))
# calculate the perm-test-stat2 and save it
Perm.test.stat2[i] <- abs(median(PermSamples[d$feed == "casein",i]) -
median(PermSamples[d$feed == "meatmeal",i]))
}
# before going too far with this,
# let's remind ourselves of
# the TEST STATS
test.stat1; test.stat2
# and, take a look at the first 15
# permutation-TEST STATS for 1 and 2
round(Perm.test.stat1[1:15], 1)
round(Perm.test.stat2[1:15], 1)
# and, let's calculate the permutation p-value
# notice how we can ask R a true/false question
(Perm.test.stat1 >= test.stat1)[1:15]
# and if we ask for the mean of all of those,
# it treats 0 = FALSE, 1 = TRUE
mean((Perm.test.stat1 >= test.stat1)[1:15])
# Calculate the p-value, for all P = 100,000
mean(Perm.test.stat1 >= test.stat1)
# and, let's calculate the p-value for
# option 2 of the test statistic (abs diff in medians)
mean(Perm.test.stat2 >= test.stat2)