R Functions
R Functions
without having to use drugs. The researcher thinks that having acupuncture in the lower back
might reduce back pain. To investigate this, the researcher recruits 25 participants to their
study. At the beginning of the study, the researcher asks the participants to rate their back pain
on a scale of 1 to 10, with 10 indicating the greatest level of pain. After 4 weeks of twice weekly
acupuncture, the participants are asked again to indicate their level of back pain on a scale of 1
to 10, with 10 indicating the greatest level of pain. The researcher wishes to understand whether
the participants' pain levels changed after they had undergone the acupuncture, so a Wilcoxon
signed-rank test is run.
R Functions
library(help = "datasets")
data(cars)
View(cars)
str(cars)
# to add these two set together we must have same number of columns, so we need to add
one Survival column to My_Test set.. My_Test [Row, Coloum]
hist(My_Train$Age)
# Let us use the ggplot … this graph is more powerful and informative
ggplot(data.combined, aes (x= data.combined$Sex, fill = Survived)) +
geom_bar(width= 0.5)+
facet_wrap(~Pclass) + ggtitle ("Pclass") +
xlab("Title")+ ylab ("Total count")+
labs (fill= "Survived")
My_Train$Survived <- as.factor(My_Train$Survived)
ggplot(My_Train, aes (x= My_Train$Sex, fill = Survived)) +
geom_bar(width= 0.5)+
xlab("Title")+ ylab ("Total count")+
labs (fill= "Survived")
# added Title
TrainTitle <- read.csv ("trainTitle.csv", header = TRUE)
TrainTitle$Title <- as.factor(TrainTitle$Title)
TrainTitle$Survived <- as.factor(TrainTitle$Survived)
ggplot(TrainTitle, aes (x= TrainTitle$Age, fill = Survived)) +
geom_bar(width= 0.5)+
facet_wrap(~Pclass + Sex) + ggtitle ("Pclass") +
xlab("Title")+ ylab ("Total count")+
labs (fill= "Survived")
y <- dnorm(x)
cumulative normal probability for q (area under the normal curve to the right of q)
pnorm(1.96) is 0.975
(x <- c(1,2,NA,3)
mean(x) # returns NA
mean (x, na.rm=TRUE) # returns 2
mean(My_Train_NA_Omit$Age)
sd(My_Train_NA_Omit$Age)
range(My_Train_NA_Omit$Age)
write.csv(My_Train_NA_Omit, "My_Train_NA_Omit.csv")
boxplot(My_Train_NA_Omit$Age~My_Train_NA_Omit$Sex)
Anova
# One Way Anova (Completely Randomized Design)
fit <- aov(y ~ A, data=mydataframe)
Y is the Values ( numeric) and A is factors (categorical)
Group1<- c(1,3,4,6,7,8,6,4,5,3,5)
Group2<- c(3,6,3,4,5,6,7,5,6)
Group3 <- c(4,5,6,7,8,9)
combines_group <-(data.frame(cbind(Group1,Group2,Group3)))
S<-stack(combines_group)
ANV<- aov (values~ ind, data= S)
summary (ANV)
Correlation
> cor(x~y)
> cor.test (x~y)
x<- data.frame(My_Test$Age,My_Test$Fare)
> cor(x, use="complete.obs", method="pearson")
Multiple Regression
# Multiple Linear Regression Example
fit <- lm(y ~ x1 + x2 + x3, data=mydata)
summary(fit) # show results