0% found this document useful (0 votes)
37 views

RG Inference Code

This document contains code for analyzing a dataset on restaurant marketing campaigns. It loads and cleans the data, checks for missing values and duplicates. It then generates summary statistics, histograms, and bar plots to visualize metrics like pageviews, calls and reservations across treatment groups and restaurant types. Statistical tests like ANOVA and normality tests are also performed to analyze relationships between variables and check distributions. Results are exported to text and image files for reporting.

Uploaded by

Brokin Hart
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
37 views

RG Inference Code

This document contains code for analyzing a dataset on restaurant marketing campaigns. It loads and cleans the data, checks for missing values and duplicates. It then generates summary statistics, histograms, and bar plots to visualize metrics like pageviews, calls and reservations across treatment groups and restaurant types. Statistical tests like ANOVA and normality tests are also performed to analyze relationships between variables and check distributions. Results are exported to text and image files for reporting.

Uploaded by

Brokin Hart
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

# Author: Shovan Chowdhury

# Reading The data set


d<-read.csv("D:/business analytics/eMDP_BA/EDA_session/RG case/RG.csv",header=T)

attach(d)
names(d)
## check the type of all the columns of the dataframe
str(appcab)

##---------------Data Cleaning and Data


Validation-------------------###

## Look for duplicate values in Request.id column as it is the


primary key##

sum(duplicated(d)) ## In the data frame


sum(duplicated(business_id)) ## No duplicate entries found in the primary key

## Look for NA values and missing/blank values in all the columns


is.na(d)
sum(is.na(d))
na.omit(d)

## Checking for blank values in all the columns of the dataframe


sapply(d, function(x) length(which(x == ""))) # checking for blank "" values; there
are none

#------------exporting results in a text file----------

sink("D:/business analytics/eMDP_BA/EDA_session3/RG case/output.txt")


summary(pageviews[treatment==0])
summary(pageviews[restaurant_type=="chain"])
summary(pageviews[treatment==2 & restaurant_type=="chain"])
sink()

jpeg("D:/business analytics/eMDP_BA/EDA_session3/RG case/graph.jpg")


par(mfrow=c(1,3))
hist(pageviews[treatment==0])
hist(pageviews[treatment==1])
hist(pageviews[treatment==2])
graphics.off()

#---------------- Bar Plots with respect to treatments-----------------

tab_1=tapply(pageviews,treatment,"mean")
tab_2=tapply(calls,treatment,"mean")
tab_3=tapply(reservations,treatment,"mean")

#barplot(tab_1,col=c("red","blue","green"),xlab="Page Views")
barplot(tab_1,col=c("red","blue","green"),xlab="Page
Views",names.arg=c("Control","Treatment 1","Treatment 2"))
barplot(tab_2,col=c("red","blue","green"),xlab="Calls",names.arg=c("Control","Treat
ment 1","Treatment 2"))
barplot(tab_3,col=c("red","blue","green"),xlab="Reservations",names.arg=c("Control"
,"Treatment 1","Treatment 2"))
# Bar Plots with respect to treatments and restaurant type
tab_4=tapply(pageviews,list(treatment,restaurant_type),"mean")
tab_5=tapply(calls,list(treatment,restaurant_type),"mean")
tab_6=tapply(reservations,list(treatment,restaurant_type),"mean")

barplot(tab_4,beside=T,col=c("red","blue","green","red","blue","green"),xlab="Page
Views")
barplot(tab_5,beside=T,col=c("red","blue","green","red","blue","green"),xlab="Calls
")
barplot(tab_6,beside=T,col=c("red","blue","green","red","blue","green"),xlab="Reser
vations")

#-------confirmatory analysis---------------------

# ANOVA
TRT=as.factor(treatment)
RT=as.factor(restaurant_type)

# One-Way ANOVA Model


mod_1=aov(pageviews~TRT)
summary(mod_1)
#TukeyHSD(mod_1)

mod_2=aov(calls~TRT)
summary(mod_2)
#TukeyHSD(mod_2)

mod_3=aov(reservations~TRT)
summary(mod_3)
#TukeyHSD(mod_3)

# Two-Way ANOVA Model


mod_4=aov(pageviews~TRT*RT)
summary(mod_4)
#TukeyHSD(mod_4)

mod_5=aov(calls~TRT*RT)
summary(mod_5)
#TukeyHSD(mod_5)

mod_6=aov(reservations~TRT*RT)
summary(mod_6)
#TukeyHSD(mod_6)

#-----binomial distribution----------

dbinom(4, size=4, prob=0.2) # P(X=4)

#P(X>=2)
s=0
for(i in 2:4)
s=s+dbinom(i,4,0.2)

# can use CDF


1-pbinom(1,4,0.2)
#--------Poisson distribution---------

dpois(5,lambda=3)

ppois(10,3)

#-------Normal/Gaussian distribution-----

pnorm(20, mean=12, sd=3.2, lower.tail=FALSE) # right tail area


pnorm(16, mean=12, sd=3.2, lower.tail=TRUE) # left tail area
qnorm(0.9, 12, 3.2) # inverse (to obtain quantile/90th percentile

#-----Normality Check------------------

# Q-Q Plot
qqnorm(reservations)
qqline(reservations, col = "red",lwd=3)

# Formal Test
# Shapiro-Wilk normality test (maximum sample size should be 5000) best test
shapiro.test(reservations)

#Anderson-Darling test

library(nortest)
ad.test(reservations)
ad.test(calls[treatment==0 & restaurant_type=="chain"])

#------------mean test one sample------------

t.test(calls, alternative = "greater", mu = 35)

t.test(calls, mu = 35)

#---------------two samples test----------------

t.test(calls, reservations, mu = 0, conf.level = 0.95)

You might also like