R Code
R Code
# Data Visualization - 01
########################################################################
rm(list=ls())
# Data structure
str(data)
########################################################################
# Quantitative data
########################################################################
data$math_score
hist(data$math_score,
main="Histogram of Math Score",
xlab="Math Score", ylab="Frequency")
help(ggplot)
ggplot(data, aes(data$math_score)) +
geom_freqpoly(bins=10)
# Line plot
plot(data$math_score, data$writing_score, type = "o", main="Line plot",
xlab="Height", ylab="weight", pch=19)
help(grid)
########################################################################
# Qualitative data
########################################################################
# Pie chart
help(pie)
pie(Group, main="Pie Chart")
# Basic barplot
help("ggplot")
ggplot(data=Group)
table(Race_Group)
# Basic barplot
ggplot(data=group_count, aes(x=group_label, y=count)) +
geom_bar(stat="identity")
help("geom_bar")
# Pareto Chart
#install.packages("qcc")
library(qcc)
pareto.chart(count)
table(x)
# Multiple barplot
# Use position=position_dodge()
ggplot(data=df, aes(x=grp, y=fre, fill=gen)) +
geom_bar(stat="identity", position=position_dodge())
#########################################################################
######
h <- data$height
w <- data$weight
# Scatterplot
plot(women, xlab = "Height (in)", ylab = "Weight (lb)",
main = "women data: American women aged 30-39")
grid(nx = 10, ny = 10)
# Line plot
plot(h, w, type = "o", main="Line plot",
xlab="Height", ylab="weight", pch=19)
help(grid)
#########################################################################
######
#########################################################################
######
## Subsetting data
# Extract data between entry 30 and entry 55 for the first 3 variables
data_subset1 <- data[30:55, 1:3]
# Extract data between entry 30 and entry 55 for the first, second, and
fifth variables
data_subset1_1 <- data[30:55, c(1,2,5)]
# Extract data of the male students who scored more than 75 for
mathematics:
data_subset3 <- subset(data, gender == "male" & math_score > 75)
# Extract data of the male students or students who scored more than 75
for mathematics:
data_subset3_1 <- subset(data, gender == "male" | math_score > 75)
# scenario 1: gender == "male"
# scenario 2: math_score > 75
# scenario 3: gender == "male" and math_score > 75
# Extract scores of the male students who scored more than 75 for
mathematics:
data_subset4 <- subset(data, gender == "male" & math_score > 75,
select = c(math_score, reading_score,
writing_score))
# Alternative method
data_subset3 <- subset(data, gender == "male" & math_score > 75)
#data_subset4_1 <- subset(data_subset3, select = c(math_score,
reading_score, writing_score))
data_subset4_2 <- data_subset3[, 2:4]
########################################################################
# Descriptive Statistics 02
########################################################################
# Mean
help(mean)
mean(data)
Mean_data <- mean(data)
# Median
help(median)
median(data)
med <- median(data)
MED = median(data)
# Mode
# We use frequency table
table(data)
# Range
max(data) # Maximum
min(data) # Minimum
# Calculating range
Range = max(data) - min(data)
# Standard deviation
help(sd)
sd(data)
# Variance
var(data)
# Coefficient of Variation
# Coefficient of Variation = std dev/mean*100
# Five-number summary
summary(data)
# Boxplot
help("boxplot")
boxplot(data)
# Histogram
data <- c(4,10, 5, 8, 7.5, 8, 5, 16.5, 1, 7.8, 8, 10, 11, 18, 15,9, 14,
23, 21, 28)
hist(data)
help(hist)
# Density plot
density(data)
plot(density(data))
help(skewness)
skewness(data)
kurtosis(data)
# Create a vector.
x <- c(12,7,3,4.2,18,2,54,-21,8,-5,NA)
#########################################################################
######
## Perform a descriptive analysis for the "iris" dataset and interpret
your results.
# iris: https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/iris.html
help(iris)
########################################################################
# Poisson Distribution 04
########################################################################
# Example:
# RDA investigated that there are twelve cars crossing a bridge per
minute on average.
help(dpois)
# P(X >= 3)
P_X_GEQ_3 <- ppois(3, lambda=12, lower=FALSE) # upper tail
P_X_GEQ_3
sprintf("P(X >= 3) = %s", round(P_X_GEQ_3, digits = 4))
x <- 0:20
########################################################################
# Extra: Random generation for a Poisson distribution with parameter
lambda(=mu).
# rpois(n, lambda)
########################################################################
# Exercise: