0% found this document useful (0 votes)
17 views4 pages

HW 1 Math 380 R Code

This document contains R code that performs various exploratory data analyses and statistical summaries on pizza delivery data. The code loads CSV data, calculates summary statistics like means, medians, and quartiles, creates plots like histograms, bar plots, box plots, and QQ plots to visualize distributions and outliers, and groups and summarizes delivery time data. Overall, the code conducts a comprehensive exploratory analysis to understand patterns in pizza delivery times and temperatures.

Uploaded by

Tyrome Madkins
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views4 pages

HW 1 Math 380 R Code

This document contains R code that performs various exploratory data analyses and statistical summaries on pizza delivery data. The code loads CSV data, calculates summary statistics like means, medians, and quartiles, creates plots like histograms, bar plots, box plots, and QQ plots to visualize distributions and outliers, and groups and summarizes delivery time data. Overall, the code conducts a comprehensive exploratory analysis to understand patterns in pizza delivery times and temperatures.

Uploaded by

Tyrome Madkins
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

# 2.

6
# (a) Set the working directory
setwd("C:/Users/tyrom/OneDrive/Documents")

# Read the CSV data


pizza_data <- read.csv("pizza_delivery.csv")

# Extract the temperature data


temperature <- pizza_data$temperature

# Create a histogram
hist(temperature, col = "yellow", main = "Pizza Temperature", freq = FALSE)

# Add a vertical line at 65 degrees


lines(c(65, 65), c(0, 1), lty = 2, col = "blue")

#2.6
# (b) Set the working directory
setwd("C:/Users/tyrom/OneDrive/Documents")

# Read the CSV data


pizzadelivery <- read.csv("pizza_delivery.csv")

# Load the ggplot2 library


library(ggplot2)

# Create a histogram plot


ggplot(data = pizzadelivery, mapping = aes(x = temperature)) +
geom_histogram(binwidth = 2.5, col = "darkgrey") +
# Set the y-axis label and customize the scale
labs(y = "Frequency") +
scale_y_continuous("Frequency", limits = c(0, 200), breaks = seq(0, 200, 10))

#2.6
# (c)Set the working directory
setwd("C:/Users/tyrom/OneDrive/Documents")

# Read the CSV data


pizza_data <- read.csv("pizza_delivery.csv")

# Extract the driver variable


driver <- pizza_data$driver

# Create a barplot to visualize the number of deliveries by driver


barplot(table(driver), xlab = "Delivery Driver", ylab = "Number of Deliveries", col
= "green")

# (d) Set the working directory


setwd("C:/Users/tyrom/OneDrive/Documents")

# Read the CSV file


pizzadelivery <- read.csv("pizza_delivery.csv")

# Load the ggplot2 library


library(ggplot2)
# Define the order and colors for days of the week
gray_colors <- c(
"Monday" = "gray10",
"Tuesday" = "gray20",
"Wednesday" = "gray30",
"Thursday" = "gray40",
"Friday" = "gray50",
"Saturday" = "gray60",
"Sunday" = "gray70"
)

# Reorder the levels of the "day" variable in descending order


pizzadelivery$day <- factor(pizzadelivery$day, levels = rev(names(gray_colors)))

# Create the bar chart using qplot


qplot(data = pizzadelivery, x = driver, fill = day, geom = "bar", xlab = "Driver",
ylab = "Number of Deliveries") +
scale_fill_manual(values = gray_colors) +
theme_bw()

#3.1
# Data Sets
distance <- c(12.5, 29.9, 14.8, 18.7, 7.6, 16.2, 16.5, 27.4, 12.1, 17.5)
altitude <- c(342, 1245, 502, 555, 398, 670, 796, 912, 238, 466)

# (a) Calculate the arithmetic mean and median for both distance and altitude.
mean_distance <- mean(distance)
median_distance <- median(distance)
mean_altitude <- mean(altitude)
median_altitude <- median(altitude)

# Print results
cat("Distance Mean:", mean_distance, "\n")
cat("Distance Median:", median_distance, "\n")
cat("Altitude Mean:", mean_altitude, "\n")
cat("Altitude Median:", median_altitude, "\n")

# (b) Determine the first and third quartiles for both distance and altitude.
quartiles_distance <- quantile(distance, probs = c(0.25, 0.75), type = 2)
quartiles_altitude <- quantile(altitude, probs = c(0.25, 0.75), type = 2)

# Print results
cat("Distance Quartiles (Q1, Q3):", quartiles_distance[1], ",",
quartiles_distance[2], "\n")
cat("Altitude Quartiles (Q1, Q3):", quartiles_altitude[1], ",",
quartiles_altitude[2], "\n")

# (c) Calculate the interquartile range (IQR), absolute median deviation, and
standard deviation for both variables.
iqr_distance <- diff(quartiles_distance)
amd_distance <- mad(distance)
sd_distance <- sd(distance)

iqr_altitude <- diff(quartiles_altitude)


amd_altitude <- mad(altitude)
sd_altitude <- sd(altitude)

# Print Results
cat("Distance IQR:", iqr_distance, "\n")
cat("Altitude IQR:", iqr_altitude, "\n")
cat("Distance Absolute Median Deviation:", amd_distance, "\n")
cat("Altitude Absolute Median Deviation:", amd_altitude, "\n")
cat("Distance Standard Deviation:", sd_distance, "\n")
cat("Altitude Standard Deviation:", sd_altitude, "\n")

# (e) Draw and interpret the box plot for both distance and altitude.
boxplot(distance, main = "Box Plot - Distance", ylab = "Distance")
boxplot(altitude, main = "Box Plot - Altitude", ylab = "Altitude")

# (f) Summarize grouped data for distance and calculate the weighted arithmetic
mean and median.
grouped_data <- data.frame(
Distance_Group = c("Short", "Moderate", "Long"),
Frequency = c(
sum(distance >= 5 & distance <= 15),
sum(distance > 15 & distance <= 20),
sum(distance > 20 & distance <= 30)
)
)

# Calculate the weighted mean for distance


midpoints <- c(10, 17.5, 25)
total_frequency <- sum(grouped_data$Frequency)
weighted_mean_distance <- sum(midpoints * grouped_data$Frequency) / total_frequency

# Calculate the weighted median for distance (assuming equal distribution within
each class)
weighted_median_distance <- midpoints[which(cumsum(grouped_data$Frequency) >=
total_frequency / 2)[1]]

# Print results
cat("Weighted Mean for Distance:", weighted_mean_distance, "\n")
cat("Weighted Median for Distance:", weighted_median_distance, "\n")

#3.10
# Set the working directory
setwd("C:/Users/tyrom/OneDrive/Documents")

# Read the CSV data


pizzadelivery <- read.csv("pizza_delivery.csv")

# (a) Calculate summary statistics


summary(pizzadelivery)

# (b) Calculate and interpret the 99% quantile for delivery time and temperature
quantile(pizzadelivery$time, probs = 0.99)
quantile(pizzadelivery$temperature, probs = 0.99)

# (c) Function to calculate absolute mean deviation


absolute_mean_deviation <- function(data) {
return(mean(abs(data - mean(data))))
}

# Calculate absolute mean deviation of temperature


abs_mean_deviation_temperature <-
absolute_mean_deviation(pizzadelivery$temperature)
abs_mean_deviation_temperature
# (d) Scale the delivery time and calculate mean and variance
scaled_delivery_time <- scale(pizzadelivery$time)
mean(scaled_delivery_time)
var(scaled_delivery_time)

# (e) Box plot for delivery time and temperature without extreme values
boxplot(pizzadelivery$time, outline = FALSE)
boxplot(pizzadelivery$temperature, outline = FALSE)

# (f) Had to install the dplyr package to better handle NAs by coercion error
received in earlier tries seems to be either issue with conversion or missing data
values.

# Load the dplyr library


library(dplyr)

# Create a new variable for delivery time in steps of 10 min


pizzadelivery$TimeInSteps <- cut(pizzadelivery$time, breaks = seq(0,
max(pizzadelivery$time) + 10, by = 10))

# Calculate the mean using dplyr


result <- pizzadelivery %>%
group_by(TimeInSteps) %>%
summarize(MeanDeliveryTime = mean(time, na.rm = TRUE))

# Print the result


result

# (g) QQ-plot for DeliveryTime and Temperature


qqnorm(pizzadelivery$time)
qqline(pizzadelivery$time)
qqnorm(pizzadelivery$temperature)
qqline(pizzadelivery$temperature)

You might also like