Task 1
Task 1
library(ggplot2)
library(stringr)
cat("Data summary:\n")
print(summary(data))
cat("\nData structure:\n")
print(str(data))
if ("purchase_date" %in%
names(data)) {
data$purchase_date <-
as.Date(data$purchase_date)
}
if ("product_name" %in%
names(data)) {
data$pack_size <-
as.numeric(str_extract(data$product
_name, "\\d+"))
data$brand_name <-
str_extract(data$product_name,
"^[^_ ]+")
}
required_segments <-
c("LIFESTAGE",
"PREMIUM_CUSTOMER")
if (all(required_segments %in%
names(data))) {
metrics <- data %>%
group_by(LIFESTAGE,
PREMIUM_CUSTOMER) %>%
summarise(
avg_spending =
mean(spending, na.rm = TRUE),
purchase_frequency = n(),
total_quantity = sum(quantity,
na.rm = TRUE),
avg_pack_size =
mean(pack_size, na.rm = TRUE)
) %>%
arrange(desc(avg_spending))
print(metrics)
} else {
cat("Warning: Required
segmentation columns not found in
data\n")
}
cat("\nInsights:\n")
cat("- Identify customer segments
with highest spending and purchase
frequency.\n")
cat("- Observe if premium
customers spend more and if
families with children show different
patterns.\n")
cat("- Look at preferred pack sizes
and brand preferences across
segments.\n")
write.csv(metrics,
"chip_customer_segment_metrics.c
sv", row.names = FALSE)
\