Hierar Varam
Hierar Varam
24CSEG034
2025-04-04
# Load Required Libraries
library(MASS)
library(dplyr)
##
## Attaching package: 'dplyr'
library(cluster)
library(factoextra)
library(ggplot2)
tail(df)
summary(df)
## op_area
## Min. :0.900
## 1st Qu.:3.500
## Median :3.900
## Mean :3.986
## 3rd Qu.:4.400
## Max. :7.000
str(df)
print(colSums(is.na(df)))
# Check outliers
outliers <- df %>% filter(op_area < lower_bound | op_area >
upper_bound)
cat("Number of outliers in 'op_area':", nrow(outliers), "\n")
# Histograms
hist(df$op_area, breaks = 30, col = "blue", main = "Distribution of
Operational Area", xlab = "Operational Area")
# Boxplots
boxplot(df$op_area, col = "green", main = "Boxplot of Operational
Area", xlab = "Operational Area")
boxplot(df$center_id, col = "blue", main = "Boxplot of Center ID",
xlab = "Center ID")
boxplot(df$city_code, col = "yellow", main = "Boxplot of City Code",
xlab = "City Code")
head(high_demand_centers)
head(low_demand_centers)
## center_id city_code region_code center_type op_area
## 1 11 679 56 TYPE_A 3.7
## 2 94 632 34 TYPE_C 3.6
## 3 139 693 34 TYPE_C 2.8
## 4 143 562 77 TYPE_B 3.8
## 5 101 699 85 TYPE_C 2.8
## 6 32 526 34 TYPE_A 3.8
library(MASS)
library(dplyr)
# Hierarchical Clustering
cluster_data <- dplyr::select(df, op_area, city_code)
scaled_data <- scale(cluster_data)
dist_matrix <- dist(scaled_data, method = "euclidean")
hc <- hclust(dist_matrix, method = "ward.D2")
# Dendrogram
plot(hc, main = "Dendrogram of Fulfillment Centers", xlab = "", sub =
"")
# Assign Clusters
k <- 4
clusters <- cutree(hc, k = k)
df$Cluster <- as.factor(clusters)
# Visualize Clusters
fviz_cluster(list(data = scaled_data, cluster = clusters),
main = "Hierarchical Clustering of Fulfillment Centers",
palette = "jco",
ggtheme = theme_minimal())
# Cluster Summary
cluster_summary <- df %>%
group_by(Cluster) %>%
summarize(
mean_op_area = mean(op_area, na.rm = TRUE),
median_op_area = median(op_area, na.rm = TRUE),
count = n()
)
print(cluster_summary)
## # A tibble: 4 × 4
## Cluster mean_op_area median_op_area count
## <fct> <dbl> <dbl> <int>
## 1 1 3.67 3.8 30
## 2 2 4.69 4.6 19
## 3 3 4 4 11
## 4 4 2.85 2.85 8