Appendix
Appendix
R Codes:
library(writexl)
library(WriteXLS)
write_xlsx(complete_data, "Complete_Cases_PM25.xlsx")
Complete_Cases_PM25
cat("Number of complete cases:", nrow(complete_data))
return(data)
}
for (i in 1:num_simulations) {
# Generate missing data
simulated_data <- generate_missing_pm25(data, missing_percent)
# Store in the list
simulations[[i]] <- simulated_data
}
return(simulations)
}
# 16/12/24
# Mean Imputation
mean_imputed <- complete_data
mean_value <- mean(mean_imputed$`PM2.5`, na.rm = TRUE)
mean_imputed$`PM2.5`[is.na(mean_imputed$`PM2.5`)] <- mean_value
mean_value
# Median Imputation
median_imputed <- complete_data
median_value <- median(median_imputed$`PM2.5`, na.rm = TRUE)
median_imputed$`PM2.5`[is.na(median_imputed$`PM2.5`)] <- median_value
median_value
install.packages("VIM")
library(VIM)
# kNN Imputation with k=5
knn_imputed <- kNN(complete_data, variable = "PM2.5", k = 5)
# View results
summary(knn_imputed$`PM2.5`)
install.packages("mice")
library(mice)
# Perform Multiple Imputation
mice_imputed <- mice(complete_data, m = 5, method = "pmm", maxit = 10, seed = 123)
# View results
summary(completed_data$`PM2.5`)
# Calculate MAE
MAE <- mean(abs(complete_data$`PM2.5` - mean_imputed$`PM2.5`), na.rm = TRUE)
cat("Mean Absolute Error (MAE):", MAE, "\n")
} else {
cat("Error: Dataset 'complete_data' is empty or not loaded properly.\n")
}
# Calculate MAE
MAE <- mean(abs(complete_data$`PM2.5` - mean_imputed$`PM2.5`), na.rm = TRUE)
cat("Mean Absolute Error (MAE):", MAE, "\n")
} else {
cat("Error: Dataset 'complete_data' is empty or not loaded properly.\n")
}
# Check if complete_data exists and is not empty
if (exists("complete_data") && nrow(complete_data) > 0) {
# Calculate MAE
MAE <- mean(abs(complete_data$`PM2.5` - mean_imputed$`PM2.5`), na.rm = TRUE)
cat("Mean Absolute Error (MAE):", MAE, "\n")
} else {
cat("Error: Dataset 'complete_data' is empty or not loaded properly.\n")
}
# Check if complete_data exists and is not empty
if (exists("complete_data") && nrow(complete_data) > 0) {
# Calculate MAE
MAE <- mean(abs(complete_data$`PM2.5` - mean_imputed$`PM2.5`), na.rm = TRUE)
cat("Mean Absolute Error (MAE):", MAE, "\n")
} else {
cat("Error: Dataset 'complete_data' is empty or not loaded properly.\n")
}
# 17/12/24
return(simulated_data)
}
# Function for Median Imputation
median_impute <- function(simulated_data) {
median_imputed <- simulated_data
median_value <- median(median_imputed$`PM2.5`, na.rm = TRUE)
median_imputed$`PM2.5`[is.na(median_imputed$`PM2.5`)] <- median_value
return(median_imputed)
}
# Function to Calculate MAE
calculate_MAE <- function(original_data, imputed_data) {
MAE <- mean(abs(original_data$`PM2.5` - imputed_data$`PM2.5`), na.rm = TRUE)
return(MAE)
}
# Print Results
for (i in 1:length(missing_percents)) {
cat("MAE for", missing_percents[i] * 100, "% Missing Data:", mae_results[i], "\n")
}
# Calculate RMSE
rmse <- calculate_RMSE(complete_data, imputed_data)
return(rmse)
})
# Calculate R^2
R2 <- calculate_R2(complete_data, imputed_data)
return(R2)
})
# Print Results
for (i in 1:length(missing_percents)) {
cat("MAE for", missing_percents[i] * 100, "% Missing Data:", mae_results[i], "\n")
}
# Missing percentages
missing_percents <- c(0.05, 0.10, 0.15, 0.20)
# Calculate RMSE
rmse <- calculate_RMSE(complete_data, imputed_data)
return(rmse)
})
# Calculate R^2
R2 <- calculate_R2(complete_data, imputed_data)
return(R2)
})
NOT SURE
install.packages("DMwR2")
library(DMwR2)
# Print Results
for (i in 1:length(missing_percents)) {
cat("MAE for", missing_percents[i] * 100, "% Missing Data with k =", k_value, ":", mae_results[i], "\n")
}