LAb Test 2
LAb Test 2
# Convert sex, smoker, and region into nominal categorical variables (factor)
insurance_data$sex <- factor(insurance_data$sex)
insurance_data$smoker <- factor(insurance_data$smoker)
insurance_data$region <- factor(insurance_data$region)
# Convert children into an ordinal categorical variable (factor with ordered levels)
insurance_data$children <- factor(insurance_data$children,
ordered = TRUE)
Q2
# Fit a basic linear model for medical charges based on all other variables
linear_model <- lm(charges ~ age + sex + bmi + children + smoker + region, data =
insurance_data)
# Display the rows with Cook's Distance greater than the threshold
influential_obs <- which(cooksd > 4/nrow(insurance_data))
insurance_data[influential_obs, ]
Q3
Q4
Q6
library(ggplot2)
# Create predicted values based on the model
insurance_data$predicted_bmi <- predict(model)