Handling Missing Data In R
# 1) Handling missing or NA Values
#Load data
data("airquality")
View(airquality)
#Identify the missing values
is.na(airquality)
#Count missing values
sum(is.na(airquality))
#eliminate missing values
na.omit(airquality)
setwd("E:\\R programme\\R.Directory")
FN<-na.omit(airquality)
View(FN)
write.csv(FN,"FN.csv")
# 2) How to imputate missing data
mice,amelia and misForest
View(airquality)
#Install the package
install.packages("VIM")
library(VIM)
aggr(airquality)
nrow(na.omit(airquality))
#install package
install.packages("mice")
library(mice)
airquality_imp<-mice(airquality)
#complete data set
airquality_comp<-complete(airquality_imp)
View(airquality_comp)
#apply aggr function
aggr(airquality_comp)
#Getting the 4 graph in the plot section
par(mfrow=c(2,2))
boxplot(airquality$Ozone,main="Data with NA")
boxplot(airquality_comp$Ozone,main="Data with NA")
#Apply the t.test function
t.test(airquality$Ozone,airquality_comp$Ozone)
basis of p-value=there is no effect of imputation on actual data set.
#densisty plot
plot(density(airquality$Ozone,na.rm=TRUE),main="Data with NA")
lines(density(airquality_comp$Ozone,na.rm=TRUE),col="red",lty=3)
plot(density(airquality$Solar.R,na.rm=TRUE),main="Data with NA")
lines(density(airquality_comp$Solar.R,na.rm=TRUE),col="red",lty=3)