4 TH
4 TH
AIM:
TO Explore various variable and row filters in R for cleaning data. Apply various plot features in
R on sample data sets and visualize.
PROGRAM:
library(tidyverse)
x=read.csv('/content/income.csv')
head(df)
OUTPUT:
summary(x)
OUTPUT:
print(colSums(is.na(x)))
OUTPUT:
num.zero = colSums(x==0)
print(num.zero)
OUTPUT:
age=df$age
hist(age)
library(ggplot2)
OUTPUT:
ggplot(data=df,mapping = aes(x=Age,y=BloodPressure))+
geom_point()
OUTPUT:
library(ggplot2)
heatmap_plot<- ggplot(df, aes(x = age, y =income..50K , fill = age)) +
geom_tile() +
scale_fill_gradient(low = "blue", high = "red") +
labs(title = "Heatmap of age and income 50k ") +
theme_minimal()
print(heatmap_plot)
OUTPUT:
install.packages("gridExtra")
library(gridExtra)
library(ggplot2)
library(gridExtra)
variable_name<- "age"
plot1 <- ggplot(x, aes(x = age)) +
geom_histogram(bins = 20, fill = "red", alpha = 0.7)
plot2 <- ggplot(x, aes(x = age)) +
geom_boxplot(fill = "darkgreen", alpha = 0.7)
plot3 <- ggplot(x, aes(x = age, group = .data[[variable_name]])) +
geom_density(lwd = 0.1, aes(fill = .data[[variable_name]]), alpha = 0.5)
plot4 <- ggplot(x, aes(x = .data[[variable_name]], y =income..50K)) +
geom_boxplot(aes(fill = .data[[variable_name]]))
grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)
OUTPUT:
install.packages("gridExtra")
library(gridExtra)
library(ggplot2)
library(gridExtra)
variable_name<- "age"
plot1 <- ggplot(x, aes(x = education.num)) +
geom_histogram(bins = 20, fill = "red", alpha = 0.7)
plot2 <- ggplot(x, aes(x = education.num)) +
geom_boxplot(fill = "darkgreen", alpha = 0.7)
plot3 <- ggplot(x, aes(x = education.num, group
= .data[[variable_name]])) +
geom_density(lwd = 0.1, aes(fill = .data[[variable_name]]), alpha =
0.5)
plot4 <- ggplot(x, aes(x = .data[[variable_name]], y =income..50K)) +
geom_boxplot(aes(fill = .data[[variable_name]]))
grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)
OUTPUT:
install.packages("gridExtra")
library(gridExtra)
library(ggplot2)
library(gridExtra)
variable_name<- "age"
plot1 <- ggplot(x, aes(x = hours.per.week)) +
geom_histogram(bins = 20, fill = "red", alpha = 0.7)
plot2 <- ggplot(x, aes(x = hours.per.week)) +
geom_boxplot(fill = "darkgreen", alpha = 0.7)
plot3 <- ggplot(x, aes(x = hours.per.week, group = .data[[variable_name]])) +
geom_density(lwd = 0.1, aes(fill = .data[[variable_name]]), alpha = 0.5)
plot4 <- ggplot(x, aes(x = .data[[variable_name]], y =income..50K)) +
geom_boxplot(aes(fill = .data[[variable_name]]))
grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)
OUTPUT:
RESULT:
The above program was completed and successfully executed.