Experiment-9.
Study and implementation of
Data
visulization with ggplot
Radhika Sureshbhai MAtholiya
_MCA_SET28(67220200055)
2022-07-05
######Experiment No. 9##########
#Aim: Study and implementation of Data Visualization with ggplot
#Install and Load Package
#installing package
#install.packages("ggplot2")
library(ggplot2)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Histogram, Density plots and Box plots are used for visualizing a continuous variable.
#Creating Histogram:
View(iris)
ggplot(data = iris, aes(x = Sepal.Length)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#To change the width of bin in the histograms we can use binwidth in geom_histogram( )
ggplot(data = iris, aes(x = Sepal.Length)) + geom_histogram(binwidth=1)
#One can also define the number of bins being wanted, the binwidth in that case will be adjusted automatically.
ggplot(data = iris , aes(x=Sepal.Length)) + geom_histogram(color="black", fill="white", bins =10)
#How to visualize various groups in histogram
ggplot(iris, aes(x=Sepal.Length, color=Species)) + geom_histogram(fill="white", binwidth = 1)
#Creating Density Plot
#Density plot is also used to present the distribution of a continuous variable.
ggplot(iris, aes( x = Sepal.Length)) + geom_density( )
#How to show various groups in density plot
ggplot(iris, aes(x=Sepal.Length, color=Species)) + geom_density( )
#Creating Bar and Column Charts :
#Now mpg data will be used for creating the following graphics.
ggplot(mpg, aes(x= class)) + geom_bar()
#Using coord_flip( ) one can inter-change x and y axis.
ggplot(mpg, aes(x= class)) + geom_bar() + coord_flip()
#How to add or modify Main Title and Axis Labels
p = ggplot(mpg, aes(x= class)) + geom_bar()
p + labs(title = "Number of Cars in each type", x = "Type of car", y = "Number of cars")
#How to add data labels
p = ggplot(mpg, aes(x= class)) + geom_bar()
p = p + labs(title = "Number of Cars in each type", x = "Type of car", y = "Number of cars")
p + geom_text(stat='count', aes(label=..count..), vjust=-0.25)
#How to reorder Bars
#Using stat="identity" we can use our derived values instead of count.
count(mpg,class) %>% arrange(-n) %>%
mutate(class = factor(class,levels= class)) %>%
ggplot(aes(x=class, y=n)) + geom_bar(stat="identity")
#Showing Mean of Continuous Variable by Categorical Variable
df = mpg %>% group_by(class) %>% summarise(mean = mean(displ)) %>%
arrange(-mean) %>% mutate(class = factor(class,levels= class))
p = ggplot(df, aes(x=class, y=mean)) + geom_bar(stat="identity")
p + geom_text(aes(label = sprintf("%0.2f", round(mean, digits = 2))),
vjust=1.6, color="white", fontface = "bold", size=4)
#Creating Stacked Bar Chart
p <- ggplot(data=mpg, aes(x=class, y=displ, fill=drv))
p + geom_bar(stat = "identity")
#Stacked - Position_dodge
p + geom_bar(stat="identity", position=position_dodge())
#Creating BoxPlot
mtcars$cyl = factor(mtcars$cyl)
ggplot(mtcars, aes(x=cyl, y=disp)) + geom_boxplot()
#To create a notched boxplot we write notch = TRUE
ggplot(mtcars, aes(x=cyl, y=disp)) + geom_boxplot(notch = TRUE)
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
#Scatter Plot
# Creating a scatter plot denoting various species.
ggplot(data = iris, aes( x = Sepal.Length, y = Sepal.Width,shape = Species, color = Species)) + geom_point()
# Creating scatter plot for automatic cars denoting different cylinders.
ggplot(data = subset(mtcars,am == 0),aes(x = mpg,y = disp,colour = factor(cyl))) + geom_point()
# Seeing the patterns with the help of geom_smooth.
ggplot(data = mtcars, aes(x = mpg,y = disp,colour = hp)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Plotting the horsepower using geom_line
ggplot(data = mtcars, aes(x = mpg,y = disp,colour = hp)) + geom_point(size = 2.5) + geom_line(aes(y = hp))
#Modifying the axis labels and appending the title and subtitle
#Adding title or changing the labels
ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point() + labs(title = "Scatter plot")
#Alternatively
ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point() + ggtitle(label = "Scatter plot")
ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point() + ggtitle(label = "Scatter plot",subtitle = "mtcars data in
R")
a <- ggplot(mtcars,aes(x = mpg, y = disp, color = factor(cyl))) + geom_point()
#Changing the axis labels.
a + labs(color = "Cylinders")
a + labs(color = "Cylinders") + xlab("Mileage") + ylab("Displacement")
#Combining it all
a + labs(color = "Cylinders") + xlab("Mileage") + ylab("Displacement") + ggtitle(label = "Scatter plot", subtitle
= "mtcars data in R")
#Playing with themes
#Changing the themes.
b <- ggplot(mtcars,aes(mpg,disp)) + geom_point() + labs(title = "Scatter Plot")
#Changing the size and color of the Title and the background color.
b + theme(plot.title = element_text(color = "blue",size = 17),plot.background = element_rect("orange"))
b + theme_minimal( )
#Removing the lines from the background.
b + theme(panel.background = element_blank())
#Removing the text from x and y axis.
b + theme(axis.text = element_blank())
b + theme(axis.text.x = element_blank())
b + theme(axis.text.y = element_blank())
#Changing the legend position
c <- ggplot(mtcars,aes(x = mpg, y = disp, color = hp)) +labs(title = "Scatter Plot") +geom_point()
c + theme(legend.position = "top")
#Combining everything.
c + theme(legend.position = "bottom", axis.text = element_blank()) +theme(plot.title = element_text(color = "Fore
st Green",size = 17),plot.background = element_rect("Yellow"))
#Changing the color scales in the legend
c + scale_color_gradient(low = "yellow",high = "red")
#if we want 3 colors
c + scale_color_gradient2(low = "red",mid = "green",high = "blue")
#To serve the purpose of having 3 colors in the legend we use scale_color_gradient2 with low = "red",mid = "gree
n" and high = "blue" means it divides the entire range(Starting from 0) to the maximum observation in 3 equal par
ts, with first part being shaded as red, central part as green and highest part as blue.
c + theme(legend.position = "bottom") + scale_color_gradientn(colours = c("red","forest green","white","blue"))
#Changing the breaks in the legend.
c + scale_color_continuous(name = "horsepower", breaks = seq(50,350,75), labels = paste(seq(50,350,75),"hp"))
#Changing the break points and color scale of the legend together
#Trial 1 : This one is wrong
c + scale_color_continuous( breaks = seq(50,350,75)) +scale_color_gradient(low = "blue",high = "red")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
#Trial 2: Next one is wrong.
c + scale_color_gradient(low = "blue",high = "red") +
scale_color_continuous( breaks = seq(50,350,75))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
#The correct way to do is to define the arguments in one function only.
c + scale_color_continuous(name = "horsepower", breaks = seq(50,350,75), low = "red", high
= "black") + theme(panel.background = element_rect("green"),
plot.background = element_rect("orange"))
#Changing the axis cut points
d <- ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point(aes(color = factor(am))) +
xlab("Mileage") + ylab("Displacement") +
theme(panel.background = element_rect("black") , plot.background = element_rect("pink"))
#To change the axis cut points we use scale_(axisname)_continuous.
d + scale_x_continuous(limits = c(2,4)) + scale_y_continuous(limits = c(15,30))
## Warning: Removed 32 rows containing missing values (geom_point).
#To change the x axis limits to 2 to 4, we use scale_x_continuous and scale_y_continuous set the least cut off po
int to 15 and highest cut off point of y axis to 30.
d + scale_x_continuous(limits = c(2,4),breaks = seq(2,4,0.25)) +
scale_y_continuous(limits = c(15,30),breaks = seq(15,30,3))
## Warning: Removed 32 rows containing missing values (geom_point).
#Faceting.
View(mtcars)
unique(mtcars$carb)
## [1] 4 1 2 3 6 8
ggplot(mtcars,aes(mpg,disp)) +geom_point() +facet_wrap(~carb)
#alternatively
ggplot(mtcars,aes(mpg,disp)) +geom_point() +facet_wrap("carb")
# Control the number of rows and columns with nrow and ncol
ggplot(mtcars, aes(mpg, disp)) + geom_point() + facet_wrap(~carb,nrow = 3)
#We can mention the number of rows we need for faceting.
# Control the number of rows and columns with nrow and ncol
ggplot(mtcars, aes(mpg, disp)) + geom_point() + facet_wrap(~carb,nrow = 3)
## Use the `labeller` option to control how labels are printed:
ggplot(mtcars, aes(mpg, disp)) + geom_point() + facet_wrap(~carb + am, labeller = "label_both")
#R provides facet_grid( ) function which can be used to faced in two dimensions.
z <- ggplot(mtcars, aes(mpg, disp)) + geom_point()
#We store our basic plot in 'z' and thus we can make the additions:
z + facet_grid(. ~ cyl) #col
z + facet_grid(cyl ~ .) #row
z + facet_grid(gear ~ cyl,labeller = "label_both") #row and col
#Adding text to the points.
ggplot(mtcars, aes(x= mpg,y = disp)) + geom_point() +geom_text(aes(label = am))
require(ggrepel)
## Loading required package: ggrepel
ggplot(mtcars, aes(x= mpg,y = disp)) + geom_point() +geom_text_repel(aes(label = am))