Graphics Using R
Graphics Using R
8 May 2017
● manuals
● guides
● cookbooks
● tutorials
● blogs
● forums
● www.stackoverflow.com
R is made up of packages
Load a package:
library(packagename)
Main graphic packages
grDevices
graphics grid
maps lattice
Vector vs. raster images
(50 x 50 pixels)
Resolution affects image quality
demo(graphics)
More examples?
example(plot)
example(hist)
example(barplot)
example(boxplot)
Graphical user interface (GUI) vs. text-based interface
First
Second
● high-level functions
● low-level functions
● interactive functions
Graphical devices
Graphic Graphic
Device
commands output
One input, several outputs
Graphic commands
windows()
screen
bitmap()
file .png
A typical session
P(x,y)
y
O x X
The graphic box model
outer margin 3
figure region
figure margin 2 figure margin 3
figure margin 4
outer margin 2
outer margin 4
plot region
figure margin 1
outer margin 1
The graphic box model: an example
The graphic box model: an example
Figure margin 3
Figure margin 4
Figure margin 2 Plot region
Figure margin 1
The graphic box model: an example
x = rnorm(50)
y = rnorm(50)
plot(x, y, main="An example graph",
xlim=c(-3, 3), ylim=c(-3, 3))
Adding boxes
plot.new()
box(which="plot")
box(which="figure")
box(which="outer")
plot.new()
plot.window(c(0,10), c(0,2))
points(c(0,0,10,10), c(0, 2, 0, 2))
(0,2) (10,2)
(0,0) (10,0)
Exploring the margins and the box
plot.new()
plot.window(c(0,10), c(0,2))
points(c(0,0,10,10), c(0, 2, 0, 2))
box()
(0,2) (10,2)
(0,0) (10,0)
Multiple figure regions
Outer margin 3
Figure region 1 Figure region 2
Outer margin 4
Figure region 3 Figure region 4
Outer margin 1
Coordinate system in the plot region
Max y value
y (x, y)
Min y value
Min x value x Max x value
Several types of plot
plot(y, type="p") plot(y, type="l")
> set.seed(123456)
> y <- rnorm(20)
> y
[1] 0.83373317 -0.27604777 -0.35500184 0.08748742
[5] 2.25225573 0.83446013 1.31241551 2.50264541
[9] 1.16823174 -0.42616558 -0.99612975 -1.11394990
[13] -0.05573154 1.17443240 1.05321861 0.05760597
[17] -0.73504289 0.93052842 1.66821097 0.55968789
> range(y)
[1] -1.113950 2.502645
Plot step-by-step: start a new plot
plot.new()
Plot step-by-step: set up coordinate system
plot.window(c(1, 20), c(-1.2, 2.6))
Plot step-by-step: add grid
grid(col="lightgray", lty="solid")
Plot step-by-step: add points
points(y)
Plot step-by-step: add x-axis
axis(1, at=c(1, 10, 20))
Plot step-by-step: add y-axis
axis(2, at=c(-1.2, 0, 2.6))
Plot step-by-step: add x-axis title
title(xlab="X")
Plot step-by-step: add y-axis title
title(ylab="Y")
Plot step-by-step: add main title
title(main="My graph title")
Plot step-by-step: let review all the code
set.seed(123456)
y <- rnorm(20)
plot.new()
plot.window(c(1, 20), c(-1.2, 2.6))
grid(col="lightgray", lty="solid")
points(y)
axis(1, at=c(1, 10, 20))
axis(2, at=c(-1.2, 0, 2.6))
title(xlab="X")
title(ylab="Y")
title(main="My graph title")
Plot step-by-step: create SVG file
set.seed(123456)
y <- rnorm(20)
open SVG device svg(file="mygraph.svg")
plot.new()
plot.window(c(1, 20), c(-1.2, 2.6))
grid(col="lightgray", lty="solid")
points(y)
graphic commands
axis(1, at=c(1, 10, 20))
axis(2, at=c(-1.2, 0, 2.6))
title(xlab="X")
title(ylab="Y")
title(main="My graph title")
close device dev.off()
Best practices: comment and save the script
# Data
set.seed(123456)
y <- rnorm(20)
# Open device
svg(file="final.svg")
# Init frame
plot.new()
plot.window(c(1, 20), c(-1.2, 2.6))
# Grid
grid(col="lightgray", lty="solid")
# Points
points(y)
# Axes
axis(1, at=c(1, 10, 20))
axis(2, at=c(-1.2, 0, 2.6))
# Titles
title(xlab="X")
title(ylab="Y")
title(main="My graph title")
# Close device
dev.off()
Overlapping points: the problem
x <- c(1, 2, 2, 3, 3, 3, 4, 4, 4, 4)
y <- c(2, 6, 6, 8, 8, 8, 10, 10, 10, 10)
plot(x=x, y=y)
4 points
3 points
2 points
1 point
Overlapping points: jitter (add noise)
x <- c(1, 2, 2, 3, 3, 3, 4, 4, 4, 4)
y <- c(2, 6, 6, 8, 8, 8, 10, 10, 10, 10)
plot(x=jitter(x), y=jitter(y), xlab="x", ylab="y")
Overlapping points: sunflower plot
1 2 3 4 5 6 7 8 9 10
Overlapping points: a sunflower plot example
x <- c(1, 2, 2, 3, 3, 3, 4, 4, 4, 4)
y <- c(2, 6, 6, 8, 8, 8, 10, 10, 10, 10)
sunflowerplot(x=x, y=y)
Overlapping points: another sunflower plot example
sunflowerplot(x=iris$Petal.Length, y=iris$Petal.Width)
Pie chart
Example:
= number of bins
= number of obs.
Histogram with relative frequencies (density)
set.seed(123456)
data <- rnorm(1000)
hist(data, freq=FALSE)
= number of bins
Histogram with not equal bins
hist(data, breaks=c(-4, 0, 1, 3)))
= number of bins
= number of obs.
= number of obs. in
the i-th bin
= width of the i-th bin
Calculate density for histogram: an example
> n <- length(data)
> n1 <- length(data[which(data > -4 & data <=0)])
> f1 <- n1 / n
> f1
[1] 0.479
> w1 <- 4
> d1 <- f1 / w1
> d1
[1] 0.11975
0.479
0.11975
Box plot (or box-and-whisker plot)
whisker
third quartile
first quartile
whisker
lowest value
within 1.5·IQR
Box plot: highlighting min and max
boxplot(airquality$Ozone, range=0)
max
3Q
2Q
1Q
min
Box plot: groups of data
d <- read.table(file="Employee_data.txt")
d <- read.table("Employee_data.txt")
salaryf <- d$salary[which(d$gender=="Female")]
boxplot(salaryf, range=0)
x <- rep(1, length(salaryf))
points(x, salaryf)
Box plot + jittered data points
d <- read.table("Employee_data.txt")
salaryf <- d$salary[which(d$gender=="Female")]
boxplot(salaryf, range=0)
x <- rep(1, length(salaryf))
x <- jitter(x, factor=8)
points(x, salaryf)
jittering
Bar plot
d <- read.table("Employee_data.txt")
jobcattable <- table(d$jobcat)
barplot(jobcattable)
Stacked bar plot
par(mfrow=c(3,2)) par(mcol=c(3,2))
1 2 1 4
3 4 2 5
5 6 3 6
Multiple plots: projecting our first layout
par(mfcol=c(2,1))
Male
Salary
Experience
Female
Salary
Experience
1 1 height=2
2 3 height=3
width=4 width=4
An example of use of the “layout” command
l <- matrix(c(1,1,2,3), nrow=2, ncol=2,
byrow=TRUE)
layout(l, height=c(2, 3))
barplot(table(d$jobcat), main="Job category")
plot(x=male$prevexp, y=male$salary,
main="Male", xlab="Experience",
ylab="Salary", ylim=c(15000, 135000))
plot(x=female$prevexp, y=female$salary,
main="Female", xlab="Experience",
ylab="Salary", ylim=c(15000, 135000))
An overlapping legend
edudata <- matrix(c(0.4, 0.6, 0.3, 0.7, 0.2, 0.8), nrow=2, ncol=3)
colors <- c("gray50", "gray80")
barplot(edudata, xlab="Education", names.arg=c("low", "medium", "high"),
col=colors, legend.text=c("female", "male"))
Adding legend by using the “layout” command
edudata <- matrix(c(0.4, 0.6, 0.3, 0.7, 0.2, 0.8), nrow=2, ncol=3)
mlayout <- matrix(c(1,2), nrow=2, ncol=1)
colors <- c("gray50", "gray80")
par(mai=c(0.8, 0.6, 0.1, 0.2)) # bottom, left, top, right
layout(mlayout, height=c(9, 3))
barplot(edudata, xlab="Education", names.arg=c("low", "medium", "high"),
col=colors)
plot.new()
par(mai=c(0, 0, 0, 0)) # bottom, left, top, right
plot.window(xlim=c(0,1), ylim=c(0,1))
legend(x=0.5, y=0.5, xjust=0.5, yjust=0.5, legend = c("male", "female"),
fill = colors)
Multiple graphs setting the figure regions
# Data
n <- 50
x <- 0:(n-1)
real_a <- 5
real_b <- 0.1
logy <- real_a + real_b*x +rnorm(n)
y <- exp(logy)
# Estimation
est <- lm(log(y) ~ x)
# Graph
plot(log(y) ~ x)
abline(est, col="red")
Plotting fitted regression line for log-linear model
# Data
n <- 50
x <- 0:(n-1)
real_a <- 5
real_b <- 0.1
logy <- real_a + real_b*x +rnorm(n)
y <- exp(logy)
# Estimation
est <- lm(log(y) ~ x)
a <- est$coefficients[[1]]
b <- est$coefficients[[2]]
fitted <- exp(a+b*x)
# Graph
plot(y ~ x)
lines(y=fitted, x=x, col="red")
Thanks for your kind attention