Model Visualisation: (With Ggplot2)
Model Visualisation: (With Ggplot2)
visualisation
(with ggplot2)
Hadley Wickham
Rice University
624 ●
0.2
● ●
● ●
●
● ● ●● ●
●
● ● ● ●
● ● ●
● ● ● ●●
● ● ●●
0.1
● ● ●
● ● ● ● ● ● ● ●●
●
● ● ● ● ● ●
● ●●
●
●● ● ● ●●● ● ●● ●
● ● ●● ●
● ●●
●● ● ● ●
●●
●●● ● ●● ● ●●
● ● ●
●
● ● ● ● ●
● ●
●● ●●●●● ● ●
●●● ● ●
●● ●
● ●● ●● ● ● ● ●
● ● ●● ● ●●● ● ●●● ● ●● ● ● ● ●
Residuals
● ● ●●●● ●● ● ●●● ● ●● ● ●●
● ●
●● ●●● ●●●
● ● ●
●● ●●● ●●●● ●● ● ●●
●
● ●● ●●●● ● ● ●●●●●
●●
●●● ● ●●
●● ● ● ● ●
●●●●● ●●
● ●●
●●●●● ●
●
● ●● ●●● ● ● ●
●
0.0
●● ●● ●
●●●●
● ● ● ●●●●
●●
● ●●●●
● ●
●●● ●
● ●●● ● ● ● ●
● ●●●●● ●●● ●● ● ●● ●
● ●
●● ●● ●
●● ● ●● ●● ●●
● ● ●
●
●
● ● ●●● ●● ●●
●●●●●●●● ● ●
● ●●●●●●● ●
●● ●●● ● ●
●●●●● ●
●●● ● ●●
● ● ● ●
●● ● ● ●●●●● ●● ● ● ● ●●●●●
● ●●●
●
●
● ● ●●
● ●
● ●
●●●● ●
●●
● ● ● ● ● ● ●●
● ●●● ●
●●
● ●
● ● ● ●● ● ● ●● ●
● ●
●
● ●
●●
●
●
●●●● ●●
● ●●●
●
●●
●●
●●●●●●●●● ● ●●●●●
●
● ● ● ●● ● ● ● ● ● ●●
●● ●
● ●
● ● ●
● ● ● ● ●● ● ● ●
●● ● ●● ●
●● ●●● ● ●●● ●●● ●●● ● ●● ● ●●●● ● ●
●●
● ●
● ● ●
−0.1
● ● ● ●
● ● ● ● ● ●
● ● ● ● ●
●
● ● ● ●● ●● ● ●
● ● ● ●
●
●
●
−0.2
●
133 ●
● 574
−0.3
Fitted values
lm(log10(sales) ~ city * ns(date, 3) + factor(month))
Monday, 13 July 2009
# File src/library/stats/R/plot.lm.R show[which] <- TRUE
# Part of the R package, http://www.R-project.org r <- residuals(x)
# yh <- predict(x) # != fitted() for glm
# This program is free software; you can redistribute it and/or modify w <- weights(x)
# it under the terms of the GNU General Public License as published by if(!is.null(w)) { # drop obs with zero wt: PR#6640
# the Free Software Foundation; either version 2 of the License, or wind <- w != 0
# (at your option) any later version. r <- r[wind]
# yh <- yh[wind]
# This program is distributed in the hope that it will be useful, w <- w[wind]
# but WITHOUT ANY WARRANTY; without even the implied warranty of labels.id <- labels.id[wind]
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the }
# GNU General Public License for more details. n <- length(r)
# if (any(show[2L:6L])) {
# A copy of the GNU General Public License is available at s <- if (inherits(x, "rlm")) x$s
# http://www.r-project.org/Licenses/ else if(isGlm) sqrt(summary(x)$dispersion)
else sqrt(deviance(x)/df.residual(x))
plot.lm <- hii <- lm.influence(x, do.coef = FALSE)$hat
function (x, which = c(1L:3,5), ## was which = 1L:4, if (any(show[4L:6L])) {
caption = list("Residuals vs Fitted", "Normal Q-Q", cook <- if (isGlm) cooks.distance(x)
"Scale-Location", "Cook's distance", else cooks.distance(x, sd = s, res = r)
"Residuals vs Leverage", }
expression("Cook's dist vs Leverage " * h[ii] / (1 - h[ii]))), }
panel = if(add.smooth) panel.smooth else points, if (any(show[2L:3L])) {
sub.caption = NULL, main = "", ylab23 <- if(isGlm) "Std. deviance resid." else "Standardized residuals"
ask = prod(par("mfcol")) < length(which) && dev.interactive(), ..., r.w <- if (is.null(w)) r else sqrt(w) * r
id.n = 3, labels.id = names(residuals(x)), cex.id = 0.75, ## NB: rs is already NaN if r=0, hii=1
qqline = TRUE, cook.levels = c(0.5, 1.0), rs <- dropInf( r.w/(s * sqrt(1 - hii)), hii )
add.smooth = getOption("add.smooth"), }
label.pos = c(4,2), cex.caption = 1)
{ if (any(show[5L:6L])) { # using 'leverages'
dropInf <- function(x, h) { r.hat <- range(hii, na.rm = TRUE) # though should never have NA
if(any(isInf <- h >= 1.0)) { isConst.hat <- all(r.hat == 0) ||
warning("Not plotting observations with leverage one:\n ", diff(r.hat) < 1e-10 * mean(hii, na.rm = TRUE)
paste(which(isInf), collapse=", "), }
call.=FALSE) if (any(show[c(1L, 3L)]))
x[isInf] <- NaN l.fit <- if (isGlm) "Predicted values" else "Fitted values"
} if (is.null(id.n))
x id.n <- 0
} else {
id.n <- as.integer(id.n)
if (!inherits(x, "lm")) if(id.n < 0L || id.n > n)
stop("use only with \"lm\" objects") stop(gettextf("'id.n' must be in {1,..,%d}", n), domain = NA)
if(!is.numeric(which) || any(which < 1) || any(which > 6)) }
stop("'which' must be in 1L:6") if(id.n > 0L) { ## label the largest residuals
isGlm <- inherits(x, "glm") if(is.null(labels.id))
show <- rep(FALSE, 6) labels.id <- paste(1L:n)
Hard to understand.
Hard to extend.
Locked into set of pre-specified graphics.
Of no use to other graphics packages.
data
}
Note use of . prefix to
avoid name clasehes
Monday, 13 July 2009
plot.lm(mod,
Residuals vs Fitted
which = 1)
0.3
624 ●
0.2
● ●
● ●
●
● ● ●● ●
●
● ● ● ●
● ● ●
● ● ● ●●
● ● ●●
0.1
● ● ●
● ● ● ● ● ● ● ●●
●
● ● ● ● ● ●
● ●●
●
●● ● ● ●●● ● ●● ●
● ● ●● ●
● ●●
●● ● ● ●
●●
●●● ● ●● ● ●●
● ● ●
●
● ● ● ● ●
● ●
●● ●●●●● ● ●
●●● ● ●
●● ●
● ●● ●● ● ● ● ●
● ● ●● ● ●●● ● ●●● ● ●● ● ● ● ●
Residuals
● ● ●●●● ●● ● ●●● ● ●● ● ●●
● ●
●● ●●● ●●●
● ● ●
●● ●●● ●●●● ●● ● ●●
●
● ●● ●●●● ● ● ●●●●●
●●
●●● ● ●●
●● ● ● ● ●
●●●●● ●●
● ●●
●●●●● ●
●
● ●● ●●● ● ● ●
●
0.0
●● ●● ●
●●●●
● ● ● ●●●●
●●
● ●●●●
● ●
●●● ●
● ●●● ● ● ● ●
● ●●●●● ●●● ●● ● ●● ●
● ●
●● ●● ●
●● ● ●● ●● ●●
● ● ●
●
●
● ● ●●● ●● ●●
●●●●●●●● ● ●
● ●●●●●●● ●
●● ●●● ● ●
●●●●● ●
●●● ● ●●
● ● ● ●
●● ● ● ●●●●● ●● ● ● ● ●●●●●
● ●●●
●
●
● ● ●●
● ●
● ●
●●●● ●
●●
● ● ● ● ● ● ●●
● ●●● ●
●●
● ●
● ● ● ●● ● ● ●● ●
● ●
●
● ●
●●
●
●
●●●● ●●
● ●●●
●
●●
●●
●●●●●●●●● ● ●●●●●
●
● ● ● ●● ● ● ● ● ● ●●
●● ●
● ●
● ● ●
● ● ● ● ●● ● ● ●
●● ● ●● ●
●● ●●● ● ●●● ●●● ●●● ● ●● ● ●●●● ● ●
●●
● ●
● ● ●
−0.1
● ● ● ●
● ● ● ● ● ●
● ● ● ● ●
●
● ● ● ●● ●● ● ●
● ● ● ●
●
●
●
−0.2
●
133 ●
● 574
−0.3
Fitted values
lm(log10(sales) ~ city * ns(date, 3) + factor(month))
Monday, 13 July 2009
ggplot(mod, aes(.fitted, .resid)) +
geom_hline(yintercept = 0) +
geom_point() +
geom_smooth(se = F)
0.2 ●
●
● ●
●
●
● ● ●
● ● ●
● ● ●
● ● ●
● ● ●
● ● ● ● ●
● ●●
0.1 ● ●● ● ●
● ● ● ●● ●●
● ● ● ●●
● ● ● ● ● ● ●
● ● ● ●
●
● ●
● ●●● ● ● ● ● ●
● ●
● ● ● ●
●
●
●●●●●●●
●
●● ●
●●● ●
●● ●● ● ● ●● ●
● ●
● ● ●●●
●● ●
●
● ●
● ●●● ● ● ● ● ● ●● ● ● ● ● ●● ● ● ●
● ● ●● ● ● ● ● ●
●● ● ● ● ● ●
● ● ● ● ●●● ● ● ●● ●● ● ●●
● ●●● ●●● ●●● ●●●●
● ● ●●
● ●● ●● ●
● ●● ●● ● ● ●
●
● ● ●● ● ●● ● ●
●● ●● ● ● ● ● ● ●●● ● ●
●● ● ●●●●
●● ●●
●● ● ●
● ●
●● ● ●
●● ● ●● ●
●
● ●
● ●●
●
●● ●●●●
●● ●
●
●
● ● ●
●● ●● ● ●● ● ● ●● ● ●● ● ●● ● ● ●
0.0 ●
● ● ●● ●● ●● ● ● ●● ● ● ● ●● ●
● ● ● ●
● ● ● ●● ● ●
.resid
● ●
● ● ● ● ● ● ● ●●
●●● ● ●● ●
●● ● ●
●●
● ● ● ● ● ●●● ● ● ●●
●
● ● ● ● ●●●● ●●●● ●●●●●●● ●
●●● ●
●●●
●● ●● ●● ●
●● ● ●● ● ● ●
●● ● ●●●
● ● ●● ●●● ● ● ●● ●
●●
● ●●● ● ●● ●●● ● ● ●● ● ●
● ● ● ● ● ●● ● ● ●●● ●
● ●● ●●●
●● ● ●● ● ● ● ● ●●
● ● ● ● ●●
● ●● ●●●● ●
● ●●●
●● ●
● ● ●●
● ● ●●●●●●●
●
●●● ● ●●
● ●
● ●
● ● ● ●● ● ● ● ●●
● ● ● ● ● ● ● ● ●●●● ● ● ● ● ● ●
●
● ● ● ●● ● ● ● ●● ● ●
● ● ●
● ● ●● ● ● ●●
●
● ●
●
● ● ● ●
● ● ● ● ●
● ●
● ● ● ● ●
−0.1 ● ● ● ●
●
● ●
● ● ●● ●
● ●
● ●●
● ● ●
●
●
●
−0.2 ●
0.2 ●
●
● ●
●
●
● ● ●
● ● ●
● ● ●
● ● ●
● ● ●
● ● ● ● ●
● ●●
0.1 ● ●● ● ●
● ● ● ●● ●●
● ● ● ●●
● ● ● ● ● ● ●
● ● ● ●
●
● ●
● ●●● ● ● ● ● ●
● ●
● ● ● ●
●
●
●●●●●●●
●
●● ●
●●● ●
●● ●● ● ● ●● ●
● ●
● ● ●●●
●● ●
●
● ●
● ●●● ● ● ● ● ● ●● ● ● ● ● ●● ● ● ●
● ● ●● ● ● ● ● ●
●● ● ● ● ● ●
● ● ● ● ●●● ● ● ●● ●● ● ●●
● ●●● ●●● ●●● ●●●●
● ● ●●
● ●● ●● ●
● ●● ●● ● ● ●
●
● ● ●● ● ●● ● ●
●● ●● ● ● ● ● ● ●●● ● ●
●● ● ●●●●
●● ●●
●● ● ●
● ●
●● ● ●
●● ● ●● ●
●
● ●
● ●●
●
●● ●●●●
●● ●
●
●
● ● ●
●● ●● ● ●● ● ● ●● ● ●● ● ●● ● ● ●
0.0 ●
● ● ●● ●● ●● ● ● ●● ● ● ● ●● ●
● ● ● ●
● ● ● ●● ● ●
.resid
● ●
● ● ● ● ● ● ● ●●
●●● ● ●● ●
●● ● ●
●●
● ● ● ● ● ●●● ● ● ●●
●
● ● ● ● ●●●● ●●●● ●●●●●●● ●
●●● ●
●●●
●● ●● ●● ●
●● ● ●● ● ● ●
●● ● ●●●
● ● ●● ●●● ● ● ●● ●
●●
● ●●● ● ●● ●●● ● ● ●● ● ●
● ● ● ● ● ●● ● ● ●●● ●
● ●● ●●●
●● ● ●● ● ● ● ● ●●
● ● ● ● ●●
● ●● ●●●● ●
● ●●●
●● ●
● ● ●●
● ● ●●●●●●●
●
●●● ● ●●
● ●
● ●
● ● ● ●● ● ● ● ●●
● ● ● ● ● ● ● ● ●●●● ● ● ● ● ● ●
●
● ● ● ●● ● ● ● ●● ● ●
● ● ●
● ● ●● ● ● ●●
●
● ●
●
● ● ● ●
● ● ● ● ●
● ●
● ● ● ● ●
−0.1 ● ● ● ●
●
● ●
● ● ●● ●
● ●
● ●●
● ● ●
●
●
●
−0.2 ●
● ● ● ● ●● ● ● ● ● ● ● ● ● ● ●●●●●
●●
● ●●
● ● ● ● ● ●● ● ● ● ●● ● ●
● ● ● ● ● ●
●●● ● ● ●●
● ●
● ● ●
● ● ● ● ●●● ● ● ●● ●●● ●
● ●
●● ●
● ●● ●● ● ●
● ● ● ● ● ● ●● ● ● ● ● ●● ● ● ●●●●
● ●●●● ●
● ●●●● ● ●● ●
● ● ● ●
● ●
●
● ●
●● ● ●
● ● ● ●●● ● ● ● ● ● ● ● ● ● ●●
● ● ● ● ● ●
● ● ●●
● ● ●
●●●
●●
● ●● ●
● ●
●●● ● ● ● ● ● ● ● ●●
● ● ● ●● ● ●
● ●● ● ● ● ● ● ● ●●● ● ● ● ● ●●
● ● ● ● ● ● ● ●
● ● ● ● ●
● ● ●● ●
● ● ● ● ● ● ●
● ● ● ●● ● ●
● ● ● ● ●
−0.1 ● ●
●
● ●
● ●
● ●●
● ● ● ●●
● ●
● ● ●
●
●
●
−0.2 ●
0.1
0.0
.resid
−0.1
−0.2
0.1
0.0
.resid
−0.1
−0.2
0.2
0.1
0.0
−0.1
−0.2
48,000 / 86,000
.resid
0.2
0.1
0.0
−0.1
−0.2
29,000 / 50,000
2000 2002 2004 2006 2008 2000 2002 2004 2006 2008 2000 2002 2004 2006 2008
date
Monday, 13 July 2009
ggplot(modf, aes(date, .resid)) +
geom_line(aes(group = city))
# Which = 4
mod$row <- rownames(mod)
ggplot(mod, aes(row, .cooksd)) +
geom_bar(stat = "identity")
# Which = 5
ggplot(mod, aes(.hat, .stdresid)) +
geom_vline(size = 2, colour = "white", xintercept = 0) +
geom_hline(size = 2, colour = "white", yintercept = 0) +
geom_point() +
geom_smooth(se = FALSE)
# Which = 6
ggplot(mod, aes(.hat, .cooksd, data = mod)) +
geom_vline(colour = NA) +
geom_abline(slope = seq(0, 3, by = 0.5), colour = "white") +
geom_smooth(se = FALSE) +
geom_point()