11.visualisasi Data
11.visualisasi Data
1
R Base Graphs
Grafik-grafik Dasar R
2
Fungsi-fungsi dasar R untuk membuat beberapa tipe plot
3
Argumen-argumen untuk menyesuaikan plot
▪ pch: mengubah bentuk titik. Nilai yang diijinkan dari 1 sampai 25.
▪ cex: mengubah ukuran titik. Contoh: cex = 0.8.
▪ col: mengubah warna titik. Contoh: col = “blue”.
▪ frame: logical value. frame = FALSE removes the plot panel border frame.
▪ main, xlab, ylab. menentukan judul utama dan label sumbu x/y secara berturut-turut
▪ las: untuk teks sumbu x vertical gunakan las = 2.
4
Data mtcars
▪ mtcars: Motor Trend Car Road Tests
▪ Sumber data: Henderson and Velleman (1981), Building multiple regression models
interactively. Biometrics, 37, 391–411.
▪ Deskripsi: data diekstraksi dari majalah Motor Trend AS 1974, dan terdiri dari konsumsi bahan bakar dan
10 aspek desain dan kinerja mobil dari 32 mobil (model 1973-74).
▪ Data terdiri dari 32 pengamatan dan 11 variabel:
[1] mpg Miles/(US) gallon
[2] cyl Number of cylinders
[3] disp Displacement (cu.in.)
[4] hp Gross horsepower
[5] drat Rear axle ratio
[6] wt Weight (1000 lbs)
[7] qsec 1/4 mile time
[8] vs Engine (0 = V-shaped, 1 = straight)
[9] am Transmission (0 = automatic, 1 = manual)
[10] gear Number of forward gears
[11] carb Number of carburetors
5
Panggil data mtcars
> # panggil data mtcars
> data("mtcars")
> # struktur data mtcars
> str(mtcars)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
> # Cetak 6 baris pertama
> head(mtcars, n=6)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
6
Membuat plot dasar dengan fungsi plot()
7
Menyimpan plot dalam file pdf
8
Menyimpan plot dalam file jpeg
9
Plot pencar dengan garis regresi linear sederhana
> library(car)
> scatterplot(mpg ~ wt, data = mtcars,
smooth = FALSE, grid = FALSE, regLine=list(method=lm, lty=1, lwd=2, col="green"))
> scatterplot(mpg ~ wt, data = mtcars, xlab = "Weight",ylab="Miles per gallon",
smooth = FALSE, grid = FALSE, regLine=list(method=lm, lty=1, lwd=2, col="green"))
11
Plot pencar dengan garis regresi berdasarkan grup (“cyl”)
12
Plot pencar dengan label
pos a position specifier for the text. If specified this
overrides any adj value given. Values
of 1, 2, 3 and 4, respectively indicate positions
below, to the left of, above and to the right of the
specified (x,y) coordinates.
13
Matriks plot pencar dengan fungsi pairs()
> pairs(mtcars)
14
Matriks plot pencar untuk beberapa variabel
> pairs(mtcars[,c("mpg","disp","wt")], pch = 19)
15
Boxplot
> #Boxplot untuk satu variabel
> boxplot(mtcars$mpg,ylab="mpg")
> #Boxplot berdasarkan grup (cyl) dengan frame
dihapus
> boxplot(mpg ~ cyl, data = mtcars, frame =
FALSE,xlab="cyt",ylab="mpg")
> #Boxplot horizontal
> boxplot(mpg ~ cyl, data = mtcars, frame =
FALSE, horizontal =
TRUE,xlab="cyt",ylab="mpg")
> #Boxplot dengan bentuk notch
> boxplot(mpg ~ cyl, data = mtcars, frame =
FALSE, notch = TRUE,xlab="cyt",ylab="mpg")
16
> boxplot(mpg ~ cyl, data = mtcars, frame
= FALSE,border =
"steelblue",xlab="cyt",ylab="mpg")
17
Box plot dengan beberapa grup
> boxplot(mpg ~ am*cyl, data = mtcars,
col = c("white", "steelblue"), frame = FALSE,xlab="am.cyl",ylab="mpg")
18
Plot garis
19
Plot untuk beberapa garis
20
Stripchart dengan fungsi stripchart()
▪ Fungsi stripchart():
stripchart(x, data = NULL method = "overplot", jitter = 0.1)
▪ Keterangan:
▪ x: the data from which the plots are to be produced. Allowed values are one or a
list of numeric vector, each corresponding to a component plot.
▪ data: a data.frame (or list) from which the variables in x should be taken.
▪ Method: the method to be used to separate coincident points. Allowed values are
one of “overplot”, “jitter” or “stack”.
▪ jitter: when method = “jitter” is used, jitter gives the amount of jittering applied.
21
Stripchart horisontal
22
Stripchart vertikal
>#plot vertical dengan metode jitter
> stripchart(mpg ~ cyl, data = mtcars,
pch = 19, frame = FALSE, vertical =
TRUE,method = "jitter",
xlab="cyl",ylab="mpg")
23
Stripchart dengan beda warna dan bentuk titik
24
Plot bar dengan nilai frekuensi
> counts <- table(mtcars$gear)
> bp <- barplot(counts, main="Car Distribution",
xlab="Number of Gears", ylab ="Frequency",ylim=c(0,18))
> text(bp,counts+1, as.character(counts))
25
Pie chart dari banyaknya mobil berdasarkan cyl
> mtcars$cyl <- factor(mtcars$cyl)
> counts <- table(mtcars$cyl)
> pie(counts, labels = levels(mtcars$cyl), radius = 1)
26
Pie chart dengan frekuensi
> counts <- table(mtcars$cyl)
> labels <- c("cyl4", "cyl6", "cyl8")
> pct <- as.numeric(round(counts/sum(counts)*100))
> # menambahkan persen ke labels
> lbls <- paste(labels, pct)
> lbls <- paste(lbls,"%",sep="")
> pie(counts,labels = lbls, col=c("red","green","blue"),main="cylinder")
27
Pie chart 3D
> library(plotrix)
> counts <- table(mtcars$cyl)
> labels <- c("cyl4", "cyl6", "cyl8")
> pct <- as.numeric(round(counts/sum(counts)*100))
> lbls <- paste(labels, pct)
> lbls <- paste(lbls,"%",sep="")
> pie3D(counts,labels = lbls, col=c("red","green","blue"))
28
Pie chart 3D
> library(plotrix)
> counts <- table(mtcars$cyl)
> labels <- c("cyl4", "cyl6", "cyl8")
> pct <- as.numeric(round(counts/sum(counts)*100))
> lbls <- paste(labels, pct)
> lbls <- paste(lbls,"%",sep="")
> pie3D(counts,labels = lbls, col=c("red","green","blue"),explode=0.1)
29
Histogram
▪ Histogram dapat dibuat dengan fungsi hist(). Aturan Sturges biasanya digunakan
untuk mendefinisikan titik-titik break antara sel-sel histogram
▪ Fungsi hist():
hist(x, breaks = "Sturges")
▪ Keterangan:
▪ x: a numeric vector
▪ breaks: breakpoints between histogram cells.
30
Histogram dari mpg
> hist(mtcars$mpg,breaks="Sturges",xlab="mpg",main=NULL)
31
Histogram
menentukan warna dan banyaknya break
33
Plot densitas
> plot(dens, frame = FALSE, col = "steelblue", main = "Density plot of mpg")
> # mengisi plot densitas
> polygon(dens, col = "steelblue")
34
Dotchart
▪ Fungsi dotchart() digunakan untuk mengambar plot titik.
dotchart(x, labels = NULL, groups = NULL, gcolor = par("fg"),
color = par("fg"))
▪ Keterangan
▪ x: numeric vector or matrix
▪ labels: a vector of labels for each point.
▪ groups: a grouping variable indicating how the elements of x are grouped.
▪ gcolor: color to be used for group labels and values.
▪ color: the color(s) to be used for points and labels.
35
Dotchart mpg
37
Plot rata-rata grup
▪ Fungsi plotmeans() dalam paket gplot digunakan untuk memperoleh plot rata-rata
grup.
> plotmeans(mpg ~ cyl, data = mtcars, frame=FALSE)
38
Plot rata-rata grup
▪ Menambahkan label rata-rata
(mean.labels = TRUE)
▪ Menghilangkan koneksi garis
(connect = FALSE)
> plotmeans(mpg ~ cyl, data = mtcars, frame = FALSE, mean.labels = TRUE, connect = FALSE)
39
Paket Lattice
40
Fungsi-fungsi dalam paket lattice
41
Scatterplot dengan fungsi xyplot()
> library(lattice)
> xyplot(mpg ~ wt, data = mtcars)
42
Scatterplot
menambahkan warna berdasarkan grup
43
Scatterplot
menunjukkan titik-titik (“p”) grid (“g”) dan garis smoothing
mengubah nama sumbu x (xlab) dan sumbu y (ylab)
44
Multiple panels by group: y ~ x | group
45
3D scatter plot dengan fungsi cloud()
46
3D scatterplot dengan warna berdasarkan grup
47
Basic plot: box plot, violin plot, dot plot, strip plot
48
Plot dengan beberapa grup
> mtcars$gear <- factor(mtcars$gear)
> # box plot
> bwplot(mpg ~ cyl | gear, data = mtcars,
layout = c(3, 1),
xlab = "cyl", ylab = "mpg")
49
Plot densitas dan histogram
> # histogram
➢ histogram(~ mpg, data =mtcars,
➢ breaks = "Sturges")
50
Plot densitas berdasarkan grup
> densityplot(~ mpg, groups = cyl, data = mtcars,
plot.points = FALSE, auto.key =TRUE)
51
Paket ggplot2
Box plot, violin plot, dot plot, stripchart, density plot, histogram plot,
scatter plot, bar plot, line plot, pie chart, Q-Q plot
52
ggplot2
▪ ggplot2 adalah paket R yang sangat berguna dan fleksibel untuk memproduksi
grafik-grafik yang elegan. Paket ini diimplementasikan oleh Hadley Wickham.
▪ Konsep dari ggplot2 adalah membagi plot dalam tiga bagian fundamental
berbeda: Plot = data + Aesthetics + Geometry.
▪ Komponen-kompen prinsip dari setiap plot dapat didefinisikan sebagai berikut:
▪ data adalah data frame
▪ Aesthetics digunakan untuk mengidentifikasikan variabel x dan y. Komponen ini
juga dapat untuk mengontrol warna, ukuran, dan bentuk titik-titik, panjang bar,
dll.
▪ Geometry mendefinisikan tipe grafik (histogram, box plot, line plot, density
plot, dot plot, ….)
▪ Ada dua fungsi utama dalam paket ggplot2 yaitu: fungsi qplot() dan ggplot().
▪ qplot() berarti quick plot, digunakan untuk memproduksi plot sederhana secara
mudah
▪ ggplot() lebih fleksibel dan luas penggunaannya daripada qplot yang membangun
plot secara bertahap.
53
Fungsi qplot()
▪ Fungsi qplot() adalah
qplot(x, y=NULL, data, geom="auto", xlim = c(NA, NA), ylim =c(NA, NA))
▪ Keterangan:
▪ x : x values
▪ y : y values (optional)
▪ data : data frame to use (optional).
▪ geom : Character vector specifying geom to use. Defaults to “point” if x and y are
specified, and “histogram” if only x is specified.
▪ xlim, ylim: x and y axis limits
▪ main: Plot title
▪ xlab, ylab: x and y axis labels
54
Plot pencar dasar
> library(ggplot2)
> # plot pencar
> qplot(x = wt,y = mpg, data = mtcars)
> # menambahkan garis
> qplot(x = wt, y = mpg, data = mtcars, geom=c("point", "line"))
55
Plot pencar dengan garis yang diperhalus (smoothed line)
> # garis yang diperhalus
> qplot(wt, mpg, data = mtcars, geom = c("point", "smooth"))
> # garis yang diperhalus berdasarkan grup
> qplot(wt, mpg, data = mtcars, color = factor(cyl),geom=c("point", "smooth"))
56
Plot pencar
mengubah warna pada plot pencar
58
Plot pencar dengan teks
60
Box plot, dot plot, & violin plot
61
Histogram plot
62
Density plot
63
ggplot2: box plot
▪ Fungsi geom_boxplot() digunakan untuk membuat suatu plot kotak.
geom_boxplot(outlier.colour="black", outlier.shape=16,
outlier.size=2, notch=FALSE)
▪ Keterangan:
▪ outlier.colour, outlier.shape, outlier.size : The color, the shape and the size for
outlying points
▪ notch : logical value. If TRUE, make a notched box plot. The notch displays a
confidence interval around the median which is normally based on the median +/-
1.58*IQR/sqrt(n). Notches are used to compare groups; if the notches of two
boxes do not overlap, this is a strong evidence that the medians differ.
64
Box plot
> library(ggplot2)
> # box plot dasar
> p <- ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_boxplot()
> p
> # merotasi box plot
> p + coord_flip()
65
Box plot
66
Box plot
mengubah garis box plot berdasarkan grup
Mengubah secara manual warna garis box plot
menggunakan fungsi:
▪ scale_color_manual() : to use custom colors
▪ scale_color_brewer() : to use color palettes
from RColorBrewer package
▪ scale_color_grey() : to use grey color palettes
67
Box plot
menggubah warna dalam box plot
68
Box plot
mengubah posisi legend
> p + theme(legend.position="top")
> p + theme(legend.position="bottom")
> p + theme(legend.position="none") # menghapus legend
69
Box plot dengan beberapa grup
70
Violin plot
> library(ggplot2)
> # Basic violin plot
> p <- ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_violin()
> p
> # rotate the violin plot
> p + coord_flip()
> # Set trim argument to FALSE
> ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_violin(trim=FALSE)
71
Violin plot
menambahkan titik rata-rata dan median
72
Violin plot
menambahkan median dan kuartil
p + geom_boxplot(width=0.1)
73
Violin plot
menambahkan rata-rata dan simpangan baku
75
Violin plot with dots
76
Violin plot colors by groups
77
Violin plot fill colors
78
Violin plot with multiple groups
79
Dot plot
# dotplot
library(ggplot2)
# Basic dot plot
p<-ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_dotplot(binaxis='y', stackdir='center')
p
# Change dotsize and stack ratio
ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_dotplot(binaxis='y', stackdir='center',
stackratio=1.5, dotsize=1.2)
# Rotate the dot plot
p + coord_flip()
80
Dot plot
Add mean and median points
81
Dot plot with box plot and violin plot
82
Dot plot colors by groups
83
# Use custom color palettes
p+scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))
# Use brewer color palettes
p+scale_fill_brewer(palette="Dark2")
# Use grey scale
p + scale_fill_grey() + theme_classic()
84
Dot plot
Change the legend position
p + theme(legend.position="top")
p + theme(legend.position="bottom")
p + theme(legend.position="none") # Remove legend
85
Dot plot with multiple groups
86
Customized dot plots
library(ggplot2)
# Basic stripchart
ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_jitter()
# Change the position # 0.2 : degree of jitter in x direction
p<-ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_jitter(position=position_jitter(0.2))
p
# Rotate the stripchart
p + coord_flip()
88
Stripchart
Change point shapes and size
89
Stripchart
Add mean and median points
90
Stripchart with box blot and violin plot
91
Stripchart
colors by groups
92
Stripchart
change the legend position
p + theme(legend.position="top")
p + theme(legend.position="bottom")
p + theme(legend.position="none")# Remove legend
93
Stripchart
Change stripchart colors and add box plots
94
Stripchart
customized
# Basic stripchart
ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_boxplot()+
geom_jitter(position=position_jitter(0.2))+
labs(title="Plot of mpg by cyl",x="cyl", y = "mpg")+
theme_classic()
# Change color/shape by groups
p <- ggplot(mtcars, aes(x=cyl, y=mpg, color=cyl, shape=cyl)) +
geom_jitter(position=position_jitter(0.2))+
labs(title="Plot of mpg by cyl",x="cyl", y = "mpg")
p + theme_classic()
95
Density plots: basic
library(ggplot2)
# Basic density
p <- ggplot(mtcars, aes(x=mpg)) +
geom_density()
p
# Add mean line
p+ geom_vline(aes(xintercept=mean(mpg)),
color="blue", linetype="dashed", size=1)
96
Density plot line types and colors
98
# Use custom color palettes
p+scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))
# Use brewer color palettes
p+scale_color_brewer(palette="Dark2")
# Use grey scale
p + scale_color_grey() + theme_classic()
99
Density plot
Change fill colors
101
Density plot
Change the legend position
p + theme(legend.position="top")
p + theme(legend.position="bottom")
p + theme(legend.position="none") # Remove legend
102
Combine histogram and density plots
p<-ggplot(mtcars, aes(x=mpg))+
geom_density()+facet_grid(cyl ~ .)
p
# Add mean lines
p+geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
linetype="dashed")
104
Customized density plots
# Basic density
ggplot(mtcars, aes(x=mpg, fill=cyl)) +
geom_density(fill="gray")+
geom_vline(aes(xintercept=mean(mpg)), color="blue",
linetype="dashed")+
labs(title="mpg density curve",x="mpg", y = "density")+
theme_classic()
# Change line colors by groups
p<- ggplot(mtcars, aes(x=mpg, color=cyl)) +
geom_density()+
geom_vline(data=mu, aes(xintercept=grp.mean, color=cyl),
linetype="dashed")+
labs(title="mpg density curve",x="mpg", y = "density")
p
105
Scatter plot: basic
library(ggplot2)
# Basic scatter plot
ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()
# Change the point size, and shape
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(size=2, shape=23)
106
# Change the point size
mtcars$gear <- factor(mtcars$gear)
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(aes(size=gear))
# With text
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point() +
geom_text(label=rownames(mtcars))
107
Scatter plot
Add regression lines
108
Scatter plot
Change the appearance of points and lines
109
Scatter plots with multiple groups
Change the point color/shape/size automatically
111
Bar plot
# Barplot
library(ggplot2)
# Basic barplot
p<-ggplot(data=mtcars, aes(x=cyl, y=mpg)) +
geom_bar(stat="identity")
p
112
Bar plot
change barplot fill colors by groups
113
Bar plot with multiple groups
114
Pie chart
# pie chart
library(ggplot2)
# Barplot
bp<- ggplot(mtcars, aes(x="", y=mpg, fill=cyl))+
geom_bar(width = 1, stat = "identity")
bp
pie <- bp + coord_polar("y", start=0)
pie
115
Pie chart
Change the pie chart fill colors
library(ggplot2)
# Basic histogram
ggplot(mtcars, aes(x=mpg)) + geom_histogram()
# Change the width of bins
ggplot(mtcars, aes(x=mpg)) +
geom_histogram(binwidth=1)
# Change colors
p<-ggplot(mtcars, aes(x=mpg)) +
geom_histogram(color="black", fill="white")
p
117
Histogram with density plot
ggplot(mtcars, aes(x=mpg)) +
geom_histogram(aes(y=..density..),
colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666")
118
Histogram plot colors by groups
119
Customized histogram plots
120
Line plot
library(ggplot2)
# Basic line plot with points
ggplot(data=df, aes(x=cyl, y=grp.mean, group=1)) +
geom_line()+
geom_point()+
labs(x = "cyl", y = "mpg")
# Change the line type
ggplot(data=df, aes(x=cyl, y=grp.mean, group=1)) +
geom_line(linetype ="dashed")+
geom_point()+
labs(x = "cyl", y = "mpg")
121
Line plot with multiple groups
> library(plyr)
> df2 <- ddply(mtcars, c("cyl","am"), summarise, grp.mean=mean(mpg))
> df2 <- df2[order(df2$am), ]
> head(df2)
cyl am grp.mean
1 4 0 22.90000
3 6 0 19.12500
5 8 0 15.05000
2 4 1 28.07500
4 6 1 20.56667
6 8 1 15.40000
> ggplot(df2, aes(x=cyl, y=grp.mean, group=am)) +
geom_line(aes(linetype=am, color=am))+
geom_point(aes(color=am))+
theme(legend.position="top")+
labs(x = "cyl", y = "mpg")
122