PCA in R
PCA in R
1 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
BiocManager::install('PCAtools')
devtools::install_github('kevinblighe/PCAtools')
library(PCAtools)
2 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
library(Biobase)
library(GEOquery)
# check that sample names match exactly between pdata and expression data
all(colnames(mat) == rownames(metadata))
3 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
## [1] TRUE
biplot(p)
4 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
pairsplot(p)
5 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
plotloadings(p, labSize = 3)
## -- variables retained:
6 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
eigencorplot(p,
metavars = c('Study','Age','Distant.RFS','ER',
'GGI','Grade','Size','Time.RFS'))
7 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
p$rotated[1:5,1:5]
p$loadings[1:5,1:5]
8 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
suppressMessages(require(hgu133a.db))
newnames <- mapIds(hgu133a.db,
keys = rownames(p$loadings),
column = c('SYMBOL'),
keytype = 'PROBEID')
## [1] 11
## PC8
## 8
9 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
library(ggplot2)
screeplot(p,
components = getComponents(p, 1:20),
vline = c(horn$n, elbow)) +
10 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
## PC27
## 27
biplot(p,
lab = paste0(p$metadata$Age, ' años'),
colby = 'ER',
hline = 0, vline = 0,
legendPosition = 'right')
11 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
biplot(p,
colby = 'ER', colkey = c('ER+' = 'forestgreen', 'ER-' = 'purple'),
colLegendTitle = 'ER-\nstatus',
# encircle config
encircle = TRUE,
encircleFill = TRUE,
hline = 0, vline = c(-25, 0, 25),
legendPosition = 'top', legendLabSize = 16, legendIconSize = 8.0)
12 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
biplot(p,
colby = 'ER', colkey = c('ER+' = 'forestgreen', 'ER-' = 'purple'),
colLegendTitle = 'ER-\nstatus',
# encircle config
encircle = TRUE, encircleFill = FALSE,
encircleAlpha = 1, encircleLineSize = 5,
hline = 0, vline = c(-25, 0, 25),
legendPosition = 'top', legendLabSize = 16, legendIconSize = 8.0)
13 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
14 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
biplot(p,
colby = 'ER', colkey = c('ER+' = 'forestgreen', 'ER-' = 'purple'),
# ellipse config
ellipse = TRUE,
ellipseConf = 0.95,
ellipseFill = TRUE,
ellipseAlpha = 1/4,
ellipseLineSize = 1.0,
xlim = c(-125,125), ylim = c(-50, 80),
hline = 0, vline = c(-25, 0, 25),
legendPosition = 'top', legendLabSize = 16, legendIconSize = 8.0)
15 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
biplot(p,
colby = 'ER', colkey = c('ER+' = 'forestgreen', 'ER-' = 'purple'),
# ellipse config
ellipse = TRUE,
ellipseConf = 0.95,
ellipseFill = TRUE,
ellipseAlpha = 1/4,
ellipseLineSize = 0,
ellipseFillKey = c('ER+' = 'yellow', 'ER-' = 'pink'),
xlim = c(-125,125), ylim = c(-50, 80),
hline = 0, vline = c(-25, 0, 25),
legendPosition = 'top', legendLabSize = 16, legendIconSize = 8.0)
16 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
17 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
biplot(p,
colby = 'ER', colkey = c('ER+' = 'forestgreen', 'ER-' = 'purple'),
hline = c(-25, 0, 25), vline = c(-25, 0, 25),
legendPosition = 'top', legendLabSize = 13, legendIconSize = 8.0,
shape = 'Grade', shapekey = c('Grade 1' = 15, 'Grade 2' = 17, 'Grade 3' = 8),
drawConnectors = FALSE,
title = 'PCA bi-plot',
subtitle = 'PC1 versus PC2',
caption = '27 PCs ≈ 80%')
biplot(p,
lab = NULL,
colby = 'ER', colkey = c('ER+'='royalblue', 'ER-'='red3'),
hline = c(-25, 0, 25), vline = c(-25, 0, 25),
vlineType = c('dotdash', 'solid', 'dashed'),
gridlines.major = FALSE, gridlines.minor = FALSE,
pointSize = 5,
legendPosition = 'left', legendLabSize = 14, legendIconSize = 8.0,
shape = 'Grade', shapekey = c('Grade 1'=15, 'Grade 2'=17, 'Grade 3'=8),
drawConnectors = FALSE,
title = 'PCA bi-plot',
subtitle = 'PC1 versus PC2',
caption = '27 PCs ≈ 80%')
biplot(p,
# loadings parameters
showLoadings = TRUE,
lengthLoadingsArrowsFactor = 1.5,
sizeLoadingsNames = 4,
colLoadingsNames = 'red4',
# other parameters
lab = NULL,
colby = 'ER', colkey = c('ER+'='royalblue', 'ER-'='red3'),
hline = 0, vline = c(-25, 0, 25),
vlineType = c('dotdash', 'solid', 'dashed'),
gridlines.major = FALSE, gridlines.minor = FALSE,
pointSize = 5,
legendPosition = 'left', legendLabSize = 14, legendIconSize = 8.0,
shape = 'Grade', shapekey = c('Grade 1'=15, 'Grade 2'=17, 'Grade 3'=8),
drawConnectors = FALSE,
title = 'PCA bi-plot',
subtitle = 'PC1 versus PC2',
caption = '27 PCs ≈ 80%')
18 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
biplot(p,
x = 'PC2', y = 'PC3',
lab = NULL,
colby = 'ESR1',
shape = 'ER',
hline = 0, vline = 0,
legendPosition = 'right') +
19 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
20 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
pairsplot(p,
components = getComponents(p, c(1:10)),
triangle = TRUE, trianglelabSize = 12,
hline = 0, vline = 0,
pointSize = 0.4,
gridlines.major = FALSE, gridlines.minor = FALSE,
colby = 'Grade',
title = 'Pairs plot', plotaxes = FALSE,
margingaps = unit(c(-0.01, -0.01, -0.01, -0.01), 'cm'))
21 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
22 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
pairsplot(p,
components = getComponents(p, c(4,33,11,1)),
triangle = FALSE,
hline = 0, vline = 0,
pointSize = 0.8,
gridlines.major = FALSE, gridlines.minor = FALSE,
colby = 'ER',
title = 'Pairs plot', titleLabSize = 22,
axisLabSize = 14, plotaxes = TRUE,
margingaps = unit(c(0.1, 0.1, 0.1, 0.1), 'cm'))
23 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
plotloadings(p,
rangeRetain = 0.01,
labSize = 4.0,
title = 'Loadings plot',
subtitle = 'PC1, PC2, PC3, PC4, PC5',
caption = 'Top 1% variables',
shape = 24,
col = c('limegreen', 'black', 'red3'),
drawConnectors = TRUE)
## -- variables retained:
## POGZ, CDC42BPA, CXCL11, ESR1, SFRP1, EEF1A2, IGKC, GABRP, CD24, PDZK1
24 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
plotloadings(p,
components = getComponents(p, c(4,33,11,1)),
rangeRetain = 0.1,
labSize = 4.0,
absolute = FALSE,
title = 'Loadings plot',
subtitle = 'Misc PCs',
caption = 'Top 10% variables',
shape = 23, shapeSizeRange = c(1, 16),
col = c('white', 'pink'),
drawConnectors = FALSE)
## -- variables retained:
25 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
26 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
eigencorplot(p,
components = getComponents(p, 1:27),
metavars = c('Study','Age','Distant.RFS','ER',
'GGI','Grade','Size','Time.RFS'),
col = c('darkblue', 'blue2', 'black', 'red2', 'darkred'),
cexCorval = 0.7,
colCorval = 'white',
fontCorval = 2,
posLab = 'bottomleft',
rotLabX = 45,
posColKey = 'top',
cexLabColKey = 1.5,
scale = TRUE,
main = 'PC1-27 clinical correlations',
colFrame = 'white',
plotRsquared = FALSE)
27 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
eigencorplot(p,
components = getComponents(p, 1:horn$n),
metavars = c('Study','Age','Distant.RFS','ER','GGI',
'Grade','Size','Time.RFS'),
col = c('white', 'cornsilk1', 'gold', 'forestgreen', 'darkgreen'),
cexCorval = 1.2,
fontCorval = 2,
posLab = 'all',
rotLabX = 45,
scale = TRUE,
main = bquote(Principal ~ component ~ Pearson ~ r^2 ~ clinical ~ correlates),
plotRsquared = TRUE,
corFUN = 'pearson',
corUSE = 'pairwise.complete.obs',
corMultipleTestCorrection = 'BH',
signifSymbols = c('****', '***', '**', '*', ''),
signifCutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1))
28 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
29 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
fontCorval = 2,
posLab = 'all',
rotLabX = 45,
scale = TRUE,
main = "PC clinical correlates",
cexMain = 1.5,
plotRsquared = FALSE,
corFUN = 'pearson',
corUSE = 'pairwise.complete.obs',
signifSymbols = c('****', '***', '**', '*', ''),
signifCutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
returnPlot = FALSE)
library(cowplot)
library(ggplotify)
30 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
31 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
32 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
sessionInfo()
33 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
34 of 35 21-01-2021, 02:19 pm
PCAtools: everything Principal Component Analysis https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/d...
35 of 35 21-01-2021, 02:19 pm