0% found this document useful (0 votes)
12 views14 pages

02 Pca

Uploaded by

Linh Pham
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views14 pages

02 Pca

Uploaded by

Linh Pham
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 14

BT_PCA

Pen Sokny

2024-11-02

#Import and summary data


library(readxl)
data <- read_excel("~/Downloads/Data analysis/Prod_caract.xls", sheet
= "data", range = "A1:S31")
data<- as.data.frame(data)
data<-data[,-1]
summary(data)

## country origin water lipid

## Length:30 Length:30 Min. :59.88 Min. :


3.96
## Class :character Class :character 1st Qu.:61.59 1st Qu.:
9.64
## Mode :character Mode :character Median :63.15
Median :11.43
## Mean :63.30
Mean :10.83
## 3rd Qu.:64.26 3rd
Qu.:12.44
## Max. :67.38
Max. :15.36
## TVBN TMA salt phenol
## Min. :14.00 Min. : 0.000 Min. :1.600 Min. :0.2100
## 1st Qu.:16.00 1st Qu.: 0.000 1st Qu.:2.500 1st Qu.:0.4225
## Median :18.00 Median : 2.000 Median :2.900 Median :0.5600
## Mean :19.82 Mean : 2.925 Mean :2.895 Mean :0.6757
## 3rd Qu.:22.00 3rd Qu.: 4.750 3rd Qu.:3.100 3rd Qu.:0.8300
## Max. :33.00 Max. :11.000 Max. :4.500 Max. :2.0100
## pH total viable count lactic flora lactobacilli
## Min. :6.030 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:6.085 1st Qu.:4.700 1st Qu.:2.555 1st Qu.:0.000
## Median :6.135 Median :6.004 Median :4.525 Median :3.057
## Mean :6.137 Mean :5.439 Mean :3.634 Mean :2.559
## 3rd Qu.:6.188 3rd Qu.:7.154 3rd Qu.:5.539 3rd Qu.:4.808
## Max. :6.260 Max. :7.823 Max. :7.146 Max. :7.290
## brochothrix yeast enterobacteriaceae L*
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :45.84
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:1.750 1st Qu.:49.21
## Median :0.000 Median :2.349 Median :3.080 Median :52.30
## Mean :1.477 Mean :1.876 Mean :3.000 Mean :51.83
## 3rd Qu.:3.078 3rd Qu.:3.244 3rd Qu.:4.503 3rd Qu.:53.61
## Max. :6.290 Max. :5.700 Max. :6.130 Max. :60.32
## a* b*
## Min. :13.59 Min. :19.39
## 1st Qu.:27.03 1st Qu.:23.85
## Median :29.67 Median :26.18
## Mean :28.59 Mean :25.91
## 3rd Qu.:30.60 3rd Qu.:27.30
## Max. :35.14 Max. :37.78

PCA for all variables


library(FactoMineR)
library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at


https://goo.gl/ve3WBa

data.pca <- PCA(data, quali.sup=c(1:2),graph=F)


barplot(data.pca$eig[,2])

plot.PCA(data.pca,choix="ind",habillage="origin")
dimdesc(data.pca)

## $Dim.1
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## lactic flora 0.9027703 9.042095e-12
## total viable count 0.8608432 1.046240e-09
## lactobacilli 0.7850679 2.794787e-07
## enterobacteriaceae 0.7762683 4.620345e-07
## TMA 0.7642260 8.875014e-07
## TVBN 0.7421875 2.669421e-06
## brochothrix 0.7317450 4.332717e-06
## yeast 0.6773733 3.931341e-05
## salt -0.6282888 2.011051e-04
##
## Link between the variable and the categorical variable (1-way
anova)
## =============================================
## R2 p.value
## country 0.4700428 0.006510941
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## country=country_B 2.034676 0.02182296
## country=country_UK 1.371667 0.03851963
##
## $Dim.2
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## a* 0.8778082 1.890486e-10
## b* 0.8241727 2.177694e-08
## L* -0.7871225 2.476948e-07
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## origin=origin_N -0.891458 0.03418821
## country=country_DK -1.861880 0.02308395
##
## $Dim.3
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## water 0.7890429 2.210011e-07
## salt 0.3728937 4.240737e-02
## lipid -0.8780952 1.832645e-10

Nhận xét: - Trục 1 gỉải thích 34,18% sự biến thiên của dữ liệu - Trục 2 giải
thích 15,7% sự biến thiên của dữ liệu

PCA - Chem
pca.chem <- PCA(data, quali.sup=c(1:2), quanti.sup=c(10:18),graph=F)
barplot(pca.chem$eig[,2])
plot.PCA(pca.chem,choix="ind",habillage="origin")
dimdesc(pca.chem)

## $Dim.1
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## TMA 0.9578046 1.072504e-16
## TVBN 0.9197269 6.879550e-13
## total viable count 0.7195652 7.418192e-06
## lactic flora 0.6336632 1.704971e-04
## lactobacilli 0.4846553 6.643327e-03
## brochothrix 0.4781546 7.527008e-03
## enterobacteriaceae 0.4346824 1.637700e-02
## yeast 0.3696976 4.435172e-02
## salt -0.7323851 4.208666e-06
##
## Link between the variable and the categorical variable (1-way
anova)
## =============================================
## R2 p.value
## country 0.5745406 0.0006043146
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## country=country_B 2.318667 0.0005564809
## country=country_UK 1.092905 0.0376107608
##
## $Dim.2
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## water 0.8636571 8.004173e-10
## lipid -0.8973315 1.871617e-11
##
## $Dim.3
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## pH 0.984165 1.385763e-22
## enterobacteriaceae 0.387115 3.456551e-02

Nhận xét: Các điểm dữ liệu nằm rãi rác, trỗn lẫn vào nhau -> không thể phân
loại nếu xét hết các biến vào PCA -> chia các biến thành 3 nhóm (hoá học,
vật lí ) ### PCA - Micro
pca.micro <- PCA(data, quali.sup=c(1:2), quanti.sup=c(3:9,
16:18),graph=F)
barplot(pca.micro$eig[,2])
plot.PCA(pca.micro,choix="ind",habillage="origin")
dimdesc(pca.micro)

## $Dim.1
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## lactic flora 0.9221833 4.521678e-13
## lactobacilli 0.8341387 1.027065e-08
## enterobacteriaceae 0.8310142 1.306738e-08
## total viable count 0.8150255 4.170489e-08
## yeast 0.7575185 1.256023e-06
## brochothrix 0.7483912 1.980195e-06
## TMA 0.5932639 5.495767e-04
## TVBN 0.5773611 8.360344e-04
## salt -0.5270499 2.766896e-03
##
## $Dim.2
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## brochothrix 0.4374740 0.015624203
## lactobacilli -0.3891338 0.033554042
## yeast -0.5511110 0.001597768
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## country=country_UK 1.145814 0.007609671
##
## $Dim.3
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## brochothrix 0.4580063 0.01092346
## phenol 0.3675248 0.04571374
## total viable count -0.3973865 0.02966524
##
## Link between the variable and the categorical variable (1-way
anova)
## =============================================
## R2 p.value
## origin 0.2265879 0.03115647
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## country=country_I 1.3425501 0.025678628
## origin=origin_S 0.2913993 0.028215215
## origin=origin_N -0.4160643 0.008719431

PCA - Phys
pca.phys <- PCA(data, quali.sup=c(1:2), quanti.sup=c(3:15), graph=F)
barplot(pca.phys$eig[,2])
plot.PCA(pca.phys,choix="ind", habillage="origin")

## Warning: ggrepel: 6 unlabeled data points (too many overlaps).


Consider
## increasing max.overlaps
pca.phys$eig

## eigenvalue percentage of variance cumulative percentage of


variance
## comp 1 2.1990913 73.303042
73.30304
## comp 2 0.6068076 20.226920
93.52996
## comp 3 0.1941011 6.470038
100.00000

dimdesc(pca.phys)

## $Dim.1
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## a* 0.9336250 5.241821e-14
## b* 0.8658556 6.465978e-10
## L* -0.7600854 1.101153e-06
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## country=country_DK -1.525172 0.03228349
##
## $Dim.2
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## L* 0.6406679 0.0001368427
## b* 0.4241641 0.0194890313
##
## Link between the variable and the categorical variable (1-way
anova)
## =============================================
## R2 p.value
## country 0.3722767 0.03717507
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## country=country_I -1.549898 0.009215841
##
## $Dim.3
##
## Link between the variable and the continuous variables (R-square)
##
======================================================================
===========
## correlation p.value
## brochothrix -0.4365214 0.015877769
## lactobacilli -0.4683110 0.009053576
##
## Link between the variable and the categorical variable (1-way
anova)
## =============================================
## R2 p.value
## country 0.4082417 0.02050408
## origin 0.2383054 0.02535371
##
## Link between variable and the categories of the categorical
variables
## ================================================================
## Estimate p.value
## origin=origin_N 0.2338344 0.01211358
## country=country_DK 0.5992204 0.04576309
## country=country_I -0.8225709 0.03094860
## origin=origin_S -0.2502612 0.01045153

You might also like