Visualisation Des Données Avec R: F. Mhamdi
Visualisation Des Données Avec R: F. Mhamdi
Visualisation Des Données Avec R: F. Mhamdi
F. MHAMDI
20-03-2023
Plan
Data Frame :
dat <- data.frame(time = factor(c(“Lunch”,“Dinner”),
levels=c(“Lunch”,“Dinner”)),
total_bill = c(14.89, 17.23))
dat
## time total_bill
## 1 Lunch 14.89
## 2 Dinner 17.23
ggplot2 (Exemple 1)
15
10
Total bill
ggplot2 (Exemple 2)
> library(reshape2)
> data(tips)
> head(tips)
> levels(tips$day)
> tips$day=factor(tips$day,levels=levels(tips$day)[c(4,1,2,3)])
75
50
count
25
## day mtip
## 1 Thur 2.771452
## 2 Fri 2.734737
## 3 Sat 2.993103
## 4 Sun 3.255132
ggplot2 (Exemple 3)
ggplot(data=mtips, aes(x=day,y=mtip)) +
geom_bar(stat=“identity”,fill=“red”,alpha=.6)+
theme_bw()+xlab(“Day”)+
ylab(“Average of tips”)
ggplot2 (Exemple 4)
2
Average of tips
> library(plyr)
> mtips <- ddply(tips, “day”, summarise, mtip =
mean(tip),stip=sd(tip))
>
mtips$day=factor(mtips$day,levels=levels(mtips$day)[c(4,1,2,3)])
> mtips$lower=mtips$mtip-2*mtips$stip
> mtips$upper=mtips$mtip+2*mtips$stip
>
mtips$day=factor(mtips$day,levels=levels(mtips$day)[c(4,1,2,3)])
ggplot2 (Solution)
> mtips
> ggplot(mtips,aes(x=day,y=mtip,group=day))+
geom_errorbar(aes(ymin=lower,ymax=upper,width=.2))+
geom_point(size=3)+theme_bw()+xlab(“Day”)+ylab(“Average
of tips”)
ggplot2 (Solution)
4
Average of tips
> library(plyr)
> mtips <- ddply(tips, c(“day”,“sex”,“smoker”), summarise, mtip
= mean(tip),stip=sd(tip))
>
mtips$day=factor(mtips$day,levels=levels(mtips$day)[c(4,1,2,3)])
> mtips$lower=mtips$mtip-2*mtips$stip
> mtips$upper=mtips$mtip+2*mtips$stip
>
mtips$day=factor(mtips$day,levels=levels(mtips$day)[c(4,1,2,3)])
ggplot2 (Solution)
> mtips
0
Average of tips
smoker
Thur Fri No
Yes
Autrement :
> ggplot(tips,aes(x=sex,y=tip,col=smoker,fill=smoker))+
geom_boxplot(position=pd,width=.2,alpha=.5)+theme_bw()+
xlab(“Gender”)+ylab(“Tips”)+facet_wrap(~day)
ggplot2 (Solution)
Thur Fri
10.0
7.5
5.0
2.5
smoker
Tips
Sat Sun No
10.0 Yes
7.5
5.0
2.5
> ggplot(tips,aes(x=day,y=tip,col=time,fill=time))+
geom_boxplot(alpha=.4)+theme_bw()+xlab(“Tips”)+ylab(“ “)+
facet_grid(sex~smoker)+ggtitle(“Tips in term of Smoker x
Gender”)
ggplot2 (Solution)
Tips in term of Smoker x Gender
No Yes
10.0
7.5
Female
5.0
2.5
time
Dinner
10.0 Lunch
7.5
Male
5.0
2.5
Exemple 1
library(sjPlot)
data(efc)
class(efc)
plot_frq(efc$tot_sc_e)
sjPlot : Diagramme en bâtons
500
403
(44.4%)
400
300 278
(30.6%)
200
120
(13.2%)
100
62
(6.8%)
25
(2.8%)
11
(1.2%) 5 2 2
(0.5%) (0.2%) (0.2%)
0
0 1 2 3 4 5 6 7 9
Services for elderly
sjPlot : Diagramme en bâtons
Exemple 2 :
attr(efc$e42dep,“labels”)
plot_frq(efc$e42dep)
sjPlot : Diagramme en bâtons
306 304
(34.0%) (33.7%)
300
225
(25.0%)
200
100
66
(7.3%)
0
independent slightly dependent moderately dependent severely dependent
elder's dependency
sjPlot : Diagramme en bâtons
Exemple 3
plot_frq(efc$e42dep,coord.flip = T,geom.size = .4)
304 (33.7%)
severely dependent
306 (34.0%)
moderately dependent
elder's dependency
225 (25.0%)
slightly dependent
66 (7.3%)
independent
sjPlot : Diagramme en bâtons
sjp.frq(efc$e42dep,show.prc = T,show.n = F)
34.0% 33.7%
300
25.0%
200
100
7.3%
0
independent slightly dependent moderately dependent severely dependent
elder's dependency
sjPlot : Tableau de contingence
Exemple 1
xtabs(~efc$e16sex+efc$e42dep)
## efc$e42dep
## efc$e16sex 1 2 3 4
## 1 23 70 109 93
## 2 43 154 197 211
sjPlot : Tableau de contingence
sjp.xtab(x = efc$e42dep, grp = efc$e16sex)
40%
37.0%
34.9%
34.0% 33.8%
32.6%
31.5%
25.4%
24.9%
23.7%
elder's gender
male
female
20% Total
7.8%
7.1% 7.3%
0%
independent slightly moderately severely
dependent dependent dependent
elder's dependency
sjPlot : Tableau de contingence
sjp.xtab(x = efc$e42dep, grp = efc$e16sex,show.n = F,show.total
= F, type=“bar”)
40%
37.0%
34.9%
32.6%
31.5%
25.4%
23.7%
elder's gender
male
female
20%
7.8%
7.1%
0%
independent slightly moderately severely
dependent dependent dependent
elder's dependency
sjPlot : Tableau de contingence
Autres représentations :
▶ changer type=line
▶ Ajouter l’option : bar.pos = “stack”
▶ Ajouter les deux options : bar.pos=“stack”,margin=“row”
100%
80%
60%
40%
25.4%
20% 37.0%
31.5%
7.1% 23.7%
sjPlot : Tableau de contingence
80%
65.2% 64.4%
68.8% 69.4%
60%
elder's gender
male
female
40%
20%
34.9% 35.6%
31.2% 30.6%
0%
independent slightly moderately severely
dependent dependent dependent
elder's dependency
sjPlot : Stacked bar plot
never
does caregiving have negative
sometimes
effect on your physical 45.6% 38.5% 9.5% 6.5%
(n=900)
df <- data.frame(
question1 = as.factor(sample(1:4, 500, replace = TRUE)),
question2 = as.factor(sample(1:4, 500, replace = TRUE)),
question3 = as.factor(sample(1:4, 500, replace = TRUE)),
question4 = as.factor(sample(1:4, 500, replace = TRUE)),
question5 = as.factor(sample(1:4, 500, replace = TRUE))
)
sjPlot : Solution
head(df)
24.4 25.6
Question 1 (n=500)
24.4 25.6
25.2 27.8
Question 2 (n=500)
22.6 24.4
Strongly agree
25.6 22.2 Agree
Question 3 (n=500)
25.0 27.2 Disagree
Strongly disagree
25.4 23.2
Question 4 (n=500)
24.6 26.8
20.6 27.2
Question 5 (n=500)
25.4 26.8
100% 80% 60% 40% 20% 0% 20% 40% 60% 80% 100%
sjPlot : Solution
df <- data.frame(
question1 = as.factor(sample(1:4, 500, replace = TRUE,
prob=c(0.18,0.5,0.2,0.12))),
question2 = as.factor(sample(1:4, 500, replace = TRUE,
prob=c(0.32,0.18,0.28,0.22))),
question3 = as.factor(sample(1:4, 500, replace = TRUE,
prob=c(0.6,0.3,0.1,0.1))),
question4 = as.factor(sample(1:4, 500, replace = TRUE,
prob=c(0.4,0.4,0.15,0.05))),
question5 = as.factor(sample(1:4, 500, replace = TRUE,
prob=c(0.05,0.05,0.35,0.55)))
)
sjPlot : Solution
22.2 30.0
Question 2 (n=500)
14.2 33.6
Strongly agree
11.4 27.6 Agree
Question 3 (n=500)
9.4 51.6 Disagree
Strongly disagree
15.8
3.8 40.2 40.2
Question 4 (n=500)
3.6
Question 5 (n=500)
52.8 40.0 3.6
100% 80% 60% 40% 20% 0% 20% 40% 60% 80% 100%
Corrplot : représentation de la matrice de corrélation
qsec
gear
mpg
carb
disp
drat
am
cyl
hp
wt
vs
1
mpg 1.00 −0.85 −0.85 −0.78 0.68 −0.87 0.42 0.66 0.60 0.48 −0.55
0.8
cyl −0.85 1.00 0.90 0.83 −0.70 0.78 −0.59 −0.81 −0.52 −0.49 0.53
0.6
disp −0.85 0.90 1.00 0.79 −0.71 0.89 −0.43 −0.71 −0.59 −0.56 0.39
0.4
hp −0.78 0.83 0.79 1.00 −0.45 0.66 −0.71 −0.72 −0.24 −0.13 0.75
drat 0.68 −0.70 −0.71 −0.45 1.00 −0.71 0.09 0.44 0.71 0.70 −0.09 0.2
wt −0.87 0.78 0.89 0.66 −0.71 1.00 −0.17 −0.55 −0.69 −0.58 0.43 0
qsec 0.42 −0.59 −0.43 −0.71 0.09 −0.17 1.00 0.74 −0.23 −0.21 −0.66 −0.2
vs 0.66 −0.81 −0.71 −0.72 0.44 −0.55 0.74 1.00 0.17 0.21 −0.57
−0.4
am 0.60 −0.52 −0.59 −0.24 0.71 −0.69 −0.23 0.17 1.00 0.79 0.06
−0.6
gear 0.48 −0.49 −0.56 −0.13 0.70 −0.58 −0.21 0.21 0.79 1.00 0.27
−0.8
carb −0.55 0.53 0.39 0.75 −0.09 0.43 −0.66 −0.57 0.06 0.27 1.00
−1
Corrplot : représentation de la matrice de corrélation
qsec
gear
mpg
carb
disp
drat
am
cyl
hp
wt
vs
1
mpg
0.8
cyl
0.6
disp
0.4
hp
drat 0.2
wt 0
qsec −0.2
vs
−0.4
am
−0.6
gear
−0.8
carb
−1
Cartographie (consulter TP5)