Ps Project
Ps Project
strings = “”)
install.packages(‘psych’)
library(psych)
View(data.frame)
library(Amelia)
missmap(data.frame, col=c(“black”, “grey”))
library(dplyr)
data.frame = select(data.frame, Survived, Pclass, Age, Sex, SibSp, Parch)
data.frame = na.omit(data.frame)
> str(data.frame)'data.frame': 891 obs. of 6 variables:
$ Survived: int 0 1 1 1 0 0 0 0 1 1 ...
$ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
$ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
$ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
$ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
$ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
To convert them into categorical variables (or factors), use the factor() function.
data.frame$Survived = factor(data.frame$Survived)
data.frame$Pclass = factor(data.frame$Pclass, order=TRUE, levels = c(3, 2, 1))
Data visualization
Correlation plot
library(GGally)ggcorr(data.frame,
nbreaks = 6,
label = TRUE,
label_size = 3,
color = “grey50”)
Survived count
library(ggplot2)
ggplot(data.frame, aes(x = Survived)) +
geom_bar(width=0.5, fill = "coral") +
geom_text(stat='count', aes(label=stat(count)), vjust=-0.5) +
theme_classic()
Survival by Pclass
predicted
0 1
0 113 19 | (TN) (FP)
1 18 65 | (FN) (TP)