Introd R
Introd R
Eduardo Martínez
2022-09-30
3 Control Structures
4 Functions
5 DataFrame
6 Graphs - ggplot2
Arithmetic in R
3+2-10ˆ2
## [1] -95
3+(2-10)ˆ2
## [1] 67
sqrt(16)
## [1] 4
sqrt(2)
## [1] 1.414214
3+2-10ˆ2
## [1] -95
3+(2-10)ˆ2
## [1] 67
sqrt(16)
## [1] 4
sqrt(2)
## [1] 1.414214
var1 <- 3
var1
## [1] 3
var2 = 4
var2
## [1] 4
var1ˆ2+var2ˆ2
## [1] 25
sin(3.14)
## [1] 0.001592653
acos(-1)
## [1] 3.141593
exp(1)
## [1] 2.718282
log(2) # (ln)
## [1] 0.6931472
log10(2) # log base 10
## [1] 0.30103
log(16,base = 2)
## [1] 4
log(0)
## [1] -Inf
1/0
## [1] Inf
0/0
## [1] NaN
value1 = TRUE
value1
## [1] TRUE
class(value1)
## [1] "logical"
value2 = FALSE
## [1] FALSE
value1 || value2 #or
## [1] TRUE
!value1 #not
## [1] FALSE
4 >= 2
## [1] TRUE
var1 != var2 # útil para filtrar
## [1] TRUE
value1 != value2
## [1] TRUE
text1
## [1] "character"
substring(text1, 1, 4)
## [1] "Hell"
substring(text1, 6, 10)
## [1] "o"
gsub("e","i", text1)
paste(text1,text2)
## [1] 8
vec1[7]
## [1] 9
vec1[10]
## [1] NA
length(vec1)
## [1] 7
vec1[length(vec1)]
## [1] 9
vec1[2:5]
## [1] 6 7 5 3
vec1[5:10]
## [1] 3 0 9 NA NA NA
vec1[c(2,4,6)]
## [1] 6 5 0
vec1[c(2,4,6,8)]
## [1] 6 5 0 NA
vec2 = c(5,3,8,2,10,0,1)
vec1 + vec2
vec1 + vec3
vec1 + vec4
vec1 - vec2
## [1] 3 3 -1 3 -7 0 8
vec1*vec2
## [1] 40 18 56 10 30 0 9
vec1 %*% vec2
## [,1]
## [1,] 163
vec1 / vec2
vec1 + vec3
vec1 + vec4
vec1 + vec5
vec1 + 3
## [1] 11 9 10 8 6 3 12
vec1 + c(3,3,3,3,3,3,3)
## [1] 11 9 10 8 6 3 12
vec5 + c(2,3)
## [1] 4 7 8 11 12 15 16 19
vec1 + c(2,3)
vec5 * c(0,1)
## [1] 0 4 0 8 0 12 0 16
vec1ˆ2
## [1] 64 36 49 25 9 0 81
vec1ˆvec2
sqrt(vec1)
min(vec1)
## [1] 0
max(vec1)
## [1] 9
sum(vec1)
## [1] 38
mean(vec1)
## [1] 5.428571
var(vec1)
## [1] 9.619048
sd(vec1)
## [1] 3.101459
summary(vec1)
## [1] 0 1 2 3 5 8 10
sort(vec2, decreasing = TRUE) # de mayor a menor
## [1] 10 8 5 3 2 1 0
sum(vec8)
## [1] 3
vec1 + vec8
## [1] 9 7 7 5 3 1 9
mean(vec8)
## [1] 0.4285714
is.na(vec4)
## [1] 2
sum(!is.na(vec4)) # counting not NA
## [1] 5
vec1 > 5
## [1] 4
!(vec1 > 5)
## [1] 3
is.na(vec4)
## [1] NA NA
vec4[!is.na(vec4)] # not NA values
## [1] 3 4 3 8 2
vec1 > 5
## [1] 8 6 7 9
vec1[vec1 < 8]
## [1] 6 7 5 3 0
## [1] 5 3 0
vec4[vec1 %% 2 == 0]
## [1] 3 NA 8
vec1[vec4 %% 2 == 0]
## [1] NA 7 NA 0 9
vec5[vec1 %% 2 == 0] # be careful
## [1] 2 4 12 16
vec6[vec1 %% 2 == 0] # be careful
## [1] 50 48 40 36 34 26 22 20
vec2[5]
## [1] 10
vec2[5] = 1
vec1[4] = 6
vec4[is.na(vec4)] = 0
## [1] 3 4 3 8 2
mean(vec4[!is.na(vec4)])
## [1] 4
vec4[is.na(vec4)] = 4
mean(vec4)
## [1] 4
## [1] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE
vec6[vec6 > 40] #filter
## [1] 50 48 46 44 42
vec6[vec6 > 40] = 40 #replace
vec5
## [1] 2 4 6 8 10 12 14 16
vec5[-2]
## [1] 2 6 8 10 12 14 16
vec5 = vec5[-2]
## [1] 2 6 14 16
vec5[-c(3,5)] # 3rd and 5th
## [1] 2 6 10 14 16
vec5[c(3,5)] = c(-100,100)
vec5
## [1] 2 6 -100 10 8 8 8
vec4[vec4 != 4]
## [1] 3 3 8 2
class(vec3)
## [1] "character"
as.numeric(vec3)
vec3 = as.numeric(vec3)
## [1] 23
var1 = "3"
var2 = "4"
# var1 + var2
as.numeric(var1) + as.numeric(var2)
## [1] 7
# as.logical
# as.character
# as.complex (rarely used)
m1 = c(1,2,4)
m2 = c(3,2,5)
m = matrix(c(m1,m2), nrow = 3, ncol =2) # Change nrow and nco
m
## [,1] [,2]
## [1,] 1 3
## [2,] 2 2
## [3,] 4 5
m3 = c(1,5,6)
## [,1] [,2]
## [1,] 1 3
## [2,] 2 2
## [3,] 4 5
m5 = cbind(m,m3) #column binding, tambi?n existe rbind
m5
## m3
## [1,] 1 3 1
## [2,] 2 2 5
## [3,] 4 5 6
## m3
## 1 3 1
## 2 2 5
## 4 5 6
## m3 1 5 6
## m3
## [1,] 1 3 1
## [2,] 2 2 5
## [3,] 4 5 6
m5[1:2, ] #row 1 a 2, all columns
## m3
## [1,] 1 3 1
## [2,] 2 2 5
m5[,1:2] #col 1 a 2, all rows
##
## [1,] 1 3
## [2,] 2 2
## [3,] 4 5
Eduardo Martínez Introduction to R 2022-09-30 59 / 121
Usign c( ) to “jump”
m5
## m3
## [1,] 1 3 1
## [2,] 2 2 5
## [3,] 4 5 6
m5[c(1,3),]
## m3
## [1,] 1 3 1
## [2,] 4 5 6
m5[,c(1,3)]
## m3
## [1,] 1 1
## [2,] 2 5
## [3,] 4 6
Eduardo Martínez Introduction to R 2022-09-30 60 / 121
Linear algebra functions
m5
## m3
## [1,] 1 3 1
## [2,] 2 2 5
## [3,] 4 5 6
diag(m5)
## [1] 1 2 6
t(m5)
det(m5)
## [1] 13
dim(m)
## [1] 3 2
m6 = t(m5)
m5 + m6
## m3
## [1,] 2 5 5
## [2,] 5 4 10
## [3,] 5 10 12
m5 + 3 ## recursion
## m3
## [1,] 4 6 4
## [2,] 5 5 8
## [3,] 7 8 9
## m3
## [1,] 4 6 4
## [2,] 5 5 8
## [3,] 7 8 9
m5 + 3 == m5 + matrix(3, nrow=3, ncol=3)
## m3
## [1,] TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE
m5 %*% m6
## m3
## [1,] 1 6 4
## [2,] 6 4 25
## [3,] 4 25 36
exp(m)
## [,1] [,2]
## [1,] 2.718282 20.085537
## [2,] 7.389056 7.389056
## [3,] 54.598150 148.413159
sqrt(m)
## [,1] [,2]
## [1,] 1.000000 1.732051
## [2,] 1.414214 1.414214
## [3,] 2.000000 2.236068
m7 = matrix(1:12,nrow = 4, ncol = 3)
m8 = rbind(m5,m6,m7)
Control Structures
## [1] "big"
## [1] "big"
list = vec1
# variable i as a counter
for(i in 1:7){
print(list[i])
}
## [1] 8
## [1] 6
## [1] 7
## [1] 6
## [1] 3
## [1] 0
## [1] 9
for(i in 1:7){
print((list[i] - mean(list))/sd(list))
}
## [1] 0.7830416
## [1] 0.1381838
## [1] 0.4606127
## [1] 0.1381838
## [1] -0.8291029
## [1] -1.79639
## [1] 1.105471
standardlist = c()
for(i in 1:7){
standardlist[i]=(list[i] - mean(list))/sd(list)
}
standardlist
for(i in 1:length(list)){
print(list[i])
}
## [1] 8
## [1] 6
## [1] 7
## [1] 6
## [1] 3
## [1] 0
## [1] 9
for(i in seq(1,7,by=2)){
print(list[i])
}
## [1] 8
## [1] 7
## [1] 3
## [1] 9
for(i in 1:length(list)){
if(list[i] < 30){
print('small')
}else{
print('big')
}
}
## [1] "small"
## [1] "small"
## [1] "small"
## [1] "small"
## [1] "small"
## [1] "small"
## [1] "small"
for(i in list){
print(i)
}
## [1] 8
## [1] 6
## [1] 7
## [1] 6
## [1] 3
## [1] 0
## [1] 9
for(value in list){
if(value>5){
print("not valid")
}
}
for(value in list){
if(value>5){
print(paste(value," is not valid"))
}
}
for(i in 1:length(list)){
if(list[i] > 5){
print(paste("The value at position ",i,"is not valid becau
}
}
i = 1
while(i <= length(list)){
print(list[i])
i=i+1
}
## [1] 8
## [1] 6
## [1] 7
## [1] 6
## [1] 3
## [1] 0
## [1] 9
Functions
}
}
isbig(15)
## [1] "small"
isbig(150)
## [1] "big"
isbig(30)
## [1] "IDK"
iseven = function(n){
remainder = n %% 2
if(remainder == 0)
return(TRUE)
return(FALSE)
}
iseven(15)
## [1] FALSE
iseven(258928)
## [1] TRUE
# iseven(vec1) # Wrong
isdivisible(15,4)
## [1] FALSE
isdivisible(21616431684,4)
## [1] TRUE
isdivisible2(15,4)
## [1] "no"
overaverage = function(list,value){
if(value > mean(list))
return('above average')
return('not above average')
}
overaverage(vec6,40)
## [1] 2
## [1] "above average"
## [1] 6
## [1] "above average"
## [1] -100
## [1] "not above average"
## [1] 10
## [1] "above average"
## [1] 8
## [1] "above average"
## [1] 8
## [1] "above average"
## [1] 8 Martínez
Eduardo Introduction to R 2022-09-30 95 / 121
Section 5
DataFrame
favs = read.csv("favorites.txt")
class(favs)
## [1] "data.frame"
head(favs) ## first 5
## flavor number
## 1 pistachio 6
## 2 mint chocolate chip 7
## 3 vanilla 5
## 4 chocolate 10
## 5 strawberry 2
## 6 neopolitan 4
names(favs)
## [1] "character"
class(favs$number)
## [1] "integer"
## [1] "factor"
favs$flavor
favs[,1]
## flavor number
## 1 pistachio 6
## 2 mint chocolate chip 7
## 3 vanilla 5
names(favs)
names(favs)
str(favs)
Graphs - ggplot2
10.0
7.5
number
5.0
2.5
0.0
unique(mtcars$carb)
## [1] 4 1 2 3 6 8
table(mtcars$carb)
##
## 1 2 3 4 6 8
## 7 10 3 10 1 1
10.0
7.5
5.0
10.0
7.5
5.0
2.5
10.0
7.5
factor(carb)
1
2
5.0 3
4
6
8
2.5
cut(mtcars$mpg, 6)
10.0
7.5
5.0
2.5
10.0
7.5
5.0
2.5
0.06
0.04
0.02