R Programming
It has vector operation and good packages as advantages
Packages are bundle of code that add new functions to R
CRAN: Comprehensive R Network Archieve
Download packages from https://cran.r-project.org/
Or use pacman (package management)
pacman::p_load(pacman, psych)
Ctrl+Enter to run a code line
Ctrl + L to clear console (or do from Edit section)
dev.off() clears plots if any
detach("package:datasets", unload = TRUE)
datasets package will no longer be available in your current R session. Any
functions or datasets provided by this package will no longer be accessible
unless you reattach the package with library()
?plot , to get help for a command or dataset like ?mtcars
plot(iris$Sepal.Length, iris$Sepal.Width, main = "Scatter Plot of Sepal Length
vs Sepal Width", xlab = "Sepal Length", ylab = "Sepal Width", pch = 19, col =
"blue") where iris is a dataset
plot(cos, 0, 2*pi) , plots cosine function from 0 to 2pi
Using barplot()
# Need a table with frequencies for each category
cylinders <- table(mtcars$cyl) # Creating frequency table
barplot(cylinders)
hist(iris$Sepal.Length)
for iris data it creates a histogram where x axis is sepal length
par(mfrow = c(3, 1))
par function: This function sets or queries graphical parameters. By using
par(mfrow = c(3, 1)), you're telling R to change the layout of the plotting
window.
mfrow parameter: This parameter is used to specify the number of rows and
columns for the plotting area.
c(3, 1): This specifies the dimensions of the plotting layout. 3 indicates the
number of rows, and 1 indicates the number of columns.
# Restore graphic parameter
par(mfrow=c(1, 1))
Elaborating hist:
hist(iris$Petal.Width [iris$Species == "setosa"],
xlim = c(0, 3),
breaks = 9,
main = "Petal Width for Setosa",
xlab = "",
col = "red")
# selects the Petal.Width values from the iris dataset where the Species is
setosa
# specifies the number of bins (intervals) for the histogram. In this case, the
histogram will have 9 bins.
Defining a scatter plot with elaboration
plot(mtcars$wt, mtcars$mpg,
pch = 19, # Solid circle
cex = 1.5, # Make 150% size
col = "#cc0000", # Red
main = "MPG as a Function of Weight of Cars",
xlab = "Weight (in 1000 pounds)",
ylab = "MPG")
# Add a normal distribution
curve(dnorm(x, mean = mean(lynx), sd = sd(lynx)),
col = "thistle4", # Color of curve
lwd = 2, # Line width of 2 pixels
add = TRUE) # Superimpose on previous graph
More superimpose commands
lines(density(lynx), col = "blue", lwd = 2)
rug(lynx, lwd = 2, col = "gray")
Getting info
summary(iris$Species) # get names and number of each, categorical
summary(iris$Sepal.Length) # Quantitative variable
summary(iris) # Entire data frame
p_help(psych, web = F) # Opens help in R Viewer for a package
Another info function:
describe(iris$Sepal.Length) # One quantitative variable
describe(iris) # Entire data frame
hist(iris$Petal.Length[iris$Species == "virginica" &
iris$Petal.Length < 5.5],
main = "Petal Length: Short Virginica")
# Leave rows or columns blank to select all
i.setosa <- iris[iris$Species == "setosa", ]
Notice it’s blank to select all after ,
Assigning a number to a variable and seeing its type
n1 <- 15
typeof(n1)
v1 <- c(1, 2, 3, 4, 5)
v1
is.vector(v1)
m1 <- matrix(c(T, F, F, F, T, F), nrow = 2,byrow = T)
m1 #by row determines how to index
# Give data, then dimemensions (rows, columns, tables)
a1 <- array(c( 1:24), c(4, 3, 2))
a1
# Can combine vectors of the same length
vNumeric <- c(1, 2, 3)
vCharacter <- c("a", "b", "c")
vLogical <- c(T, F, T)
dfa <- cbind(vNumeric, vCharacter, vLogical)
dfa # Matrix of one data type
df <- as.data.frame(cbind(vNumeric, vCharacter, vLogical))
df # Makes a data frame with three different data types
list2 <- list(o1, o2, o3, list1)
(coerce1 <- c(1, "b", TRUE)) parentheses around it also prints the results
It makes all least restrictive data type
(coerce7 <- as.data.frame(matrix(1:9, nrow= 3)))
is.data.frame(coerce7)
(coerce5 <- as.numeric(c("1", "2", "3")))
typeof(coerce5)
str(df1) # gives structure info about a data frame
Print those to understand factors:
x4 <- c(1:3)
df4 <- cbind.data.frame(x4, y)
df4$x4 <- factor(df4$x4,
levels = c(1, 2, 3),
labels = c("macOS", "Windows", "Linux"))
df4
typeof(df4$x4)
str(df4)
Ordered factors
x5 <- c(1:3)
df5 <- cbind.data.frame(x5, y)
(df5$x5 <- ordered(df5$x5,
levels = c(3, 1, 2),
labels = c("No", "Maybe", "Yes")))
df5
typeof(df5$x5)
str(df5)
# Creating a factor
colors <- factor(c("red", "blue", "green", "blue", "red"))
# Display the factor
print(colors)
# Check the levels of the factor
print(levels(colors))
# Summarize the factor, shows with their numbers of occurrences
summary(colors)
Alt + - for quick <-
# Specify change in values
(x4 <- seq(30, 0, by = -3))
x6 <- scan() # After running this command, go to console
# Hit return after each number
# Hit return twice to stop
# Repeats set
x8 <- rep(c(TRUE, FALSE), 5)
# Repeats items in set
x9 <- rep(c(TRUE, FALSE), each = 5)