0% found this document useful (0 votes)
4 views7 pages

R Programming Tutorial for Beginners (1)

This document is a comprehensive tutorial on R programming for beginners, covering data manipulation, graphics creation, data analysis, and advanced techniques. It includes practical examples and code snippets for creating vectors, data frames, and visualizations using both base R and the ggplot2 package. Additionally, it demonstrates statistical analysis methods such as linear regression and descriptive statistics.

Uploaded by

HARRISON SAEZ
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views7 pages

R Programming Tutorial for Beginners (1)

This document is a comprehensive tutorial on R programming for beginners, covering data manipulation, graphics creation, data analysis, and advanced techniques. It includes practical examples and code snippets for creating vectors, data frames, and visualizations using both base R and the ggplot2 package. Additionally, it demonstrates statistical analysis methods such as linear regression and descriptive statistics.

Uploaded by

HARRISON SAEZ
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

R Programming Tutorial for Beginners

###################################################
# #
# R Programming Tutorial for Beginners #
# Joaquim Schork #
# #
###################################################

## Tomado de: https://statisticsglobe.com/wp-content/uploads/2022/01/Introduction-


to-the-R-Programming-Language-by-Statistics-Globe.txt

## Fijando el directorio de trabajo...


setwd("C:/datos")

## Desinstalando objetos de la sesión anterior...


rm(list = ls())

##### Data Manipulation in R #####

vec_1 <- c(1, 1, 5, 3, 1, 5) # Create vector object


vec_1 # Print vector object

data_1 <- data.frame(x1 = c(7, 2, 8, 3, 3, 7), # Create data frame


x2 = c("x", "y", "x", "x", "x", "y"),
x3 = 11:16)
data_1 # Print data frame

list_1 <- list(1:5, # Create list


vec_1,
data_1)
list_1 # Print list

class(vec_1) # Check class of vector elements

vec_2 <- c("a", "b", "a", "c") # Create character vector


vec_2 # Create character vector

class(vec_2) # Check class of vector elements

vec_3 <- factor(c("gr1", "gr1", "gr2", "gr3", "gr2")) # Create factor vector
vec_3 # Print factor vector

class(vec_3) # Check class of vector elements

vec_4 <- as.character(vec_3) # Convert factor to character


vec_4 # Print updated vector
class(vec_4) # Check class of updated vector elements

data_2 <- data_1 # Create duplicate of data frame


data_2$x4 <- vec_1 # Add new column to data frame
data_2 # Print updated data frame

data_3 <- data_2[ , colnames(data_2) != "x2"] # Remove column from data


frame
data_3 # Print updated data frame

data_4 <- data_3 # Create duplicate of data frame


colnames(data_4) <- c("col_A", "col_B", "col_C") # Change column names
data_4 # Print updated data frame

data_5 <- rbind(data_3, 101:103) # Add new row to data frame


data_5 # Print updated data frame

data_6 <- data_5[data_5$x1 > 3, ] # Remove rows from data frame


data_6 # Print updated data frame

data_7 <- data.frame(ID = 101:106, # Create first data frame


x1 = letters[1:6],
x2 = letters[6:1])
data_7 # Print first data frame

data_8 <- data.frame(ID = 104:108, # Create second data frame


y1 = 1:5,
y2 = 5:1,
y3 = 5)
data_8 # Print second data frame

data_9 <- merge(x = data_7, # Merge two data frames


y = data_8,
by = "ID",
all = TRUE)
data_9 # Print merged data frame

vec_5 <- vec_1 # Create duplicate of vector


vec_5[vec_5 == 1] <- 99 # Replace certain value in vector
vec_5 # Print updated vector

data_10 <- data_1 # Create duplicate of data frame


data_10$x2[data_10$x2 == "y"] <- "new" # Replace values in column
data_10 # Print updated data frame

getwd() # Get current working directory

setwd("C:/datos")
getwd() # Get current working directory

write.csv(data_10, # Export data frame to CSV file


"data_10.csv",
row.names = FALSE)

data_11 <- read.csv("data_10.csv") # Import data frame from CSV file


data_11 # Print imported data frame

##### Creating Graphics in R #####

data(iris) # Load iris data set


head(iris) # Print head of iris data set

plot(x = iris$Sepal.Length, # Draw Base R scatterplot


y = iris$Sepal.Width,
col = iris$Species)

plot(density(x = iris$Sepal.Length)) # Draw Base R density plot

hist(x = iris$Sepal.Length) # Draw Base R histogram

boxplot(iris$Sepal.Length ~ iris$Species) # Draw Base R boxplot

install.packages("ggplot2") # Install ggplot2 package


library("ggplot2") # Load ggplot2

ggplot(iris, # Draw ggplot2 scatterplot


aes(x = Sepal.Length,
y = Sepal.Width,
col = Species)) +
geom_point()

ggplot(iris, # Draw ggplot2 density plot


aes(x = Sepal.Length)) +
geom_density()

ggplot(iris, # Draw multiple ggplot2 density plots


aes(x = Sepal.Length,
col = Species)) +
geom_density()

ggplot(iris, # Fill ggplot2 density plots


aes(x = Sepal.Length,
col = Species,
fill = Species)) +
geom_density()
ggplot(iris, # Opacity of ggplot2 density plots
aes(x = Sepal.Length,
col = Species,
fill = Species)) +
geom_density(alpha = 0.3)

ggplot(iris, # Draw ggplot2 histogram


aes(x = Sepal.Length)) +
geom_histogram()

ggplot(iris, # Draw ggplot2 boxplot


aes(x = Species,
y = Sepal.Length)) +
geom_boxplot()

ggplot(iris, # Add colors to ggplot2 boxplot


aes(x = Species,
y = Sepal.Length,
fill = Species)) +
geom_boxplot()

iris_groups <- iris # Create duplicate of iris data set


iris_groups$Sub <- letters[1:3] # Add subgroups to data
iris_groups <- aggregate(formula = Sepal.Length ~ Species + Sub, # Mean by
subgroup
data = iris_groups,
FUN = mean)
iris_groups # Print aggregated iris data set

ggplot(iris_groups, # Draw ggplot2 barplot


aes(x = Species,
y = Sepal.Length)) +
geom_bar(stat = "identity")

ggplot(iris_groups, # Draw stacked ggplot2 barplot


aes(x = Species,
y = Sepal.Length,
fill = Sub)) +
geom_bar(stat = "identity")

ggplot(iris_groups, # Draw grouped ggplot2 barplot


aes(x = Species,
y = Sepal.Length,
fill = Sub)) +
geom_bar(stat = "identity",
position = "dodge")

##### Data Analysis & Descriptive Statistics in R #####


mean(vec_1) # Calculate mean

median(vec_1) # Calculate median

min(vec_1) # Calculate minimum

max(vec_1) # Calculate maximum

sum(vec_1) # Calculate sum

var(vec_1) # Calculate variance

sd(vec_1) # Calculate standard deviation

summary(vec_1) # Calculate multiple descriptive statistics

table(vec_1) # Create frequency table

table(data_1[ , c("x1", "x2")]) # Create contingency table

mod_1 <- lm(formula = Sepal.Width ~ Sepal.Length, # Estimate linear regression


model
data = iris)

summary(mod_1) # Summary statistics of model

ggplot(iris, # Draw scatterplot with regression line


aes(x = Sepal.Length,
y = Sepal.Width)) +
geom_point() +
geom_smooth(method = "lm")

mod_2 <- lm(formula = Sepal.Width ~ Sepal.Length + Species, # Model wit multiple


predictors
data = iris)

summary(mod_2) # Summary statistics of model

ggplot(iris, # Draw multiple regression lines


aes(x = Sepal.Length,
y = Sepal.Width,
col = Species)) +
geom_point() +
geom_smooth(method = "lm")

##### Advanced Techniques in R #####


vec_6 <- numeric() # Create empty numeric vector
vec_6 # Print empty numeric vector

for(i in 1:length(vec_1)) { # Apply for loop to vector


vec_6[i] <- vec_1[i] + i
}

vec_1 # Print vec_1 for comparison

vec_6 # Print new vector

data_12 <- data_1 # Create duplicate of data frame


data_12$x4 <- NA # Add new column containing only NA
data_12 # Print new data frame

for(i in 1:nrow(data_1)) { # Loop over rows of data frame


data_12$x4[i] <- data_12$x1[i] + i * data_12$x3[i]
}

data_12 # Print updated data frame

vec_7 <- character() # Create empty character vector


vec_7 # Print empty character vector

for(i in 1:length(vec_1)) { # for loop & nested if else statement


if(vec_1[i] > 3) {
vec_7[i] <- "high"
} else {
vec_7[i] <- "low"
}
}

vec_7 # Print updated vector

vec_8 <- ifelse(test = vec_1 > 3, # Apply ifelse function


yes = "high",
no = "low")

vec_8 # Print new vector

fun_1 <- function(x) { # Create simple user-defined function


out <- x^2 + 5 * x
out
}

fun_1(x = vec_1) # Apply simple user-defined function

fun_2 <- function(x, y) { # Create complex user-defined function


if(y > 3) {
out <- (x^2 + 5 * x) / y
} else {
out <- (x^2 + 5 * x) / (10 * y)
}
out
}

for(i in 1:5) { # Complex user-defined function in for loop


print(paste0("This is the result of iteration ",
i,
": ",
fun_2(x = 5, y = i)))
}

You might also like