0% found this document useful (0 votes)
41 views

R Programming

The document provides information on performing various data analysis tasks in R such as importing data, cleaning data, organizing data, visualizing data using ggplot2, and annotating plots. It includes examples of using dplyr and tidyverse packages to filter, arrange, group and summarize data. It also demonstrates creating different plot types like scatter plots, bar charts, histograms and adding facets and annotations to plots. The document is a tutorial that introduces the basics of data manipulation and visualization in R.

Uploaded by

ouahib.chafiai1
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
41 views

R Programming

The document provides information on performing various data analysis tasks in R such as importing data, cleaning data, organizing data, visualizing data using ggplot2, and annotating plots. It includes examples of using dplyr and tidyverse packages to filter, arrange, group and summarize data. It also demonstrates creating different plot types like scatter plots, bar charts, histograms and adding facets and annotations to plots. The document is a tutorial that introduces the basics of data manipulation and visualization in R.

Uploaded by

ouahib.chafiai1
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 9

R programming

 Filter
Importing data
In console:
> install.packages("dplyr")
> library(dplyr)
In script:
1.
filtered_tg <- filter(data_1, Quantité ==10)
View(filtered_tg)
2.
arrange(filtered_tg, IDCommande)
3.
arrange(filter(data_1, Quantité ==10),IDCommande)
4.
filtered_data_1 <- data_1 %>%
filter(Quantité ==10) %>%
arrange(IDCommande)
5.
filtered_data_1 <- data_1 %>%
filter(Quantité ==10) %>%
group_by(Quantité) %>%
summarise(mean_IDCommande = mean(IDCommande,na.rm = T),.groups = "drop")

 Data frames
> install.packages("tidyverse")
> library(ggplot2)
> data("diamonds")
> View(diamonds)
> head(diamonds) “the head function to give us just the first six rows.”
> str(diamonds)
> colnames(diamonds)
> mutate(diamonds, cart_2=carat*100) “use the mutate function to make changes to our data
frame.”

o Example
install.packages("tidyverse")
library(tidyverse)
## Step 2: Create data frame
names <- c("A", "B", "C", "D")
age <- c(10,11,12,13 )
people <- data.frame(names, age)
## Step 3: inspect the data frame
head(people)
str(people)
glimpse(people)
colnames(people)
mutate(people, age_in_20 = age + 20)
o Example
install.packages("tidyverse")
library(tidyverse)
## Step 2: Import data
bookings_df <- read_csv("hotel_bookings.csv")
## Step 3: Inspect & clean data
head(bookings_df)
str(bookings_df)
colnames(bookings_df)
new_df <- select(bookings_df, `adr`, adults)
## Step 4: Visualizing data
ggplot(data = diamonds, aes(x = carat, y = price)) +
geom_point()
ggplot(data = diamonds, aes(x = carat, y = price, color = cut)) +
geom_point()
ggplot(data = diamonds, aes(x = carat, y = price, color = cut)) +
geom_point() +
facet_wrap(~cut)

 Cleaning data
> install.packages("here")
> library(here)
> install.packages("skimr")
> library(skimr)
> install.packages("janitor")
> library("janitor")
> install.packages("dplyr")
> library(dplyr)
Import data
> glimpse(data_1)

data_1 %>%
select(Quantité)

data_1 %>%
select(-Quantité)

data_1 %>%
rename(Qte=Quantité)

 Organize data
> library(tidyverse)
> data_1 %>% arrange(bill_length_mm)
> data_1 %>% arrange(-bill_length_mm)
> data_1 %>% filter(Quantité == "10")
 Data cleaning
Step 1: Load packages
In order to start cleaning your data, you will need to by install the required packages. If
you have already installed and loaded "tidyverse", "skimr", and "janitor".

install.packages("tidyverse")
install.packages("skimr")
install.packages("janitor")

Once a package is installed, you can load it by running the “library()” function with the
package name inside the parentheses:

library(tidyverse)
library(skimr)
library(janitor)

## Step 2: Import data


bookings_df <- read_csv("hotel_bookings.csv")

## Step 3: Getting to know your data


head(bookings_df)
str(bookings_df)
glimpse(bookings_df)
colnames(bookings_df)
skim_without_charts(bookings_df)

o Example

id <- c(1,10)

name <- c("John Mendes", "Rob Stewart", "Rachel Abrahamson", "Christy Hickman",
"Johnson Harper", "Candace Miller", "Carlson Landy", "Pansy Jordan", "Darius Berry",
"Claudia Garcia")

job_title <- c("Professional", "Programmer", "Management", "Clerical", "Developer",


"Programmer", "Management", "Clerical", "Developer", "Programmer")

employee <- data.frame(id, name, job_title)

print(employee)

separate(employee, name,into=c("first_name","last_name"), sep" ")

unite(employee, "name",first_name,last_name, sep" ")


o Example
install.packages("Tmisc")
library(Tmisc)
data(quartet)
View(quartet)
quartet %>%
group_by (set) %>%
summarize(mean(x),sd(x),mean(y),sd(y),cor(x,y))
ggplot(quartet,aes(x,y)) + goem_smooth(method=lm,se=FALSE) + facet_wrap(-set)
install.packages("datasauRus")
library(datasauRus)
ggplot(datasaurus_dozen,aes(x=x,y=y,colour=dataset))+geom_point()+theme_void()
+theme(legend.position = "none")+facet_wrap(~dataset,ncol=3)

o Example
install.packages("SimDesing")
library(SimDesing)
actual_temp <- c(68.3, 70, 72.4, 71, 67, 70)
predicted_temp <- c (67.9, 69, 71.5, 70, 67, 69)
bias(actual_temp, predicted_temp)

> actual_sales <- c(150, 203, 137, 247, 116, 287)


> predicted_sales <- c(200, 300, 150, 250, 150, 300)
> bias(actual_sales,predicted_sales)

https://www.rdocumentation.org/packages/SimDesign/versions/2.2/topics/bias

o Example
## Step 1: Load packages
install.packages("tidyverse")
install.packages("skimr")
install.packages("janitor")

library(tidyverse)
library(skimr)
library(janitor)

## Step 2: Import data


bookings_df <- read_csv("hotel_bookings.csv")

## Step 3: Getting to know your data


head(bookings_df)
str(bookings_df)
glimpse(bookings_df)
colnames(bookings_df)
skim_without_charts(bookings_df)
 Visualization basics in R and tidyverse
some of the most popular include ggplot2, Plotly, Lattice, RGL, Dygraphs, Leaflet,
Highcharter, Patchwork, gganimate and ggridges.
You can use points to create a scatter plot, bars to create a bar chart, or lines to create a
line diagram.
o Example
install.packages("ggplot2")
install.packages("palmerpenguins")
library(ggplot2)
library(palmerpenguins)
data(penguins)
View(penguins)
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g))

 Getting started with ggplot()


library("ggplot2")
library("palmerpenguins")

o geom_point
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g))

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, color = species))

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, shape = species))

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, shape = species, color = species))

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, shape = species, color = species, size = species))

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, alpha = species))

o geom_smooth
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g))
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g)) + geom_point(mapping = aes(x = flipper_length_mm, y = body_mass_g))
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g, linetype = species))

o geom_bar
ggplot(data = penguins) + geom_bar(mapping = aes(x =flipper_length_mm))
o facet_wrap
library("ggplot2")
library("palmerpenguins")

ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g)) +
geom_point(aes(color=species)) +
facet_wrap(~species)

o facet_grid
ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g,color=species)) +
facet_grid(sex~species)

o Example
## Step 1: Import your data
hotel_bookings <- read.csv("hotel_bookings.csv")
## Step 2: Look at a sample of your data
head(hotel_bookings)
colnames(hotel_bookings)
## Step 3: Install and load the 'ggplot2' package
install.packages('ggplot2')
library(ggplot2)
## Step 4: Begin creating a plot
ggplot(data = hotel_bookings) +
geom_point(mapping = aes(x = lead_time, y = children))

o Example
## Step 1: Import your data
hotel_bookings <- read.csv("hotel_bookings.csv")
## Step 2: Refresh Your Memory
head(hotel_bookings)
colnames(hotel_bookings)
## Step 3: Install and load the 'ggplot2' package (optional)
install.packages('ggplot2')
library(ggplot2)
## Step 4: Making a Bar Chart
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel))
## Step 5: Diving deeper into bar charts
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel, ))
## Step 6: Facets galore
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_wrap(~deposit_type) +
theme(axis.text.x = element_text(angle = 45))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_wrap(~market_segment) +
theme(axis.text.x = element_text(angle = 45))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_grid(~deposit_type) +
theme(axis.text.x = element_text(angle = 45))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_wrap(~deposit_type~market_segment) +
theme(axis.text.x = element_text(angle = 45))

o Example
## Step 1: Import your data
hotel_bookings <- read.csv("hotel_bookings.csv")
## Step 2: Refresh Your Memory
head(hotel_bookings)
colnames(hotel_bookings)
## Step 3: Install and load the 'ggplot2' package (optional)
install.packages('ggplot2')
library(ggplot2)
## Step 4: Making many different charts
ggplot(data = hotel_bookings) +
geom_point(mapping = aes(x = lead_time, y = children))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = hotel, fill = market_segment))
## Step 5: Filtering
install.packages('tidyverse')
library(tidyverse)
onlineta_city_hotels <- filter(hotel_bookings,
(hotel=="" &
hotel_bookings$market_segment==""))
View(onlineta_city_hotels)
onlineta_city_hotels_v2 <- hotel_bookings %>%
filter(hotel=="City Hotel") %>%
filter(market_segment=="Online TA")
View(onlineta_city_hotels_v2)

 Annotation layer
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species)) +
labs(title = " qqq")

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, color = species)) +
labs(title = " qqq", subtitle = "abc", caption = "www") +
annotate("text", x=220, y=3500, label= "ttt", color ="red")

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, color = species)) +
labs(title = " qqq", subtitle = "abc", caption = "www") +
annotate("text", x=220, y=3500, label= "ttt", color ="red")
functions:
 Importing data
hotel_bookings <- read.csv("hotel_bookings.csv")
 Head function
> head(diamonds) “the head function to give us just the first six rows.”

 ggplot()
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g))
o color
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species))

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g), color = "purple")
o shape
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, shape = species))
o shape and color
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, shape = species, color = species))
o size
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, shape = species, color = species, size = species))
o alpha
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, alpha = species))
o geom_smooth
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g))
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g)) + geom_point(mapping = aes(x = flipper_length_mm, y = body_mass_g))
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g, linetype = species))
o geom_bar
ggplot(data = penguins) + geom_bar(mapping = aes(x =flipper_length_mm))
o facet_wrap
ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g)) +
geom_point(aes(color=species)) +
facet_wrap(~species)
o facet_grid
ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g,color=species)) +
facet_grid(sex~species)

 Annotation layer
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species)) +
labs(title = " qqq")
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species)) +
labs(title = " qqq", subtitle = "abc", caption = "www") +
annotate("text", x=220, y=3500, label= "ttt", color ="red")

ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =


body_mass_g, color = species)) +
labs(title = " qqq", subtitle = "abc", caption = "www") +
annotate("text", x=220, y=3500, label= "ttt", color ="red")

You might also like