R Programming
R Programming
Filter
Importing data
In console:
> install.packages("dplyr")
> library(dplyr)
In script:
1.
filtered_tg <- filter(data_1, Quantité ==10)
View(filtered_tg)
2.
arrange(filtered_tg, IDCommande)
3.
arrange(filter(data_1, Quantité ==10),IDCommande)
4.
filtered_data_1 <- data_1 %>%
filter(Quantité ==10) %>%
arrange(IDCommande)
5.
filtered_data_1 <- data_1 %>%
filter(Quantité ==10) %>%
group_by(Quantité) %>%
summarise(mean_IDCommande = mean(IDCommande,na.rm = T),.groups = "drop")
Data frames
> install.packages("tidyverse")
> library(ggplot2)
> data("diamonds")
> View(diamonds)
> head(diamonds) “the head function to give us just the first six rows.”
> str(diamonds)
> colnames(diamonds)
> mutate(diamonds, cart_2=carat*100) “use the mutate function to make changes to our data
frame.”
o Example
install.packages("tidyverse")
library(tidyverse)
## Step 2: Create data frame
names <- c("A", "B", "C", "D")
age <- c(10,11,12,13 )
people <- data.frame(names, age)
## Step 3: inspect the data frame
head(people)
str(people)
glimpse(people)
colnames(people)
mutate(people, age_in_20 = age + 20)
o Example
install.packages("tidyverse")
library(tidyverse)
## Step 2: Import data
bookings_df <- read_csv("hotel_bookings.csv")
## Step 3: Inspect & clean data
head(bookings_df)
str(bookings_df)
colnames(bookings_df)
new_df <- select(bookings_df, `adr`, adults)
## Step 4: Visualizing data
ggplot(data = diamonds, aes(x = carat, y = price)) +
geom_point()
ggplot(data = diamonds, aes(x = carat, y = price, color = cut)) +
geom_point()
ggplot(data = diamonds, aes(x = carat, y = price, color = cut)) +
geom_point() +
facet_wrap(~cut)
Cleaning data
> install.packages("here")
> library(here)
> install.packages("skimr")
> library(skimr)
> install.packages("janitor")
> library("janitor")
> install.packages("dplyr")
> library(dplyr)
Import data
> glimpse(data_1)
data_1 %>%
select(Quantité)
data_1 %>%
select(-Quantité)
data_1 %>%
rename(Qte=Quantité)
Organize data
> library(tidyverse)
> data_1 %>% arrange(bill_length_mm)
> data_1 %>% arrange(-bill_length_mm)
> data_1 %>% filter(Quantité == "10")
Data cleaning
Step 1: Load packages
In order to start cleaning your data, you will need to by install the required packages. If
you have already installed and loaded "tidyverse", "skimr", and "janitor".
install.packages("tidyverse")
install.packages("skimr")
install.packages("janitor")
Once a package is installed, you can load it by running the “library()” function with the
package name inside the parentheses:
library(tidyverse)
library(skimr)
library(janitor)
o Example
id <- c(1,10)
name <- c("John Mendes", "Rob Stewart", "Rachel Abrahamson", "Christy Hickman",
"Johnson Harper", "Candace Miller", "Carlson Landy", "Pansy Jordan", "Darius Berry",
"Claudia Garcia")
print(employee)
o Example
install.packages("SimDesing")
library(SimDesing)
actual_temp <- c(68.3, 70, 72.4, 71, 67, 70)
predicted_temp <- c (67.9, 69, 71.5, 70, 67, 69)
bias(actual_temp, predicted_temp)
https://www.rdocumentation.org/packages/SimDesign/versions/2.2/topics/bias
o Example
## Step 1: Load packages
install.packages("tidyverse")
install.packages("skimr")
install.packages("janitor")
library(tidyverse)
library(skimr)
library(janitor)
o geom_point
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g))
o geom_smooth
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g))
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g)) + geom_point(mapping = aes(x = flipper_length_mm, y = body_mass_g))
ggplot(data = penguins) + geom_smooth(mapping = aes(x = flipper_length_mm, y =
body_mass_g, linetype = species))
o geom_bar
ggplot(data = penguins) + geom_bar(mapping = aes(x =flipper_length_mm))
o facet_wrap
library("ggplot2")
library("palmerpenguins")
ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g)) +
geom_point(aes(color=species)) +
facet_wrap(~species)
o facet_grid
ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g,color=species)) +
facet_grid(sex~species)
o Example
## Step 1: Import your data
hotel_bookings <- read.csv("hotel_bookings.csv")
## Step 2: Look at a sample of your data
head(hotel_bookings)
colnames(hotel_bookings)
## Step 3: Install and load the 'ggplot2' package
install.packages('ggplot2')
library(ggplot2)
## Step 4: Begin creating a plot
ggplot(data = hotel_bookings) +
geom_point(mapping = aes(x = lead_time, y = children))
o Example
## Step 1: Import your data
hotel_bookings <- read.csv("hotel_bookings.csv")
## Step 2: Refresh Your Memory
head(hotel_bookings)
colnames(hotel_bookings)
## Step 3: Install and load the 'ggplot2' package (optional)
install.packages('ggplot2')
library(ggplot2)
## Step 4: Making a Bar Chart
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel))
## Step 5: Diving deeper into bar charts
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel, ))
## Step 6: Facets galore
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_wrap(~deposit_type) +
theme(axis.text.x = element_text(angle = 45))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_wrap(~market_segment) +
theme(axis.text.x = element_text(angle = 45))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_grid(~deposit_type) +
theme(axis.text.x = element_text(angle = 45))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = distribution_channel)) +
facet_wrap(~deposit_type~market_segment) +
theme(axis.text.x = element_text(angle = 45))
o Example
## Step 1: Import your data
hotel_bookings <- read.csv("hotel_bookings.csv")
## Step 2: Refresh Your Memory
head(hotel_bookings)
colnames(hotel_bookings)
## Step 3: Install and load the 'ggplot2' package (optional)
install.packages('ggplot2')
library(ggplot2)
## Step 4: Making many different charts
ggplot(data = hotel_bookings) +
geom_point(mapping = aes(x = lead_time, y = children))
ggplot(data = hotel_bookings) +
geom_bar(mapping = aes(x = hotel, fill = market_segment))
## Step 5: Filtering
install.packages('tidyverse')
library(tidyverse)
onlineta_city_hotels <- filter(hotel_bookings,
(hotel=="" &
hotel_bookings$market_segment==""))
View(onlineta_city_hotels)
onlineta_city_hotels_v2 <- hotel_bookings %>%
filter(hotel=="City Hotel") %>%
filter(market_segment=="Online TA")
View(onlineta_city_hotels_v2)
Annotation layer
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species)) +
labs(title = " qqq")
ggplot()
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g))
o color
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species))
Annotation layer
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species)) +
labs(title = " qqq")
ggplot(data = penguins) + geom_point(mapping = aes(x = flipper_length_mm, y =
body_mass_g, color = species)) +
labs(title = " qqq", subtitle = "abc", caption = "www") +
annotate("text", x=220, y=3500, label= "ttt", color ="red")