Basic R Programming
Basic R Programming
x <- 100
typeof(x) #double
y <- 100L
typeof(y) #integer
a = TRUE
typeof(a) #logical
x<-100
y<-200
x==y
y>x
#Assigning variables
selling <- 500
cost <-250
profit <- selling - cost
profit
3. Print Formatting
x <- 10
print(x)
x <- 10:30
x
class(x) #integer
4. Vectors
- a sequence of data elements with same basic types
v1 <- c(1,2,3,4,5)
print(v1)
class(v1) #numeric
typeof(v1) #double
#integer vector
num = 1:10
#numeric vector
num = c(1:10,10.5)
num
#character vector
ltrs = letters[1:10]
ltrs
#factor vector
fac = as.factor(ltrs)
class(fac)
5. List
- a generic vector that can contain object of different types
list <- list(x=c(10,20,30),
y=c("a","b","c"),
z=c(TRUE,FALSE))
list
test[[1]]
product.category[["product"]]
product.category$product
6. Matrix
matrix(1:6,nrow=2) # 1 2 3
#246
matrix(1:6,ncol=2) # 1 4
#25
#36
matrix(1:6,nrow=2,byrow=TRUE) # 1 2 3
#456
matrix(1:3,nrow=2,ncol=3) # 1 3 2
#213
cbind(1:3,1:3) # 1 1
#22
#33
rbind(1:3,1:3) # 1 2 3
#123
n<-matrix(1:6,byrow=TRUE,nrow=2) # 1 2 3
n #456
rbind(n,7:9) # 1 2 3
n #456
#789
cbind(n,c(10,11))
n <- matrix(1:6,byrow=TRUE,nrow=2)
rownames(n) <- c("row1","row2")
n
colnames(n) <- c("col1","col2","col3")
n
x <- matrix(1:8,ncol=2)
l <- matrix(LETTERS[1:6],nrow=4,ncol=3)
l
cbind(x,l)
7. Data frame
- used to store data in the form of table
name <- c("John","Peter","Patrick","Julie","Bob")
age <- c(28,30,31,38,35)
children <- c(FALSE,TRUE,TRUE,FALSE,TRUE)
df <- data.frame(name,age,children)
df <- data.frame(Name=name,Age=age,Children=children)
df
df[3,2] #31
df[3,"age"]
df[3,]
df[["age"]]
df[c(3,5),c("Age","Children")]
df[2]
Modify data
height <- c(163,177,163,162,157)
df$height <- height
df
Sorting (Ascending)
sort(df$age)
ranks <- order(df$age)
df[ranks,]
Sorting (Descending)
df[order(df$age,descending = TRUE),]
Vectors
#create vectors
vec1 <- c(10,20,30)
vec2 <- c("a","b","c")
vec3 <- c(TRUE,FALSE,TRUE)
vec1;vec2;vec3
v1 <- order_detail[(l-1):1]
v3 <- order_detail[l:1]
#remove NA values
sum(order_detail,na.rm=T)
Matrix
mat1 <- matrix(0,3,3) #3 columns 3 rows
mat1
mat2 <- matrix(1:9,3,3)
mat2
mat3 <- matrix(1:9,nrow=3,byrow=T)
mat3
#naming a matrix
days <- c('Mon','Tue','Wed','Thu','Fri')
st.names <- c('Stock1','Stock2')
colnames(stock.matrix) <- days
rownames(stock.matrix) <- st.names
stock.matrix
#extraction of colums
student[,1:2]
student[,c(1,3)]
student["John",2:3]
student[c("John","Sam"),4]
summary(iris)
str(iris)
List
list2 <- list(vec=seq(1:10),
mat=matrix(1:9,3,3),
lis=list(a=10,b=20))
list2
is.recursive(list2)
#indexing of list
list2[2]
list2$mat
list2['vec']
list2[[3]][2] #3rd element second value
list2$lis[2]
toupper(c1) #uppercase
substr(c1,1,2) #extract set of characters
newname <- sub("Rob","Cena",name) #replace
8. Flow Control
#if-else
x <- 30L
if(is.integer(x)){
print("x is an integer")
}else{
print("x is not an integer")
}
#while loop
v <- c("Hello World")
count <- 2
while (count<7){
print(v)
count=count+1
}
#for loop
fruit <- c('Apple','Orange','Banana')
for (i in fruit){
print(i)
}
for (i in 1:length(fruit)){
print(fruit[i])
}
#print square and square roots
for(i in 1:25){
sq=i*i
sqroot=sqrt(i)
message("i=",i,"sq=",sq,"sqroot=",sqroot) }
9. Functions
Build-in function
text <- "R is fun to learn"
grepl('fun',text)
v <- c('a','b','c','d')
grep('b',v)
Function
squares <- function(a) {
for(i in 1:a){
b <- i^2
print(b)
}
}
squares(4)
#simple function
hello_world <- function(){
print("Hello world in R")
}
hello_world()
#10 generate 1000 radom values from a normal distribution of mean 0 and 1000
normalDist <- rnorm(100,0,1)
mean(normalDist)
hist(normalDist)
#or
expoValue(normalDist,power=4)
expoValue(normalDist,power=2,breaks=50)
#example2
status <- function(age){
ageGrp = "Not Defined"
vote = "Not Defined"
if(age>=18){
ageGrp='Adult'
vote = "Yes"
}
message("Your age group is ",ageGrp)
message("Voting status is ",vote)
}
status(20)
#function example
sqr <- function(n)
{
repeat{
square=n*n
message("The square is ",square)
if(square>=100) break
n=n+1
}
return(n)
}
sqr(6)
Switch case
HRA <- function(city){
hra_amt <- switch(toupper(city),
BLR=7500,
MUM=1000,
CHN=7500,
5000
)
return(hra_amt)
}
HRA("VOK")
Repeat
time <- 15
repeat{
message("Hello,welcome to R")
if(time>=20) break
time=time+1
}
Build in function
#build in function
seq(1,10,by=2)
v <- c(11,4,5,7,3,10,2)
sort(v)
sort(v, decreasing = TRUE)
v2 <- c(1,2,3,4,5)
rev(v2)
append(v,v2)
Factor
dresssize <- c("M","L","S","S","L","M","L","M")
dresssize_factor <- factor(dresssize,ordered=TRUE,levels=c("S","M","L"))
dresssize_factor
install.packages("dplyr")
library(dplyr)
install.packages('nycflights13')
library('nycflights13')
view(flights)
head(flights)
#filter()
f1 <- filter(flights,month==07)
view(f1)
view(filter(flights,month==09,day==2,origin=='LGA'))
#slice()
slice(flights,1:5)
slice(flights,5:10)
#group by()
head(mtcars)
by_gear <- mtcars %>% group_by(gear)
by_gear
a<- summarise(by_gear,gear1=sum(gear),gear2=mean(gear))
a
summarise(group_by(mtcars,cy1),mean(gear,na.rm=TRUE))
b <- by_gear %>% summarise(gear1=sum(gear),gear2=mean(gear))
b
view(by_gear)
#example2
by_cy1 <- mtcars %>% group_by(cy1)
by_cy1 %>% summarise(
gear = mean(gear)
hp = mean(hp)
)
head(by_cy1)
#sample()
sample_n(flight,15) #15 random samples
sample_frac(flights,0.4) #return 40% of the total data
#multiple assignment
a <- filter(df,mpg=20)
b <- sample_n(a,size=5)
result <- arrange(b,desc(mpg))
result
#selected columns
#ot same as filter, filter only specific data
df
df_mpg_hp_cy1 = df %>% select(mpg,hp,cy1)
head(df_mpg_hp_cy1)
- tidyr package helps to create tidy data.
install.packages('tidyr')
library('tidyr')
n=10
#base graphics
library(MASS)
plot(UScereal$sugars,UScereal$calories)
title("plot(UScereal$sugars,UScereal$calories)")
x <- UScereal$sugars
y <- UScereal$calories
library(grid)
#grid graphics
pushViewport(plotViewport())
pushViewport(dataViewport(x,y))
grid.rect()
grid.xaxis()
grid.yaxis()
grid.points(x,y)
grid.text("UScereal$calories",x=unit(-3,"lines"),rot=90)
grid.text("UScereal$sugars",y=unit(-3,"lines"),rot=0)
popViewport(2)
- ggplot is a data visualization package to create graphs in R // decomposing complex graphs
into logical subunits
- ggplot uses geoms or geometric objects to form the basis of different types of graphs
#example1
library(ggplot2)
head(mpg,n=10)
str(mpg)
install.packages("tidyverse")
library(tidyverse)
ggplot(mpg) + geo_bar(aes(x=class))
#dodged bar
ggplot(mpg) +
geom_bar(aes(x=class,
fill=factory(cy1)),
position=position_dodge(preserve='single'))
#line graph
library(tidyverse)
#filter data we need
Tree_1 <- filter(Orange,Tree==1)
#graph the data
ggplot(tree_1)+
geom_line(aes(x=age,y=circumference))
#pie chart
#create data for the graph
x <- c(33,45,70,110)
labels <- c("Soap","Detergent","Oil","Shampoo")
#plot the chart
pie(x,labels)
pie(x,labels,main="City pie chart",col=rainbow(length(x)))
piepercent <- round(100*x/sum(x),1)
pie(x,labels=piepercent,main="City pie chart",col=rainbow(length(x)))
legend("topright",c("Soap","Shampoo","Oil","Grocery"),cex=0.8,
fill=rainbow(length(x)))
data("air quality")
view("air quality")
#scatter plots
plot(airquality$ozone,airquality$wind)
plot(airquality$ozone,airquality$wind,col='red')
plot(airquality$ozone,airquality$wind,type='h',col='blue')
plot(airquality)
#histogram
hist(airquality$Solar.R)
hist(airquality$Solar.R,main='Solar Radiation vales in air',xlab='Solar rad.')
#boxplot
boxplot(airquality$Solar.R)
#multiple box plots
boxplots(airquality[,0:4],main='Multiple Box plots')
#scatter plots
ggplot(data=mtcars,mapping=aes(x=wt,y=mpg))+geom_point()
#scatter plots by factors
view(mtcars)
ggplot(sata=mtcars,mapping=aes(x=wt,y=mpg,col='red'))+geom_point()
#scatter plots by size
ggplot(data=mtcars,mapping=aes(x=wt,y=mpg,size=qsec))+geom_point
ggplot(data=mtcars,mapping=aes(x=mt,y=mpg,col='red',size=qsec))+geom_point()
#bar plots
ggplot(data=ggplot::pmg,aes(class))+geom_bar()
#stacked bar chart
ggplot(data=ggplot2::mpg,aes(class)) + geom_bar(aes(fill=drv))
#using dodge
ggplot(data=ggplot2::mpg,aes(class))+geom_bar(aes(fill=drv),position="dodge")
ggplot(data=ggplot2::mpg) +
geom_point(mapping=aes(x=displ,y=hwy))
ggplot(data=ggplot2::mpg) +
geom_point(mapping=aes(x=displ,y=hwy,color=class))
10. Time Series Analysis