dim(mydata) # Display no.
of rows and
columns
length(mydata) # Total observations
head(mydata) # Display mydata's column
headers and first 6 rows
head(mydata, n=10) # Display mydata's column
headers
or
and first 10 rows
head(mydata, 10)
tail(mydata) # Display mydata's column
headers and last 6 rows
# Display mydata's structure
str(mydata)
colnames(mydata) # Display mydata's column
names
rownames(mydata) # Display mydata's all values
in the first column
summary(mydata) # Extract a basic statistics
install.packages("psych") # To extract elaborated
statistical summary
library(psych)
describe(mydata)
mydata [,2:4] # Display all rows but just
columns 2 through 4
mydata [,-3] # Display all rows and all
column except the third one
mydata [,c(2,4)] # Display all rows but just
columns 2 and 4
mydata[mydata$age>20,] # Display mydata but age
above 20
mydata[mydata$age>20,c(3,7)] # Display mydata age above
20 of colunm 3 and 7
mydata[mydata$age>20,c("mpg","hp")] # Display mydata age above
20 of mpg and hp
seq(fro=1, to=15, by=2) or # Display sequence from 1 to
seq(1,15,2) 15 by the interval of 2
rep(1, times=10) or # repeat 1 at 10 times
rep(1,10)
rep(c("m", "f"), times=5)
mydata[,-c(1:20)] # Removes mydata's variables
1 to 20
Recode Variables
# Recode using ifelse function
ifelse(mydata$age<=15, ”small”, ”large”)
# Combining several comparisons with logical operators
ifelse((mydata$age<=15 &
mydata$gender=="female"), "small", "large")
# Coding in to several steps (e.g. age into groups)
data$AgeGroup <- cut(data$age, c(1, 15, 30, 100), c("Below 15", "15-
29", "30 or Above")) #Where, 1= lower limit and 100= upper limit
Subsetting
mean(age[gender=="female"])# = is used to assign object and == is
used to represent the meaning of equality
maleOver15 <- mydata[Gender=="male" & Age>15, ]
Getting Data into R
mydata <- read.csv("filename.txt")
mydata <- read.csv("filename.txt", header=FALSE)
mydata <- read.csv(file.choose(), header=T)
mydata <- read.table("filename.txt", sep="\t",
header=TRUE)
mydata <- read.table(file.choose(), header=T, sep=",")
mydata <- read.table(file.choose(), header=T, sep="\t")
mydata <- read.delim(file.choose(), header=T)
SPSS & excel ............... ???????
Export R Data
# Export to text file
write.table(mydata, "dat1.txt", sep="\t")
# Export to csv file
write.csv(myeata,"mydata.csv")
# Export to Excel file
library(xlsx)
write.xlsx(mydata, "dat.xlsx")
# Export to SPSS file
library(foreign)
write.foreign(mydata, "data1.txt", mydata.sps", package="SPSS")
Labelling Variable (Relabel)
R's ability to handle variable labels is somewhat unsatisfying. However, it can
be done using Hmisc package.
names(data)[c(2,3,4)]<-c("Eyes", "Span.mm", "Gender")
# Where, c(2,3,4) is variable no. 2, 3 and 4
Labelling Values
# variable v1 is coded 1, 2 and 3
# we want to attach value labels 1=Yes, 2=No, 3=Don't Know
mydata$v1 <- factor(mydata$v1,
levels = c(1,2,3),
labels = c("red", "blue", "green"))
Remove All Objects
rm(list=ls())
q() # Exits R
R Operators
+ Add, 2 + 3 = 5
- Subtract, 5 - 2 = 3
* Multiply, 2 * 3 = 6
/ Divide, 6 / 2 = 3
^ Exponent, 2 ^ 3 = 8
%% Modulus operator, 9%%2 = 1
%/% Integer division, 9 %/% 2 = 4
< Less than
> Greater than
= Equal to
<= Less than or equal to
>= Greater than or equal to
!= Not equal to
! Not
| OR
& And
knitr (or Export to word) package
Graphics using ggplot2
Bar Chart
3-d bar chart
Pie Chart
3-d Pie Chart
Histogram
Error Bars
High-Low
Scattered Plot
Line graph
Population Pyramid
Box Plot
ROC curve
Statistics
summary(mydata) # Extract a basic statistics
install.packages("psych") # To extract elaborated statistical
summary
library(psych)
describe(mydata)
Frequency Tables
Crosstabs
Multiple Response Table
Hypothesis Test
chi-square test
library(MASS)
Reliability (Cronbach's alpha) test
t-test:
a) one-sample
b) paired
c) independent sample
One Way ANOVA
Two Way ANOVA
Repeated Measures ANOVA
MANOVA
Correlation
Regression: Logistic, linear, ...
Note
Deviant observations: Data actually not an outlier