Module III
Module III
Data Manipulation
print(data)
# Printing data
student_result
duplicated(student_result)
sum(duplicated(student_result))
# Printing data
student
# Printing data without duplicates using unique
Prepared by : Dr. Srinivasa Rao Pokuri, Faculty SCOPE, VIT AP
unique(student)
Identify and Remove Duplicate
Data in R
# Creating a sample data frame of students and their marks in respective subjects.
student=data.frame(name=c("Ram","Geeta","John","Paul“,"Cassie","Geeta", "Paul"),
maths=c(7,8,8,9,10,8,9),
science=c(5,7,6,8,9,7,8),
history=c(7,7,7,7,7,7,7))
# Printing data
Student
# Printing data
Student
# Printing data
student_result
dplyr::distinct(student_result,maths,.keep_all = TRUE)
Df1-> df2->
2. Left join(‘all.x=TRUE’):
left_join <- merge(df1, df2, by = "ID“, all.x=TRUE)
print(left_join)
Df1-> df2->
3. Right join(‘all.y=TRUE’):
join <- merge(df1, df2, by = "ID“, all.y=TRUE)
print(join)
Df1-> df2->
• Left join:
left_join <- dplyr::left_join(df1, df2, by = "ID")
print(left_join)