Unit-3

Download as pdf or txt
Download as pdf or txt
You are on page 1of 12

Unit-3

Data Reshaping

Joining multiple vectors to form a data frame and combining two data frames

name <- c("Naruto Shipudden", "Dragon Ball", "One Piece")


mc <- c("Naruto", "Goku", "Luffy")

## Join and form df


anime <- data.frame(name, mc)

anime2 <- data.frame(


name = c("Attack on Titan", "Death Note", "Bleach"),
mc = c("Eren", "Light", "Ichigo")
)

## Combine two dfs


combined_anime <- rbind(anime, anime2)
combined_anime

Merging two data frames based on a common column

## df1
df1 <- data.frame(
id = 1:5,
name = c("Yawar", "Zayed", "Hanan", "Ziya", "Paras")
)

## df2
df2 <- data.frame(
id = c(2, 5, 4, 1, 3),
address = c("Saraibal", "Tral", "Hyderpora", "Khanyar", "Rajbagh")
)

# Merge the two data frames based on the 'id' column


merged_df <- merge(df1, df2, by = "id")

# View the merged data frame


print(merged_df)

Melting
library("reshape2")
library("reshape")

n <- c(1, 1, 2, 2)
time <- c(1, 2, 1, 2)
x <- c(6, 3, 2, 5)
y <- c(1, 4, 6, 9)

df <- data.frame(n, time, x, y)


cat("===Original data===\\n")
df

cat("=== Organized by n and time ===\\n")


molten_data <- melt(df, id = c("n", "time"))
molten_data

CSV Files
## Write into a csv file
anime <- data.frame(
Name = c("Naruto", "Bleach", "One Piece", "Dragon Ball"),
Year = c(2002, 2004, 1999, 1986),
Episodes = c(220, 366, 900, 153),
Main_Character = c("Naruto", "Ichigo", "Luffy", "Goku"),
villains = c("Orochimaru", "Aizen", "Blackbeard", "Freeza")
)

write.csv(anime, "anime.csv", row.names = FALSE)

## Read from a csv file


anime <- read.csv("anime.csv")
anime

xlsx / xls Files

## Load library
library("xlsx")

## Create a data frame


creators <- data.frame(
"Creator" = c("Kishimoto", "Araki", "Oda", "Toriyama")
)

## Write the data frame into anime.xlsx Sheet2


write.xlsx(creators, "anime.xlsx",
sheetName = "Sheet2", append = TRUE,
row.names = FALSE
)

## Read data from the anime.xlsx


anime <- read.xlsx("anime.xlsx", sheetIndex = 3)
anime

Binary Files

Writing data into a binary file


## Load csv file
anime_csv <- read.csv("anime.csv")

## Create a con object in wb mode


write_filename <- file("anime.dat", "wb")

## Write the column names to the con object


writeBin(colnames(anime_csv), write_filename)

## Write the column values to the con object


writeBin(
c(
anime_csv$Name,
anime_csv$Year,
anime_csv$Episodes,
anime_csv$Main_Character,
anime_csv$villains
),
write_filename
)

## Close the con so the file can be used by other programs


close(write_filename)

Reading data into a binary file

## Create a con object in rb mode


read_filename <- file("anime.dat", "rb")

## Get the column names (n = 5, because we have 5 columns)


column_names <- readBin(read_filename, character(), n = 5)

## Get the column values (n = 25, because we have 20 values without cnames)
column_values <- readBin(read_filename, character(), n = 20)

## Read from 1 to 4th byte


anime_names <- column_values[1:4]

## Read from 5th to 8th byte


anime_years <- column_values[5:8]

## Combine into a data frame


final_data <- as.data.frame(cbind(anime_names, anime_years))
colnames(final_data) <- column_names[1:2]

final_data
class(final_data)

Xml Files

Read from an xml file


library("XML")
library("methods")

## xmlParse() parses an XML document


anime_xml <- xmlParse(file = "anime.xml")

## xmlRoot() returns the root node of the XML document


root_node <- xmlRoot(anime_xml)

## xmlSize() returns the number of children of a node


root_size <- xmlSize(root_node)

## 4th child of root node


root_node[[4]]

## 1st child of 4th child of root node


root_node[[4]][[1]]

## 1st child of 1st child of 4th child of root node


root_node[[4]][[1]][[1]]

## xmlToDataFrame() converts an XML document to a data frame


anime_dataframe <- xmlToDataFrame(anime_xml)

JSON Files

Reading from a json file

## Load the package


library("rjson")

## Read JSON file


anime_json <- fromJSON(file = "anime.json")

## Convert JSON to dataframe


anime_dataframe <- as.data.frame(anime_json)
anime_dataframe

Databases
## Load the package
library("RMySQL")

## Create a db connection
mysql_con <- dbConnect(
MySQL(),
user = "yuhao",
password = "huoyuhao",
dbname = "laragigs",
host = "localhost",
local.infile = TRUE
)

## List the tables in the db


dbListTables(mysql_con)

## Insert data into table


dbSendQuery(mysql_con,
"insert into friends(id, name, age)
values(5, 'Ziya', 25)"
)

####### CREATING A TABLE FROM DATA.FRAME ########

# Write the data.frame to a MySQL table


dbWriteTable(
conn = mysql_con,
name = "anime",
value = anime,
row.names = FALSE,
overwrite = TRUE
)

# Close the connection to the database


dbDisconnect(mysql_con)

R Charts

Pie chart

The basic syntax for creating a pie-chart using the R is −

pie(x, labels, radius, main, col, clockwise)

Following is the description of the parameters used −


x is a vector containing the numeric values used in the pie chart.
labels is used to give description to the slices.
radius indicates the radius of the circle of the pie chart.(value between −1 and +1).
main indicates the title of the chart.
col indicates the color palette.
clockwise is a logical value indicating if the slices are drawn clockwise or anti clockwise.

values <- c(78, 21, 1)


labels <- c("Nitrogen", "Oxygen", "Other gases")
colors <- c("green", "red", "cyan")
piepercent <- round(100 * values / sum(values), 1)

pie(values, labels = piepercent, main = "Atmospheric composition", col = colors)


legend("bottomleft", labels , cex = 0.5, fill = colors)

Bar chart

The basic syntax to create a bar-chart in R is −

barplot(H,names.arg,main,xlab,ylab,col)

Following is the description of the parameters used −

H is a vector or matrix containing numeric values used in bar chart.


xlab is the label for x axis.
ylab is the label for y axis.
main is the title of the bar chart.
names.arg is a vector of names appearing under each bar.
col is used to give colors to the bars in the graph.

values <- c(10, 16, 18)


labels <- c("Football", "Cricket", "Basketball")
colors <- c("green", "red", "cyan")

barplot(
values,
names.arg = labels,
xlab="Sports",
ylab="No. of students",
main = "Popularity of sports",
col = c("red", "green", "blue")
)

Box plot

The basic syntax to create a boxplot in R is −

boxplot(x, data, notch, varwidth, names, main)

Following is the description of the parameters used −

x is a vector or a formula.
data is the data frame.
notch is a logical value. Set as TRUE to draw a notch.
varwidth is a logical value. Set as true to draw width of the box proportionate to the sample size.
names are the group labels which will be printed under each boxplot.
main is used to give a title to the graph

dataset <- data.frame(


x = c(1, 2, 3, 4, 5),
y = c(1, 4, 9, 16, 25)
)

boxplot(
dataset$y ~ dataset$x^2,
data = dataset,
main = "Boxplot of x^2",
ylab = "x^2", xlab = "x",
names = c("1 sqr", "2 sqr", "3 sqr", "4 sqr", "5 sqr")
)

Histogram

The basic syntax for creating a histogram using R is −

hist(v,main,xlab,xlim,ylim,breaks,col,border)

Following is the description of the parameters used −

v is a vector containing numeric values used in histogram.


main indicates title of the chart.
col is used to set color of the bars.
border is used to set border color of each bar.
xlab is used to give description of x-axis.
xlim is used to specify the range of values on the x-axis.
ylim is used to specify the range of values on the y-axis.
breaks is used to mention the width of each bar.

# Generate some sample data


weights <- c(69, 69, 66, 68, 68, 67, 75, 48, 43, 43, 49, 51, 53, 58, 63)

# Create a histogram using the hist() function


hist(
weights,
ylim=c(0, 8),
xlab="Weight of students",
ylab="Number of students",
main="Weight of students in class",
breaks = 3
)

Line Graphs

The basic syntax to create a line chart in R is −

plot(v,main,xlab,ylab,col,type)

Following is the description of the parameters used −

v is a vector containing the numeric values.


type takes the value "p" to draw only the points, "l" to draw only the lines and "o" to draw both
points and lines.
xlab is the label for x axis.
ylab is the label for y axis.
main is the Title of the chart.
col is used to give colors to both the points and lines.

# Create the data for the chart.


v <- c(7,12,28,3,41)
t <- c(18, 32, 18, 20, 31)

# Plot the bar chart.


plot(v, type = "o", col = "red", xlab = "Month", ylab = "Rain fall (mm)",
main = "Rain fall chart")

lines(t, type="o", col="blue")


Scatter plot

The basic syntax for creating scatterplot in R is −

plot(x, y, main, xlab, ylab, xlim, ylim, axes)

Following is the description of the parameters used −

x is the data set whose values are the horizontal coordinates.


y is the data set whose values are the vertical coordinates.
main is the tile of the graph.
xlab is the label in the horizontal axis.
ylab is the label in the vertical axis.
xlim is the limits of the values of x used for plotting.
ylim is the limits of the values of y used for plotting.
axes indicates whether both axes should be drawn on the plot.
weights <- c(53, 56, 69, 69, 75)
heights <- c(175, 182, 181, 190, 180)

plot(
weights,
heights,
xlab="Weight",
ylab="Height",
main="Weight and Height of students",
ylim=c(170, 195),
xlim=c(50, 80)
)

You might also like