PGDM 2019-21
Assignment
SUBJECT:
BDA
Submitted To: Submitted By:
Dr. Saurabh Mittal Aman Siddiqui
GM19019
Section c
1. Import datasheet from CSV format in to R environment
> getwd()
[1] "C:/Users/hp/Documents"
> setwd("C:/Users/hp/Desktop/term V")
> assign <- read.csv("COVID19data.csv" , header = TRUE, sep = ",")
2. Extract data from XML file and put in to analysis
> install.packages("XML")
library("XML")
> library("methods")
> result <- xmlParse(file = "myfile.xml")
> print(result)
<?xml version="1.0"?>
<RECORDS>
<EMPLOYEE>
<ID>1</ID>
<NAME>Rick</NAME>
<SALARY>623.3</SALARY>
<STARTDATE>1/1/2012</STARTDATE>
<DEPT>IT</DEPT>
</EMPLOYEE>
<EMPLOYEE>
<ID>2</ID>
<NAME>Dan</NAME>
<SALARY>515.2</SALARY>
<STARTDATE>9/23/2013</STARTDATE>
<DEPT>Operations</DEPT>
</EMPLOYEE>
<EMPLOYEE>
<ID>3</ID>
<NAME>Michelle</NAME>
<SALARY>611</SALARY>
<STARTDATE>11/15/2014</STARTDATE>
<DEPT>IT</DEPT>
</EMPLOYEE>
<EMPLOYEE>
<ID>4</ID>
<NAME>Ryan</NAME>
<SALARY>729</SALARY>
<STARTDATE>5/11/2014</STARTDATE>
<DEPT>HR</DEPT>
</EMPLOYEE>
<EMPLOYEE>
<ID>5</ID>
<NAME>Gary</NAME>
<SALARY>843.25</SALARY>
<STARTDATE>3/27/2015</STARTDATE>
<DEPT>Finance</DEPT>
</EMPLOYEE>
<EMPLOYEE>
<ID>6</ID>
<NAME>Nina</NAME>
<SALARY>578</SALARY>
<STARTDATE>5/21/2013</STARTDATE>
<DEPT>IT</DEPT>
</EMPLOYEE>
<EMPLOYEE>
<ID>7</ID>
<NAME>Simon</NAME>
<SALARY>632.8</SALARY>
<STARTDATE>7/30/2013</STARTDATE>
<DEPT>Operations</DEPT>
</EMPLOYEE>
<EMPLOYEE>
<ID>8</ID>
<NAME>Guru</NAME>
<SALARY>722.5</SALARY>
<STARTDATE>6/17/2014</STARTDATE>
<DEPT>Finance</DEPT>
</EMPLOYEE>
</RECORDS>
> result <- xmlParse(file = "myfile.xml")
> rootnode <- xmlRoot(result)
> rootsize <- xmlSize(rootnode)
> print(rootsize)
[1] 8
> ootnode <- xmlRoot(result)
> print(rootnode[1])
$EMPLOYEE
<EMPLOYEE>
<ID>1</ID>
<NAME>Rick</NAME>
<SALARY>623.3</SALARY>
<STARTDATE>1/1/2012</STARTDATE>
<DEPT>IT</DEPT>
</EMPLOYEE>
attr(,"class")
[1] "XMLInternalNodeList" "XMLNodeList"
> print(rootnode[[1]][[1]])
<ID>1</ID>
> print(rootnode[[1]][[5]])
<DEPT>IT</DEPT>
> print(rootnode[[3]][[2]])
<NAME>Michelle</NAME>
3. perform linear regression on the data.
> x <- c(151, 174, 138, 186, 128, 136, 179, 163, 152, 131)
> y <- c(63, 81, 56, 91, 47, 57, 76, 72, 62, 48)
>
> relation <- lm(y~x)
>
> print(relation)
Call:
lm(formula = y ~ x)
Coefficients:
(Intercept) x
-38.4551 0.6746
>
> print(summary(relation))
Call:
lm(formula = y ~ x)
Residuals:
Min 1Q Median 3Q Max
-6.3002 -1.6629 0.0412 1.8944 3.9775
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -38.45509 8.04901 -4.778 0.00139 **
x 0.67461 0.05191 12.997 1.16e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.253 on 8 degrees of freedom
Multiple R-squared: 0.9548, Adjusted R-squared: 0.9491
F-statistic: 168.9 on 1 and 8 DF, p-value: 1.164e-06
4. Prepare pie chart in R
programming x <- c(21, 62, 10, 53)
> labels <- c("Dhanbad", "Bokaro", "Ranchi", "Jamshedpur")
>
> # Give the chart file a name.
> png(file = "city.png")
>
> # Plot the chart.
> pie(x,labels)
>
> # Save the file.
> dev.off(
) null
device
1
4.Prepare Histogram in R programming.
# Create the histogram.
> hist(v,xlab = "Weight",col = "grey",border = "black")
>
> # Save the file.
> dev.off(
) null
device
1
0# 0t 08 0L
5”0
Fr
eq
ue
nc
z jo uie‹Bo$S!H
Variables
var1=pen
Error: object 'pen' not found
>var1 = "pen"
>var1 = "phone"
>var1 = "kit"
>ptint var1
Error: unexpected symbol in "ptint var1"
>var1
[1] "kit"
Data Types
var1 = "12.34"
>var1
[1] "12.34"
>class(var1)
[1] "character"
>var1= 12.34
>var1
[1]
12.34
>class(var1)
[1] "numeric"
>var1 ="TRUE"
>var1
[1] "TRUE"
>class(var1)
[1] "character"
>var1=TRUE
>var1
[1] TRUE
>class(var1)
[1] "logical"
Operators
A=8
>a
Error: object 'a' not found
>A
[1] 8
>b <- "hello world"
>b
[1] "hello world"
>num1=10
>num2=20
>num1
[1] 10
>num2
[1] 20
>num1+num2
[1] 30
>num1-num2
[1] -10
>num1*num2
[1] 200
>num1/num2
[1] 0.5
>num1=10
>num2=20
>num1 >num2
[1] FALSE
>num1 <num2
[1] TRUE
>num1 != num2
[1] TRUE
>num1=num2
>log1=TRUE
>log2=FALSE
>log1log2
Error: object 'log1log2' not found
>log1
[1] TRUE
>log2
[1] FALSE
>log1&log1
[1] TRUE
>log1&log2
[1] FALSE
>log2&log1
[1] FALSE
>log2&log2
[1] FALSE
>log1|log2
[1] TRUE
>log2|log2
[1] FALSE
Vector
vec1 <- c(1,2,3)
>class(vec1)
[1] "numeric"
>vec1 <- c(a,b,c)
Error: object 'a' not found
>vec1 <- c("a","b","c")
>vec2 <-c(1,2,3)
>vec1
[1] "a" "b" "c"
>vec2
[1] 1 2
>mix1 <-c(1,"true", TRUE)
>mix1
[1] "1" "true" "TRUE"
>mixbag1 <-(1,FALSE,2,TRUE)
Error: unexpected ',' in "mixbag1 <-(1,"
>mixbag1 <-c (1,FALSE,2,TRUE)
>mixbag1
[1] 1 0 2 1
>class(mixbag1)
[1] "numeric"
>mixbag1[2]
[1] 0
>mixbag1[3]
[1] 2
>mixbag1[2;4]
Error: unexpected ';' in "mixbag1[2;"
>mixbag1[2:4]
[1] 0 2 1
>mixbag1<-c([2,4])
Error: unexpected '[' in "mixbag1<-c(["
>mixbag1[c(2,4)
] [1] 0 1
List
L1 <- list(1,"a",TRUE)
> class(L1)
[1] "list"
>L
1
[[1]]
[1] 1
[[2]]
[1] "a"
[[3]]
[1] TRUE
> class(L1{1})
Error: unexpected '{' in "class(L1{"
> class(L1[[1]])
[1] "numeric"
> class(L1[[a]])
Error: object 'a' not found
> class(L1[["a"]])
[1] "NULL"
> class(L1[[2]])
[1] "character"
> class(L1[[3]])
[1] "logical"
> L2 <- list (c(1,2,3),c("a","b","c"),c(TRUE,FALSE,TRUE))
>L
2
[[1]]
[1] 1 2 3
[[2]]
[1] "a" "b" "c"
[[3]]
[1] TRUE FALSE TRUE
> L2[[2[3]]]
NULL
> L2[[2]][3]
[1] "c"
> L2[[3]][2]
[1] FALSE
>
Matrix
m1<- matrix(c(1,2,3,4,5,6))
> m1
[,1]
[1,] 1
[2,] 2
[3,] 3
[4,] 4
[5,] 5
[6,] 6
> m1<- matrix(c(1,2,3,4,5,6),ncol=2,nrow = 3)
> m1
[,1] [,2]
[1,] 1 4
[2,] 2 5
[3,] 3 6
> m1<- matrix(c(1,2,3,4,5,6),ncol=2,nrow = 3,byrow = TRUE)
> m1
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
> m1<- matrix(c("Nancy","simran","arshi","puja","narayani","shristi"),ncol=2,nrow =
3,byrow = TRUE)
> m1
[,1] [,2]
[1,] "Nancy" "simran"
[2,] "arshi" "puja"
[3,] "narayani" "shristi"
> m1[3,2]
[1] "shristi"
> m1[1,2]
[1] "simran"
Array
vec1 <-c(1,2,3,4,5,6)
> vec2<-c(7,8,9,10,11,12)
> vec1
[1] 1 2 3 4 5 6
> vec2
[1] 7 8 9 10 11 12
> a1<- array(c(vec1,vec2)dim = c(2,3,2))
Error: unexpected symbol in "a1<- array(c(vec1,vec2)dim"
> a1<- array(c(vec1,vec2),dim = c(2,3,2))
> a1
,,1
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
,,2
[,1] [,2] [,3]
[1,] 7 9 11
[2,] 8 10 12
> a1[2,1,3]
Error in a1[2, 1, 3] : subscript out of bounds
> a1[1,3,2]
[1] 11
> a1[2,2,1]
[1] 4
>
Factors & Data frame
factor(c("blue","yellow","green"))
[1] blue yellow green
Levels: blue green
yellow
> factor(c("blue","yellow","green","green"))
[1] blue yellow green green
Levels: blue green yellow
> data.frame(fruit_name=c("apple","banana","orange"),fruit_cost=c(100,12,50))
fruit_name fruit_cost
1 apple 100
2 banana 12
3 orange 50
> fruit$fruit_name
Error: object 'fruit' not found
> data.frame(fruit_name=c("apple","banana","orange"),fruit_cost=c(100,12,50)->
fruit_cost)
fruit_name fruit_cost
1 apple 100
2 banana 12
3 orange 50
> fruit$fruit_name
Error: object 'fruit' not found
> data.frame(fruit_name=c("apple","banana","orange"),fruit_cost=c(100,12,50)->fruit)
fruit_name fruit_cost
1 apple 100
2 banana 12
3 orange 50
> fruit$fruit_name
Error in fruit$fruit_name : $ operator is invalid for atomic vectors
> data.frame(fruit_name=c("apple","banana","orange"),fruit_cost=c(100,12,50))->fruit
> fruit$fruit_name
[1] "apple" "banana" "orange"
> fruit$fruit_cost
[1] 100 12 50
Inbuilt function
info<-
data.frame(name=c("nancy","narayani","shristi","sriansh","vaibhav","gaurav"),id=c(101,10
2,103,104,105,106),salary=c(30000,35000,25000,40000,38000,40000))
> info
name id salary
1 nancy 101 30000
2 narayani 102 35000
3 shristi 103 25000
4 sriansh 104 40000
5 vaibhav 105 38000
6 gaurav 106 40000
> View(info)
> str(info)
'data.frame': 6 obs. of 3 variables:
$ name : chr "nancy" "narayani" "shristi" "sriansh" ...
$ id : num 101 102 103 104 105 106
$ salary: num 30000 35000 25000 40000 38000 40000
> head(info)
name id salary
1 nancy 101 30000
2 narayani 102 35000
3 shristi 103 25000
4 sriansh 104 40000
5 vaibhav 105 38000
6 gaurav 106 40000
> head(info,n=4)
name id salary
1 nancy 101 30000
2 narayani 102 35000
3 shristi 103 25000
4 sriansh 104 40000
> tail(info,n=2)
name id salary
5 vaibhav 105 38000
6 gaurav 106 40000
> table(info)
, , salary = 25000
id
name 101 102 103 104 105 106
gaurav 000000
nancy 000000
narayani 0 0 0 0 0 0
shristi 00100 0
sriansh 0 0 0 0 0 0
vaibhav 0 0 0 0 0 0
, , salary = 30000
id
name 101 102 103 104 105 106
gaurav 000000
nancy 100000
narayani 0 0 0 0 0 0
shristi 00000 0
sriansh 0 0 0 0 0 0
vaibhav 0 0 0 0 0 0
, , salary = 35000
id
name 101 102 103 104 105 106
gaurav 000000
nancy 000000
narayani 0 1 0 0 0 0
shristi 00000 0
sriansh 0 0 0 0 0 0
vaibhav 0 0 0 0 0 0
, , salary = 38000
id
name 101 102 103 104 105 106
gaurav 0 0 0 0 0 0
nancy 0 0 0 0 0 0
narayani 0 0 0 0 0 0
shristi 00000 0
sriansh 0 0 0 0 0 0
vaibhav 0 0 0 0 1 0
, , salary = 40000
id
name 101 102 103 104 105 106
gaurav 000001
nancy 000000
narayani 0 0 0 0 0 0
shristi 00000 0
sriansh 0 0 0 1 0 0
vaibhav 0 0 0 0 0 0
> table(names())
Error in names() : 0 arguments passed to 'names' which requires 1
> table(info$name)
gaurav nancy narayani shristi sriansh vaibhav
1 1 1 1 1 1
> min(info$salary)
[1] 25000
> max(info$salary)
[1] 40000
> mean(info$salary)
[1] 34666.67
> range(info$salary)
[1] 25000 40000
> if(info$id[102]>100){}
Error in if (info$id[102] > 100) { :
missing value where TRUE/FALSE needed
> if(info$id[102]>100){print(narayani)}
Error in if (info$id[102] > 100) { :
missing value where TRUE/FALSE needed
> if(info$id[2]>100){print(narayani)}
Error in print(narayani) : object 'narayani' not found
> if(info$id[2]>100){print(narayani)}
Error in print(narayani) : object 'narayani' not found
> if(info$id[102]>100){
+ print("narayani")
+}
Error in if (info$id[102] > 100) { :
missing value where TRUE/FALSE needed
> if(info$id[102]>100){
+ if(info$id[102]>100){
+}
+
+ print("narayani")
+}
Error in if (info$id[102] > 100) { :
missing value where TRUE/FALSE needed
> if(info$id[2]>100){
+ print("narayani")
+}
[1] "narayani"
> if(info$id[2]<100){
+ print("narayani")
+}
> if(info$id[2]<100){
+ print("narayani")
+ }else{}
NULL
> if(info$id[2]<100){
+ print("narayani")
+ }else{
+ print("not narayani")
+}
[1] "not narayani"
> vec1<- c(1,2,3,4,5,6,7,8,9)
> vec1
[1] 1 2 3 4 5 6 7 8 9
> for(i in vec1){
+ print(i+5)
+}
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
i=1
> while (i<9) {
+ print(i+5)
+ i=i+1
+}
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
>
# Give the chart file a name
> png(file = "barchart.png")
>
> # Plot the bar chart
> barplot(H)
>
> # Save the file
> dev.off(
) null
device
1
>
> # Create the data for the chart
> H <- c(7,12,28,3,41)
> M <- c("Mar","Apr","May","Jun","Jul")
>
> # Give the chart file a name
> png(file = "barchart_months_revenue.png")
> # Plot the bar chart
> barplot(H,names.arg=M,xlab="Month",ylab="Revenue",col="blue",
+ main="Revenue chart",border="red")
>
> # Save the file
> dev.off(
) null
device
1
>
> # Create the input vectors.
> colors = c("green","orange","brown")
> months <- c("Mar","Apr","May","Jun","Jul")
> regions <- c("East","West","North")
>
> # Create the matrix of the values.
> Values <- matrix(c(2,9,3,11,9,4,8,7,3,12,5,2,8,10,11), nrow = 3, ncol = 5,
byrow = TRUE)
>
> # Give the chart file a name
> png(file = "barchart_stacked.png")
>
> barplot(Values, main = "total revenue", names.arg = months, xlab
= "month", ylab = "revenue", col = colors)
>
> # Add the legend to the chart
> legend("topleft", regions, cex = 1.3, fill = colors)
>
> # Save the file
> dev.off(
) null
device
@ RStudo
Gilr Edit Code Vic'w Plots Session Bu•ld Dc'bug Puzfilr tools Help
› a Cr ease the dat a for the chart
» in < - c t* , 1 7, 28 3, AU
> lv < - c ("i•' ar " , "apr ”- , "vAy" , " run ”' . '" 3 u "
valu
EO
> png(Cile = "barchart_jmonths_revenue.png") Fr'
› w PloT chs bar chaCt
› barplot(H,names. args,xlab="vomth",ylab="Revenue”,col="blue”. 1a
na1n-"Re enue charm" . bar der-” r ed ” '}
>N save the0ile DO
> dei.off() re
null
de,.1te
> c cre‹te tha 1nput vector s .
> col or3 - c t" green”“ . “or ange ” . " br0wn "J
> months <- c("nar",”*pr”,"Nxy”,“Jun”,”Jul")
> reg 1ozs s - c {"East " , "\ves r " , " S ord h")
» e Cr eat e t he mat r y x of- t he x’a 1ues .
> vat u es < - natr ñ x (c (2. 9, 3, 11, 9. A , 8. * . 3, 1 Z . 3, 2, 6, 10, 11) , nr as = 3. nco1 = S . byron = TR
> » cJ v e t he c har t 'F a 1e a nane
s png (f11e - "bar char t_s tack ed. png "
› barplot(Values, main = "total revenue’, names. arg = months, *lab = ”n*onth", yl ab "r
e enue”' , co1 - co1or st
> e Add t he 1egqnd no t he c har I
» 1egend ( ’” cop1 eft " , r eg Tons . cex - 1. 3, f 11 J - col or s)
›r saw e z he ft 1e
Edii with WPS Office