Multivariate Assign
Multivariate Assign
KMA/G/OO9/20
#generating data
y<-rnorm(10,180,10)
y
Y=round(y,1)
Y
x1<-runif(10,min = 20,max = 40)
x1
X1=round(x1,0)
X1
x2<-runif(10,min=0,max=2)
x2
X2=round(x2,1)
X2
x3<-rnorm(10,7,1)
x3
X3=round(x3,0)
X3
data<-data.frame(Y,X1,X2,X3)
data
summary(data$Y)
# standard deviation
sd(data$Y)
#variance
var(data$Y)
#histogram
hist(data$Y,main = "Histogram of Y")
#boxplot
boxplot(data$Y,main="Boxplot of Y")
summary(data$X1)
# standard deviation
sd(data$X1)
#variance
var(data$X1)
#histogram
hist(data$X1,main = "Histogram of X1")
#boxplot
boxplot(data$X1,main="Boxplot of X1")
summary(data$X2)
# standard deviation
sd(data$X2)
#variance
var(data$X2)
#histogram
hist(data$X2,main = "Histogram of X2")
#boxplot
boxplot(data$X2,main="Boxplot of X2")
summary(data$X3)
# standard deviation
sd(data$X3)
#variance
var(data$X3)
#histogram
hist(data$X3,main = "Histogram of X3")
#boxplot
boxplot(data$X3,main="Boxplot of X3")
data_matrix<-data
data_matrix
covariance_matrix<-cov(data)
covariance_matrix
n=nrow(data_matrix)
n
sigma=(1/(n-1))*covariance_matrix
sigma
correlation_matrix<-cor(data_matrix)
correlation_matrix
library(corrplot)
corrplot(correlation_matrix,method = "color")
data_matrix<-data
data_matrix
covariance_matrix<-cov(data)
covariance_matrix
n=nrow(data_matrix)
n
sigma=(1/(n-1))*covariance_matrix
sigma
correlation_matrix<-cor(data_matrix)
correlation_matrix
library(corrplot)
corrplot(correlation_matrix,method = "color")
#correlation between Y and X1 is weakly positive
#correlation between Y and X2 is weakly negative
#correlation between Y and X3 is weakly negative
#correlation between X1 and X2 is weakly positive
#correlation between X1 and X3 is weakly positive
#correlation between X2 and X3 is positive strong
#model
model<-lm(Y~X1+X2+X3,data = data.frame(Y,X1,X2,X3))
model
#summary of the model
summary(model)
# Y=205.0161+0.9234X1+4.0115X2-8.2133X3
# The intercept is significant at 0.01
# To test H0: the model is not significant vs H1:the model is
significant, 0.5009 is the p-value>0.05
# therefore we fail to reject H0 hence the model is not significant
OUTPUT
> #generating data
> y<-rnorm(10,180,10)
>y
[1] 177.2675 175.3130 187.0417 168.0264 188.6637 188.6415 168.0138 186.3949
[9] 204.3023 174.4278
> Y=round(y,1)
>Y
[1] 177.3 175.3 187.0 168.0 188.7 188.6 168.0 186.4 204.3 174.4
> x1<-runif(10,min = 20,max = 40)
> x1
[1] 36.01836 34.55525 24.34096 34.88477 37.33307 30.58812 31.97277 38.75265
[9] 39.01448 33.21065
> X1=round(x1,0)
> X1
[1] 36 35 24 35 37 31 32 39 39 33
> x2<-runif(10,min=0,max=2)
> x2
[1] 0.1445732 1.6526632 0.9590882 0.7824576 0.5982456 0.4886165 0.8436203 1.8755404
[9] 0.5289702 1.1371375
> X2=round(x2,1)
> X2
[1] 0.1 1.7 1.0 0.8 0.6 0.5 0.8 1.9 0.5 1.1
> x3<-rnorm(10,7,1)
> x3
[1] 6.166156 7.578722 5.912419 8.484031 5.813793 7.101079 7.532989 7.586735
[9] 6.698253 7.079502
> X3=round(x3,0)
> X3
[1] 6 8 6 8 6 7 8 8 7 7
> data<-data.frame(Y,X1,X2,X3)
> data
Y X1 X2 X3
1 177.3 36 0.1 6
2 175.3 35 1.7 8
3 187.0 24 1.0 6
4 168.0 35 0.8 8
5 188.7 37 0.6 6
6 188.6 31 0.5 7
7 168.0 32 0.8 8
8 186.4 39 1.9 8
9 204.3 39 0.5 7
10 174.4 33 1.1 7
> summary(data$Y)
Min. 1st Qu. Median Mean 3rd Qu. Max.
168.0 174.6 181.8 181.8 188.2 204.3
> # standard deviation
> sd(data$Y)
[1] 11.28637
> #variance
> var(data$Y)
[1] 127.3822
> #histogram
> hist(data$Y,main = "Histogram of Y")
> #boxplot
> boxplot(data$Y,main="Boxplot of Y")
Boxplot of Y
200
190
180
170
Histogram of Y
4
3
Frequency
2
1
0
data$Y
> summary(data$X1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
24.00 32.25 35.00 34.10 36.75 39.00
> # standard deviation
> sd(data$X1)
[1] 4.45845
> #variance
> var(data$X1)
[1] 19.87778
> #histogram
> hist(data$X1,main = "Histogram of X1")
> #boxplot
> boxplot(data$X1,main="Boxplot of X1")
Histogram of X1
5
4
Frequency
3
2
1
0
20 25 30 35 40
data$X1
Boxplot of X1
35
30
25
> summary(data$X2)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.100 0.525 0.800 0.900 1.075 1.900
> # standard deviation
> sd(data$X2)
[1] 0.5537749
> #variance
> var(data$X2)
[1] 0.3066667
> #histogram
> hist(data$X2,main = "Histogram of X2")
> #boxplot
> boxplot(data$X2,main="Boxplot of X2")
Histogram of X2
4
3
Frequency
2
1
0
data$X2
Boxplot of X2
1.5
1.0
0.5
> summary(data$X3)
Min. 1st Qu. Median Mean 3rd Qu. Max.
6.00 6.25 7.00 7.10 8.00 8.00
> # standard deviation
> sd(data$X3)
[1] 0.875595
> #variance
> var(data$X3)
[1] 0.7666667
> #histogram
> hist(data$X3,main = "Histogram of X3")
> #boxplot
> boxplot(data$X3,main="Boxplot of X3")
Boxplot of X3
8.0
7.5
7.0
6.5
6.0
Histogram of X3
4
3
Frequency
2
1
0
data$X3
> data_matrix<-data
> data_matrix
Y X1 X2 X3
1 177.3 36 0.1 6
2 175.3 35 1.7 8
3 187.0 24 1.0 6
4 168.0 35 0.8 8
5 188.7 37 0.6 6
6 188.6 31 0.5 7
7 168.0 32 0.8 8
8 186.4 39 1.9 8
9 204.3 39 0.5 7
10 174.4 33 1.1 7
> covariance_matrix<-cov(data)
> covariance_matrix
Y X1 X2 X3
Y 127.3822222 9.9444444 -0.9988889 -4.1222222
X1 9.9444444 19.8777778 0.1555556 1.1000000
X2 -0.9988889 0.1555556 0.3066667 0.2888889
X3 -4.1222222 1.1000000 0.2888889 0.7666667
> n=nrow(data_matrix)
>n
[1] 10
> sigma=(1/(n-1))*covariance_matrix
> sigma
Y X1 X2 X3
Y 14.1535802 1.10493827 -0.11098765 -0.45802469
X1 1.1049383 2.20864198 0.01728395 0.12222222
X2 -0.1109877 0.01728395 0.03407407 0.03209877
X3 -0.4580247 0.12222222 0.03209877 0.08518519
> correlation_matrix<-cor(data_matrix)
> correlation_matrix
Y X1 X2 X3
Y 1.0000000 0.19762514 -0.15981940 -0.4171322
X1 0.1976251 1.00000000 0.06300403 0.2817770
X2 -0.1598194 0.06300403 1.00000000 0.5957914
X3 -0.4171322 0.28177696 0.59579144 1.0000000
> library(corrplot)
> corrplot(correlation_matrix,method = "color")
> #correlation between Y and X1 is weakly positive
> #correlation between Y and X2 is weakly negative
> #correlation between Y and X3 is weakly negative
> #correlation between X1 and X2 is weakly positive
> #correlation between X1 and X3 is weakly positive
> #correlation between X2 and X3 is positive strong
X1
X2
X3
Y
1
0.8
Y
0.6
0.4
X1
0.2
-0.2
X2
-0.4
-0.6
X3
-0.8
-1
> #model
> model<-lm(Y~X1+X2+X3,data = data.frame(Y,X1,X2,X3))
> model
Call:
lm(formula = Y ~ X1 + X2 + X3, data = data.frame(Y, X1, X2, X3))
Coefficients:
(Intercept) X1 X2 X3
205.0161 0.9234 4.0115 -8.2133
Call:
lm(formula = Y ~ X1 + X2 + X3, data = data.frame(Y, X1, X2, X3))
Residuals:
Min 1Q Median 3Q Max
-12.080 -6.145 -3.379 4.682 18.759
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 205.0161 39.9775 5.128 0.00216 **
X1 0.9234 0.9055 1.020 0.34716
X2 4.0115 8.7089 0.461 0.66129
X3 -8.2133 5.7292 -1.434 0.20168
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> # Y=205.0161+0.9234X1+4.0115X2-8.2133X3
> # The intercept is significant at 0.01
> # To test H0: the model is not significant vs H1:the model is significant, 0.5009 is the p-value>0.05
> # therefore we fail to reject H0 hence the model is not significant