0% found this document useful (0 votes)
8 views

Assignment_01

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views

Assignment_01

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

Assignment_01

Siti Noor Hazirah

2024-11-18

Question 1 (6.18 - page 252)

a. Prepare a stem-and-Ieaf plot for each predictor variable. What information do these plots provide?

# Load necessary libraries


library(ggplot2)
library(GGally)

## Registered S3 method overwritten by 'GGally':


## method from
## +.gg ggplot2

# Load the data


# Replace 'path_to_file' with te actual path to your data file
file_path <- file.choose()
data <- read.table(file_path, header = FALSE, sep = "", dec = ".")
head(data)

## V1 V2 V3 V4 V5
## 1 13.5 1 5.02 0.14 123000
## 2 12.0 14 8.19 0.27 104079
## 3 10.5 16 3.00 0.00 39998
## 4 15.0 4 10.70 0.05 57112
## 5 14.0 11 8.97 0.07 60000
## 6 10.5 15 9.45 0.24 101385

# Part (a): Stem-and-leaf plots for each predictor variable


cat("Stem-and-leaf plots for each variable:\n")

## Stem-and-leaf plots for each variable:

for (col in colnames(data)) {


cat("\nStem-and-leaf plot for", col, ":\n")
stem(data[[col]])
}

##
## Stem-and-leaf plot for V1 :
##

1
## The decimal point is at the |
##
## 10 | 55
## 11 | 5
## 12 | 005
## 13 | 00055888
## 14 | 0000000033555555568
## 15 | 000000000333355555669
## 16 | 00003555555558888
## 17 | 000258
## 18 | 08
## 19 | 33
##
##
## Stem-and-leaf plot for V2 :
##
## The decimal point is at the |
##
## 0 | 0000000000000000
## 2 | 00000000000000000000000
## 4 | 00000
## 6 | 0
## 8 | 0
## 10 | 00
## 12 | 00000
## 14 | 0000000000000
## 16 | 0000000000
## 18 | 000
## 20 | 00
##
##
## Stem-and-leaf plot for V3 :
##
## The decimal point is at the |
##
## 2 | 0
## 4 | 080003358
## 6 | 012613
## 8 | 00001223456001555689
## 10 | 013344566677778123344666668
## 12 | 00011115777889002
## 14 | 6
##
##
## Stem-and-leaf plot for V4 :
##
## The decimal point is 1 digit(s) to the left of the |
##
## 0 | 0000000000000000000000000000002333333333334444445555556678889
## 1 | 023444469
## 2 | 1223477
## 3 | 3
## 4 |
## 5 | 7

2
## 6 | 0
## 7 | 3
##
##
## Stem-and-leaf plot for V5 :
##
## The decimal point is 5 digit(s) to the right of the |
##
## 0 | 333333444444
## 0 | 555666667778899
## 1 | 000001111222333334
## 1 | 578889
## 2 | 011122334444
## 2 | 555788899
## 3 | 002
## 3 | 567
## 4 | 23
## 4 | 8

# Part (b): Scatter plot matrix and correlation matrix


# Scatter plot matrix
ggpairs(data, title = "Scatter Plot Matrix")

Scatter Plot Matrix


V1 V2 V3 V4 V5
0.25
0.20
0.15 Corr: Corr: Corr: Corr:

V1
0.10
0.05 −0.250* 0.414*** 0.067 0.535***
0.00
20
15 Corr: Corr: Corr:

V2
10
5 0.389*** −0.253* 0.289**
0
15.0
12.5
10.0 Corr: Corr:
V3
7.5 −0.380*** 0.441***
5.0
2.5
0.6
Corr:
V4

0.4
0.2 0.081
0.0
5e+05
4e+05
3e+05
V5

2e+05
1e+05
12.5 15.0 17.5 0 5 10 15 202.5 5.0 7.510.012.515.0
0.0 0.2 0.4 0.6 1e+05
2e+05
3e+05
4e+05
5e+05

# Correlation matrix
cor_matrix <- cor(data)
print("Correlation Matrix:")

3
## [1] "Correlation Matrix:"

print(cor_matrix)

## V1 V2 V3 V4 V5
## V1 1.00000000 -0.2502846 0.4137872 0.06652647 0.53526237
## V2 -0.25028456 1.0000000 0.3888264 -0.25266347 0.28858350
## V3 0.41378716 0.3888264 1.0000000 -0.37976174 0.44069713
## V4 0.06652647 -0.2526635 -0.3797617 1.00000000 0.08061073
## V5 0.53526237 0.2885835 0.4406971 0.08061073 1.00000000

# Part (c): Fit regression model with four predictor variables


# Assuming Variable1 is the dependent variable and others are predictors
model <- lm(V1 ~ V2 + V3 + V4 + V5, data = data)
summary(model)

##
## Call:
## lm(formula = V1 ~ V2 + V3 + V4 + V5, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1872 -0.5911 -0.0910 0.5579 2.9441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.220e+01 5.780e-01 21.110 < 2e-16 ***
## V2 -1.420e-01 2.134e-02 -6.655 3.89e-09 ***
## V3 2.820e-01 6.317e-02 4.464 2.75e-05 ***
## V4 6.193e-01 1.087e+00 0.570 0.57
## V5 7.924e-06 1.385e-06 5.722 1.98e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.137 on 76 degrees of freedom
## Multiple R-squared: 0.5847, Adjusted R-squared: 0.5629
## F-statistic: 26.76 on 4 and 76 DF, p-value: 7.272e-14

# Display the regression equation


cat("Regression Equation:\n")

## Regression Equation:

coeffs <- coef(model)


cat(sprintf("V1 = %.3f + %.3f*V2 + %.3f*V3 + %.3f*V4 + %.3f*V5\n",
coeffs[1], coeffs[2], coeffs[3], coeffs[4], coeffs[5]))

## V1 = 12.201 + -0.142*V2 + 0.282*V3 + 0.619*V4 + 0.000*V5

4
# Part (d): Obtain residuals
residuals <- resid(model)
cat("Residuals:\n")

## Residuals:

print(residuals)

## 1 2 3 4 5 6
## -1.035672440 -1.513806414 -0.591053402 -0.133568082 0.313283765 -3.187185224
## 7 8 9 10 11 12
## -0.538356749 0.236302386 1.989220372 0.105829603 0.023124830 -0.337070751
## 13 14 15 16 17 18
## 0.717869468 -0.392411015 -0.201019573 -0.814937024 0.101690072 -1.759131637
## 19 20 21 22 23 24
## -1.210114916 -0.634341765 -0.366004170 0.288596123 -0.093200248 0.233884284
## 25 26 27 28 29 30
## -0.853339941 -2.123934469 0.466014057 -0.573974675 -1.068826727 -0.197717691
## 31 32 33 34 35 36
## -1.121737177 -0.173906919 -1.030125636 -0.090953654 0.215053952 0.784804746
## 37 38 39 40 41 42
## 1.083920373 -2.132451269 -0.185470952 -1.120385453 -0.012771680 2.500938643
## 43 44 45 46 47 48
## -1.582833452 0.929599530 0.394236721 0.117200255 0.815339787 1.605896564
## 49 50 51 52 53 54
## 0.557941960 0.494737472 0.207611404 -0.032045798 1.155796537 0.234272601
## 55 56 57 58 59 60
## -1.073489739 1.059646672 -0.261711555 1.031651273 -0.345957207 0.203372872
## 61 62 63 64 65 66
## 0.917961126 2.944144932 2.459696482 1.859088749 1.451807658 -0.483857748
## 67 68 69 70 71 72
## -0.756250356 2.011402309 0.078550427 0.009892809 1.766898426 -0.463930876
## 73 74 75 76 77 78
## -0.510410866 -0.106354746 1.209427169 -0.261085606 -0.627547725 0.910085787
## 79 80 81
## -0.550846871 -2.030180944 -0.906819056

# Plot residuals to assess the model fit


ggplot(data = data.frame(Fitted = fitted(model), Residuals = residuals), aes(x = Fitted, y = Residuals))
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Residual Plot", x = "Fitted Values", y = "Residuals") +
theme_minimal()

5
Residual Plot

2
Residuals

−2

12 14 16 18
Fitted Values

You might also like