0% found this document useful (0 votes)
3 views

Assignment_01

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Assignment_01

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

Assignment_01

Siti Noor Hazirah

2024-11-18

Question 1 (6.18 - page 252)

a. Prepare a stem-and-Ieaf plot for each predictor variable. What information do these plots provide?

# Load necessary libraries


library(ggplot2)
library(GGally)

## Registered S3 method overwritten by 'GGally':


## method from
## +.gg ggplot2

# Load the data


# Replace 'path_to_file' with te actual path to your data file
file_path <- file.choose()
data <- read.table(file_path, header = FALSE, sep = "", dec = ".")
head(data)

## V1 V2 V3 V4 V5
## 1 13.5 1 5.02 0.14 123000
## 2 12.0 14 8.19 0.27 104079
## 3 10.5 16 3.00 0.00 39998
## 4 15.0 4 10.70 0.05 57112
## 5 14.0 11 8.97 0.07 60000
## 6 10.5 15 9.45 0.24 101385

# Part (a): Stem-and-leaf plots for each predictor variable


cat("Stem-and-leaf plots for each variable:\n")

## Stem-and-leaf plots for each variable:

for (col in colnames(data)) {


cat("\nStem-and-leaf plot for", col, ":\n")
stem(data[[col]])
}

##
## Stem-and-leaf plot for V1 :
##

1
## The decimal point is at the |
##
## 10 | 55
## 11 | 5
## 12 | 005
## 13 | 00055888
## 14 | 0000000033555555568
## 15 | 000000000333355555669
## 16 | 00003555555558888
## 17 | 000258
## 18 | 08
## 19 | 33
##
##
## Stem-and-leaf plot for V2 :
##
## The decimal point is at the |
##
## 0 | 0000000000000000
## 2 | 00000000000000000000000
## 4 | 00000
## 6 | 0
## 8 | 0
## 10 | 00
## 12 | 00000
## 14 | 0000000000000
## 16 | 0000000000
## 18 | 000
## 20 | 00
##
##
## Stem-and-leaf plot for V3 :
##
## The decimal point is at the |
##
## 2 | 0
## 4 | 080003358
## 6 | 012613
## 8 | 00001223456001555689
## 10 | 013344566677778123344666668
## 12 | 00011115777889002
## 14 | 6
##
##
## Stem-and-leaf plot for V4 :
##
## The decimal point is 1 digit(s) to the left of the |
##
## 0 | 0000000000000000000000000000002333333333334444445555556678889
## 1 | 023444469
## 2 | 1223477
## 3 | 3
## 4 |
## 5 | 7

2
## 6 | 0
## 7 | 3
##
##
## Stem-and-leaf plot for V5 :
##
## The decimal point is 5 digit(s) to the right of the |
##
## 0 | 333333444444
## 0 | 555666667778899
## 1 | 000001111222333334
## 1 | 578889
## 2 | 011122334444
## 2 | 555788899
## 3 | 002
## 3 | 567
## 4 | 23
## 4 | 8

# Part (b): Scatter plot matrix and correlation matrix


# Scatter plot matrix
ggpairs(data, title = "Scatter Plot Matrix")

Scatter Plot Matrix


V1 V2 V3 V4 V5
0.25
0.20
0.15 Corr: Corr: Corr: Corr:

V1
0.10
0.05 −0.250* 0.414*** 0.067 0.535***
0.00
20
15 Corr: Corr: Corr:

V2
10
5 0.389*** −0.253* 0.289**
0
15.0
12.5
10.0 Corr: Corr:
V3
7.5 −0.380*** 0.441***
5.0
2.5
0.6
Corr:
V4

0.4
0.2 0.081
0.0
5e+05
4e+05
3e+05
V5

2e+05
1e+05
12.5 15.0 17.5 0 5 10 15 202.5 5.0 7.510.012.515.0
0.0 0.2 0.4 0.6 1e+05
2e+05
3e+05
4e+05
5e+05

# Correlation matrix
cor_matrix <- cor(data)
print("Correlation Matrix:")

3
## [1] "Correlation Matrix:"

print(cor_matrix)

## V1 V2 V3 V4 V5
## V1 1.00000000 -0.2502846 0.4137872 0.06652647 0.53526237
## V2 -0.25028456 1.0000000 0.3888264 -0.25266347 0.28858350
## V3 0.41378716 0.3888264 1.0000000 -0.37976174 0.44069713
## V4 0.06652647 -0.2526635 -0.3797617 1.00000000 0.08061073
## V5 0.53526237 0.2885835 0.4406971 0.08061073 1.00000000

# Part (c): Fit regression model with four predictor variables


# Assuming Variable1 is the dependent variable and others are predictors
model <- lm(V1 ~ V2 + V3 + V4 + V5, data = data)
summary(model)

##
## Call:
## lm(formula = V1 ~ V2 + V3 + V4 + V5, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1872 -0.5911 -0.0910 0.5579 2.9441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.220e+01 5.780e-01 21.110 < 2e-16 ***
## V2 -1.420e-01 2.134e-02 -6.655 3.89e-09 ***
## V3 2.820e-01 6.317e-02 4.464 2.75e-05 ***
## V4 6.193e-01 1.087e+00 0.570 0.57
## V5 7.924e-06 1.385e-06 5.722 1.98e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.137 on 76 degrees of freedom
## Multiple R-squared: 0.5847, Adjusted R-squared: 0.5629
## F-statistic: 26.76 on 4 and 76 DF, p-value: 7.272e-14

# Display the regression equation


cat("Regression Equation:\n")

## Regression Equation:

coeffs <- coef(model)


cat(sprintf("V1 = %.3f + %.3f*V2 + %.3f*V3 + %.3f*V4 + %.3f*V5\n",
coeffs[1], coeffs[2], coeffs[3], coeffs[4], coeffs[5]))

## V1 = 12.201 + -0.142*V2 + 0.282*V3 + 0.619*V4 + 0.000*V5

4
# Part (d): Obtain residuals
residuals <- resid(model)
cat("Residuals:\n")

## Residuals:

print(residuals)

## 1 2 3 4 5 6
## -1.035672440 -1.513806414 -0.591053402 -0.133568082 0.313283765 -3.187185224
## 7 8 9 10 11 12
## -0.538356749 0.236302386 1.989220372 0.105829603 0.023124830 -0.337070751
## 13 14 15 16 17 18
## 0.717869468 -0.392411015 -0.201019573 -0.814937024 0.101690072 -1.759131637
## 19 20 21 22 23 24
## -1.210114916 -0.634341765 -0.366004170 0.288596123 -0.093200248 0.233884284
## 25 26 27 28 29 30
## -0.853339941 -2.123934469 0.466014057 -0.573974675 -1.068826727 -0.197717691
## 31 32 33 34 35 36
## -1.121737177 -0.173906919 -1.030125636 -0.090953654 0.215053952 0.784804746
## 37 38 39 40 41 42
## 1.083920373 -2.132451269 -0.185470952 -1.120385453 -0.012771680 2.500938643
## 43 44 45 46 47 48
## -1.582833452 0.929599530 0.394236721 0.117200255 0.815339787 1.605896564
## 49 50 51 52 53 54
## 0.557941960 0.494737472 0.207611404 -0.032045798 1.155796537 0.234272601
## 55 56 57 58 59 60
## -1.073489739 1.059646672 -0.261711555 1.031651273 -0.345957207 0.203372872
## 61 62 63 64 65 66
## 0.917961126 2.944144932 2.459696482 1.859088749 1.451807658 -0.483857748
## 67 68 69 70 71 72
## -0.756250356 2.011402309 0.078550427 0.009892809 1.766898426 -0.463930876
## 73 74 75 76 77 78
## -0.510410866 -0.106354746 1.209427169 -0.261085606 -0.627547725 0.910085787
## 79 80 81
## -0.550846871 -2.030180944 -0.906819056

# Plot residuals to assess the model fit


ggplot(data = data.frame(Fitted = fitted(model), Residuals = residuals), aes(x = Fitted, y = Residuals))
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Residual Plot", x = "Fitted Values", y = "Residuals") +
theme_minimal()

5
Residual Plot

2
Residuals

−2

12 14 16 18
Fitted Values

You might also like