STA1040 Assignment
STA1040 Assignment
2023-10-05
library(readxl)
Data <- read_excel("C:/Users/mmrabu/Downloads/Sales Data.xlsx")
Data
## # A tibble: 35 x 16
## manufact model year sales resale type price engine_s horsepow wheelbas
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Acura TL 2013 16.9 16.4 0 21.5 1.8 140 101.
## 2 Acura TL 2015 39.4 NA 0 28.4 3.2 225 108.
## 3 Acura RL 2013 14.1 18.2 0 NA 3.2 NA 107.
## 4 Acura RL 2015 8.59 29.7 0 42 3.5 210 NA
## 5 Audi A4 2013 20.4 22.3 0 24.0 1.8 150 103.
## 6 Audi A4 2015 18.8 23.6 0 34.0 2.8 200 109.
## 7 Audi A8 2013 1.38 39 0 62 4.2 310 113
## 8 BMW 328i 2013 19.7 NA 0 27.0 NA 170 107.
## 9 BMW 328i 2015 NA 28.7 0 33.4 2.8 193 107.
## 10 BMW 528i 2015 17.5 36.1 0 38.9 2.8 193 NA
## # i 25 more rows
## # i 6 more variables: width <dbl>, length <dbl>, curb_wgt <dbl>,
## # fuel_cap <dbl>, mpg <dbl>, Grp_Horse <dbl>
#install.packages('dplyr')
#install.packages('tidyr')
library(tidyr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
Manufacturer = Data$manufact
Model = Data$model
Year = Data$year
Sales = Data$sales
Price = Data$price
Horsepower = Data$horsepow
long = data.frame(Manufacturer, Model, Year, Sales, Price, Horsepower)
long
1
## Manufacturer Model Year Sales Price Horsepower
## 1 Acura TL 2013 16.919 21.500 140
## 2 Acura TL 2015 39.384 28.400 225
## 3 Acura RL 2013 14.114 NA NA
## 4 Acura RL 2015 8.588 42.000 210
## 5 Audi A4 2013 20.397 23.990 150
## 6 Audi A4 2015 18.780 33.950 200
## 7 Audi A8 2013 1.380 62.000 310
## 8 BMW 328i 2013 19.747 26.990 170
## 9 BMW 328i 2015 NA 33.400 193
## 10 BMW 528i 2015 17.527 38.900 193
## 11 Buick Century 2013 91.561 21.975 NA
## 12 Buick Century 2015 39.350 25.300 240
## 13 Buick Park Avenue 2013 27.851 31.965 205
## 14 Buick Park Avenue 2015 83.257 27.885 NA
## 15 Cadillac Seville 2013 11.185 31.010 200
## 16 Cadillac Eldorado 2013 NA 39.895 275
## 17 Cadillac Seville 2015 15.943 NA 275
## 18 Cadillac Eldorado 2015 6.536 39.665 275
## 19 Audi A8 2013 14.785 46.225 255
## 20 Chevrolet Metro 2013 21.855 9.235 55
## 21 Chevrolet Metro 2015 145.519 NA 115
## 22 Chevrolet Prizm 2013 32.299 13.960 120
## 23 Chevrolet Prizm 2015 NA 16.535 170
## 24 Chevrolet Lumina 2013 24.629 18.890 NA
## 25 Chevrolet Lumina 2015 42.593 19.390 180
## 26 Chevrolet Impala 2013 107.995 NA 180
## 27 Chevrolet Impala 2015 26.402 24.340 200
## 28 Chrysler Corvette 2013 17.947 45.705 345
## 29 Chrysler Corvette 2015 32.306 16.480 132
## 30 Chrysler Sebring Coupe 2013 7.854 19.840 163
## 31 Chrysler Sebring Coupe 2015 32.775 24.495 168
## 32 Chrysler Concorde 2013 NA 22.245 NA
## 33 Chrysler Concorde 2015 13.462 28.340 253
## 34 Chrysler 300M 2013 30.696 29.185 253
## 35 Chrysler 300M 2015 53.480 NA NA
## # A tibble: 35 x 6
## Manufacturer Model Sales Horsepower ‘2013‘ ‘2015‘
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Acura TL 16.9 140 21.5 NA
## 2 Acura TL 39.4 225 NA 28.4
## 3 Acura RL 14.1 NA NA NA
## 4 Acura RL 8.59 210 NA 42
## 5 Audi A4 20.4 150 24.0 NA
## 6 Audi A4 18.8 200 NA 34.0
## 7 Audi A8 1.38 310 62 NA
## 8 BMW 328i 19.7 170 27.0 NA
## 9 BMW 328i NA 193 NA 33.4
## 10 BMW 528i 17.5 193 NA 38.9
2
## # i 25 more rows
## # A tibble: 70 x 6
## Manufacturer Model Sales Horsepower Year Price
## <chr> <chr> <dbl> <dbl> <fct> <dbl>
## 1 Acura TL 16.9 140 2013 21.5
## 2 Acura TL 39.4 225 2013 NA
## 3 Acura RL 14.1 NA 2013 NA
## 4 Acura RL 8.59 210 2013 NA
## 5 Audi A4 20.4 150 2013 24.0
## 6 Audi A4 18.8 200 2013 NA
## 7 Audi A8 1.38 310 2013 62
## 8 BMW 328i 19.7 170 2013 27.0
## 9 BMW 328i NA 193 2013 NA
## 10 BMW 528i 17.5 193 2013 NA
## # i 60 more rows
colnames(Data)
Data2 = unite(Data, col = 'Dimensions', c('wheelbas', 'width', 'length'), sep = ' in, ')
Data2
## # A tibble: 35 x 14
## manufact model year sales resale type price engine_s horsepow Dimensions
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Acura TL 2013 16.9 16.4 0 21.5 1.8 140 101.2 in, 67~
## 2 Acura TL 2015 39.4 NA 0 28.4 3.2 225 108.1 in, 70~
## 3 Acura RL 2013 14.1 18.2 0 NA 3.2 NA 106.9 in, 70~
## 4 Acura RL 2015 8.59 29.7 0 42 3.5 210 NA in, 71.4 ~
## 5 Audi A4 2013 20.4 22.3 0 24.0 1.8 150 102.6 in, 68~
## 6 Audi A4 2015 18.8 23.6 0 34.0 2.8 200 108.7 in, 76~
## 7 Audi A8 2013 1.38 39 0 62 4.2 310 113 in, 74 i~
## 8 BMW 328i 2013 19.7 NA 0 27.0 NA 170 107.3 in, 68~
## 9 BMW 328i 2015 NA 28.7 0 33.4 2.8 193 107.3 in, 68~
## 10 BMW 528i 2015 17.5 36.1 0 38.9 2.8 193 NA in, 70.9 ~
## # i 25 more rows
## # i 4 more variables: curb_wgt <dbl>, fuel_cap <dbl>, mpg <dbl>,
## # Grp_Horse <dbl>
3
colnames(Data2)
Demonstrate how you can fill in missing Data using any apprpriate techniques
## # A tibble: 35 x 16
## manufact model year sales resale type price engine_s horsepow wheelbas
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Acura TL 2013 16.9 16.4 0 21.5 1.8 140 101.
## 2 Acura TL 2015 39.4 0 0 28.4 3.2 225 108.
## 3 Acura RL 2013 14.1 18.2 0 0 3.2 0 107.
## 4 Acura RL 2015 8.59 29.7 0 42 3.5 210 0
## 5 Audi A4 2013 20.4 22.3 0 24.0 1.8 150 103.
## 6 Audi A4 2015 18.8 23.6 0 34.0 2.8 200 109.
## 7 Audi A8 2013 1.38 39 0 62 4.2 310 113
## 8 BMW 328i 2013 19.7 0 0 27.0 0 170 107.
## 9 BMW 328i 2015 0 28.7 0 33.4 2.8 193 107.
## 10 BMW 528i 2015 17.5 36.1 0 38.9 2.8 193 0
## # i 25 more rows
## # i 6 more variables: width <dbl>, length <dbl>, curb_wgt <dbl>,
## # fuel_cap <dbl>, mpg <dbl>, Grp_Horse <dbl>
Generate and add additional variables into your data using the existing variables
Data3
## # A tibble: 35 x 17
## manufact model year sales resale type price engine_s horsepow wheelbas
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Acura TL 2013 16.9 16.4 0 21.5 1.8 140 101.
## 2 Acura TL 2015 39.4 0 0 28.4 3.2 225 108.
4
## 3 Acura RL 2013 14.1 18.2 0 0 3.2 0 107.
## 4 Acura RL 2015 8.59 29.7 0 42 3.5 210 0
## 5 Audi A4 2013 20.4 22.3 0 24.0 1.8 150 103.
## 6 Audi A4 2015 18.8 23.6 0 34.0 2.8 200 109.
## 7 Audi A8 2013 1.38 39 0 62 4.2 310 113
## 8 BMW 328i 2013 19.7 0 0 27.0 0 170 107.
## 9 BMW 328i 2015 0 28.7 0 33.4 2.8 193 107.
## 10 BMW 528i 2015 17.5 36.1 0 38.9 2.8 193 0
## # i 25 more rows
## # i 7 more variables: width <dbl>, length <dbl>, curb_wgt <dbl>,
## # fuel_cap <dbl>, mpg <dbl>, Grp_Horse <dbl>, Range <dbl>
Data3
## # A tibble: 35 x 18
## manufact model year sales resale type price engine_s horsepow wheelbas
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Acura TL 2013 16.9 16.4 0 21.5 1.8 140 101.
## 2 Acura TL 2015 39.4 0 0 28.4 3.2 225 108.
## 3 Acura RL 2013 14.1 18.2 0 0 3.2 0 107.
## 4 Acura RL 2015 8.59 29.7 0 42 3.5 210 0
## 5 Audi A4 2013 20.4 22.3 0 24.0 1.8 150 103.
## 6 Audi A4 2015 18.8 23.6 0 34.0 2.8 200 109.
## 7 Audi A8 2013 1.38 39 0 62 4.2 310 113
## 8 BMW 328i 2013 19.7 0 0 27.0 0 170 107.
## 9 BMW 328i 2015 0 28.7 0 33.4 2.8 193 107.
## 10 BMW 528i 2015 17.5 36.1 0 38.9 2.8 193 0
## # i 25 more rows
## # i 8 more variables: width <dbl>, length <dbl>, curb_wgt <dbl>,
## # fuel_cap <dbl>, mpg <dbl>, Grp_Horse <dbl>, Range <dbl>, Pwer_to_Wght <dbl>
Generate and add additional datasets to your data using the additional variables
## # A tibble: 35 x 17
## Make year sales resale type price engine_s horsepow wheelbas width length
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5
## 1 Acura~ 2013 16.9 16.4 0 21.5 1.8 140 101. 67.3 172.
## 2 Acura~ 2015 39.4 0 0 28.4 3.2 225 108. 70.3 0
## 3 Acura~ 2013 14.1 18.2 0 0 3.2 0 107. 70.6 192
## 4 Acura~ 2015 8.59 29.7 0 42 3.5 210 0 71.4 197.
## 5 Audi ~ 2013 20.4 22.3 0 24.0 1.8 150 103. 68.2 178
## 6 Audi ~ 2015 18.8 23.6 0 34.0 2.8 200 109. 76.1 192
## 7 Audi ~ 2013 1.38 39 0 62 4.2 310 113 74 198.
## 8 BMW 3~ 2013 19.7 0 0 27.0 0 170 107. 68.4 176
## 9 BMW 3~ 2015 0 28.7 0 33.4 2.8 193 107. 68.5 176
## 10 BMW 5~ 2015 17.5 36.1 0 38.9 2.8 193 0 70.9 188
## # i 25 more rows
## # i 6 more variables: curb_wgt <dbl>, fuel_cap <dbl>, mpg <dbl>,
## # Grp_Horse <dbl>, Range <dbl>, Pwer_to_Wght <dbl>
colnames(Data3)
6
## 28 420.2 0.10747664 0.10876963
## 29 432.0 0.04534524 0.03814815
## 30 381.6 Inf 0.05199161
## 31 384.0 0.05042017 0.06378906
## 32 442.0 0.00000000 0.05032805
## 33 391.0 0.07098765 0.07248082
## 34 391.0 0.07092795 0.07464194
## 35 0.0 NaN NaN
7
## 32 Chrysler Concorde 2013 0.000 13.725 0 22.245 2.7 0
## 33 Chrysler Concorde 2015 13.462 17.325 0 28.340 0.0 253
## 34 Chrysler 300M 2013 30.696 0.000 0 29.185 3.5 253
## 35 Chrysler 300M 2015 53.480 19.540 1 0.000 0.0 0
## wheelbas width length curb_wgt fuel_cap mpg Grp_Horse Range Pwer_to_Wght
## 1 101.2 67.3 172.4 2.639 13.2 28.0 2 369.6 0.05305040
## 2 108.1 70.3 0.0 3.517 17.2 25.0 4 430.0 0.06397498
## 3 106.9 70.6 192.0 3.470 17.2 0.0 0 0.0 0.00000000
## 4 0.0 71.4 196.6 3.850 18.0 22.0 4 396.0 0.05454545
## 5 102.6 68.2 178.0 2.998 16.4 27.0 2 442.8 0.05003336
## 6 108.7 76.1 192.0 3.561 18.5 22.0 4 407.0 0.05616400
## 7 113.0 74.0 198.2 3.902 23.7 0.0 6 0.0 0.07944644
## 8 107.3 68.4 176.0 3.179 0.0 26.1 3 0.0 0.05347594
## 9 107.3 68.5 176.0 0.000 16.6 24.0 3 398.4 Inf
## 10 0.0 70.9 188.0 3.472 18.5 24.8 0 458.8 0.05558756
## 11 109.0 72.7 194.6 3.368 17.5 25.0 3 437.5 0.00000000
## 12 109.0 72.7 196.2 3.543 17.5 0.0 4 0.0 0.06773920
## 13 113.8 74.7 206.8 3.778 18.5 24.0 4 444.0 0.05426151
## 14 112.2 0.0 0.0 3.591 17.5 25.0 0 437.5 0.00000000
## 15 107.4 70.3 194.8 3.770 18.0 22.0 4 396.0 0.05305040
## 16 0.0 74.5 207.2 3.978 18.5 0.0 5 0.0 0.06913022
## 17 112.2 75.0 201.0 0.000 18.5 22.0 0 407.0 Inf
## 18 108.0 75.5 200.6 3.843 19.0 22.0 5 418.0 0.07155868
## 19 0.0 77.0 201.2 5.572 30.0 15.0 5 450.0 0.04576454
## 20 93.1 62.6 149.4 1.895 10.3 45.0 1 463.5 0.02902375
## 21 104.1 67.9 180.9 2.676 14.3 27.0 2 386.1 0.04297459
## 22 97.1 0.0 0.0 2.398 13.2 0.0 2 0.0 0.05004170
## 23 107.0 69.4 190.4 3.051 15.0 25.0 3 375.0 0.05571944
## 24 107.5 72.5 200.9 3.330 16.6 25.0 3 415.0 0.00000000
## 25 110.5 72.7 197.9 3.340 17.0 27.0 3 459.0 0.05389222
## 26 110.5 73.0 200.0 3.389 17.0 27.0 3 459.0 0.05311301
## 27 0.0 74.1 193.2 3.500 16.8 0.0 4 0.0 0.05714286
## 28 104.5 73.6 179.7 3.210 19.1 22.0 0 420.2 0.10747664
## 29 108.0 71.0 186.0 2.911 16.0 27.0 2 432.0 0.04534524
## 30 103.7 0.0 190.9 0.000 15.9 24.0 0 381.6 Inf
## 31 106.0 69.2 193.0 3.332 16.0 24.0 3 384.0 0.05042017
## 32 113.0 74.4 209.1 3.452 17.0 26.0 4 442.0 0.00000000
## 33 113.0 74.4 207.7 3.564 17.0 23.0 5 391.0 0.07098765
## 34 113.0 74.4 197.8 3.567 17.0 23.0 5 391.0 0.07092795
## 35 0.0 0.0 0.0 0.000 0.0 0.0 0 0.0 NaN
## Cost_per_mile Value_score
## 1 0.05817100 123.2371
## 2 0.06604651 143.3767
## 3 NaN NaN
## 4 0.10606061 132.0535
## 5 0.05417796 147.6347
## 6 0.08341523 135.7132
## 7 Inf Inf
## 8 Inf Inf
## 9 0.08383534 Inf
## 10 0.08478640 152.9801
## 11 0.05022857 145.8501
## 12 Inf Inf
## 13 0.07199324 148.0421
8
## 14 0.06373714 145.8546
## 15 0.07830808 132.0438
## 16 Inf Inf
## 17 0.00000000 Inf
## 18 0.09489234 139.3888
## 19 0.10272222 150.0495
## 20 0.01992449 154.5163
## 21 0.00000000 128.7143
## 22 Inf Inf
## 23 0.04409333 125.0333
## 24 0.04551807 138.3485
## 25 0.04224401 153.0320
## 26 0.00000000 153.0177
## 27 Inf Inf
## 28 0.10876963 140.1387
## 29 0.03814815 144.0278
## 30 0.05199161 Inf
## 31 0.06378906 128.0381
## 32 0.05032805 147.3501
## 33 0.07248082 130.3812
## 34 0.07464194 130.3819
## 35 NaN NaN
#tinytex::install_tinytex(force = TRUE)