Flight Price Prediction Capstone Project Submission 2
Flight Price Prediction Capstone Project Submission 2
Flight Price Prediction Capstone Project Submission 2
28/03/2020
Objective:
Flight ticket prices can be something hard to guess, today we might see a price, check out
the price of the same flight tomorrow, it will be a different story. We might have often
heard travelers saying that flight ticket prices are so unpredictable. Here you will be
provided with prices of flight tickets for various airlines between the months of March
and June of 2019 and between various cities.
FEATURES:
1. Airline: The name of the airline.
2. Date_of_Journey: The date of the journey
3. Source: The source from which the service begins.
4. Destination: The destination where the service ends.
5. Route: The route taken by the flight to reach the destination.
6. Dep_Time: The time when the journey starts from the source.
7. Arrival_Time: Time of arrival at the destination.
8. Duration: Total duration of the flight.
9. Total_Stops: Total stops between the source and destination.
10. Additional_Info: Additional information about the flight
11. Price: The price of the ticket
str(train)
library(ggplot2)
library(purrr)
library(tidyr)
library(corrgram)
library(caret)
library(lubridate)
library(tidyverse)
library(rpart)
library(fastDummies)
colnames(train)
## [1] 2
mv=data.frame(apply(train, 2, function(x){sum(is.na(x))}))
mv
## apply.train..2..function.x...
## Airline 0
## Date_of_Journey 0
## Source 0
## Destination 0
## Route 1
## Dep_Time 0
## Arrival_Time 0
## Duration 0
## Total_Stops 1
## Additional_Info 0
## Price 0
## [1] 10682 11
sum(is.na(train))
## [1] 0
#Replacing same dates of different formats (1/03/2019 and 01/03/2019) to same format:
train$Date_of_Journey <- str_replace_all(train$Date_of_Journey, "01/03/2019",
"1/03/2019")
train$Date_of_Journey <- str_replace_all(train$Date_of_Journey, "03/03/2019",
"3/03/2019")
train$Date_of_Journey <- str_replace_all(train$Date_of_Journey, "06/03/2019",
"6/03/2019")
train$Date_of_Journey <- str_replace_all(train$Date_of_Journey, "09/03/2019",
"9/03/2019")
unique(train$Date_of_Journey)
#Changing / to - :
str(train$Date_of_Journey)
unique(train$Date_of_Journey)
#Variable 3: Source:
unique(train$Source)
#Variable 4: Destination:
unique(train$Destination)
unique(train$Destination)
#Variable 5: Route:
unique(train$Route)
## [1] "BLR <U+2192> DEL" "CCU <U+2192> IXR <U+2192> BBI <
U+2192> BLR"
___ Continued
#It’s not important variable. Since, Total_Stops variable explains the same.
#Variable 6: Dep_Time:
unique(train$Dep_Time)
## [1] "22:20" "05:50" "09:25" "18:05" "16:50" "09:00" "18:55" "08:00" "08:
55"
Continued**
str(train)
#created a new variable named departure by uniting Date_of_Journey & Dep_Tme. #The
departure variable has been changed to datetime format. #We can extract day , month,
hour seperately in to new columns
#Variable 7: Arrival_Time:
unique(train$Arrival_Time)
#Dep_Time & Arrival_Time will be explained by duration variable. So, we can leave this
variable
#Variable 8 :Duration:
unique(train$Duration)
## [1] "13h" "15h 10m" "20h" "23h 25m" "1h 30m" "27h 40m" "16h 25m
"
Continued
str(train$Duration)
## chr [1:10659] "13h" "15h 10m" "20h" "23h 25m" "1h 30m" "27h 40m" "16h 25m
" ...
## [1] "character"
train$dur1=hm(train$dur)
str(train$dur1)
sum(is.na(train$dur1))
## [1] 1
class(train$dur1)
## [1] "Period"
## attr(,"package")
## [1] "lubridate"
summary(train$dur1)
#Variable 9: Total_Stops:
unique(train$Total_Stops)
## [1] "1 stop" "non-stop" "2 stops" "3 stops" "4 stops"
#Creating morning, day, evening, night, midnight timestamp using dep_hour variable:
str(train$dep_hour)
## chr [1:10659] "00" "00" "00" "00" "02" "04" "04" "05" "05" "05" "05" "05"
...
train$dep_hour=as.numeric(train$dep_hour)
train$dep_time_slot = ifelse(train$dep_hour < 5, "Pre_Morning", ifelse(train$
dep_hour < 10,"Morning",ifelse(train$dep_hour < 17,"Day_Time",ifelse(train$de
p_hour < 22,"Evening","Late_Night"))))
train$dep_time_slot=as.factor(train$dep_time_slot)
summary(train$dep_time_slot)
#Thus created the flight dep time slot by using the flight departure hour. Now, the flights
are acheduled as “Day_Time Evening Late_Night Morning Pre_Morning”.
#Boxplot of each dep_time_slot vs price:
ggplot(aes(y = train$Price, x = train$dep_time_slot, fill = train$Price), dat
a = train) + geom_boxplot()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(title = "Boxp
lot of each level in dep_time_slot with price")
summary(train)
#Since “Total_Stops” and “Route” variables are denoting same thing. I’m removing “Route”
variable from the data:
train1$Route=NULL
#Summary of Train1
summary(train1)
str(train1)
#Data Visualization:
#Plotting frequecy count of each airline:
ggplot(train1,aes(x=train1$Airline,fill=train1$Airline))+
geom_bar(position="dodge")+labs(title = "Counts of each Airline
")+
geom_text(aes(label=..count..),stat='count',position=position_d
odge(0.9),vjust=-0.2)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Plotting mean price of each airline:
ggplot(train1, aes(x=train1$Airline, y=train1$Price)) + stat_summary(fun="mea
n", geom="bar")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(t
itle = "Mean price of each airline")
#Plotting mean price of each Source:
ggplot(train1, aes(x=train1$Source, y=train1$Price)) + stat_summary(fun="mean
", geom="bar")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(t
itle = "Mean price of each Source")
ggplot(train1,aes(x=train1$Source,fill=train1$Destination))+
geom_bar(position="dodge")+labs(title = "Source vs Destination"
)+
geom_text(aes(label=..count..),stat='count',position=position_d
odge(0.9),vjust=-0.2)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#It’s clear that #Banglore flights go only to Delhi #Chennai flights go only to Kolkata #Delhi
flights go only to Cochin #Kolkata flights go only to Banglore #Mumbai flights go only to
Hyderabad.
#Plotting frequecy count of each airline vs source:
ggplot(train1,aes(x=train1$Airline,fill=train1$Source))+
geom_bar(position="dodge")+labs(title = "Counts of each Airline
from Source")+
geom_text(aes(label=..count..),stat='count',position=position_d
odge(0.9),vjust=-0.2)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(train1$departure,train1$Price)
unique(train2$Total_Stops)
## [1] "1 stop" "non-stop" "2 stops" "3 stops" "4 stops"
train3=train2
colnames(train3)
#Again removing the original variables, since I have created the dummy variables.
train3=train3[, -c(1:4)]
train3=train3[,-c(3,4)]
summary(train3)
str(train3)
dim(train3)
## [1] 10659 64
sum(is.na(train3))
## [1] 1
train3=na.omit(train3)
#TEST DATA: #Data preparations on test data seperately, because in real time we will get
test data(new) seperately after dealing with train & modelling)
test <- read_excel("C:/Users/jasvi/Desktop/Capstone Project/FlightPrice_test.
xlsx")
#Doing same data prepartions (as did on train data) on test data:
View(test)
summary(test)
str(test)
dim(test)
## [1] 2671 10
## [1] 0
mv_test=data.frame(apply(test, 2, function(x){sum(is.na(x))}))
mv_test
## apply.test..2..function.x...
## Airline 0
## Date_of_Journey 0
## Source 0
## Destination 0
## Route 0
## Dep_Time 0
## Arrival_Time 0
## Duration 0
## Total_Stops 0
## Additional_Info 0
#True jet is not at all given in the test$Airline. Also, removing levels in test which I
removed in train. #Subset:
test=subset(test, test$Airline != "Vistara Premium economy" & test$Airline !=
"Jet Airways Business" & test$Airline != "Multiple carriers Premium economy"
)
unique(test$Airline)
#Variable 2: Date_of_Journey:
unique(test$Date_of_Journey)
## [1] "6/06/2019" "12/05/2019" "21/05/2019" "24/06/2019" "12/06/2019"
## [6] "12/03/2019" "1/05/2019" "15/03/2019" "18/05/2019" "21/03/2019"
## [11] "15/06/2019" "15/05/2019" "3/06/2019" "06/03/2019" "24/03/2019"
## [16] "6/03/2019" "9/05/2019" "18/03/2019" "6/04/2019" "1/06/2019"
## [21] "3/03/2019" "27/03/2019" "9/06/2019" "3/05/2019" "1/04/2019"
## [26] "18/06/2019" "15/04/2019" "6/05/2019" "9/03/2019" "3/04/2019"
## [31] "27/06/2019" "21/06/2019" "21/04/2019" "18/04/2019" "9/04/2019"
## [36] "24/05/2019" "01/03/2019" "09/03/2019" "27/05/2019" "03/03/2019"
## [41] "27/04/2019" "1/03/2019" "24/04/2019" "12/04/2019"
str(test$Date_of_Journey)
#Variable 3: Source:
unique(test$Source)
#Variable 4: Destination:
unique(test$Destination)
#Variable 5: Route:
unique(test$Route)
## [1] "DEL <U+2192> BOM <U+2192> COK" "CCU <U+2192> MAA <U+2192> BLR"
Continued**
#Variable 7: Arrival_Time:
unique(test$Arrival_Time)
#Variable 8 :Duration:
unique(test$Duration)
## [1] "1h 30m" "2h 50m" "17h 5m" "12h 50m" "2h 45m" "5h 55m" "40h 40m
"
Continued**
, "h", ".00")
class(test$dur)
## [1] "character"
test$dur1=hm(test$dur)
str(test$dur1)
sum(is.na(test$dur1))
## [1] 1
class(test$dur1)
## [1] "Period"
## attr(,"package")
## [1] "lubridate"
summary(test$dur1)
test$duration=round(as.duration(test$dur1)/dhours(1)) #important
#Variable 9: Total_Stops:
unique(test$Total_Stops)
## [1] "non-stop" "4 stops" "2 stops" "1 stop" "3 stops"
#Derived Variable: #Extract the hour and day data from the request time
test$dep_hour <- format(test$departure, "%H")
#Creating morning, day, evening, night, midnight timestamp using dep_hour variable:
str(test$dep_hour)
## chr [1:2664] "02" "05" "05" "05" "05" "06" "06" "06" "07" "07" "08" "08"
...
test$dep_hour=as.numeric(test$dep_hour)
test$dep_time_slot = ifelse(test$dep_hour < 5, "Pre_Morning", ifelse(test$dep
_hour < 10,"Morning",ifelse(test$dep_hour < 17,"Day_Time",ifelse(test$dep_hou
r < 22,"Evening","Late_Night"))))
test$dep_time_slot=as.factor(test$dep_time_slot)
summary(test$dep_time_slot)
summary(test)
test1=test
test1$Route=NULL
test1$Date_of_Journey=NULL
test1$Dep_Time=NULL
test1$Arrival_Time=NULL
test1$Duration=NULL
test1$dur=NULL
test1$dep_hour=NULL
test1$Additional_Info=NULL
test1$dur1=NULL
test1$departure=NULL
test3=test2
colnames(test3)
summary(test3)
str(test3)
dim(test3)
## [1] 2664 69
sum(is.na(test3))
## [1] 1
test3=na.omit(test3)
colnames(test3)
##
## Call:
## lm(formula = Price ~ ., data = train3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10368 -1264 -119 1255 42354
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5618.028 228.843 24.550 < 2e-16 ***
## duration -15.237 4.424 -3.444 0.000576 ***
## `Airline_Air India` 1877.469 153.411 12.238 < 2e-16 ***
## Airline_GoAir 140.971 219.458 0.642 0.520654
## Airline_IndiGo 370.118 145.660 2.541 0.011069 *
## `Airline_Jet Airways` 4553.184 145.208 31.356 < 2e-16 ***
## `Airline_Multiple carriers` 3460.609 160.002 21.629 < 2e-16 ***
## Airline_SpiceJet 85.329 160.690 0.531 0.595417
## Airline_Vistara 2388.100 177.005 13.492 < 2e-16 ***
## Source_Chennai 125.282 139.297 0.899 0.368465
## Source_Delhi 247.660 91.537 2.706 0.006830 **
## Source_Kolkata 405.453 84.780 4.782 1.76e-06 ***
## Source_Mumbai -1605.191 108.144 -14.843 < 2e-16 ***
## Destination_Cochin NA NA NA NA
## Destination_Delhi NA NA NA NA
## Destination_Hyderabad NA NA NA NA
## Destination_Kolkata NA NA NA NA
## `Total_Stops_2 stops` 2368.622 82.500 28.711 < 2e-16 ***
## `Total_Stops_3 stops` 3291.665 363.980 9.044 < 2e-16 ***
## `Total_Stops_4 stops` -901.178 2387.767 -0.377 0.705873
## `Total_Stops_non-stop` -3382.937 91.935 -36.797 < 2e-16 ***
## dep_time_slot_Evening -79.450 64.014 -1.241 0.214589
## dep_time_slot_Late_Night 412.940 113.645 3.634 0.000281 ***
## dep_time_slot_Morning -218.890 59.102 -3.704 0.000214 ***
## dep_time_slot_Pre_Morning 31.341 121.576 0.258 0.796571
## dep_day_01Jun 1518.250 201.216 7.545 4.88e-14 ***
## dep_day_01Mar 11767.683 233.231 50.455 < 2e-16 ***
## dep_day_01May 1351.393 206.350 6.549 6.06e-11 ***
## dep_day_03Apr 127.696 274.203 0.466 0.641440
## dep_day_03Jun 1471.523 202.372 7.271 3.81e-13 ***
## dep_day_03Mar 5351.423 204.900 26.117 < 2e-16 ***
## dep_day_03May 1424.722 295.252 4.825 1.42e-06 ***
## dep_day_06Apr -134.897 284.372 -0.474 0.635249
## dep_day_06Jun 1419.850 184.259 7.706 1.42e-14 ***
## dep_day_06Mar 5735.347 195.159 29.388 < 2e-16 ***
## dep_day_06May 1344.926 205.575 6.542 6.34e-11 ***
## dep_day_09Apr 251.092 262.285 0.957 0.338425
## dep_day_09Jun 1463.604 184.464 7.934 2.33e-15 ***
## dep_day_09Mar 3568.481 205.953 17.327 < 2e-16 ***
## dep_day_09May 1523.929 185.543 8.213 2.40e-16 ***
## dep_day_12Apr 1677.626 338.481 4.956 7.29e-07 ***
## dep_day_12Jun 937.374 184.882 5.070 4.04e-07 ***
## dep_day_12Mar 3374.598 256.937 13.134 < 2e-16 ***
## dep_day_12May 1491.453 209.739 7.111 1.23e-12 ***
## dep_day_15Apr 632.694 295.758 2.139 0.032440 *
## dep_day_15Jun 1048.363 203.007 5.164 2.46e-07 ***
## dep_day_15Mar 1566.667 243.330 6.438 1.26e-10 ***
## dep_day_15May 1351.489 191.071 7.073 1.61e-12 ***
## dep_day_18Apr 1782.013 330.756 5.388 7.29e-08 ***
## dep_day_18Jun 1314.179 279.684 4.699 2.65e-06 ***
## dep_day_18Mar 2471.817 245.941 10.050 < 2e-16 ***
## dep_day_18May 1730.269 184.218 9.393 < 2e-16 ***
## dep_day_21Apr 1236.848 305.272 4.052 5.12e-05 ***
## dep_day_21Jun 1494.800 276.092 5.414 6.29e-08 ***
## dep_day_21Mar -65.375 193.860 -0.337 0.735953
## dep_day_21May 1658.359 184.534 8.987 < 2e-16 ***
## dep_day_24Apr 592.712 292.320 2.028 0.042624 *
## dep_day_24Jun 901.095 200.414 4.496 6.99e-06 ***
## dep_day_24Mar 2303.058 199.418 11.549 < 2e-16 ***
## dep_day_24May 1499.174 204.747 7.322 2.62e-13 ***
## dep_day_27Apr 1031.952 290.669 3.550 0.000387 ***
## dep_day_27Jun 929.576 199.994 4.648 3.39e-06 ***
## dep_day_27Mar -412.390 207.823 -1.984 0.047245 *
## dep_day_27May 1594.541 197.109 8.090 6.64e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2380 on 10598 degrees of freedom
## Multiple R-squared: 0.7165, Adjusted R-squared: 0.7149
## F-statistic: 453.9 on 59 and 10598 DF, p-value: < 2.2e-16
summary(model2)
##
## Call:
## lm(formula = Price ~ duration + `Airline_Air India` + Airline_IndiGo +
## `Airline_Jet Airways` + Airline_SpiceJet + `Airline_Multiple carriers`
+
## Airline_Vistara + Airline_GoAir + Source_Mumbai + Source_Kolkata +
## `Total_Stops_non-stop` + `Total_Stops_2 stops` + `Total_Stops_3 stops`
+
## dep_time_slot_Morning + dep_time_slot_Late_Night + dep_day_03Mar +
## dep_day_06Mar + dep_day_09Mar + dep_day_12Mar + dep_day_15Mar +
## dep_day_18Mar + dep_day_21Mar + dep_day_24Mar + dep_day_27Mar +
## dep_day_03Apr + dep_day_06Apr + dep_day_09Apr + dep_day_12Apr +
## dep_day_15Apr + dep_day_18Apr + dep_day_21Apr + dep_day_24Apr +
## dep_day_27Apr + dep_day_01May + dep_day_03May + dep_day_06May +
## dep_day_09May + dep_day_12May + dep_day_15May + dep_day_18May +
## dep_day_21May + dep_day_24May + dep_day_27May + dep_day_01Jun +
## dep_day_03Jun + dep_day_06Jun + dep_day_09Jun + dep_day_12Jun +
## dep_day_15Jun + dep_day_18Jun + dep_day_21Jun + dep_day_24Jun +
## dep_day_27Jun, data = train3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10774 -1384 -221 1256 42172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11055.688 211.901 52.174 < 2e-16 ***
## duration -14.080 4.917 -2.863 0.004200 **
## `Airline_Air India` 1897.934 170.431 11.136 < 2e-16 ***
## Airline_IndiGo 363.116 161.905 2.243 0.024932 *
## `Airline_Jet Airways` 4439.613 161.033 27.570 < 2e-16 ***
## Airline_SpiceJet 66.008 177.567 0.372 0.710098
## `Airline_Multiple carriers` 3351.805 177.742 18.858 < 2e-16 ***
## Airline_Vistara 2150.000 195.550 10.995 < 2e-16 ***
## Airline_GoAir -12.468 244.213 -0.051 0.959285
## Source_Mumbai -1803.485 114.709 -15.722 < 2e-16 ***
## Source_Kolkata -266.442 69.215 -3.849 0.000119 ***
## `Total_Stops_non-stop` -3555.842 87.839 -40.481 < 2e-16 ***
## `Total_Stops_2 stops` 2303.779 90.948 25.331 < 2e-16 ***
## `Total_Stops_3 stops` 3264.067 405.615 8.047 9.38e-16 ***
## dep_time_slot_Morning -163.297 55.761 -2.928 0.003413 **
## dep_time_slot_Late_Night 652.809 121.388 5.378 7.70e-08 ***
## dep_day_03Mar 155.585 197.867 0.786 0.431705
## dep_day_06Mar 535.831 185.209 2.893 0.003822 **
## dep_day_09Mar -1627.429 199.256 -8.168 3.51e-16 ***
## dep_day_12Mar -1959.766 257.640 -7.607 3.05e-14 ***
## dep_day_15Mar -3679.773 244.597 -15.044 < 2e-16 ***
## dep_day_18Mar -2756.340 247.837 -11.122 < 2e-16 ***
## dep_day_21Mar -5244.986 183.565 -28.573 < 2e-16 ***
## dep_day_24Mar -2640.551 194.111 -13.603 < 2e-16 ***
## dep_day_27Mar -5532.476 200.611 -27.578 < 2e-16 ***
## dep_day_03Apr -4969.375 284.134 -17.490 < 2e-16 ***
## dep_day_06Apr -5212.902 296.279 -17.595 < 2e-16 ***
## dep_day_09Apr -4864.394 269.610 -18.042 < 2e-16 ***
## dep_day_12Apr -3377.133 360.304 -9.373 < 2e-16 ***
## dep_day_15Apr -4453.249 309.804 -14.374 < 2e-16 ***
## dep_day_18Apr -3281.154 350.966 -9.349 < 2e-16 ***
## dep_day_21Apr -3850.110 320.836 -12.000 < 2e-16 ***
## dep_day_24Apr -4475.868 305.727 -14.640 < 2e-16 ***
## dep_day_27Apr -4068.156 303.462 -13.406 < 2e-16 ***
## dep_day_01May -3537.015 203.451 -17.385 < 2e-16 ***
## dep_day_03May -3665.521 309.606 -11.839 < 2e-16 ***
## dep_day_06May -3547.772 202.445 -17.525 < 2e-16 ***
## dep_day_09May -3484.382 174.426 -19.976 < 2e-16 ***
## dep_day_12May -3402.623 207.700 -16.382 < 2e-16 ***
## dep_day_15May -3639.648 182.433 -19.951 < 2e-16 ***
## dep_day_18May -3278.983 172.714 -18.985 < 2e-16 ***
## dep_day_21May -3349.419 173.262 -19.332 < 2e-16 ***
## dep_day_24May -3396.886 201.366 -16.869 < 2e-16 ***
## dep_day_27May -3546.017 187.085 -18.954 < 2e-16 ***
## dep_day_01Jun -3617.753 192.556 -18.788 < 2e-16 ***
## dep_day_03Jun -3661.461 193.991 -18.874 < 2e-16 ***
## dep_day_06Jun -3586.539 172.855 -20.749 < 2e-16 ***
## dep_day_09Jun -3535.248 173.297 -20.400 < 2e-16 ***
## dep_day_12Jun -4062.389 173.779 -23.377 < 2e-16 ***
## dep_day_15Jun -4070.925 194.994 -20.877 < 2e-16 ***
## dep_day_18Jun -3773.269 291.003 -12.966 < 2e-16 ***
## dep_day_21Jun -3583.450 286.687 -12.500 < 2e-16 ***
## dep_day_24Jun -4229.439 191.433 -22.094 < 2e-16 ***
## dep_day_27Jun -4201.964 190.942 -22.006 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2654 on 10604 degrees of freedom
## Multiple R-squared: 0.6472, Adjusted R-squared: 0.6454
## F-statistic: 367 on 53 and 10604 DF, p-value: < 2.2e-16
library(car)
model2$coefficients
## (Intercept) duration
## 11055.68805 -14.07963
## `Airline_Air India` Airline_IndiGo
## 1897.93423 363.11600
## `Airline_Jet Airways` Airline_SpiceJet
## 4439.61279 66.00766
## `Airline_Multiple carriers` Airline_Vistara
## 3351.80540 2150.00034
## Airline_GoAir Source_Mumbai
## -12.46773 -1803.48536
## Source_Kolkata `Total_Stops_non-stop`
## -266.44157 -3555.84209
## `Total_Stops_2 stops` `Total_Stops_3 stops`
## 2303.77949 3264.06732
## dep_time_slot_Morning dep_time_slot_Late_Night
## -163.29670 652.80944
## dep_day_03Mar dep_day_06Mar
## 155.58477 535.83117
## dep_day_09Mar dep_day_12Mar
## -1627.42863 -1959.76598
## dep_day_15Mar dep_day_18Mar
## -3679.77324 -2756.33985
## dep_day_21Mar dep_day_24Mar
## -5244.98551 -2640.55079
## dep_day_27Mar dep_day_03Apr
## -5532.47568 -4969.37514
## dep_day_06Apr dep_day_09Apr
## -5212.90185 -4864.39387
## dep_day_12Apr dep_day_15Apr
## -3377.13272 -4453.24886
## dep_day_18Apr dep_day_21Apr
## -3281.15398 -3850.11046
## dep_day_24Apr dep_day_27Apr
## -4475.86771 -4068.15568
## dep_day_01May dep_day_03May
## -3537.01511 -3665.52117
## dep_day_06May dep_day_09May
## -3547.77164 -3484.38234
## dep_day_12May dep_day_15May
## -3402.62319 -3639.64846
## dep_day_18May dep_day_21May
## -3278.98336 -3349.41947
## dep_day_24May dep_day_27May
## -3396.88592 -3546.01709
## dep_day_01Jun dep_day_03Jun
## -3617.75298 -3661.46089
## dep_day_06Jun dep_day_09Jun
## -3586.53887 -3535.24772
## dep_day_12Jun dep_day_15Jun
## -4062.38900 -4070.92460
## dep_day_18Jun dep_day_21Jun
## -3773.26938 -3583.45018
## dep_day_24Jun dep_day_27Jun
## -4229.43878 -4201.96394
mape(train3$Price,model2$fitted.values)
## [1] 0.2251769
library(Metrics)
rmse(train3$Price,model2$fitted.values)
## [1] 2647.094
## 1 2 3 4 5 6
## 14224.45829 14196.29903 12672.02972 12629.79084 6031.31735 12559.39270
## 7 8 9 10 11 12
Continued**
#TOP MOST FEATURES IMPACTING THE FLIGHT PRICE ARE (FROM LINEAR REGRESSION MOD
EL);
summary(model2)
##
## Call:
## lm(formula = Price ~ duration + `Airline_Air India` + Airline_IndiGo +
## `Airline_Jet Airways` + Airline_SpiceJet + `Airline_Multiple carriers`
+
## Airline_Vistara + Airline_GoAir + Source_Mumbai + Source_Kolkata +
## `Total_Stops_non-stop` + `Total_Stops_2 stops` + `Total_Stops_3 stops`
+
## dep_time_slot_Morning + dep_time_slot_Late_Night + dep_day_03Mar +
## dep_day_06Mar + dep_day_09Mar + dep_day_12Mar + dep_day_15Mar +
## dep_day_18Mar + dep_day_21Mar + dep_day_24Mar + dep_day_27Mar +
## dep_day_03Apr + dep_day_06Apr + dep_day_09Apr + dep_day_12Apr +
## dep_day_15Apr + dep_day_18Apr + dep_day_21Apr + dep_day_24Apr +
## dep_day_27Apr + dep_day_01May + dep_day_03May + dep_day_06May +
## dep_day_09May + dep_day_12May + dep_day_15May + dep_day_18May +
## dep_day_21May + dep_day_24May + dep_day_27May + dep_day_01Jun +
## dep_day_03Jun + dep_day_06Jun + dep_day_09Jun + dep_day_12Jun +
## dep_day_15Jun + dep_day_18Jun + dep_day_21Jun + dep_day_24Jun +
## dep_day_27Jun, data = train3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10774 -1384 -221 1256 42172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11055.688 211.901 52.174 < 2e-16 ***
## duration -14.080 4.917 -2.863 0.004200 **
## `Airline_Air India` 1897.934 170.431 11.136 < 2e-16 ***
## Airline_IndiGo 363.116 161.905 2.243 0.024932 *
## `Airline_Jet Airways` 4439.613 161.033 27.570 < 2e-16 ***
## Airline_SpiceJet 66.008 177.567 0.372 0.710098
## `Airline_Multiple carriers` 3351.805 177.742 18.858 < 2e-16 ***
## Airline_Vistara 2150.000 195.550 10.995 < 2e-16 ***
## Airline_GoAir -12.468 244.213 -0.051 0.959285
## Source_Mumbai -1803.485 114.709 -15.722 < 2e-16 ***
## Source_Kolkata -266.442 69.215 -3.849 0.000119 ***
## `Total_Stops_non-stop` -3555.842 87.839 -40.481 < 2e-16 ***
## `Total_Stops_2 stops` 2303.779 90.948 25.331 < 2e-16 ***
## `Total_Stops_3 stops` 3264.067 405.615 8.047 9.38e-16 ***
## dep_time_slot_Morning -163.297 55.761 -2.928 0.003413 **
## dep_time_slot_Late_Night 652.809 121.388 5.378 7.70e-08 ***
## dep_day_03Mar 155.585 197.867 0.786 0.431705
## dep_day_06Mar 535.831 185.209 2.893 0.003822 **
## dep_day_09Mar -1627.429 199.256 -8.168 3.51e-16 ***
## dep_day_12Mar -1959.766 257.640 -7.607 3.05e-14 ***
## dep_day_15Mar -3679.773 244.597 -15.044 < 2e-16 ***
## dep_day_18Mar -2756.340 247.837 -11.122 < 2e-16 ***
## dep_day_21Mar -5244.986 183.565 -28.573 < 2e-16 ***
## dep_day_24Mar -2640.551 194.111 -13.603 < 2e-16 ***
## dep_day_27Mar -5532.476 200.611 -27.578 < 2e-16 ***
## dep_day_03Apr -4969.375 284.134 -17.490 < 2e-16 ***
## dep_day_06Apr -5212.902 296.279 -17.595 < 2e-16 ***
## dep_day_09Apr -4864.394 269.610 -18.042 < 2e-16 ***
## dep_day_12Apr -3377.133 360.304 -9.373 < 2e-16 ***
## dep_day_15Apr -4453.249 309.804 -14.374 < 2e-16 ***
## dep_day_18Apr -3281.154 350.966 -9.349 < 2e-16 ***
## dep_day_21Apr -3850.110 320.836 -12.000 < 2e-16 ***
## dep_day_24Apr -4475.868 305.727 -14.640 < 2e-16 ***
## dep_day_27Apr -4068.156 303.462 -13.406 < 2e-16 ***
## dep_day_01May -3537.015 203.451 -17.385 < 2e-16 ***
## dep_day_03May -3665.521 309.606 -11.839 < 2e-16 ***
## dep_day_06May -3547.772 202.445 -17.525 < 2e-16 ***
## dep_day_09May -3484.382 174.426 -19.976 < 2e-16 ***
## dep_day_12May -3402.623 207.700 -16.382 < 2e-16 ***
## dep_day_15May -3639.648 182.433 -19.951 < 2e-16 ***
## dep_day_18May -3278.983 172.714 -18.985 < 2e-16 ***
## dep_day_21May -3349.419 173.262 -19.332 < 2e-16 ***
## dep_day_24May -3396.886 201.366 -16.869 < 2e-16 ***
## dep_day_27May -3546.017 187.085 -18.954 < 2e-16 ***
## dep_day_01Jun -3617.753 192.556 -18.788 < 2e-16 ***
## dep_day_03Jun -3661.461 193.991 -18.874 < 2e-16 ***
## dep_day_06Jun -3586.539 172.855 -20.749 < 2e-16 ***
## dep_day_09Jun -3535.248 173.297 -20.400 < 2e-16 ***
## dep_day_12Jun -4062.389 173.779 -23.377 < 2e-16 ***
## dep_day_15Jun -4070.925 194.994 -20.877 < 2e-16 ***
## dep_day_18Jun -3773.269 291.003 -12.966 < 2e-16 ***
## dep_day_21Jun -3583.450 286.687 -12.500 < 2e-16 ***
## dep_day_24Jun -4229.439 191.433 -22.094 < 2e-16 ***
## dep_day_27Jun -4201.964 190.942 -22.006 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2654 on 10604 degrees of freedom
## Multiple R-squared: 0.6472, Adjusted R-squared: 0.6454
## F-statistic: 367 on 53 and 10604 DF, p-value: < 2.2e-16
model2$coefficients
## (Intercept) duration
## 11055.68805 -14.07963
## `Airline_Air India` Airline_IndiGo
## 1897.93423 363.11600
## `Airline_Jet Airways` Airline_SpiceJet
## 4439.61279 66.00766
## `Airline_Multiple carriers` Airline_Vistara
## 3351.80540 2150.00034
## Airline_GoAir Source_Mumbai
## -12.46773 -1803.48536
## Source_Kolkata `Total_Stops_non-stop`
## -266.44157 -3555.84209
## `Total_Stops_2 stops` `Total_Stops_3 stops`
## 2303.77949 3264.06732
## dep_time_slot_Morning dep_time_slot_Late_Night
## -163.29670 652.80944
## dep_day_03Mar dep_day_06Mar
## 155.58477 535.83117
## dep_day_09Mar dep_day_12Mar
## -1627.42863 -1959.76598
## dep_day_15Mar dep_day_18Mar
## -3679.77324 -2756.33985
## dep_day_21Mar dep_day_24Mar
## -5244.98551 -2640.55079
## dep_day_27Mar dep_day_03Apr
## -5532.47568 -4969.37514
## dep_day_06Apr dep_day_09Apr
## -5212.90185 -4864.39387
## dep_day_12Apr dep_day_15Apr
## -3377.13272 -4453.24886
## dep_day_18Apr dep_day_21Apr
## -3281.15398 -3850.11046
## dep_day_24Apr dep_day_27Apr
## -4475.86771 -4068.15568
## dep_day_01May dep_day_03May
## -3537.01511 -3665.52117
## dep_day_06May dep_day_09May
## -3547.77164 -3484.38234
## dep_day_12May dep_day_15May
## -3402.62319 -3639.64846
## dep_day_18May dep_day_21May
## -3278.98336 -3349.41947
## dep_day_24May dep_day_27May
## -3396.88592 -3546.01709
## dep_day_01Jun dep_day_03Jun
## -3617.75298 -3661.46089
## dep_day_06Jun dep_day_09Jun
## -3586.53887 -3535.24772
## dep_day_12Jun dep_day_15Jun
## -4062.38900 -4070.92460
## dep_day_18Jun dep_day_21Jun
## -3773.26938 -3583.45018
## dep_day_24Jun dep_day_27Jun
## -4229.43878 -4201.96394
#CROSS VALIDATION:
set.seed(100)
# Define train control for k fold cross validation
ctrl<-trainControl(method='cv',number = 10)
summary(model_cv)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10368 -1264 -119 1255 42354
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5618.028 228.843 24.550 < 2e-16 **
*
## duration -15.237 4.424 -3.444 0.000576 **
*
## `\\`Airline_Air India\\`` 1877.469 153.411 12.238 < 2e-16 **
*
## Airline_GoAir 140.971 219.458 0.642 0.520654
## Airline_IndiGo 370.118 145.660 2.541 0.011069 *
## `\\`Airline_Jet Airways\\`` 4553.184 145.208 31.356 < 2e-16 **
*
## `\\`Airline_Multiple carriers\\`` 3460.609 160.002 21.629 < 2e-16 **
*
## Airline_SpiceJet 85.329 160.690 0.531 0.595417
## Airline_Vistara 2388.100 177.005 13.492 < 2e-16 **
*
## Source_Chennai 125.282 139.297 0.899 0.368465
## Source_Delhi 247.660 91.537 2.706 0.006830 **
## Source_Kolkata 405.453 84.780 4.782 1.76e-06 **
*
## Source_Mumbai -1605.191 108.144 -14.843 < 2e-16 **
*
## Destination_Cochin NA NA NA NA
## Destination_Delhi NA NA NA NA
## Destination_Hyderabad NA NA NA NA
## Destination_Kolkata NA NA NA NA
## `\\`Total_Stops_2 stops\\`` 2368.622 82.500 28.711 < 2e-16 **
*
## `\\`Total_Stops_3 stops\\`` 3291.665 363.980 9.044 < 2e-16 **
*
## `\\`Total_Stops_4 stops\\`` -901.178 2387.767 -0.377 0.705873
## `\\`Total_Stops_non-stop\\`` -3382.937 91.935 -36.797 < 2e-16 **
*
## dep_time_slot_Evening -79.450 64.014 -1.241 0.214589
## dep_time_slot_Late_Night 412.940 113.645 3.634 0.000281 **
*
## dep_time_slot_Morning -218.890 59.102 -3.704 0.000214 **
*
## dep_time_slot_Pre_Morning 31.341 121.576 0.258 0.796571
## dep_day_01Jun 1518.250 201.216 7.545 4.88e-14 **
*
## dep_day_01Mar 11767.683 233.231 50.455 < 2e-16 **
*
## dep_day_01May 1351.393 206.350 6.549 6.06e-11 **
*
## dep_day_03Apr 127.696 274.203 0.466 0.641440
## dep_day_03Jun 1471.523 202.372 7.271 3.81e-13 **
*
## dep_day_03Mar 5351.423 204.900 26.117 < 2e-16 **
*
## dep_day_03May 1424.722 295.252 4.825 1.42e-06 **
*
## dep_day_06Apr -134.897 284.372 -0.474 0.635249
## dep_day_06Jun 1419.850 184.259 7.706 1.42e-14 **
*
## dep_day_06Mar 5735.347 195.159 29.388 < 2e-16 **
*
## dep_day_06May 1344.926 205.575 6.542 6.34e-11 **
*
## dep_day_09Apr 251.092 262.285 0.957 0.338425
## dep_day_09Jun 1463.604 184.464 7.934 2.33e-15 **
*
## dep_day_09Mar 3568.481 205.953 17.327 < 2e-16 **
*
## dep_day_09May 1523.929 185.543 8.213 2.40e-16 **
*
## dep_day_12Apr 1677.626 338.481 4.956 7.29e-07 **
*
## dep_day_12Jun 937.374 184.882 5.070 4.04e-07 **
*
## dep_day_12Mar 3374.598 256.937 13.134 < 2e-16 **
*
## dep_day_12May 1491.453 209.739 7.111 1.23e-12 **
*
## dep_day_15Apr 632.694 295.758 2.139 0.032440 *
## dep_day_15Jun 1048.363 203.007 5.164 2.46e-07 **
*
## dep_day_15Mar 1566.667 243.330 6.438 1.26e-10 **
*
## dep_day_15May 1351.489 191.071 7.073 1.61e-12 **
*
## dep_day_18Apr 1782.013 330.756 5.388 7.29e-08 **
*
## dep_day_18Jun 1314.179 279.684 4.699 2.65e-06 **
*
## dep_day_18Mar 2471.817 245.941 10.050 < 2e-16 **
*
## dep_day_18May 1730.269 184.218 9.393 < 2e-16 **
*
## dep_day_21Apr 1236.848 305.272 4.052 5.12e-05 **
*
## dep_day_21Jun 1494.800 276.092 5.414 6.29e-08 **
*
## dep_day_21Mar -65.375 193.860 -0.337 0.735953
## dep_day_21May 1658.359 184.534 8.987 < 2e-16 **
*
## dep_day_24Apr 592.712 292.320 2.028 0.042624 *
## dep_day_24Jun 901.095 200.414 4.496 6.99e-06 **
*
## dep_day_24Mar 2303.058 199.418 11.549 < 2e-16 **
*
## dep_day_24May 1499.174 204.747 7.322 2.62e-13 **
*
## dep_day_27Apr 1031.952 290.669 3.550 0.000387 **
*
## dep_day_27Jun 929.576 199.994 4.648 3.39e-06 **
*
## dep_day_27Mar -412.390 207.823 -1.984 0.047245 *
## dep_day_27May 1594.541 197.109 8.090 6.64e-16 **
*
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2380 on 10598 degrees of freedom
## Multiple R-squared: 0.7165, Adjusted R-squared: 0.7149
## F-statistic: 453.9 on 59 and 10598 DF, p-value: < 2.2e-16
pred2=predict(model_cv,x_test)
pred2
#REGULARISATION:
library(glmnet)
library(ISLR)
library(dplyr)
library(tidyr)
library(Metrics)
set.seed(100)
train_x=as.matrix(train3[,2:64])
train_y=as.matrix(train3[,1])
test_x=as.matrix(test3)
custom=trainControl(method='repeatedcv',number=10,repeats=5,verboseIter=TRUE)
#RIDGE REGRESSION:
ridge=train(Price~.,train3,method='glmnet',tuneGrid=expand.grid(alpha=0,lambd
a=seq(0.001,1,length=5)),
trControl=custom)
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0, lambda = 1 on full training set
plot(ridge)
ridge
## glmnet
##
## 10658 samples
## 63 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 9592, 9592, 9592, 9592, 9592, 9594, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.00100 2409.696 0.7088255 1722.293
## 0.25075 2409.696 0.7088255 1722.293
## 0.50050 2409.696 0.7088255 1722.293
## 0.75025 2409.696 0.7088255 1722.293
## 1.00000 2409.696 0.7088255 1722.293
##
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 1.
plot(ridge$finalModel,xvar = 'lambda',label=T)
plot(ridge$finalModel,xvar = 'dev',label=T)
plot(varImp(ridge,scale = T))
pred3=predict(ridge,test_x)
pred3
#LASSO REGRESSION:
set.seed(100)
lasso=train(Price~.,train3,method='glmnet',tuneGrid=expand.grid(alpha=1,lambd
a=seq(0.001,0.5,length=5)),
trControl=custom)
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.5 on full training set
plot(lasso)
lasso
## glmnet
##
## 10658 samples
## 63 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 9592, 9592, 9592, 9592, 9592, 9594, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.00100 2380.943 0.7148115 1705.007
## 0.12575 2380.943 0.7148115 1705.007
## 0.25050 2380.943 0.7148115 1705.007
## 0.37525 2380.943 0.7148115 1705.007
## 0.50000 2380.939 0.7148124 1704.999
##
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.5.
plot(lasso$finalModel,xvar = 'lambda',label=T)
plot(lasso$finalModel,xvar = 'dev',label=T)
plot(varImp(ridge,scale = T))
pred4=predict(lasso,test_x)
pred4
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.111, lambda = 1 on full training set
plot(elastic)
elastic
## glmnet
##
## 10658 samples
## 63 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 9592, 9592, 9592, 9592, 9592, 9594, ...
## Resampling results across tuning parameters:
##
## alpha lambda RMSE Rsquared MAE
## 0.0000000 0.000100 2409.696 0.7088255 1722.293
## 0.0000000 0.250075 2409.696 0.7088255 1722.293
## 0.0000000 0.500050 2409.696 0.7088255 1722.293
## 0.0000000 0.750025 2409.696 0.7088255 1722.293
## 0.0000000 1.000000 2409.696 0.7088255 1722.293
## 0.1111111 0.000100 2380.899 0.7148185 1704.944
## 0.1111111 0.250075 2380.899 0.7148185 1704.944
## 0.1111111 0.500050 2380.899 0.7148185 1704.944
## 0.1111111 0.750025 2380.899 0.7148185 1704.944
## 0.1111111 1.000000 2380.899 0.7148185 1704.944
## 0.2222222 0.000100 2381.000 0.7147973 1704.937
## 0.2222222 0.250075 2381.000 0.7147973 1704.937
## 0.2222222 0.500050 2381.000 0.7147973 1704.937
## 0.2222222 0.750025 2381.000 0.7147973 1704.937
## 0.2222222 1.000000 2381.000 0.7147973 1704.937
## 0.3333333 0.000100 2381.008 0.7147943 1704.979
## 0.3333333 0.250075 2381.008 0.7147943 1704.979
## 0.3333333 0.500050 2381.008 0.7147943 1704.979
## 0.3333333 0.750025 2381.008 0.7147943 1704.979
## 0.3333333 1.000000 2381.008 0.7147943 1704.979
## 0.4444444 0.000100 2380.950 0.7148072 1704.945
## 0.4444444 0.250075 2380.950 0.7148072 1704.945
## 0.4444444 0.500050 2380.950 0.7148072 1704.945
## 0.4444444 0.750025 2380.950 0.7148072 1704.945
## 0.4444444 1.000000 2380.947 0.7148079 1704.941
## 0.5555556 0.000100 2380.919 0.7148163 1704.966
## 0.5555556 0.250075 2380.919 0.7148163 1704.966
## 0.5555556 0.500050 2380.919 0.7148163 1704.966
## 0.5555556 0.750025 2380.919 0.7148163 1704.966
## 0.5555556 1.000000 2380.939 0.7148113 1704.926
## 0.6666667 0.000100 2380.963 0.7148051 1705.020
## 0.6666667 0.250075 2380.963 0.7148051 1705.020
## 0.6666667 0.500050 2380.963 0.7148051 1705.020
## 0.6666667 0.750025 2380.957 0.7148067 1704.998
## 0.6666667 1.000000 2380.982 0.7148006 1704.884
## 0.7777778 0.000100 2380.988 0.7148001 1705.015
## 0.7777778 0.250075 2380.988 0.7148001 1705.015
## 0.7777778 0.500050 2380.988 0.7148001 1705.015
## 0.7777778 0.750025 2380.991 0.7147989 1704.946
## 0.7777778 1.000000 2381.026 0.7147896 1704.821
## 0.8888889 0.000100 2380.919 0.7148164 1704.999
## 0.8888889 0.250075 2380.919 0.7148164 1704.999
## 0.8888889 0.500050 2380.919 0.7148164 1704.999
## 0.8888889 0.750025 2380.952 0.7148079 1704.893
## 0.8888889 1.000000 2381.058 0.7147819 1704.767
## 1.0000000 0.000100 2380.943 0.7148115 1705.007
## 1.0000000 0.250075 2380.943 0.7148115 1705.007
## 1.0000000 0.500050 2380.939 0.7148124 1704.999
## 1.0000000 0.750025 2380.990 0.7147990 1704.851
## 1.0000000 1.000000 2381.117 0.7147674 1704.701
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0.1111111 and lambda = 1.
plot(elastic$finalModel,xvar = 'lambda',label=T)
plot(elastic$finalModel,xvar = 'dev',label=T)
plot(varImp(elastic,scale = T))
pred5=predict(elastic,test_x)
pred5
##
## Call:
## summary.resamples(object = result)
##
## Models: Ridge, Lasso, Elasticnet
## Number of resamples: 50
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Ridge 1624.772 1692.027 1726.927 1722.293 1752.307 1854.102 0
## Lasso 1606.835 1668.504 1705.873 1704.999 1737.276 1823.627 0
## Elasticnet 1607.009 1668.361 1705.925 1704.944 1737.141 1823.500 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Ridge 2139.305 2274.853 2348.793 2409.696 2567.539 2859.873 0
## Lasso 2109.680 2242.681 2319.599 2380.939 2526.477 2823.308 0
## Elasticnet 2109.715 2243.130 2319.699 2380.899 2526.683 2823.606 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA'
s
## Ridge 0.6486506 0.6912938 0.7129501 0.7088255 0.7271613 0.7570388
0
## Lasso 0.6582928 0.6920968 0.7219713 0.7148124 0.7335355 0.7636205
0
## Elasticnet 0.6579882 0.6921236 0.7219617 0.7148185 0.7334666 0.7636060
0
bwplot(result)
## alpha lambda
## 10 0.1111111 1
best=elastic$finalModel
coef(best,s=elastic$bestTune$lambda) #Coefficients from elastic net model.