1) Delivery_time -> Predict delivery time using sorting time
2) Salary_hike -> Build a prediction model for Salary_hike
------------------------------------------------------------
Build a simple linear regression model by performing EDA and do necessary
transformations and select the best model using R or Python.
Q.1) Delivery_time -> Predict delivery time using sorting time
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import [Link] as smf
import [Link] as plt
# import dataset
data=pd.read_csv('delivery_time.csv')
data
#EDA and Data Visualization
[Link]()
[Link](data['Delivery Time'])
[Link](data['Sorting Time'])
# Renaming Columns
dataset=[Link]({'Delivery Time':'delivery_time', 'Sorting
Time':'sorting_time'},axis=1)
dataset
#Correlation Analysis
[Link]()
[Link](x=dataset['sorting_time'],y=dataset['delivery_time'])
#Model Building
model=[Link]("delivery_time~sorting_time",data=dataset).fit()
[Link]()
#Model Testing
# Finding Coefficient parameters
[Link]
# Finding tvalues and pvalues
[Link] , [Link]
# Finding Rsquared Values
[Link] , model.rsquared_adj
#Model Predictions
# Manual prediction for say sorting time 5
delivery_time = (6.582734) + (1.649020)*(5)
delivery_time
# Automatic Prediction for say sorting time 5, 8
new_data=[Link]([5,8])
new_data
data_pred=[Link](new_data,columns=['sorting_time'])
data_pred
[Link](data_pred)
Q.2) Salary_hike -> Build a prediction model for Salary_hike
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import [Link] as smf
import [Link] as plt
# import dataset
data=pd.read_csv('Salary_Data.csv')
data
#EDA and Data Visualization
[Link]()
[Link](data['YearsExperience'])
[Link](data['Salary'])
# Renaming Columns
dataset1=[Link]({'YearsExperience':'Experiance in year'},axis=1)
dataset1
#Correlation Analysis
[Link]()
[Link](x=dataset1['Experiance in year'],y=dataset1['Salary'])
#Model Building
model=[Link]("Salary~YearsExperience",data=data1).fit()
[Link]()
#Model Testing
# Finding Coefficient parameters
[Link]
# Finding tvalues and pvalues
[Link] , [Link]
# Finding Rsquared Values
[Link] , model.rsquared_adj
#Model Predictions
# Manual prediction for say 3 Years Experience
Salary = (25792.200199) + (9449.962321)*(3)
Salary
# Automatic Prediction for say sorting time 5, 8
new_data=[Link]([5,8])
new_data
data_pred=[Link](new_data,columns=['YearsExperience'])
data_pred
[Link](data_pred)