0% found this document useful (0 votes)

35 views6 pages

Practical 1

Uploaded by

manasishivarkar

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

35 views6 pages

Practical 1

Uploaded by

manasishivarkar

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 6

import pandas as pd

import seaborn as sns

df = pd.read_csv('uber.csv')
df.head()

Unnamed: 0 key fare_amount \

0 24238194 2015-05-07 19:52:06.0000003 7.5
1 27835199 2009-07-17 20:04:56.0000002 7.7
2 44984355 2009-08-24 21:45:00.00000061 12.9
3 25894730 2009-06-26 08:22:21.0000001 5.3
4 17610152 2014-08-28 17:47:00.000000188 16.0

pickup_datetime pickup_longitude pickup_latitude \

0 2015-05-07 19:52:06 UTC -73.999817 40.738354
1 2009-07-17 20:04:56 UTC -73.994355 40.728225
2 2009-08-24 21:45:00 UTC -74.005043 40.740770
3 2009-06-26 08:22:21 UTC -73.976124 40.790844
4 2014-08-28 17:47:00 UTC -73.925023 40.744085

dropoff_longitude dropoff_latitude passenger_count

0 -73.999512 40.723217 1
1 -73.994710 40.750325 1
2 -73.962565 40.772647 1
3 -73.965316 40.803349 3
4 -73.973082 40.761247 5

df.isnull().sum()

Unnamed: 0 0
key 0
fare_amount 0
pickup_datetime 0
pickup_longitude 0
pickup_latitude 0
dropoff_longitude 1
dropoff_latitude 1
passenger_count 0
dtype: int64

df = df.drop(columns=['Unnamed:
0','key','dropoff_longitude','dropoff_latitude'])

df.dtypes

fare_amount float64
pickup_datetime object
pickup_longitude float64
pickup_latitude float64
passenger_count int64
dtype: object
df['fare_amount'] = df['fare_amount'].astype('int')
# df['pickup_longitude'] = df['pickup_longitude'].astype('int')
# df['pickup_latitude'] = df['pickup_latitude'].astype('int')

df.dtypes

fare_amount int32
pickup_datetime object
pickup_longitude float64
pickup_latitude float64
passenger_count int64
dtype: object

from datetime import datetime

df['pickup_datetime'] =
pd.to_datetime(df['pickup_datetime'],errors='coerce')

df['year'] = df['pickup_datetime'].dt.year
df['month'] =df['pickup_datetime'].dt.month
df['day'] = df['pickup_datetime'].dt.day
df['hours'] = df['pickup_datetime'].dt.hour
df['weekday'] = df['pickup_datetime'].dt.weekday

df = df.drop(columns = ['pickup_datetime'])
df

fare_amount pickup_longitude pickup_latitude

passenger_count year \
0 7 -73.999817 40.738354
1 2015
1 7 -73.994355 40.728225
1 2009
2 12 -74.005043 40.740770
1 2009
3 5 -73.976124 40.790844
3 2009
4 16 -73.925023 40.744085
5 2014
... ... ... ... ..
. ...
199995 3 -73.987042 40.739367
1 2012
199996 7 -73.984722 40.736837
1 2014
199997 30 -73.986017 40.756487
2 2009
199998 14 -73.997124 40.725452
1 2015
199999 14 -73.984395 40.720077
1 2010
month day hours weekday
0 5 7 19 3
1 7 17 20 4
2 8 24 21 0
3 6 26 8 4
4 8 28 17 3
... ... ... ... ...
199995 10 28 10 6
199996 3 14 1 4
199997 6 29 0 0
199998 5 20 14 2
199999 5 15 4 5

[200000 rows x 9 columns]

# identify outliers
import seaborn as sns
import matplotlib.pyplot as plt
sns.boxplot(x=df['fare_amount'])
plt.show()

df.corr()
fare_amount pickup_longitude pickup_latitude \
fare_amount 1.000000 0.010532 -0.008573
pickup_longitude 0.010532 1.000000 -0.816461
pickup_latitude -0.008573 -0.816461 1.000000
passenger_count 0.010205 -0.000414 -0.001560
year 0.127932 0.009966 -0.010233
month 0.024222 -0.004665 0.004625
day 0.001224 0.005184 -0.008264
hours -0.021455 0.002433 -0.003822
weekday 0.007641 0.000825 -0.002455

passenger_count year month day

hours \
fare_amount 0.010205 0.127932 0.024222 0.001224 -
0.021455
pickup_longitude -0.000414 0.009966 -0.004665 0.005184
0.002433
pickup_latitude -0.001560 -0.010233 0.004625 -0.008264 -
0.003822
passenger_count 1.000000 0.004798 0.009773 0.003252
0.013196
year 0.004798 1.000000 -0.115859 -0.012170
0.002156
month 0.009773 -0.115859 1.000000 -0.017360 -
0.003926
day 0.003252 -0.012170 -0.017360 1.000000
0.004677
hours 0.013196 0.002156 -0.003926 0.004677
1.000000
weekday 0.033196 0.006113 -0.008786 0.005617 -
0.086947

weekday
fare_amount 0.007641
pickup_longitude 0.000825
pickup_latitude -0.002455
passenger_count 0.033196
year 0.006113
month -0.008786
day 0.005617
hours -0.086947
weekday 1.000000

sns.heatmap(df.corr(),annot=True)
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>

x = df.drop('fare_amount',axis = 1)
y = df['fare_amount']

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
x_scale = scaler.fit_transform(x)

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test =
train_test_split(x_scale,y,test_size=0.2,random_state=43)

from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(x_train,y_train)

LinearRegression()

y_pred = lr.predict(x_test)
y_pred
array([10.55116531, 12.61965214, 12.59597574, ..., 9.48363077,
9.93822128, 12.56229707])

from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators = 100, random_state=43)
rf.fit(x_train,y_train)

RandomForestRegressor(random_state=43)

y_pred1 = rf.predict(x_test)

y_pred1

array([ 7.29, 8.48, 10.92, ..., 29.27, 8.97, 12.41])

from sklearn.metrics import mean_squared_error, r2_score

import numpy as np

mse_lr = np.sqrt(mean_squared_error(y_test,y_pred))
print('RMSE of Linear Regression: ',mse_lr)
r2_lr = r2_score(y_test,y_pred)
print('R2-Score of Linear Regression: ',r2_lr)

MSE of Linear Regression: 9.827949026067758

R2-Score of Linear Regression: 0.019287259101657295

mse_rf = np.sqrt(mean_squared_error(y_test,y_pred1))
print('RMSE of Random Forest: ',mse_rf)
r2_rf = r2_score(y_test,y_pred1)
print('R2-Score of Random Forest: ',r2_rf)

MSE of Random Forest: 8.589228089760512

R2-Score of Random Forest: 0.2509267327753276

Analyzing Taxi Trends
No ratings yet
Analyzing Taxi Trends
43 pages
Step 16 Chapter4
No ratings yet
Step 16 Chapter4
64 pages
ML All Prints
No ratings yet
ML All Prints
25 pages
Merged
No ratings yet
Merged
47 pages
Delhivery Case Study Compressed
No ratings yet
Delhivery Case Study Compressed
31 pages
Rainfall
No ratings yet
Rainfall
31 pages
Assignment No 1 Output
No ratings yet
Assignment No 1 Output
42 pages
ML Code Output
No ratings yet
ML Code Output
38 pages
Bose A S
No ratings yet
Bose A S
37 pages
2016MIS013
No ratings yet
2016MIS013
36 pages
House - Price - Prediction
No ratings yet
House - Price - Prediction
16 pages
Report
No ratings yet
Report
25 pages
Assignment 1, Codeandssfile
No ratings yet
Assignment 1, Codeandssfile
29 pages
Delhivery Feature Engineering Cs
No ratings yet
Delhivery Feature Engineering Cs
46 pages
Delhivery
No ratings yet
Delhivery
20 pages
ML#05
No ratings yet
ML#05
35 pages
Airlanes Booking Analys
No ratings yet
Airlanes Booking Analys
26 pages
ML - Practical - 1 - Jupyter Notebook
No ratings yet
ML - Practical - 1 - Jupyter Notebook
15 pages
ML Practical 1
No ratings yet
ML Practical 1
15 pages
Outlook Module3
No ratings yet
Outlook Module3
21 pages
Predict The Price of The Uber Ride From A Given Pickup Point To The Agreed Drop-Off Location
No ratings yet
Predict The Price of The Uber Ride From A Given Pickup Point To The Agreed Drop-Off Location
9 pages
Airline Passenger Booking Analyze
No ratings yet
Airline Passenger Booking Analyze
26 pages
ML 1 16
No ratings yet
ML 1 16
13 pages
Airfare ML - Predicting Flight Fares
No ratings yet
Airfare ML - Predicting Flight Fares
21 pages
AIML Lab Ex 3-5 - 1
No ratings yet
AIML Lab Ex 3-5 - 1
31 pages
Lab1.ipynb - Colaboratory
No ratings yet
Lab1.ipynb - Colaboratory
9 pages
Name: Siddhesh Asati: #Group: B (ML) #Assignment: 6
No ratings yet
Name: Siddhesh Asati: #Group: B (ML) #Assignment: 6
9 pages
Scaffold FG
No ratings yet
Scaffold FG
13 pages
ML Assignment Presentation
No ratings yet
ML Assignment Presentation
37 pages
Ml-Exp-1 - Jupyter Notebook
No ratings yet
Ml-Exp-1 - Jupyter Notebook
8 pages
ML 1 Um
No ratings yet
ML 1 Um
5 pages
ARIMA
No ratings yet
ARIMA
11 pages
Uber ml1 - Jupyter Notebook
No ratings yet
Uber ml1 - Jupyter Notebook
10 pages
SourceCode Assignment1
No ratings yet
SourceCode Assignment1
9 pages
ML - 2 - Jupyter Notebook
No ratings yet
ML - 2 - Jupyter Notebook
6 pages
Supervised Regression
No ratings yet
Supervised Regression
24 pages
Data Cleaning On Melbourne Housing
No ratings yet
Data Cleaning On Melbourne Housing
16 pages
Ds Pract 5 Data Analytics1 Vedanti
No ratings yet
Ds Pract 5 Data Analytics1 Vedanti
7 pages
P1) Code Uber
No ratings yet
P1) Code Uber
6 pages
SPPUML1
No ratings yet
SPPUML1
8 pages
Praktikum 5
No ratings yet
Praktikum 5
20 pages
Bike Sharing Data Analysis
No ratings yet
Bike Sharing Data Analysis
24 pages
Uber
No ratings yet
Uber
7 pages
ML Practical 1
No ratings yet
ML Practical 1
15 pages
Case Study 1 Exercise R Script
No ratings yet
Case Study 1 Exercise R Script
5 pages
Divvy Exercise R Script
No ratings yet
Divvy Exercise R Script
5 pages
Loading The Dataset: First We Load The Dataset and Find Out The Number of Columns, Rows, NULL Values, Etc
100% (1)
Loading The Dataset: First We Load The Dataset and Find Out The Number of Columns, Rows, NULL Values, Etc
8 pages
How To Convert Casuals To Members?": Google Data Analytics Course Capstone Project: Case Study 1 "Cyclistic"
No ratings yet
How To Convert Casuals To Members?": Google Data Analytics Course Capstone Project: Case Study 1 "Cyclistic"
18 pages
Institute of Technology Management & Research
No ratings yet
Institute of Technology Management & Research
10 pages
ML Practical 1 Code
100% (1)
ML Practical 1 Code
1 page
SN Travel Jupyter Notebook PDF
No ratings yet
SN Travel Jupyter Notebook PDF
28 pages
MTA Project
No ratings yet
MTA Project
1 page
EDA Optimising NYC Taxis GautamTiwari - Cleanup
No ratings yet
EDA Optimising NYC Taxis GautamTiwari - Cleanup
1 page
Geography and Language
No ratings yet
Geography and Language
2 pages
House Price Prediction
No ratings yet
House Price Prediction
1 page
Uber Drive Practice DP PDF
No ratings yet
Uber Drive Practice DP PDF
10 pages
Data Analysis Dummy Report: 0. Data Import and Cleaning
No ratings yet
Data Analysis Dummy Report: 0. Data Import and Cleaning
1 page
RATIO ANALYSIS OF Menraj Chaudhary
No ratings yet
RATIO ANALYSIS OF Menraj Chaudhary
25 pages
Yulu Case Study
No ratings yet
Yulu Case Study
1 page
XLSTAT - Statistical Analysis Software
No ratings yet
XLSTAT - Statistical Analysis Software
43 pages
Coding Questions
No ratings yet
Coding Questions
124 pages
Business and Economics Undergraduate Course Guide 2014 PDF
No ratings yet
Business and Economics Undergraduate Course Guide 2014 PDF
46 pages
Module 2 Part 1 - Types of Forecasting Models and Simple Linear Regression
No ratings yet
Module 2 Part 1 - Types of Forecasting Models and Simple Linear Regression
71 pages
Automobile Mechatronics
No ratings yet
Automobile Mechatronics
68 pages
The Challenges of Nursing Students in The Clinical Learning Environment
No ratings yet
The Challenges of Nursing Students in The Clinical Learning Environment
20 pages
Data Analysis Midterm Exam
No ratings yet
Data Analysis Midterm Exam
3 pages
EDA Case Study
No ratings yet
EDA Case Study
14 pages
Preparing The Action Research Proposal
No ratings yet
Preparing The Action Research Proposal
19 pages
Statistics Using Excel PDF
No ratings yet
Statistics Using Excel PDF
63 pages
Chapter 15
No ratings yet
Chapter 15
43 pages
IEEE Guide For The Statistical Analysis of Thermal Life Test Data
No ratings yet
IEEE Guide For The Statistical Analysis of Thermal Life Test Data
34 pages
Parametric Tests
No ratings yet
Parametric Tests
16 pages
Article: Science Mapping and Visualization Tools Used in Bibliometric & Scientometric Studies: An Overview
No ratings yet
Article: Science Mapping and Visualization Tools Used in Bibliometric & Scientometric Studies: An Overview
15 pages
Food Analysis
No ratings yet
Food Analysis
16 pages
12.4 - Standard Deviation
100% (1)
12.4 - Standard Deviation
12 pages
Influence of Devolved Dispute Resolution Mechanisms On Job Satisfaction of Health Care Workers in Nakuru County Kenya
No ratings yet
Influence of Devolved Dispute Resolution Mechanisms On Job Satisfaction of Health Care Workers in Nakuru County Kenya
13 pages
Algoritma K-Means Clustering Dan Contoh Soal - KETUTRARE
No ratings yet
Algoritma K-Means Clustering Dan Contoh Soal - KETUTRARE
17 pages
AIML Practical Exam Codes 1
No ratings yet
AIML Practical Exam Codes 1
7 pages
PSYBSC1619 Kalyani Joshi
No ratings yet
PSYBSC1619 Kalyani Joshi
52 pages
IFN645Lecture4 - Feature Selection - 2021
No ratings yet
IFN645Lecture4 - Feature Selection - 2021
39 pages
CHAPTER 3 NALANG KULANG AYOS GUYzzzzz
No ratings yet
CHAPTER 3 NALANG KULANG AYOS GUYzzzzz
38 pages
Chapter 10: Correlation and Regression Chapter 13: Nonparametric Statistics
No ratings yet
Chapter 10: Correlation and Regression Chapter 13: Nonparametric Statistics
27 pages
The Use of Undercover Game Application To Improve Students' Vocabulary
No ratings yet
The Use of Undercover Game Application To Improve Students' Vocabulary
16 pages
0 0 1 1 1 W A P 1 N N I 1 I X I N 1 N N I 1 I 2
No ratings yet
0 0 1 1 1 W A P 1 N N I 1 I X I N 1 N N I 1 I 2
2 pages
Examples of Business Analytics in Action - HBS Online
No ratings yet
Examples of Business Analytics in Action - HBS Online
5 pages
A Comprehensive Study On Using Data Mining in ERP Systems
No ratings yet
A Comprehensive Study On Using Data Mining in ERP Systems
7 pages
Kavya Data Analyst
No ratings yet
Kavya Data Analyst
1 page
Swot Analysis Tows Matrix: Strenghts (S) Weaknesses (W)
No ratings yet
Swot Analysis Tows Matrix: Strenghts (S) Weaknesses (W)
4 pages

Practical 1

Uploaded by

Practical 1

Uploaded by

import pandas as pd

import seaborn as sns

Unnamed: 0 key fare_amount \

pickup_datetime pickup_longitude pickup_latitude \

dropoff_longitude dropoff_latitude passenger_count

from datetime import datetime

fare_amount pickup_longitude pickup_latitude

[200000 rows x 9 columns]

passenger_count year month day

<function matplotlib.pyplot.show(close=None, block=None)>

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.ensemble import RandomForestRegressor

array([ 7.29, 8.48, 10.92, ..., 29.27, 8.97, 12.41])

from sklearn.metrics import mean_squared_error, r2_score

MSE of Linear Regression: 9.827949026067758

MSE of Random Forest: 8.589228089760512

You might also like