Airbnb Case - SHIRIN MAHAJAN Python
Airbnb Case - SHIRIN MAHAJAN Python
In [1]:
#1 Read the CSV file and create pandas dataframe object airbnb.
import numpy as np
import pandas as pd
bnb=pd.read_csv('Airbnbcase.csv')
bnb
Shared De Pijp /
0 10176931 1476 49180562 NaN Amsterdam NaN
room Rivierenbuurt
Shared
1 8935871 1476 46718394 NaN Amsterdam NaN Centrum West
room
Shared
2 14011697 1476 10346595 NaN Amsterdam NaN Watergraafsmeer
room
Shared
3 6137978 1476 8685430 NaN Amsterdam NaN Centrum West
room
Shared De Baarsjes /
4 18630616 1476 70191803 NaN Amsterdam NaN
room Oud West
Private
18718 17789893 1476 47501089 NaN Amsterdam NaN Bijlmer Centrum
room
Private
18719 16877166 1476 67093870 NaN Amsterdam NaN Bijlmer Centrum
room
Private Geuzenveld /
18720 19859427 1476 29724632 NaN Amsterdam NaN
room Slotermeer
Private
18721 17132164 1476 115156569 NaN Amsterdam NaN Centrum West
room
Private
18722 7605782 1476 39503013 NaN Amsterdam NaN Centrum West
room
In [2]:
#2 Review first 5 rows
bnb.head(5)
Out[2]: room_id survey_id host_id room_type country city borough neighborhood revie
Shared De Pijp /
0 10176931 1476 49180562 NaN Amsterdam NaN
room Rivierenbuurt
Shared
1 8935871 1476 46718394 NaN Amsterdam NaN Centrum West
room
Shared
2 14011697 1476 10346595 NaN Amsterdam NaN Watergraafsmeer
room
Shared
3 6137978 1476 8685430 NaN Amsterdam NaN Centrum West
room
Shared De Baarsjes /
4 18630616 1476 70191803 NaN Amsterdam NaN
room Oud West
In [3]:
#3 What are the room_type available
bnb['room_type'].value_counts()
Shared room 63
In [4]:
#4 How many rows and columns are present in the dataset
rows=len(bnb.axes[0])
cols=len(bnb.axes[1])
print('no. of rows:',rows)
print('no. of col:',cols)
no. of col: 20
In [5]:
#5 Count the number of locations where the price is below 100 in shared rooms
Out[5]: room_id survey_id host_id room_type country city borough neighborhood revie
Shared De Pijp /
0 10176931 1476 49180562 NaN Amsterdam NaN
room Rivierenbuurt
Shared
1 8935871 1476 46718394 NaN Amsterdam NaN Centrum West
room
Shared
2 14011697 1476 10346595 NaN Amsterdam NaN Watergraafsmeer
room
Shared
3 6137978 1476 8685430 NaN Amsterdam NaN Centrum West
room
Shared De Baarsjes /
4 18630616 1476 70191803 NaN Amsterdam NaN
room Oud West
5 rows × 21 columns
In [6]:
(bnb['low_price']==True).sum()
Out[6]: 3376
In [7]:
#6 Drop the shared room type as its count is very low compare to others
bnb.tail(5)
Private Bijlmer
18718 17789893 1476 47501089 NaN Amsterdam NaN
room Centrum
Private Bijlmer
18719 16877166 1476 67093870 NaN Amsterdam NaN
room Centrum
Private Geuzenveld /
18720 19859427 1476 29724632 NaN Amsterdam NaN
room Slotermeer
Private
18721 17132164 1476 115156569 NaN Amsterdam NaN Centrum West
room
Private
18722 7605782 1476 39503013 NaN Amsterdam NaN Centrum West
room
5 rows × 21 columns
In [8]:
(bnb['room_type']=='Private room').sum()
Out[8]: 3682
In [9]:
(bnb['room_type']=='Entire home/apt').sum()
Out[9]: 14978
In [10]:
#7Take a count of each neighborhood to show how popular they are w.r.t to airbnb apa
bnb['neighborhood'].value_counts()
Westerpark 1430
Watergraafsmeer 517
Slotervaart 349
Osdorp 163
Bijlmer Centrum 99
Bijlmer Oost 97
Gaasperdam / Driemond 42
Westpoort 15
In [11]:
#8 What is the overall_satisfication count by each rating
os = bnb.groupby('overall_satisfaction', dropna=False).reviews.count()
os
Out[11]: overall_satisfaction
0.0 5748
1.0 1
1.5 1
2.5 1
3.0 19
3.5 109
4.0 577
4.5 4559
5.0 7708
In [25]:
bnb.drop(['pricevalue','low_price',],axis=1,inplace=True)
In [ ]:
bnb.drop(['low price',],axis=1,inplace=True)
In [13]:
#9 Create a sub dataset where overall satisfication is 0
rating = bnb[bnb.overall_satisfaction == 0]
rating.sample(10)
Entire
10940 3172092 1476 16083490 NaN Amsterdam NaN Slotervaart
home/apt
Entire
9836 12983144 1476 71484940 NaN Amsterdam NaN Watergraafsmeer
home/apt
Entire
7350 17819035 1476 8986411 NaN Amsterdam NaN Westerpark
home/apt
Entire De Pijp /
9202 19888201 1476 10366758 NaN Amsterdam NaN
home/apt Rivierenbuurt
Entire Buitenveldert /
1709 14366189 1476 46858505 NaN Amsterdam NaN
home/apt Zuidas
Entire
6544 15780441 1476 99163836 NaN Amsterdam NaN Oud Oost
home/apt
Entire Noord-West /
5272 18559464 1476 8874273 NaN Amsterdam NaN
home/apt Noord-Midden
Entire
12714 14553702 1476 38953420 NaN Amsterdam NaN Westerpark
home/apt
Entire De Baarsjes /
11472 14165297 1476 85697893 NaN Amsterdam NaN
home/apt Oud West
Entire
4347 6877495 1476 36039260 NaN Amsterdam NaN Centrum West
home/apt
10 rows × 21 columns
In [14]:
#10 What is the satisfication ratings count where bedroom is 3
a= bnb[bnb.bedrooms == 3].bedrooms.count()
b = bnb[bnb.bedrooms == 3].overall_satisfaction.count()
print('with ratings:',b)
In [16]:
#11 On which price most of the properties are available
pl = bnb.groupby('price', dropna=False).room_id.count()
pl.sort_values(ascending = False)
Out[16]: price
119.0 1023
180.0 1001
144.0 887
150.0 621
132.0 588
...
373.0 1
371.0 1
369.0 1
367.0 1
6000.0 1
In [17]:
#12 Drop the values where bedroom are equal to zero
bnb
Shared De Pijp /
0 10176931 1476 49180562 NaN Amsterdam NaN
room Rivierenbuurt
Shared
1 8935871 1476 46718394 NaN Amsterdam NaN Centrum West
room
Shared
2 14011697 1476 10346595 NaN Amsterdam NaN Watergraafsmeer
room
Shared
3 6137978 1476 8685430 NaN Amsterdam NaN Centrum West
room
Shared De Baarsjes /
4 18630616 1476 70191803 NaN Amsterdam NaN
room Oud West
Private
18718 17789893 1476 47501089 NaN Amsterdam NaN Bijlmer Centrum
room
Private
18719 16877166 1476 67093870 NaN Amsterdam NaN Bijlmer Centrum
room
Private Geuzenveld /
18720 19859427 1476 29724632 NaN Amsterdam NaN
room Slotermeer
Private
18721 17132164 1476 115156569 NaN Amsterdam NaN Centrum West
room
Private
18722 7605782 1476 39503013 NaN Amsterdam NaN Centrum West
room
In [ ]:
In [18]:
#13 Price column has large variation, so normalize the price column.
bnbscaled = bnb.copy()
Shared De Pijp /
0 10176931 1476 49180562 NaN Amsterdam NaN
room Rivierenbuurt
Shared
1 8935871 1476 46718394 NaN Amsterdam NaN Centrum West
room
Shared
2 14011697 1476 10346595 NaN Amsterdam NaN Watergraafsmeer
room
Shared
3 6137978 1476 8685430 NaN Amsterdam NaN Centrum West
room
Shared De Baarsjes /
4 18630616 1476 70191803 NaN Amsterdam NaN
room Oud West
Private
18718 17789893 1476 47501089 NaN Amsterdam NaN Bijlmer Centrum
room
Private
18719 16877166 1476 67093870 NaN Amsterdam NaN Bijlmer Centrum
room
Private Geuzenveld /
18720 19859427 1476 29724632 NaN Amsterdam NaN
room Slotermeer
Private
18721 17132164 1476 115156569 NaN Amsterdam NaN Centrum West
room
Private
18722 7605782 1476 39503013 NaN Amsterdam NaN Centrum West
room
In [19]:
bnb['price'].mean()
Out[19]: 168.9025556377711
In [20]:
bnb['price'].std()
Out[20]: 110.98841382800018
In [21]:
#14 Change the price variable to three levels- Basic, medium and luxury
bnbscaled['price'].max()
Out[21]: 1.0
In [22]:
bnbscaled['price'].min()
Out[22]: 0.0
In [ ]:
conditions = [
(bnbscaled['price'] == 0),
(bnbscaled['price'] == 1)
values = ['Basic','medium','Luxury']
# create a new column and use np.select to assign values to it using our lists as ar
bnbscaled['Newprice'] = np.select(conditions,values)
bnbscaled.head()
In [23]:
#15 Drop the last 3 columns and save the data frame.
In [24]:
bnbscaled
Shared De Pijp /
0 10176931 1476 49180562 NaN Amsterdam NaN
room Rivierenbuurt
Shared
1 8935871 1476 46718394 NaN Amsterdam NaN Centrum West
room
Shared
2 14011697 1476 10346595 NaN Amsterdam NaN Watergraafsmeer
room
Shared
3 6137978 1476 8685430 NaN Amsterdam NaN Centrum West
room
Shared De Baarsjes /
4 18630616 1476 70191803 NaN Amsterdam NaN
room Oud West
Private
18718 17789893 1476 47501089 NaN Amsterdam NaN Bijlmer Centrum
room
Private
18719 16877166 1476 67093870 NaN Amsterdam NaN Bijlmer Centrum
room
Private Geuzenveld /
18720 19859427 1476 29724632 NaN Amsterdam NaN
room Slotermeer
Private
18721 17132164 1476 115156569 NaN Amsterdam NaN Centrum West
room
Private
18722 7605782 1476 39503013 NaN Amsterdam NaN Centrum West
room
In [ ]: