Data Analytics Using Python
Data Analytics Using Python
23 - Jupyter Notebook
In [1]:
1 import pandas as pd
2 ipl=pd.read_csv('IPL Matches 2008-2020.csv')
3 ipl
Out[1]:
M Royal
2008-
0 335982 Bangalore BB McCullum Chinnaswamy 0 Challengers
04-18
Stadium Bangalore
Punjab
Cricket
2008- Kings XI
1 335983 Chandigarh MEK Hussey Association 0
04-19 Punjab
Stadium,
Mohali
Kolkata
2008- Eden
4 335986 Kolkata DJ Hussey 0 Knight
04-20 Gardens
Riders
Dubai
Royal
2020- International
811 1216547 Dubai AB de Villiers 0 Challengers
09-28 Cricket
Bangalore
Stadium
Dubai
2020- International Mumbai
812 1237177 Dubai JJ Bumrah 0
11-05 Cricket Indians
Stadium
Royal
2020- Sheikh Zayed
813 1237178 Abu Dhabi KS Williamson 0 Challengers
11-06 Stadium
Bangalore
Dubai
2020- International Delhi
815 1237181 Dubai TA Boult 0
11-10 Cricket Capitals
Stadium
In [2]:
1 print(type(ipl))
<class 'pandas.core.frame.DataFrame'>
In [3]:
1 ipl.columns
Out[3]:
In [4]:
C:\ProgramData\Anaconda3\lib\site-packages\scipy\__init__.py:146: UserWarn
ing: A NumPy version >=1.16.5 and <1.23.0 is required for this version of
SciPy (detected version 1.24.1
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
In [6]:
1 sns.countplot(data=ipl,y='player_of_match')
Out[6]:
<AxesSubplot:xlabel='count', ylabel='player_of_match'>
In [7]:
1 ipl['player_of_match'].value_counts()
Out[7]:
AB de Villiers 23
CH Gayle 22
RG Sharma 18
DA Warner 17
MS Dhoni 17
..
S Anirudha 1
M Kartik 1
R McLaren 1
RJ Harris 1
PJ Cummins 1
Name: player_of_match, Length: 233, dtype: int64
In [8]:
1 ipl['winner'].value_counts()
Out[8]:
In [9]:
1 sns.countplot(data=ipl,y='winner')
Out[9]:
<AxesSubplot:xlabel='count', ylabel='winner'>
In [10]:
1 sns.countplot(data=ipl,y='winner',
2 order=ipl['winner'].value_counts().index)
Out[10]:
<AxesSubplot:xlabel='count', ylabel='winner'>
In [11]:
1 ipl.isnull()
Out[11]:
... ... ... ... ... ... ... ... ... ...
811 False False False False False False False False False
812 False False False False False False False False False
813 False False False False False False False False False
814 False False False False False False False False False
815 False False False False False False False False False
In [12]:
1 ipl.isnull().sum()
Out[12]:
id 0
city 13
date 0
player_of_match 4
venue 0
neutral_venue 0
team1 0
team2 0
toss_winner 0
toss_decision 0
winner 4
result 4
result_margin 17
eliminator 4
method 797
umpire1 0
umpire2 0
dtype: int64
In [14]:
1 ipl['city'].mode()
Out[14]:
0 Mumbai
dtype: object
In [15]:
1 ipl['city']=ipl['city'].fillna('Mumbai')
In [16]:
1 ipl.isnull().sum()
Out[16]:
id 0
city 0
date 0
player_of_match 4
venue 0
neutral_venue 0
team1 0
team2 0
toss_winner 0
toss_decision 0
winner 4
result 4
result_margin 17
eliminator 4
method 797
umpire1 0
umpire2 0
dtype: int64
In [17]:
1 ipl[ipl['winner'].isnull()]
Out[17]:
Royal
2015- M Chinnaswamy
486 829763 Bangalore NaN 0 Challengers
04-29 Stadium
Bangalore
Royal
2015- M Chinnaswamy
511 829813 Bangalore NaN 0 Challengers
05-17 Stadium
Bangalore
Royal
2019- M.Chinnaswamy
744 1178424 Bengaluru NaN 0 Challengers
04-30 Stadium
Bangalore
In [18]:
1 ipl[ipl['winner'].isnull()].index
Out[18]:
In [19]:
1 ipl.drop(ipl[ipl['winner'].isnull()].index)
Out[19]:
M Royal
2008-
0 335982 Bangalore BB McCullum Chinnaswamy 0 Challengers
04-18
Stadium Bangalore
Punjab
Cricket
2008- Kings XI
1 335983 Chandigarh MEK Hussey Association 0
04-19 Punjab
Stadium,
Mohali
Kolkata
2008- Eden
4 335986 Kolkata DJ Hussey 0 Knight
04-20 Gardens
Riders
Dubai
Royal
2020- International
811 1216547 Dubai AB de Villiers 0 Challengers
09-28 Cricket
Bangalore
Stadium
Dubai
2020- International Mumbai
812 1237177 Dubai JJ Bumrah 0
11-05 Cricket Indians
Stadium
Royal
2020- Sheikh Zayed
813 1237178 Abu Dhabi KS Williamson 0 Challengers
11-06 Stadium
Bangalore
Dubai
2020- International Delhi
815 1237181 Dubai TA Boult 0
11-10 Cricket Capitals
Stadium
In [20]:
1 ipl
Out[20]:
M Royal
2008-
0 335982 Bangalore BB McCullum Chinnaswamy 0 Challengers
04-18
Stadium Bangalore
Punjab
Cricket
2008- Kings XI
1 335983 Chandigarh MEK Hussey Association 0
04-19 Punjab
Stadium,
Mohali
Kolkata
2008- Eden
4 335986 Kolkata DJ Hussey 0 Knight
04-20 Gardens
Riders
Dubai
Royal
2020- International
811 1216547 Dubai AB de Villiers 0 Challengers
09-28 Cricket
Bangalore
Stadium
Dubai
2020- International Mumbai
812 1237177 Dubai JJ Bumrah 0
11-05 Cricket Indians
Stadium
Royal
2020- Sheikh Zayed
813 1237178 Abu Dhabi KS Williamson 0 Challengers
11-06 Stadium
Bangalore
Dubai
2020- International Delhi
815 1237181 Dubai TA Boult 0
11-10 Cricket Capitals
Stadium
In [21]:
1 ipl=ipl.drop(ipl[ipl['winner'].isnull()].index)
In [22]:
1 ipl
Out[22]:
M Royal
2008-
0 335982 Bangalore BB McCullum Chinnaswamy 0 Challengers
04-18
Stadium Bangalore
Punjab
Cricket
2008- Kings XI
1 335983 Chandigarh MEK Hussey Association 0
04-19 Punjab
Stadium,
Mohali
Kolkata
2008- Eden
4 335986 Kolkata DJ Hussey 0 Knight
04-20 Gardens
Riders
Dubai
Royal
2020- International
811 1216547 Dubai AB de Villiers 0 Challengers
09-28 Cricket
Bangalore
Stadium
Dubai
2020- International Mumbai
812 1237177 Dubai JJ Bumrah 0
11-05 Cricket Indians
Stadium
Royal
2020- Sheikh Zayed
813 1237178 Abu Dhabi KS Williamson 0 Challengers
11-06 Stadium
Bangalore
Dubai
2020- International Delhi
815 1237181 Dubai TA Boult 0
11-10 Cricket Capitals
Stadium
In [23]:
1 ipl.isnull().sum()
Out[23]:
id 0
city 0
date 0
player_of_match 0
venue 0
neutral_venue 0
team1 0
team2 0
toss_winner 0
toss_decision 0
winner 0
result 0
result_margin 13
eliminator 0
method 793
umpire1 0
umpire2 0
dtype: int64
In [24]:
1 ipl['method']=ipl['method'].fillna('NA')
In [25]:
1 ipl.isnull().sum()
Out[25]:
id 0
city 0
date 0
player_of_match 0
venue 0
neutral_venue 0
team1 0
team2 0
toss_winner 0
toss_decision 0
winner 0
result 0
result_margin 13
eliminator 0
method 0
umpire1 0
umpire2 0
dtype: int64
In [26]:
1 ipl['result_margin'].mean()
Out[26]:
17.321652065081352
In [27]:
1 ipl['result_margin']=ipl['result_margin'].fillna(ipl['result_margin'].mean())
In [29]:
1 ipl.isnull().sum()
Out[29]:
id 0
city 0
date 0
player_of_match 0
venue 0
neutral_venue 0
team1 0
team2 0
toss_winner 0
toss_decision 0
winner 0
result 0
result_margin 0
eliminator 0
method 0
umpire1 0
umpire2 0
dtype: int64
Winner Count
In [30]:
1 ipl['winner'].value_counts()
Out[30]:
In [31]:
In [32]:
1 ipl['winner'].value_counts()
Out[32]:
In [33]:
1 titanic=pd.read_csv('titanic.csv')
In [34]:
1 titanic
Out[34]:
Braund,
0 1 0 3 Mr. Owen male 22.0 1 0 A/5 21171 7.250
Harris
Cumings,
Mrs. John
Bradley
1 2 1 1 female 38.0 1 0 PC 17599 71.283
(Florence
Briggs
Th...
Heikkinen,
STON/O2.
2 3 1 3 Miss. female 26.0 0 0 7.925
3101282
Laina
Futrelle,
Mrs.
Jacques
3 4 1 1 female 35.0 1 0 113803 53.100
Heath
(Lily May
Peel)
Allen, Mr.
4 5 0 3 William male 35.0 0 0 373450 8.050
Henry
... ... ... ... ... ... ... ... ... ...
Montvila,
886 887 0 2 Rev. male 27.0 0 0 211536 13.000
Juozas
Graham,
Miss.
887 888 1 1 female 19.0 0 0 112053 30.000
Margaret
Edith
Johnston,
Miss.
W./C.
888 889 0 3 Catherine female NaN 1 2 23.450
6607
Helen
"Carrie"
Behr, Mr.
889 890 1 1 Karl male 26.0 0 0 111369 30.000
Howell
Dooley,
890 891 0 3 Mr. male 32.0 0 0 370376 7.750
Patrick
In [35]:
1 titanic.columns
Out[35]:
In [36]:
1 import plotly.express as ex
In [ ]:
1 ex.box(data_frame=titanic)