EDA IPL Data
EDA IPL Data
csv')
df.head()
df.nunique()
df.winner.value_counts().head(3).sum()
total = df.winner.count()
total
df.winner.unique()
df.winner.value_counts()
#Q8
df['venue'].value_counts().sort_values(ascending=False)
#unique value sorted with name.
dict_st ={'Feroz Shah Kotla Ground': 'Feroz Shah Kotla', 'MA Chidambaram Stadium, Chepauk': 'M.
A. Chidambaram Stadium', 'M. Chinnaswamy Stadium': 'M Chinnaswamy Stadium',
'Rajiv Gandhi International Stadium, Uppal': 'Rajiv Gandhi Intl. Cricket Stadium', 'ACA-
VDCA Stadium': 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium'}
df['stadium'] =df['venue'].replace(dict_st)
df['stadium'].value_counts().sort_values(ascending=False)
df['stadium'].nunique()
#Q9
df.head()
ump = df['umpire1'].value_counts() + df['umpire2'].value_counts()
ump.sort_values(ascending = False).head(3).sum()
#Q10
ump.reset_index()
#Q11
df_j= pd.merge(df,df1,left_on='id',right_on='match_id')
df_j.head()
df_j['total_runs'].sum()-df_j[df_j['batsman_runs']==4]['batsman_runs'].sum()-
df_j[df_j['batsman_runs']==6]['batsman_runs'].sum()
#Q12
df_j.groupby('season')['total_runs'].sum() -df_j[df_j['batsman_runs']==4].groupby('season')
['batsman_runs'].sum() -df_j[df_j['batsman_runs']==6].groupby('season')['batsman_runs'].sum()
#Q 13
max_win_margin_runs = df.groupby('season')['win_by_runs'].max().sum()
max_win_margin_runs
#Q14
df.groupby('season')['win_by_runs'].max().sort_values().tail(1)
#Q 15
#df[(df['team1']=='Mumbai Indians') & (df['team2']=='Chennai Super Kings')]
#Q16
#Q17
temp_df = df1.groupby(['batsman','match_id'])
['batsman_runs'].agg('sum').reset_index().sort_values(by='batsman_runs',
ascending=False).reset_index(drop=True)
#Q18
temp_df[(temp_df['batsman_runs'] >=50) & (temp_df['batsman_runs'] <100)]['batsman'].nunique()
#Q19
temp_df[temp_df['batsman_runs'] >=100]['batsman_runs'].count()
#Q20