In [11]: import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [4]: df = pd.read_csv("C:\\Users\\admin\\Downloads\\71 Centuries of Virat Kohli.csv")
df
Out[4]: Out/Not Batting Strike Man of the Unnamed:
Score Against Inn. Venue Column1 H/A Date Result Format Captain
Out Order Rate Match 14
24-01-
0 116 Out Australia 6 2 NaN Adelaide Oval Adelaide Away Lost Test No No NaN
2012
New 31-08-
1 103 Out 5 2 NaN M. Chinnaswamy Stadium Bangalore Home Won Test Yes No NaN
Zealand 2012
Vidarbha Cricket Association 13-12-
2 103 Out England 5 2 NaN Nagpur Home Drawn Test No No NaN
Stadium 2012
22-02-
3 107 Out Australia 5 2 NaN M. A. Chidambaram Stadium Chennai Home Won Test No No NaN
2013
18-12-
4 119 Out South Africa 4 1 NaN Wanderers Stadium Johannesburg Away Drawn Test No No NaN
2013
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Vidarbha Cricket Association 05-03-
66 116 Out Australia 3 1 96.67 Nagpur Home Won ODI Yes Yes NaN
Stadium 2019
08-03-
67 123 Out Australia 3 2 129.47 JSCA International Stadium Ranchi Home Lost ODI No Yes NaN
2019
11-08-
68 120 Out West Indies 3 1 96.00 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
14-08-
69 114 Not Out West Indies 3 2 115.15 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
Dubai International Cricket 08-09-
70 122 Not Out Afganistan 1 1 200.00 Dubai Away Won T20I Yes No NaN
Stadium 2022
71 rows × 15 columns
In [7]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Score 71 non-null int64
1 Out/Not Out 71 non-null object
2 Against 71 non-null object
3 Batting Order 71 non-null int64
4 Inn. 71 non-null int64
5 Strike Rate 44 non-null float64
6 Venue 71 non-null object
7 Column1 71 non-null object
8 HA 71 non-null category
9 Date 71 non-null object
10 Result 71 non-null object
11 Format 71 non-null object
12 Man of the Match 71 non-null object
13 Captain 71 non-null object
14 Unnamed: 14 0 non-null float64
dtypes: category(1), float64(2), int64(3), object(9)
memory usage: 8.1+ KB
In [9]: df.shape
(71, 15)
Out[9]:
In [10]: df.columns
Index(['Score', 'Out/Not Out', 'Against', 'Batting Order', 'Inn.',
Out[10]:
'Strike Rate', 'Venue', 'Column1', 'HA', 'Date', 'Result', 'Format',
'Man of the Match', 'Captain', 'Unnamed: 14'],
dtype='object')
In [12]: df.describe()
Out[12]: Score Batting Order Inn. Strike Rate Unnamed: 14
count 71.000000 71.000000 71.000000 44.000000 0.0
mean 132.140845 3.521127 1.732394 114.019545 NaN
std 35.911119 0.714326 0.675230 25.257567 NaN
min 100.000000 1.000000 1.000000 84.900000 NaN
25% 107.000000 3.000000 1.000000 96.632500 NaN
50% 119.000000 3.000000 2.000000 108.935000 NaN
75% 139.500000 4.000000 2.000000 120.787500 NaN
max 254.000000 6.000000 4.000000 200.000000 NaN
In [17]: # replace null values in strike rate column with mean of the data
mean = df['Strike Rate'].mean()
mean
114.01954545454545
Out[17]:
In [19]: df['Strike Rate']=df['Strike Rate'].replace(np.nan,mean)
df
Out[19]: Out/Not Batting Man of the Unnamed:
Score Against Inn. Strike Rate Venue Column1 HA Date Result Format Captain
Out Order Match 14
24-01-
0 116 Out Australia 6 2 114.019545 Adelaide Oval Adelaide Away Lost Test No No NaN
2012
New 31-08-
1 103 Out 5 2 114.019545 M. Chinnaswamy Stadium Bangalore Home Won Test Yes No NaN
Zealand 2012
Vidarbha Cricket Association 13-12-
2 103 Out England 5 2 114.019545 Nagpur Home Drawn Test No No NaN
Stadium 2012
22-02-
3 107 Out Australia 5 2 114.019545 M. A. Chidambaram Stadium Chennai Home Won Test No No NaN
2013
18-12-
4 119 Out South Africa 4 1 114.019545 Wanderers Stadium Johannesburg Away Drawn Test No No NaN
2013
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Vidarbha Cricket Association 05-03-
66 116 Out Australia 3 1 96.670000 Nagpur Home Won ODI Yes Yes NaN
Stadium 2019
08-03-
67 123 Out Australia 3 2 129.470000 JSCA International Stadium Ranchi Home Lost ODI No Yes NaN
2019
11-08-
68 120 Out West Indies 3 1 96.000000 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
14-08-
69 114 Not Out West Indies 3 2 115.150000 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
Dubai International Cricket 08-09-
70 122 Not Out Afganistan 1 1 200.000000 Dubai Away Won T20I Yes No NaN
Stadium 2022
71 rows × 15 columns
In [20]: sns.histplot(df['Score'])
<Axes: xlabel='Score', ylabel='Count'>
Out[20]:
In [21]: df[df['Score'] == df['Score'].max()]
Out[21]: Out/Not Batting Man of the Unnamed:
Score Against Inn. Strike Rate Venue Column1 HA Date Result Format Captain
Out Order Match 14
South Maharashtra Cricket Association 10-10-
25 254 Not Out 4 1 114.019545 Pune Home Won Test Yes Yes NaN
Africa Stadium 2019
In [22]: sns.countplot(x=df['Format'])
plt.show()
In [23]: plt.figure(figsize=(12,6))
sns.countplot(x=df['Against'])
plt.show()
In [24]: df.head(10)
Out[24]: Out/Not Batting Man of the Unnamed:
Score Against Inn. Strike Rate Venue Column1 HA Date Result Format Captain
Out Order Match 14
24-01-
0 116 Out Australia 6 2 114.019545 Adelaide Oval Adelaide Away Lost Test No No NaN
2012
New 31-08-
1 103 Out 5 2 114.019545 M. Chinnaswamy Stadium Bangalore Home Won Test Yes No NaN
Zealand 2012
Vidarbha Cricket Association 13-12-
2 103 Out England 5 2 114.019545 Nagpur Home Drawn Test No No NaN
Stadium 2012
22-02-
3 107 Out Australia 5 2 114.019545 M. A. Chidambaram Stadium Chennai Home Won Test No No NaN
2013
18-12-
4 119 Out South Africa 4 1 114.019545 Wanderers Stadium Johannesburg Away Drawn Test No No NaN
2013
New 14-02-
5 105 Not Out 4 4 114.019545 Basin Reserve Wellington Away Drawn Test No No NaN
Zealand 2014
09-12-
6 115 Out Australia 4 2 114.019545 Adelaide Oval Adelaide Away Lost Test No Yes NaN
2014
09-12-
7 141 Out Australia 4 4 114.019545 Adelaide Oval Adelaide Away Lost Test No Yes NaN
2014
26-12-
8 169 Out Australia 4 2 114.019545 Melbourne Cricket Ground Melbourne Away Drawn Test No No NaN
2014
06-01-
9 147 Out Australia 4 2 114.019545 Sydney Cricket Ground Sydney Away Drawn Test No Yes NaN
2015
In [25]: ## Kohli scores more centuries in Adelaide oval stadium which is in Australia
plt.figure(figsize=(12,6))
sns.countplot(x=df['Venue'])
plt.xticks(rotation="vertical")
plt.show()
In [28]: new=df.groupby('HA').size().reset_index().rename(columns={0:"Centuries_count"})
new
Out[28]: HA Centuries_count
0 Away 39
1 Home 32
In [30]: ## Kohli has good record on overseas because he score more centuries in overseas rather than home
plt.pie('Centuries_count',labels='HA',data=new,startangle=90,autopct="%1.0f%%")
plt.show()
In [31]: ## Kohli scores more number of centuries in tests when he became captain
sns.countplot(x=df['Format'],hue=df['Captain'])
plt.show()
In [32]: win=df.groupby(df['Result']).size().reset_index().rename(columns={0:"Counts"})
win
Out[32]: Result Counts
0 Drawn 7
1 Lost 13
2 Lost (D/L) 1
3 Tied 1
4 Won 48
5 Won (D/L) 1
In [37]: plt.pie('Counts',labels='Result',data=win,autopct="%1.0f%%")
plt.show()
In [ ]: