‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [1]:
#Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
2D-
e Plot
In [3]:
# Bivariate Analysis
# For Categorical -Numerical and Numerical- Numerical,
# It is plot on Time series data.
In [2]:
tpLoting a simple graph
price= [55000,49000, 62000, 50000]
year= [2015,2016,2017, 2018]
fplt. plot (x-axis, y-axis)
plt.plot(year, price)
plt.grid()
plt.show()
‘2000
0000
58000
56000,
54000
52000
50000
2150 2155 20160 20165 2170 2175 2180
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
at‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [7]
#PLotting by Loading a dataset
Batsman= pd.read_csv("sharma-kohli.csv")
Batsman.head()
out[7]:
Index RG Sharma V Kohli
© 2008 404165
1 2008 362246,
2 2010 404307
3 2011 372887
4 2012 433364
In [16]:
plt.plot(Batsman[ "index" ],Batsman["V Kohli"])
out [10]:
[
]
1000
so
800
700
«00
s00
400
300
200
2008 2010 iz 2014 216
locathost 8888/notebooks!ard Semester/My Practce/MatplotiiyMatpotibjpynb 261‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In (11):
#Ploting multiple graphs
plt.plot(Batsman[ "index" ],B8atsman{"V Kohli"])
plt. plot (Batsman[ "index" ],Batsman["RG Sharma"])
out[11]:
[]
1000
so
800
700
«00
s00
400
300
200
72008 2010 iz 2014 216
In [14]:
a#Ploting with title
plt.plot(Batsman["index"],Batsman["V Kohli"])
plt.plot(Batsman[ "index" ],Batsman["RG Sharma”])
plt.title("Rohit Sharma V/S Virat Kohli
plt.xlabel("Season")
plt.ylabel("Runs Scored")
out [14]:
Text(0, 0.5, ‘Runs Scored")
Rohit Sharma W/S Virat Kohli
3000
00
00
700
00
500
Runs Scored
400
300
200
2008 2010 2012 2014 2016
‘Season
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
301187128, 11:27 PM Matploil-Jupyter Notebook
In [15]:
# Changing the colour (hex color)
plt.plot(Batsman["index"],Batsman["V Kohli"], color="green")
plt.plot(Batsman["index"],Batsman["RG Sharma], color="blue")
plt.title("Rohit Sharma V/S Virat Kohli
pit.xlabel("Season")
plt.ylabel("Runs Scored")
out [15]:
Text(®, 8.5, ‘Runs Scored")
Rohit Sharma /S Virat Kohli
3000
09
00
709
«00
500
Runs Scored
300
200
2008 2010 2012 214 2016
‘Season
localhost 8888 /notebooks/3rd Semester/My Practce/MatpotibyMatpotibipyno 401187128, 11:27 PM Matploil-Jupyter Notebook
In [16]:
# Other than solid Line(dashed, dotted, dashdot)
plt.plot(Batsman["index"],Batsman["V Kohli"], colo
plt.plot(Batsman["index"],Batsman["RG Sharma"], colo
reen", Linestyle="dashed")
blue", linestyle="dashed”)
plt.title("Rohit Sharma V/S Virat Kohli
plt.xlabel("Season")
plt.ylabel("Runs Scored")
out [16]:
Text(®, 8.5, ‘Runs Scored’)
Rohit Sharma /S Virat Kohli
Runs Scored
2008 2010 2012 214 2016
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
501187128, 11:27 PM Matploil-Jupyter Notebook
In [26]:
# Line width
plt.plot(Batsman["index" ],Batsman["V Kohli"], color="green", linestyle="solid", Linewidt
plt.plot(Batsman["index"],Batsman["RG Sharma], color="blue", linestyle="dashed", linewi,
plt.title("Rohit Sharma V/S Virat Kohli
pit.xlabel("Season")
plt.ylabel("Runs Scored")
out [20]:
Text(®, 0.5, ‘Runs Scored")
Rohit Sharma /S Virat Kohli
3000
Runs Scored
locathost 8888/notebooks!ard Semester/My Practce/MatplotiiyMatpotibjpynb 301187128, 11:27 PM Matploil-Jupyter Notebook
In [28]:
# Marker
plt.plot(Batsman["index"],Batsman["V Kohli], color="green", marker=".", markersize=10 )
plt.plot(Batsman["index"],Batsman["RG Sharma"), color="blue", linestyle="dashed”)
plt.title("Rohit Sharma V/S Virat Kohli
plt.xlabel("Season")
plt.ylabel("Runs Scored")
out [28]:
Text(®, 8.5, ‘Runs Scored")
Rohit Sharma /S Virat Kohli
3000
Runs Scored
localhost 8888 /notebooks/3rd Semester/My Practce/MatpotibyMatpotibipyno 781187128, 11:27 PM Matploil-Jupyter Notebook
In [30]:
# Using Lagend
plt.plot(Batsman["index"],Batsman["V Kohli"], colo
plt.plot(Batsman["index"],Batsman["RG Sharma"], colo
ren", labe
blue" , labe:
plt.title("Rohit Sharma V/S Virat Kohli
plt.xlabel("Season")
plt.ylabel("Runs Scored")
plt.legend()
Out [3@]:
_______Rohit Sharma V/S Virat Kohli
— Wat
900 | — Rohit
3000
200
700
600
Runs Scored
500
400
300
200
2008 2010 2012 2014 2016
‘Season
locathost 8888/notebooks!ard Semester/My Practce/MatplotiiyMatpotibjpynb
31187128, 11:27 PM Matploil-Jupyter Notebook
In [31]:
# Limiting axes (Mainly used for outliers)
#(Bcoz of outliers graps can get flat or not accurate, we can trim for this type)
price= [55000,49000, 62000, 500@000]
year= [2015,2016,2017, 2018]
dipLt. plot (x-axis, y-axis)
plt.plot(year,price)
pit. show()
2150 20155 2160 2165 2170 2175 20180
[55000,49000, 62000, Se00e0]
(2015, 2016, 2017, 2018]
dplt. plot (x-axis, y-axis)
plt.plot(year, price)
plt.ylim(0, 100000)
plt.show()
300000
0000
2000
0000
20000
°
2150 20155 2160 2165 2170 20175 20180
locathost 8888/notebooks!ard Semester/My Practce/MatplotiiyMatpotibjpynb 931‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [33]:
price= [55000,49000, 62000, 5000000]
year= [2615,2016,2017,2618]
tpLt. plot (x-axis, y-axis)
plt.plot(year,price)
plt.ylim(9, 100000)
plt.xlim(2016, 218)
plt.show()
300000
0000
2000
40000
2000
°
2016 002016 25 2016,502016 752017 002017 25 2017 502017.75 2018.00
In [34]:
# Grid
plt.plot(Batsman["index"],Batsman["V Kohli"], color="green", label=
plt.plot(Batsman["index"],Batsman["RG Sharma"), color="blue" , labe
plt.title("Rohit Sharma V/S Virat Kohli")
plt.xlabel ("Season")
plt.ylabel("Runs Scored")
plt.legend()
plt.grid()
Rohit Sharma V/S Virat Kohli
3000
— Wat
200 | — Rohit
200
700
600
500
Runs Scored
400
300
200
2008 2010 2012 2014 2016
‘Season
locathost 8888/notebooks!ard Semester/My Practce/MatplotiiyMatpotibjpynb
10831‘HR, 11:27 PM Maiplotity - Jupyter Notebook
Scatter Plot
In [3]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [4]:
# Bivariate Analysis
# Numerical -Numerical
# Finding Correlation between two quantities
# (2d plot is the main output of scatter plot)
In [5]:
# Plotting simple scatter plot
x= np.1inspace(-10, 10,5)
x
y= 10% +3+np. random. randint (0,300, 50)
y
plt.scatter(x,y)
out[5]:
300 ° .
Bo . o
200
150
100
00 75 50 25 00 25 so 75 100
locathost 8888/notebooks!ard Semester/My Practce/MatplotiiyMatpotibjpynb
m1‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [6]
wScatter plot on pandas dataframe
Data= pd.read_csv("batter.csv")
df= Data.head(5@)
df
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb 12817187129, 11:27 PM
out[s]:
batter
° V Koh
1 SDhawan
2 DAWamer
3 R@Sharma
4 SKRaina
5 AB4e Villers
6 CHGayle
7 MS Dhoni
8 RVUthappa
9 KO Karthik
40 GGambhir
11 ATRayudu
42 AMRahane
43° KLRehul
14 SRWatson
15 MK Pandey
46 SVSamson
47 KAPolard
18 F duPlessis
49 YK Pathan
20 BB McCullum
21 RRPant
22 ——~PAPatel
23 sc Butter
4 8S ler
25 QdeKock
26 Yuvraj Singh
27 VSehwag
28 SAYadav
2 MViay
30 RAJadeja
31 SPD Smith
32 SEMarsh
33 AMiler
34 JH Kallis
35 WPSaha
36 DRSmith
6634
e244
5983
5281
5536
5181
4907
4078
4954
477
a2t7
4190
407s
3895
3880
3657
3526
2437
3403
3222
2082
2251
2ada
2232
2780
2767
2754
27128
2644
2619
2502
2498
2489
2485
2427
2azr
2385
ag
36.251966
34.982682
44.42987
30314433,
32374269
so.esse46
39658730
9.196860
27522222
26852761
31.007353,
28,896552
30,863636
aor
30.793651
29.731707
29.140496
20.404959,
aaarara7
29.290909
27.711598
4.768293,
2.603175,
39.399933,
31.236955,
31.204598
2410811
27555556,
29707865
25.930693,
2.617021
34,652778
39.507937
36.102041
28.552041
25281250
28.392867
strike_rate
125977972
122.4082
13640157
126.964504
132.595312
148 580842
142.121729
130.931089
126,152279
120.267572
119,665153
124,148148
197.575758
132.799182
134.169209,
117.739858
132.407060
140.457703,
s27.167414
138,088272
126.a4a592
142.550000
116625717
144.259035,
121.192898
130951254
124.784776
148.827059
134009123,
118.614130
122,108346,
124,812808,
130,109775
133,569097
105.936272
124,397745,
132.279534
Maiplotity - Jupyter Notebook
localhost 8888 /notebooks/3rd Semester/My Practce/MatpotibyMatpotibipyn
139317187129, 11:27 PM
batter
37 MAAgarwal
38 SR Tendulkar
39 GJ Maxwell
40 NRana
a R Dravid
42. KS Williamson
“a AJ Finch
44 AC Gilchrist
45 AD Russell
46 JP Duminy
47 MEK Hussey
48 HH Pandya
Up [73hubman Gil
runs
2335,
2334
2320
2181
2174
2105
2092
2089
2039
2029
1977
1972
1900
plt.scatter(d#["avg
out[7]:
avg
2.669903
3.826087
25.494505
27.961538
28.233766
36.293103
24,904762
27.223684
29.985294
39,784314
8.764706
29.878788
2.203390
,d#["strike_rate"])
strike_rate
129.506378
114187867
147.676639
130.053667
113347237
123.315759
123.349057
133.054662
168.234323
120.773610
119,963592
140.256046
122.186495
Maiplotity - Jupyter Notebook
x70 ai
160
150 0
.
uo
.
wofe .
0 oe
uo
.
% 30 3 «0 6
localhost 8888 /notebooks/3rd Semester/My Practce/MatpotibyMatpotibipyno
14st‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [8]:
# Labeling
plt.scatter(df[ "avg" ],df["strike_rate"])
plt.xlabel ("Average")
plt.ylabel("Strike_rate")
plt.show()
70 a
160
150 AO
‘
# uo °
g . °
RB} . .
20 .
.
no
:
z ” s o &
nverage
In [9]
# Labeling
plt.scatter(df[ "avg" ],df["strike_rate"])
plt-xlabel("Average")
plt.ylabel("Strike_rate")
pit.show()
o
Buel s
wo
uo
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
1531‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In (16):
Using Marker
plt.scatter(df["avg"],df["strike_rate"], marker=
plt.xlabel ("Average")
plt.ylabel("Strike_rate")
plt.show()
70
160
‘Strike_rate
In [11]:
# Legend- Location od Legend
plt.scatter(df["avg"],df["strike_rate"], label="average")
plt-xlabel ("Average")
plt.ylabel("Strike_rate")
plt.legend(loc="upper left")
pit. show()
170
emerge
160
180
Eso
g 0
FB} .
0
.
uo
:
B 2 s 0 5
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
16131‘HR, 11:27 PM
In
wSize- LOADING A DATASET
[22]:
tips= sns.load_dataset("tips")
tips
out[12]:
239
240
2a
2a
243
total_bill
16.99
10.34
21.01
23.68
2459
2003
27.18
281
17.82
18.78
tip
401
1.66
3.50
331
361
5.92
2.00
2.00
475
3.00
Female
Malo
Malo
Malo
Female
Male
Female
Male
Male
Female
244 rows * 7 columns
In [13]:
plt.scatter(tips["total_bill"], tips["tip"])
out[13]:
smoker
No
No
No
No
No
No
Yes
Yes
No
No
aay
Sun
sun
sun
sun
sun
sat
sat
Sat
Sat
Thur
time
Dinner
Dinner
Dinner
Dinner
Dinner
Dinner
Dinner
Dinner
Dinner
Dinner
Maiplotity - Jupyter Notebook
»
10
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
a3‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [14]:
# Size will give us the number of person the customer came with
plt.scatter(tips["total_bill"],tips["tip"], s=tips["size"])
out [14]:
» ji
8
6
4
2
Fd » » a 50
In [15]:
# Multiplying it by 2@ to increase the size of the plots
plt.scatter(tips[“total_bill"],tips[“tip"],
plt.xlabel("Total_bi11")
plt.ylabel("Tips")
=tips["size"]*26)
out[15]:
Text(@, 0.5, ‘Tips')
10 e
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb 1831187128, 11:27 PM Maiplotib-Jupyter Notebook
In [16]:
plt.plot(tips{"total_bill"],tips["tip"])
out [16]:
[]
»
In [17]:
#Faster technique to plot scatter plot
plt.plot(tips["total_bill"],tips["tip"],"o" )
out[17}:
[]
» °
In [18]:
# Note- we should not use plt.plot with "o" bcoz there are many parameters we can not us
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb 19931‘HR, 11:27 PM Maiplotity - Jupyter Notebook
Bar Plot
In [19]:
#x axis- categories
tty axis- Numerical
#Bivariate analysis
# Use case- Aggregate analysis of groups
# Numerical vs Categorical
In [4]:
# Simple bar chart
children=[2,5,6,9,10
color=["red”,"blue”, "green", "white", "yellow"]
plt.bar(color, children)
out[4]:
»
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
20131‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [5]:
# Color
children=[2,5,6,9,10]
color=["red","blue”, "green", "white", "yellow"]
plt.bar(color, children, color="Yellow")
out[5]:
»
In (6):
# Horizontal bar chart (Barh)
children=[2,5,6,9,10]
color=["red","blue”, "green", "white", "yellow"]
plt.barh(color, children)
out[s]:
yellow
white
geen
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
Fy
21131‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [7]:
# Colour
df= pd.read_csv("batsman_season_record.csv")
df
out [7]:
batsman 2015 2016 2017
0 ABde Villiers 513 687 216
1 DAWamer 562 848 641
2 MSDhoni 372 284 290
3 RGShama 482 489 333
4 VKohli 505 973 308
In [9]:
plt.bar(df["batsman" ],df["2015"])
out [9]:
8 8 8 8 8
‘AB de Villiers OA Warner MS Dhoni RG-Sharma — V Kohl,
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb773, 14:27 PM Matploil-Jupyter Notebook
In (11):
#PLot multiple graph
plt.bar(df["batsman"],d¢["2015"], width=.1)
out (11):
500
200
100
°
AB de Villers DAWamner — MSDhoni RGSharma WV Kohli
In [24]:
plt.bar(np.arange(df.shape[0])-0.2,df["2015"], width=0.2,color="Red")
plt.bar(np-arange(df.shape[0]),df["2016"], [email protected],color="Vellow")
plt.bar(np.arange(df.shape[9])+0.2,df["2e17"], width=2.2,color="Blue")
plt.xticks(np.arange(df.shape[@]), df["batsman"], rotation=60)
plt.show()
1000
0
ao
0
m0
ol
é ¢ $$ § #?
ef ¢ bi &
localhost 8888 /notebooks/3rd Semester/My Practce/MatpotibyMatpotibipyno
23931‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [25]:
# Stacked bar chart
df
out [25]:
batsman 2015 2016 2017
© ABde Villiers 513 687 216
1 DAWamer 562 848 641
2 MSDhoni 972 284 290
3° RGSharma 482 489 333
4 VKohli 505 973 308
In [29]:
plt.bar(df["batsman"],df["2015"])
plt.bar(df["batsman"],df["2016"], bottom=df["2015"])
plt.bar(df["batsman"],df["2017"], bottom=df["2016"]+ df["2015"])
out[29]:
‘AB de Villiers DA Warner MS Dhoni RG Sharma V Kohli,
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb 24031‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [30]:
plt.bar(df["batsman"] ,df["2015"], label="2015")
plt.bar(df["batsman"],df["2016"], bottom=df["2015"],label="2016")
plt.bar(df["batsman"],d#["2017"], bottom=df["2016"]+ df["2015"], label="2017")
plt.legend()
plt.show
out[3@]:
)
200
5
150
13s
100
07s
050
025
0.00
10 % 0 0 50 ©
In [22]:
#Using Bins
data=[32,45, 56, 10,15,27,61]
plt.hist(data,bins=[10,25,40,55,70]) # Bin size is Large
out [22]:
(array([2., 2.) Les 2.1)
array([1®, 25, 42, 55, 70]),
)
200
17s
150
125
100
075
050
025
0.00
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
26131‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [23]:
df= pd.read_csv("vk.csv")
aft
out (23):
match_id batsman_runs
° 12 62
1 7 28
2 20 64
3 a °
4 30 10
136 624 75
137 626 13
138 632 4
139 633 °
140 636 84
141 rows * 2 columns
In [25]:
plt.hist(d#1["batsman_runs"], bins=[@,10, 20, 30,40,50, 60,70, 80,90,100,110,120])
plt.show()
Bom Bm 8
In [26]:
# Handling Bins
Arr= np.load("big-array.npy")
ar
out[26]:
array([33, 39, 37, ..., 33, 38, 39], dtype
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
27131187128, 11:27 PM Maiplotib-Jupyter Notebook
In [27]:
Arr.shape
out 27]:
(11949, )
In [29]:
plt-hist(Arr)
plt.show()
000
$88
0 2» 2” 40 0 © 7
In [30]:
# Some bins contains so many data that some bins are formed properly
Pie chart
In [32]:
# Univariate/Bi-variate
#Categorical Vs Numerical
#Use case- To find the contribution on a standard scale
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
28131187128, 11:27 PM Matploil-Jupyter Notebook
In [33]:
#Simple data
df2= [23,45,100,20,49]
plt.pie(df2)
plt.show()
In [36]:
#Labels
df2= [23,45,100,20,49]
Subjects= ["Eng", "Hindi", "Maths", "Science", "History"]
plt.pie(df2, labels-Subjects)
plt.show()
History
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
29131‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [37]:
#ataset
d#3= pd.read_csv("gayle-175.csv")
#3
out[37]:
batsman batsman_runs
© AB4de Villiers 3
1 CHGayie 175
2 RRampaul 0
3 SS Tiwary 2
4 TMDilshan 33
5 V Koh 1"
In [40]:
plt.pie(df3["batsman_cuns"], labels=df3["batsman"])
plt.show()
AB de Villers
cH Gayle
In [42]:
# Showing Percentage
plt.pie(df3["batsman_runs"], labels=df3["batsman"], autopct="%0.1f%%")
pit. show()
AB de Villers
CH Gayle
V koh
‘Tw Dilshan
SRAmpay
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb
30031‘HR, 11:27 PM Maiplotity - Jupyter Notebook
In [44]:
plt.pie(df3["batsman_cuns"], labels=dF3["batsman"], autopct="%0.1f%%", colors=|
plt.show()
“blue”, "gl
AB de Villers
cH Gayle
In [47]:
#ExpLoding
plt.pie(df3["batsman_cuns"], labels=df3["batsman"], autopct="%0.1*%%" ,explode=[0,0,0.3,0
plt.show()
AB de Villers
cH Gayle
‘Tw Dilshan
SS Twary
RRampaul
In[ ]:
locahost 8888/notebooksiard Semester?My Practce/MatplotiiyMatpotibipynb 31131