Matplotlib Notes
Matplotlib Notes
Numerical Data
Categorical Data
In [ ]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
In [ ]: df = sns.load_dataset('tips')
In [ ]: df.head()
In [ ]: df1 = df.head(5)
In [ ]: plt.plot(df1['total_bill'], df1['tip'])
plt.show()
In [ ]: # line style
plt.plot(df1['total_bill'], df1['tip'], color='green', linestyle='dotted'
plt.plot(df1['total_bill'], df1['size'], linestyle='dashed')
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: df = sns.load_dataset('iris')
df.head()
df1 = df.head(5)
In [ ]: plt.plot(df1['sepal_length'], df1['sepal_width'],
color='green', linestyle='dashdot')
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: plt.plot(df1['sepal_length'], df1['sepal_width'],
color='green', linestyle='dashdot', linewidth=3, marker='o')
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: # label
df = sns.load_dataset('tips')
df1 = df.head(5)
plt.plot(year, price)
plt.ylim(0, 100000)
plt.show()
Scatter
Bivariate Analysis
numerical vs numerical
Use case - finding correlation
In [ ]: df = sns.load_dataset('iris')
df.head()
df1 = df.head(5)
df1
In [ ]: plt.scatter(df['sepal_length'], df['sepal_width'])
plt.show()
In [ ]: # size
plt.scatter(df['sepal_length'], df['sepal_width'],
color='orange', marker='o', s=df['petal_width']*40)
plt.show()
In [ ]: plt.plot(df['sepal_length'], df['sepal_width'], 'o')
plt.show()
**Bar Chart**
Bivariate Analysis
Numerical vs Categorical
Use case - Aggregate analysis of groups
In [ ]: plt.bar(df['species'], df['sepal_length'],
width=0.5, color=['orange'])
plt.show()
In [ ]: plt.barh(df['species'], df['sepal_length'])
plt.show()
In [ ]: df.describe()
In [ ]: plt.hist(df['sepal_length'])
plt.show()
In [ ]: # using bin
plt.hist(df['sepal_length'], bins=[1, 4.5,
7, 8], edgecolor='r', log=True)
plt.show()
**Pie chart**
Univariate/ Bivariate Analysis
Categorical vs Numerical
Use case - To find contribution on a standard scale
plt.plot(year, price)
plt.ylim(0, 100000)
plt.show()
In [ ]: plt.scatter(df['sepal_length'], df['sepal_width'])
plt.show()
In [ ]: df['species'].unique()
array(['setosa', 'versicolor', 'virginica'], dtype=object)
Out[ ]:
In [ ]: df['species'] = df['species'].replace(
{'setosa': 0, 'versicolor': 1, 'virginica': 2})
In [ ]: df.tail()
In [ ]: # color
plt.scatter(df['sepal_length'], df['petal_length'],
c=df['species'], cmap='winter')
plt.colorbar()
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\1043740856.py:4: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar()
In [ ]: # size
plt.figure(figsize=(10, 6))
plt.scatter(df['sepal_length'], df['petal_length'],
c=df['species'], cmap='winter')
plt.xlabel('Sepal Length')
plt.ylabel('petal length')
plt.colorbar()
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\2569873812.py:7: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar()
**Annotations**
In [ ]: x = [1, 2, 3, 4]
y = [5, 6, 7, 8]
plt.scatter(x, y)
plt.text(1, 5, 'Point 1')
plt.text(2, 6, 'Point 2')
plt.text(3, 7, 'Point 3')
plt.text(4, 8, 'Point 4')
In [ ]: plt.figure(figsize=(10, 6))
plt.scatter(df['sepal_length'], df['petal_length'],
c=df['species'], cmap='winter')
plt.xlabel('Sepal Length')
plt.ylabel('petal length')
plt.axvline(6.2, color='r')
plt.axhline(3.5, color='blue')
plt.colorbar()
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\2047405343.py:8: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar()
**Subplot**
In [ ]: fig, ax = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10, 6))
ax[0].scatter(df['sepal_length'], df['petal_length'])
ax[0].set_xlabel('Sepal lenght')
ax1 = fig.add_subplot(2, 2, 1)
ax1.scatter(df['sepal_length'], df['petal_length'])
ax1.set_xlabel('sepal length')
ax2 = fig.add_subplot(2, 2, 2)
ax2.scatter(df['sepal_width'], df['petal_width'])
ax2.set_xlabel('sepal width')
ax3 = fig.add_subplot(2, 2, 3)
ax3.scatter(df['sepal_length'], df['sepal_width'], color='green')
ax3.set_xlabel('sepal len')
ax4 = fig.add_subplot(2, 2, 4)
ax4.hist(df['petal_length'], color='orange',
edgecolor='white', bins=[1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5,
ax4.set_xlabel('petal len')
plt.show()
In [ ]: plt.style.use('ggplot')
ax = plt.subplot(projection='3d')
ax.scatter3D(df['sepal_length'], df['sepal_width'],
df['petal_length'], marker='>', s=50)
ax.set_xlabel('sepal len')
ax.set_ylabel('sepal width')
ax.set_zlabel('petal len')
plt.show()
In [ ]: x = [0, 1, 5]
y = [0, 10, 13]
z = [0, 13, 20]
plt.figure(figsize=(10, 6))
ax = plt.subplot(projection='3d')
ax.scatter(x, y, z, s=70)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
3D Line Plot
In [ ]: x = [0, 1, 5, 25]
y = [0, 10, 13, 0]
z = [0, 13, 20, 9]
plt.figure(figsize=(10, 6))
ax = plt.subplot(projection='3d')
ax.scatter(x, y, z, s=100, color='red')
ax.plot(x, y, z)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
xx, yy = np.meshgrid(x, y)
yy.shape
(100, 100)
Out[ ]:
In [ ]: z = xx**2 + yy**2
ax = plt.subplot(projection='3d')
plt.colorbar(p)
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\1838783719.py:7: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar(p)
**Contour Plots**
In [ ]: fig = plt.figure(figsize=(10, 6))
ax = plt.subplot()
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\3272041914.py:6: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
fig.colorbar(p)