Lecture Material 8
Lecture Material 8
1 Correlation
[ ]: import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
[ ]: # Correlation
df=sns.load_dataset('iris')
[ ]: df.head()
[ ]: df=df[['sepal_length','sepal_width','petal_length','petal_width']]
1
[ ]: <Axes: xlabel='sepal_length', ylabel='petal_length'>
[ ]: phool=sns.load_dataset('iris')
phool.head(5)
[ ]: sns.regplot(x=phool['sepal_length'], y=phool['sepal_width'])
2
[ ]: corr=df.corr(method="pearson")
[ ]: sns.heatmap(corr)
[ ]: <Axes: >
3
[ ]: sns.heatmap(corr, annot= True)
[ ]: <Axes: >
4
[ ]: %pip install Jinja2
[ ]: corr.style.background_gradient(cmap='coolwarm')
[ ]: <pandas.io.formats.style.Styler at 0x156de8600e0>
[ ]: sns.pairplot(corr)
[ ]: <seaborn.axisgrid.PairGrid at 0x156d5ef5880>
5
[ ]: penguins=sns.load_dataset('penguins')
penguins.head(-10)
6
332 Gentoo Biscoe 43.5 15.2 213.0
333 Gentoo Biscoe 51.5 16.3 230.0
body_mass_g sex
0 3750.0 Male
1 3800.0 Female
2 3250.0 Female
3 NaN NaN
4 3450.0 Female
.. … …
329 5500.0 Male
330 5000.0 Female
331 5950.0 Male
332 4650.0 Female
333 5500.0 Male
[ ]: sns.pairplot(penguins, hue='species')
[ ]: <seaborn.axisgrid.PairGrid at 0x156e8d31cd0>
7
[ ]: sns.pairplot(penguins, hue='species', diag_kind='hist')
[ ]: <seaborn.axisgrid.PairGrid at 0x156de85be00>
8
[ ]: # Calculating pearson correlation
from scipy.stats import pearsonr
corr, _ = pearsonr(phool['sepal_length'], phool['petal_length'])
print('Pearsons correlation: %.3f' % corr)