Seaborn
Seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [ ]:
print(sns.get_dataset_names())
['anagrams', 'anscombe', 'attention', 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'exercise', 'flights', 'fmri', 'gammas', 'geyser', 'iri
s', 'mpg', 'penguins', 'planets', 'taxis', 'tips', 'titanic']
In [ ]:
sns.set(style="darkgrid") #whitegrid, darkgrid, white, dark, ticks.
color = ["red", "blue"]
In [ ]:
df = sns.load_dataset("tips")
df.tail()
In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 total_bill 244 non-null float64
1 tip 244 non-null float64
2 sex 244 non-null category
3 smoker 244 non-null category
4 day 244 non-null category
5 time 244 non-null category
6 size 244 non-null int64
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB
In [ ]:
df['day'].value_counts()
Sat 87
Out[ ]:
Sun 76
Thur 62
Fri 19
Name: day, dtype: int64
In [ ]:
df['smoker'].value_counts()
No 151
Out[ ]:
Yes 93
Name: smoker, dtype: int64
In [ ]:
df['time'].value_counts()
Dinner 176
Out[ ]:
Lunch 68
Name: time, dtype: int64
In [ ]:
sns.relplot(x="total_bill", y="tip",data=df)
<seaborn.axisgrid.FacetGrid at 0x23361d547f0>
Out[ ]:
In [ ]:
sns.relplot(x="total_bill", y="tip",hue="sex",data=df)
<seaborn.axisgrid.FacetGrid at 0x233624bfac0>
Out[ ]:
In [ ]:
sns.relplot(x="total_bill", y="tip",hue="smoker",data=df, palette =color)
<seaborn.axisgrid.FacetGrid at 0x233625b06a0>
Out[ ]:
In [ ]:
sns.relplot(x="total_bill", y="tip",hue="smoker",data=df, style = "time")
<seaborn.axisgrid.FacetGrid at 0x233625b0610>
Out[ ]:
In [ ]:
df['size'].value_counts()
2 156
Out[ ]:
3 38
4 37
5 5
1 4
6 4
Name: size, dtype: int64
In [ ]:
sns.relplot(x="total_bill", y="tip",hue="smoker",data=df, style = "time", size="size")
# shows the size of the plot as the number of observations in the data set.
<seaborn.axisgrid.FacetGrid at 0x23362698af0>
Out[ ]:
In [ ]:
sns.relplot(x="total_bill", y="tip",hue="smoker",data=df, style = "time", size="size", sizes=(15,200))
<seaborn.axisgrid.FacetGrid at 0x233627d1730>
Out[ ]:
In [ ]:
sns.relplot(x="total_bill", y="tip",data=df, size="size", kind='line')
<seaborn.axisgrid.FacetGrid at 0x23363a11a90>
Out[ ]:
In [ ]:
np.random.seed(0) # set seed for reproducibility i.e. same random numbers every time
df1 = pd.DataFrame(dict(time = np.arange(500),value=np.random.randn(500).cumsum()))
In [ ]:
df1.head()
0 0 1.764052
1 1 2.164210
2 2 3.142948
3 3 5.383841
4 4 7.251399
In [ ]:
sns.relplot(x="time", y="value",data=df1, kind='line')
<seaborn.axisgrid.FacetGrid at 0x23363c156a0>
Out[ ]:
In [ ]:
df2 = pd.DataFrame(np.random.randn(500,2).cumsum(axis = 0), columns=['time','value'])
In [ ]:
sns.lineplot(x="time", y="value",data=df2)
<AxesSubplot:xlabel='time', ylabel='value'>
Out[ ]:
In [ ]:
sns.lineplot(x="time", y="value",data=df2, sort=False)
<AxesSubplot:xlabel='time', ylabel='value'>
Out[ ]:
In [ ]:
sns.relplot(x = 'total_bill', y = 'tip', data = df,hue='sex', row='sex',col='smoker')
<seaborn.axisgrid.FacetGrid at 0x23363e38340>
Out[ ]:
In [ ]:
fmri = sns.load_dataset("fmri")
fmri.head()
In [ ]:
sns.relplot(data=fmri,x="timepoint",y='signal',kind='line',ci=None)
# ci stands for confidence interval
<seaborn.axisgrid.FacetGrid at 0x23363d73c10>
Out[ ]:
In [ ]:
sns.relplot(data=fmri,x="timepoint",y='signal',kind='line',ci='sd')
<seaborn.axisgrid.FacetGrid at 0x23364307dc0>
Out[ ]:
In [ ]:
sns.relplot(data=fmri,x="timepoint",y='signal',kind='line',hue='event')
<seaborn.axisgrid.FacetGrid at 0x2336438e5b0>
Out[ ]:
In [ ]:
fmri['signal'].describe()
count 1064.000000
Out[ ]:
mean 0.003540
std 0.093930
min -0.255486
25% -0.046070
50% -0.013653
75% 0.024293
max 0.564985
Name: signal, dtype: float64
In [ ]:
sns.relplot(data=fmri,x="timepoint",y='signal',kind='line',hue='region',style='event')
<seaborn.axisgrid.FacetGrid at 0x2336469c6d0>
Out[ ]:
In [ ]:
flight = sns.load_dataset("flights")
flight.head()
In [ ]:
sns.relplot(data=flight,x="year",y='passengers',kind='line',hue='month')
<seaborn.axisgrid.FacetGrid at 0x233647fd4c0>
Out[ ]:
In [ ]:
sns.relplot(data=flight,x="year",y='passengers',kind='line',hue='month',markers=True)
# markers=True shows the markers on the line plot
<seaborn.axisgrid.FacetGrid at 0x23366b8f3d0>
Out[ ]:
In [ ]:
sns.relplot(data=fmri,x="timepoint",y='signal',kind='line',hue='region',style='event',markers=True)
<seaborn.axisgrid.FacetGrid at 0x23366c72430>
Out[ ]:
In [ ]:
df['day'].value_counts()
Sat 87
Out[ ]:
Sun 76
Thur 62
Fri 19
Name: day, dtype: int64
In [ ]:
sns.catplot(x="day",y="tip",data=df)
#catplot is used for categorical data
#by default catplot shows stripplot
<seaborn.axisgrid.FacetGrid at 0x23366e2b880>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=df,jitter=False)
<seaborn.axisgrid.FacetGrid at 0x23366e91eb0>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=df,kind='swarm')
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 8.1% of the points cannot be placed; you may want to decrease
the size of the markers or use stripplot.
warnings.warn(msg, UserWarning)
<seaborn.axisgrid.FacetGrid at 0x23366eedd00>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=df,kind='swarm',hue='size')
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 8.1% of the points cannot be placed; you may want to decrease
the size of the markers or use stripplot.
warnings.warn(msg, UserWarning)
<seaborn.axisgrid.FacetGrid at 0x23368a260a0>
Out[ ]:
In [ ]:
sns.catplot(x="smoker",y="tip",data=df,order=['No','Yes'])
<seaborn.axisgrid.FacetGrid at 0x23368aae1c0>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=df,kind='box')
<seaborn.axisgrid.FacetGrid at 0x2336927f910>
Out[ ]:
In [ ]:
df["day"].describe()
count 244
Out[ ]:
unique 4
top Sat
freq 87
Name: day, dtype: object
In [ ]:
thur = df[df["day"]=="Thur"]
thur.describe()
In [ ]:
sns.catplot(x="day",y="tip",data=thur,kind='box',hue='sex')
# to plot boxplot for thursday
<seaborn.axisgrid.FacetGrid at 0x2336a695640>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=thur,kind='box',hue='sex',dodge = False, fliersize = 10)
<seaborn.axisgrid.FacetGrid at 0x2336a658880>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=df,kind='violin')
<seaborn.axisgrid.FacetGrid at 0x2336a571070>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=df,kind='violin',inner='quartile')
<seaborn.axisgrid.FacetGrid at 0x2336a5d8d00>
Out[ ]:
In [ ]:
dim = sns.load_dataset("diamonds")
dim.describe()
In [ ]:
sns.catplot(x="cut",y="price",data=dim,kind='boxen')
<seaborn.axisgrid.FacetGrid at 0x2336a5fd250>
Out[ ]:
In [ ]:
sns.catplot(x="day",y="tip",data=df,kind='boxen')
<seaborn.axisgrid.FacetGrid at 0x2336cd4c0a0>
Out[ ]:
In [ ]:
sns.catplot(x="sex",y="tip",data=df,kind='bar')
<seaborn.axisgrid.FacetGrid at 0x2336ba22130>
Out[ ]:
In [ ]:
t = sns.load_dataset("titanic")
In [ ]:
t
Out[ ]: survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
2 1 3 female 26.0 0 0 7.9250 S Third woman False NaN Southampton yes True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
886 0 2 male 27.0 0 0 13.0000 S Second man True NaN Southampton no True
887 1 1 female 19.0 0 0 30.0000 S First woman False B Southampton yes True
888 0 3 female NaN 1 2 23.4500 S Third woman False NaN Southampton no False
889 1 1 male 26.0 0 0 30.0000 C First man True C Cherbourg yes True
890 0 3 male 32.0 0 0 7.7500 Q Third man True NaN Queenstown no True
In [ ]:
sns.catplot(x = 'sex',y='survived',kind='bar',data=t,hue="class")
<seaborn.axisgrid.FacetGrid at 0x2336ba2ee80>
Out[ ]:
In [ ]: