Seaborn Besant
Seaborn Besant
February 5, 2023
[78]: df = pd.read_csv("dm_office_sales.csv")
[79]: df.head()
salary sales
0 91684 372302
1 119679 495660
2 82045 320453
3 92949 377148
4 71280 312802
[80]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 division 1000 non-null object
1 level of education 1000 non-null object
2 training level 1000 non-null int64
3 work experience 1000 non-null int64
4 salary 1000 non-null int64
5 sales 1000 non-null int64
dtypes: int64(4), object(2)
memory usage: 47.0+ KB
1
Scatterplot
[81]: sns.scatterplot(x='salary',y='sales',data=df)
[82]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df)
2
[83]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,hue='division')
3
[84]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,hue='work experience')
4
[85]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,hue='work␣
↪experience',palette='viridis')
5
[86]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,size='work experience')
6
[87]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,s=200)
7
[88]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,s=200,linewidth=0,alpha=0.2)
8
[89]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,style='level of education')
9
[90]: plt.figure(figsize=(12,8))
# Sometimes its nice to do BOTH hue and style off the same column
sns.scatterplot(x='salary',y='sales',data=df,style='level of␣
↪education',hue='level of education',s=100)
10
[91]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,style='level of␣
↪education',hue='level of education',s=100)
11
[92]: # The y axis doesn't really represent anything
# X axis is just a stick per data point
sns.rugplot(x='salary',data=df)
[92]: <AxesSubplot:xlabel='salary'>
12
[93]: sns.rugplot(x='salary',data=df,height=0.5)
[93]: <AxesSubplot:xlabel='salary'>
13
[96]: sns.displot(data=df,x='salary',kde=True)
14
[97]: sns.displot(data=df,x='salary')
15
[98]: sns.histplot(data=df,x='salary')
16
[99]: sns.histplot(data=df,x='salary',bins=10)
17
[100]: sns.histplot(data=df,x='salary',bins=100)
18
[101]: sns.set(style='darkgrid')
sns.histplot(data=df,x='salary',bins=100)
19
[102]: sns.set(style='white')
sns.histplot(data=df,x='salary',bins=100)
20
[103]: sns.displot(data=df,x='salary',bins=20,kde=False,
color='red',edgecolor='black',lw=4,ls='--')
21
[104]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='division',data=df)
22
[105]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='level of education',data=df)
[106]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='level of education',data=df,hue='training level')
23
[107]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='level of education',data=df,hue='training␣
↪level',palette='Set1')
sns.barplot(x='level of education',y='salary',data=df,estimator=np.mean,ci='sd')
24
[109]: plt.figure(figsize=(12,6))
sns.barplot(x='level of education',y='salary',data=df,estimator=np.
↪mean,ci='sd',hue='division')
25
[110]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
[111]: df = pd.read_csv("StudentsPerformance.csv")
[112]: df.head()
[113]: plt.figure(figsize=(12,6))
sns.boxplot(x='parental level of education',y='math score',data=df)
26
[ ]:
27