0% found this document useful (0 votes)
92 views70 pages

Data Visualization

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
92 views70 pages

Data Visualization

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

1

Machine Learning Visualization from Basic


to Advance

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
2

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
3

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
4

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
5

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
6

Barplot:
[Link](x='day', y='total_bill', data=tips, palette='tab10');

Boxplot
[Link](x='day', y='total_bill', hue='sex', data=tips, linewidth
=2.5, palette='Dark2');

Kdeplot
[Link](data=df , x='Age', hue='Sex', multiple='stack', palette='tab10');

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
7

Violinplot
[Link](x="day", y="total_bill", data=tips);

Stripplot
[Link](x="time", y="total_bill", hue="sex", data=tips);

Scatterplot
[Link](x = 'total_bill', y = 'tip', hue = 'sex', data = tips);

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
8

Swarmplot
[Link](x="day", y="total_bill", hue="sex", data=tips);

Boxenplot
[Link]( x='time', y="total_bill", hue='sex', data=tips);

Lineplot
[Link](x="size",y="total_bill",data=tips,hue='sex',markers=True);

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
9

Jointplot
[Link](x="chol", y="thalachh",data=heart,kind="hist",hue='sex');

Jointplot
[Link](x="chol",y="trtbps",data=heart,kind="kde",hue='sex');

JointGrid
g = [Link](data=heart, x="age", y="chol", hue="output")
[Link]([Link], [Link]);

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
10

Lmplot
g= [Link](x="age", y="chol", hue="cp", data=heart)

Relplot
g = [Link](x="age", y="chol", data=heart,hue='sex')

Heatmap
mask = [Link](np.ones_like([Link](), dtype=bool))
[Link]([Link](), mask = mask, annot=True, cmap='Dark2');

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
11

Catplot
[Link](x='smoker', col='sex', kind='count', data=tips
,palette="Dark2");

Violinplot
[Link]([wine["alcohol"], wine['fixed acidity'],wine['free sulfur
dioxide']], positions=[1,2,3], showmeans=True);

Distplot
bar = [Link](titanic["Age"],color='Blue',kde=True,bins=25)
[Link](["Skewness: {:.2f}".format(titanic['Age'].skew())])
[Link]("Age Distribution");

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
12

[Link]("Sex")["Age","Fare","Pclass"].mean().plot(kind='bar')

color = [Link]([Link](0, 1, 10))


[Link](['Embarked','Sex'])['Age'].count().plot(kind='bar',
width=.4,color='gold');

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
13

[Link](data=titanic, x="Age", kde=True, bins = 100,color =


"red", facecolor = "#3F7F7F",height = 5, aspect = 3.5);

[Link](tips['total_bill'],color='orange',bins=[10,15,25,30,50],edgecol
or='black',rwidth=0.5);

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
14

ht = pd.pivot_table(data=titanic, index="Pclass", aggfunc="median")


[Link](x=[Link],y=ht['Fare'])
[Link]("Proportion of Fare", fontweight="bold");

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
15

g = [Link](heart, col="fbs", hue="cp")


g.map_dataframe([Link], x="age", y="chol")
g.add_legend();

g = [Link](heart, col="cp")
g = [Link]([Link], "age");

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
16

fig = [Link](heart, hue="output", aspect=4)


[Link]([Link], 'age', shade=True)
oldest = heart['age'].max()
[Link](xlim=(0, oldest))
fig.add_legend()
[Link]()

[Link](titanic, hue='Sex', height=4).map([Link],


'Age').add_legend();

[Link](titanic['Sex'],titanic['Embarked']).plot(kind='bar',stacke
d=True);
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
17

grid = [Link](titanic, row='Embarked', aspect=1.6)


[Link]([Link], 'Pclass', 'Survived', 'Sex', palette='deep')
grid.add_legend();

[Link](figsize=(10,4))
print("Skewness: %f" % titanic['Fare'].skew())
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
18

print("Kurtosis: %f" % titanic['Fare'].kurt())


[Link](titanic['Fare'],bins=50,hist_kws={"edgecolor": (1,0,0,1)})
[Link]()

a=tips['total_bill']
mean=[Link]()
median=[Link](a)
mode=[Link]()
[Link](a,hist=False)
[Link](mean,color='r',label='mean')
[Link](median,color='b',label='median')
[Link](mode[0],color='g',label='mode')
[Link]()
[Link]()

[Link](a)
[Link](0.85,13,s='Q1',size=13)
[Link](0.85,17,s='Q2',size=13)
[Link](0.85,23,s='Q3',size=13)
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
19

[Link](1.1,16,s='IQR',rotation=0,size=10)
[Link]()

cat = ['Sex','Embarked']
sns.set_theme(rc = {'[Link]': 100, '[Link]': 12,
'[Link]': '#f0eee9', '[Link]': '#fffdfa',
'[Link]': '#e8e6e1'}, font_scale = 1.2)
fig, ax = [Link](5, 2, figsize = (12, 22))
for indx, (column, axes) in list(enumerate(list(zip(cat,
[Link]())))):

[Link](ax = axes, x = titanic[column], hue = titanic['Pclass'],


palette = 'magma', alpha = 0.8)

else:
[axes.set_visible(False) for axes in [Link]()[indx + 1:]]
plt.tight_layout()
[Link]()

num = wine.select_dtypes(include="number")
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
20

fig, ax = [Link](14, 1, figsize = (7, 30))


for indx, (column, axes) in list(enumerate(list(zip(num, [Link]())))):

[Link](ax = axes, y = wine[column].index, x = wine[column],hue =


wine['total sulfur dioxide'],
palette = 'magma', alpha = 0.8)

else:
[axes.set_visible(False) for axes in [Link]()[indx + 1:]]
plt.tight_layout()
[Link]()

num = heart.select_dtypes(include="number")
fig, ax = [Link](3, 2, figsize = (14, 15))
for indx, (column, axes) in list(enumerate(list(zip(num, [Link]())))):

[Link](ax = axes, x = heart[column],hue = heart['HeartDisease'],


palette = 'magma', alpha = 0.8, multiple = 'stack')

legend = axes.get_legend() # [Link] has some issues with legend


Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
21

handles = [Link]
[Link]()
[Link](handles, ['0', '1'], title = 'HeartDisease', loc = 'upper right')
Quantiles = [Link](heart[column], [0, 0.25, 0.50, 0.75, 1])

for q in Quantiles: [Link](x = q, linewidth = 0.5, color = 'r')


plt.tight_layout()
[Link]()

raw_df = raw_df [['name', 'year', 'selling_price', 'km_driven', 'fuel',


'seller_type',
'transmission', 'owner']]
def barw(ax):
for p in [Link]:
val = p.get_width() #height of the bar
x = p.get_x()+ p.get_width() # x- position
y = p.get_y() + p.get_height()/2 #y-position
[Link](round(val,2),(x,y))
[Link](figsize=(10,5))
ax0 = [Link](data = raw_df, y ='owner', order =
raw_df['owner'].value_counts().index)
barw(ax0)
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
22

#Correlation with Response Variable class


X = [Link](['HeartDisease'], axis=1)
y = heart['HeartDisease']

[Link](y).[Link](figsize=(16, 4), rot=90, grid=False)


[Link]('Correlation with heart',
fontsize=25,
color='Blue',
font='Times New Roman')
[Link]()

import matplotlib
[Link]({'[Link]': 12})
corr = [Link]()
mask = [Link](np.ones_like(corr, dtype=bool))
[Link](dpi=100)
[Link]('Correlation Analysis',
fontsize=15,
color='Blue',
font='Lucida Calligraphy')
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
23

[Link](corr,
mask=mask,
annot=True,
lw=0,
linecolor='white',
cmap='viridis',
fmt="0.2f")
[Link](rotation=90)
[Link](rotation=0)
[Link]()

[Link]({'[Link]': 15})
ax=heart['Sex'].value_counts().[Link](explode=[0.1,
0.1],autopct='%1.2f%%',shadow=True);
ax.set_title(label = "Sex", fontsize = 40,color='DarkOrange',font='Lucida
Calligraphy');
[Link](labels=['M','F'])
[Link]('off');

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
24

#set configuration for charts


[Link]["[Link]"]=[18 , 7]
[Link]["[Link]"]=15
[Link]["[Link]"]="medium"
[Link]["[Link]"]="medium"

def plot_disribution(data , x ,color,bins ):


mean = data[x].mean()
std = data[x].std()
info=dict(data = data , x = x , color = color)
[Link](1 , 3 , 1 , title =f"Ditstribution of {x} column")
[Link](a=data[x] , bins = bins)
[Link](f"bins of {x}")
[Link](mean , label ="mean" , color ="red")
[Link]("frequency")
[Link](["${\sigma}$ = %d"%std , f"mean = {mean:.2f}"])
[Link](f"histogram of {x} column")
[Link](1 , 3 , 2)
[Link](**info)
[Link](f"{x}")
[Link](f"box plot of {x} column")
[Link](1 , 3 , 3)
[Link](**info)
[Link](f"{x}")
[Link](f"distribution of points in {x} column")
[Link](f"Distribution of {x} column" , fontsize =20 , color="red")
[Link]()

age_bins = [Link](29 , 77+5 , 5)


base_color = sns.color_palette()[4]
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
25

plot_disribution(data = heart , x ="Age" , color = base_color ,


bins=age_bins)

sns.set_style("white")
sns.set_context("poster",font_scale = 1.2)
palette =
["#1d7874","#679289","#f4c095","#ee2e31","#ffb563","#918450","#f85e00","#a416
23","#9a031e","#d6d6d6","#ffee32","#ffd100","#333533","#202020"]
[Link](figsize=(20,8))
p = [Link](x=titanic["Pclass"][:14],y=titanic["Age"],palette=palette,
saturation=1, edgecolor = "#1c1c1c", linewidth = 2)
[Link].set_title("\nTop Anime Community\n", fontsize=25)
[Link]("Total Member" , fontsize = 20)
[Link]("\nAnime Name" , fontsize = 20)
[Link](rotation = 90)
for container in [Link]:
p.bar_label(container,label_type = "center",padding = 6,size = 25,color =
"black",rotation = 90,
bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "orange", "edgecolor":
"black", "alpha": 1})

[Link](left=True, bottom=True)
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
26

countfeature = ["Survived", "Pclass", "Sex", "SibSp", "Parch", "Embarked"]


countlist = list(enumerate(countfeature))

[Link](figsize = (15,10))
[Link]("Countplot of Categorical Features", fontsize=25,color='Red')
for i in countlist:
[Link](2,3,i[0]+1)
[Link](data = titanic, x = i[1], hue = "Survived",
palette="rainbow")
[Link]("")
[Link](['Not Survived', 'Survived'], loc='upper center', prop={'size':
10})
plt.tight_layout()
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
27

numfeature = ["Age", "Fare"]


enumfeat = list(enumerate(numfeature))

[Link](figsize=(20,7))
[Link]("Distribution and Outliers of Numerical Data",
fontsize=25,color='Blue')
for i in enumfeat:
[Link](1,4,i[0]+1)
[Link](data = titanic[i[1]], palette="Dark2")
[Link](str(i[1]))
for i in enumfeat:
[Link](1,4,i[0]+3)
[Link](data = titanic[i[1]], palette="tab10", bins=15)
[Link](str(i[1]))
plt.tight_layout()
[Link]()

[Link](figsize=(15,7))
[Link]("Probability Distribution of numerical columns according to
number of Survived", fontsize = 25,color="Red")
for i in enumfeat:
[Link](1,2,i[0]+1)
[Link](data=titanic, x=i[1], hue="Survived")
plt.tight_layout()
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
28

[Link](figsize=(12,8))
data_4 = [Link]()["Fire Alarm"].sort_values(ascending=False)
indices = data_4.index
labels = []
corr = []
for i in range(1, len(indices)):
[Link](indices[i])
[Link](data_4[i])
[Link](x=corr, y=labels, palette='mako')
[Link]('Correlation coefficient between different features and Fire
Alarm ')
[Link]()

education=df['parental level of education'].value_counts()


sns.set_palette('bright')
[Link](figsize=(10,7))
labels=[Link]
sizes=[Link]
[Link](sizes,labels=labels,autopct='%1.1f%%',
shadow=True,startangle=90)
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
29

import matplotlib
[Link]({'[Link]': 15})
[Link](figsize=(18,9))
cols_out = ["RestingBP", "Cholesterol", "MaxHR", "Age",'ChestPainType']
[Link](heart[cols_out], hue="ChestPainType", diag_kind="hist",
palette="tab10") # tab10
[Link]();

fig, ax = [Link](figsize = (18,8))


[Link](x= wine["quality"])
[Link]("Wine Quality Count",fontsize=20,color='#1a4441',font='Comic
Sans Ms',pad=20)
[Link]("Quality ",fontsize=15,color='#1a4441',font='Comic Sans Ms')
[Link]("Count",fontsize=15,color='#1a4441',font='Comic Sans Ms');

total = len(wine)
for p in [Link]:
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
30

percentage = f'{100 * p.get_height() / total:.1f}%\n'


x = p.get_x() + p.get_width() / 2
y = p.get_height()
[Link](percentage, (x, y), ha='center', va='center')

print("Skewly distributed columns by skewness value:\n")


skew_df = [Link]().sort_values()

fig,ax = [Link](figsize=(25,7))
[Link](x = skew_df[(skew_df<2)& (skew_df>-2)].index, height =
skew_df[(skew_df<2)& (skew_df>-2)], color = "g", label= "Semi-normal
distribition")
[Link](x = skew_df[skew_df>2].index, height = skew_df[skew_df>2], color
= "r", label = "Positively skewed features")
[Link](x = skew_df[skew_df<-2].index, height = skew_df[skew_df<-2], color
= "b", label = "Negatively skewed features")
[Link]()
[Link]("Skewness of numerical columns",fontsize = 20)
ax.tick_params(labelrotation=90);

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
31

from wordcloud import WordCloud, STOPWORDS


text = " ".join(Company for Company in df["Cuisines"])
#font = "[Link]"
word_cloud = WordCloud(width = 2300,
height = 800,
colormap = 'jet',
background_color = "white").generate(text)
[Link](figsize = (50, 8))
[Link](word_cloud, interpolation = "gaussian")
[Link]("off")
[Link]()

[Link](figsize=(10,5))
#plotting the values for people who have heart disease
[Link]([Link][[Link]==1],
[Link][[Link]==1],
c="tomato")
#plotting the values for people who doesn't have heart disease
[Link]([Link][[Link]==0],
[Link][[Link]==0],
c="lightgreen")
[Link]("Heart Disease w.r.t Age and Max Heart Rate")
[Link]("Age")
[Link](["Disease", "No Disease"])
[Link]("Max Heart Rate");
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
32

df2=[Link]('Type Of Restaurant')['Cost Per


Head'].mean().sort_values(ascending=False)
[Link](figsize = (15,6))
color = [('b' if i < 500 else 'r') for i in df2]
[Link](color=color);

import math
cont_features=['fixed acidity', 'volatile acidity', 'citric acid','free sulfur
dioxide','pH', 'alcohol']

y=3
x=[Link](len(cont_features)/y)

[Link](x,y,figsize=(15,10))
for i in range(1,len(cont_features)+1) :
[Link](x,y,i)
[Link](data=wine,y=cont_features[i-
1],x='quality',palette=['#e60000','#FAFAD2','#660000','#DEB078','#FF8C00','
black'])
plt.tight_layout()
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
33

[Link]([Link](columns=['quality']),kind="reg",diag_kind='kde',plot
_kws={'line_kws':{'color':'red'}},corner=True)
plt.tight_layout()
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
34

features = ['fixed acidity','citric acid','volatile acidity']


fig, axs = [Link](1,3, figsize=(16,6))
for f, ax in zip(features,[Link]()):
[Link](wine, x=f, ax=ax)
[Link]()

corr_mat_train = [Link](columns = ['quality'], axis = 1).corr()


threshold = 0.3
corr_threshold_train = corr_mat_train[(corr_mat_train > threshold) | (corr_mat_train
< -threshold)]
[Link](figsize = (8, 6))
[Link](corr_threshold_train, annot = True, cmap = 'seismic', fmt = ".2f",
linewidths = 0.5, cbar_kws={'shrink': .5},annot_kws={'size':
8}).set_title('Correlations Among Features (in Train)');

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
35

import [Link] as plt


import seaborn as sns
%matplotlib inline

def missing_values(data, thresh = 20, color = 'black', edgecolor = 'black',


height = 3, width = 15):

[Link](figsize = (width, height))


percentage = ([Link]().mean()) * 100
percentage.sort_values(ascending = False).[Link](color = color,
edgecolor = edgecolor)
[Link](y = thresh, color = 'r', linestyle = '-')

[Link]('Missing values percentage per column', fontsize = 20, weight =


'bold' )

[Link](len([Link]().sum()/len(data))/1.7, thresh + 12.5, f'Columns


with more than {thresh}% missing values', fontsize = 12, color = 'crimson',
ha = 'left' ,va = 'top')
[Link](len([Link]().sum()/len(data))/1.7, thresh - 5, f'Columns with
less than {thresh}% missing values', fontsize=12, color='green',
ha = 'left' ,va = 'top')
[Link]('Columns', size = 15, weight = 'bold')
[Link]('Missing values percentage')
[Link](weight = 'bold')

return [Link]()

missing_values(titanic, thresh = 10, color = sns.color_palette('Reds',15))

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
36

# Pie chart
labels = df['listed_in(type)'].value_counts().index
sizes = df['listed_in(type)'].value_counts().values
# only "explode" the 2nd slice (i.e. 'Hogs')
explode = (0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
fig1, ax1 = [Link](figsize = (8, 8))
[Link](sizes, labels = labels,
shadow = True, startangle = 90, explode = explode, rotatelabels = True)
centre_circle = [Link]((0, 0), 0.70,fc = 'white')
fig = [Link]()
[Link]().add_artist(centre_circle)
[Link]('equal')
plt.tight_layout()
[Link]()

[Link]['[Link]'] = (18, 5)
Y = [Link](df['rate'], df['book_table'])
[Link]([Link](1).astype(float), axis = 0).plot(kind = 'bar', stacked =
True,color=['red','yellow'])
[Link]('table booking vs Normal rate', fontweight = 30, fontsize = 20)
[Link](loc="upper right")
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
37

# check distribution of Na_to_k (based on Drug_Type)


%matplotlib inline
[Link]('seaborn-notebook')
for i, label in enumerate(df.Drug_Type.unique().tolist()):
[Link]([Link][df2['Drug_Type'] == i+1, 'Na_to_K'],
label=label, shade=True)
[Link]('1. KDE of Na_to_k (based on Drug_Type)', fontdict=font,
pad=15)
[Link]([Link](0,46,2), rotation=90)
[Link]([0,46])
[Link]()
[Link]()

# draw countplot and pie plot of categorical data


for col in categorical:
fig, axes = [Link](1,2,figsize=(10,4))
# count of col (countplot)
[Link](data=df2, x=col, ax=axes[0])
for container in axes[0].containers:
axes[0].bar_label(container)
# count of col (pie chart)
slices = df2[col].value_counts().values
activities = [f"{i} ({var})" for i, var in zip(df2[col].value_counts().index,
df[col].value_counts().index)]
axes[1].pie(slices, labels=activities, shadow=True, autopct='%1.1f%%')
[Link](f'Count of Unique Value in {col}', y=1.09, **font)
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
38

# count of purchased based on Gender


%matplotlib inline
for col in ['Sex','BP','Cholesterol']:
ax = [Link](data=df, x='Drug_Type', hue=col)
for container in [Link]:
ax.bar_label(container)
[Link](f'Count of Drug (based on {col})', fontdict=font, pad=15)
[Link]()

# Mean of Age and Na_to_K based on each feature


for col in ['Sex', 'BP', 'Cholesterol']:
fig , ax= [Link](1,2, figsize=(10,4))
gp = [Link]([col])['Na_to_K'].mean().to_frame().reset_index()
[Link](data=gp, x=col, y='Na_to_K', ax=ax[0])
for container in ax[0].containers:
ax[0].bar_label(container)
ax[0].set_title(f'Mean of Na_to_K (based on {col})', y=1.09, **font)
[Link](data=df, x=col, y='Na_to_K', ax=ax[1])
ax[1].set_title(f'Boxplot of {col})', y=1.09, **font)
[Link]()
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
39

# use scatter plot for numerics feature (Age and Na_to_K)


fig, ax = [Link](2,2,figsize=(14,8))
for i, col in enumerate(['Sex', 'BP', 'Cholesterol', 'Drug_Type']):
[Link](data=df, x='Age', y='Na_to_K', hue=col, ax=ax[i//2, i%2],
palette='turbo')
ax[i//2, i%2].set_title(f'Na_to_K vs Age (based on {col}', y=1.09, **font)
ax[i//2, i%2].legend(loc='upper center', bbox_to_anchor=(1.2, 0.6),
fancybox=True, shadow=True)

fig.tight_layout()
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
40

fig, ax = [Link](3,2,figsize=(14,12))
[Link](data=df, x='Cholesterol', y='Na_to_K', hue='Drug_Type',
ax=ax[0,0])
[Link](data=df, x='Cholesterol', y='Age', hue='Drug_Type',
ax=ax[0,1])
[Link](data=df, x='BP', y='Na_to_K', hue='Drug_Type', ax=ax[1,0])
[Link](data=df, x='BP', y='Age', hue='Drug_Type', ax=ax[1,1])
[Link](data=df, x='Sex', y='Na_to_K', hue='Drug_Type', ax=ax[2,0])
[Link](data=df, x='Sex', y='Age', hue='Drug_Type', ax=ax[2,1])
ax[0,0].set_title('Swarmplot of Drug Type vs Na_to_K',y=1.05, **font)
ax[0,1].set_title('Swarmplot of Drug Type vs Age',y=1.05, **font)
plt.tight_layout()
[Link]()

# Mean of Income and CCAvg based on each feature


for i, col in enumerate(['Income', 'CCAvg','Mortgage']):
print('='*30, f"Mean of {col} in each categorical feature", '='*30)
for j, cat in enumerate(discrete_cols2):
fig , ax= [Link](1,2, figsize=(10,4))
gp = [Link]([cat])[col].mean().to_frame().reset_index()
[Link](data=gp, x=cat, y=col, ax=ax[0])
for container in ax[0].containers:
ax[0].bar_label(container)
ax[0].set_title(f'Mean of {col} (based on {cat})', y=1.09, **FONT)
[Link](data=df, x=cat, y=col, ax=ax[1])
ax[1].set_title(f'Boxplot of {cat} (Fig {i+11}-{j+1})', y=1.09,
**FONT)
[Link]()
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
41

continuous_cols = ['Age','Experience','CCAvg','Mortgage']

for i, col in enumerate(continuous_cols):


fig = px.scatter_3d(
data_frame= df,
x=[Link],
y=df[col],
z=df['Personal Loan'],
color=df['Personal Loan'].astype(str),
color_discrete_map={'1':'orange', '0':'red'},
template='ggplot2',
hover_name='Age',
# hover_data=
opacity=0.6,
# symbol='Transmission',
# symbol_map=
# log_x=True,
# log_z=True,
height=700,
title=f'3D scatter of features based on Personal Loan (Fig {i+1})')
fig.update_layout(
title_text="Box Plot Styling Outliers",
title_font=dict(color='orange', family='newtimeroman', size=25),
title_x=0.45,
paper_bgcolor='#145A32',
# plot_bgcolor='#DAF7A6',
font=dict(color='#DAF7A6', family='newtimeroman', size=16),
)
[Link](fig)

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
42

df["Type Of Restaurant"].value_counts()[:10].[Link](figsize = (10, 10),


autopct = '%1.0f%%')
[Link]("Pie Chart")
[Link](rotation = 90)
[Link]()

df['city_1'].value_counts().nlargest(n=20, keep='first').[Link](figsize = (10, 10),


autopct = '%1.0f%%')
[Link]("Pie Chart")
[Link](rotation = 90)
[Link]()
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
43

[Link](figsize=(10, 5))
sns.set_context("paper")

kdeplt = [Link](
data=heart_dft_chol_n0,
x="Cholesterol",
hue="Sex",
palette=sex_color,
alpha=0.7,
lw=2,
)
kdeplt.set_title("Cholesterol values distribution\n Male VS Female", fontsize=12)
kdeplt.set_xlabel("Cholesterol", fontsize=12)
[Link](x=Chol_mean_f, color="#c90076", ls="--", lw=1.3)
[Link](x=Chol_mean_m, color="#2986cc", ls="--", lw=1.3)
[Link](108, 0.00612, "Mean Cholesterol / Male", fontsize=10, color="#2986cc")
[Link](260, 0.006, "Mean Cholesterol / Female", fontsize=10, color="#c90076")
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
44

heart_df_fg = [Link](
data=heart_dft_chol_n0,
col="Sex",
hue="Sex",
row="HeartDisease",
height=4,
aspect=1.3,
palette=sex_color,
col_order=["Male", "Female"],
)
heart_df_fg.map_dataframe([Link], "Age", "MaxHR")
[Link]()

mean_SalePrice = usa_housing_df[["SalePrice"]].mean().squeeze()
median_SalePrice = usa_housing_df[["SalePrice"]].median().squeeze()

[Link](figsize=(10, 5))
sns.set_context("paper")

histplt = [Link](
data=usa_housing_df,
x="SalePrice",
color="#4f758f",
bins=60,
alpha=0.5,
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
45

lw=2,
)
histplt.set_title("SalePrice Distribution", fontsize=12)
histplt.set_xlabel("SalePrice", fontsize=12)

[Link](x=mean_SalePrice, color="#14967f", ls="--", lw=1.5)


[Link](x=median_SalePrice, color="#9b0f33", ls="--", lw=1.5)
[Link](mean_SalePrice + 5000, 175, "Mean SalePrice", fontsize=9,
color="#14967f")
[Link](
median_SalePrice - 115000, 175, "Median SalePrice", fontsize=9,
color="#9b0f33"
)
[Link].set_major_formatter([Link]())
[Link](0, 200)
[Link]()

df2 = titanic[['Survived','Pclass','Sex','Embarked','SibSp','Parch',"Age"]]

fig, axes = [Link](1, 2)


fig.set_figheight(10)
fig.set_figwidth(20)
for i,col in enumerate(df2.select_dtypes('object')):
[Link](x="Age", y=col, data=df2,
whis=[0, 100], width=.6,ax=axes[i])

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
46

df2 = titanic[['Survived','Pclass','Sex','Embarked','SibSp','Parch',"Age"]]
#create the subplots
f, (ax_box, ax_hist) = [Link](2, sharex=True,
gridspec_kw={"height_ratios": (.15, .85)})
#title
ax_box.title.set_text('Price countplot and Boxplot')
# assigning a graph to each ax
[Link](df2["Age"], orient="h" ,ax=ax_box)
[Link](data=df2, x="Age", ax=ax_hist)
# Remove x axis name for the boxplot
ax_box.set(xlabel='')
[Link]()

NUMERICAL = wine[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',


'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
'pH', 'sulphates', 'alcohol']]
fig, axes = [Link](2, 4)
fig.set_figheight(12)
fig.set_figwidth(16)
for i,col in enumerate(NUMERICAL):
[Link](wine[col],ax=axes[(i // 4) -1 ,(i % 4)], kde = True)
axes[(i // 4) -1 ,(i % 4)].axvline(wine[col].mean(), color='k', linestyle='dashed',
linewidth=1)

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
47

fig, axes = [Link](1, 3)


fig.set_figheight(7)
fig.set_figwidth(20)
[Link](data=titanic, x="Age", y="Fare", hue="Survived", size="Survived",
ax=axes[0])
[Link](data=titanic, x="Age", y="Fare", hue="Pclass", size="Pclass",
ax=axes[1])
[Link](data=titanic, x="Age", y="Fare", hue="SibSp", size="SibSp",
ax=axes[2]);

color = list([Link](12, 'grey'))


color[2], color[10] = 'orange', 'orange'
[Link]('month').mean().active_power.plot(kind='bar', title='Average of Active
Power of each Months', color=color, rot=0)
[Link]('Active Power [kW]');

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
48

[Link]('Actual Power vs Theoretical Power')


[Link](df.theor_power, df.active_power, 'o', markersize= 1)
[Link]('both')
[Link]('Theoretcial Power (kW)')
[Link]('Actual Power (kW)')
[Link]([0,3650], [0,3650], '-', c= 'k')
[Link]()

group_hours = df_demand['load'].groupby([Link](freq='D', how='mean'))


fig, axs = [Link](1,1, figsize=(12,5))
year_demands = [Link]()
for name, group in group_hours:
year_demands[[Link]] = [Link]([Link])
year_demands.plot(ax=axs)
axs.set_xlabel('Hour of the day')
axs.set_ylabel('Energy Demanded MWh')
axs.set_title('Mean yearly energy demand by hour of the day ');

plot , ax = [Link](1 , 3 , figsize=(14,4))


[Link](data = train_data.loc[train_data["Pclass"]==1] , x = "Age" , hue = "Surv
ived",binwidth=5,ax = ax[0],palette = sns.color_palette(["yellow" , "green"]),multip
le = "stack").set_title("1-Pclass")
[Link](data = train_data.loc[train_data["Pclass"]==2] , x = "Age" , hue = "Surv
ived",binwidth=5,ax = ax[1],palette = sns.color_palette(["yellow" , "green"]),multip
le = "stack").set_title("2-Pclass")
[Link](data = train_data.loc[train_data["Pclass"]==3] , x = "Age" , hue = "Surv
ived",binwidth=5,ax = ax[2],palette = sns.color_palette(["yellow" , "green"]),multip
le = "stack").set_title("3-Pclass")
[Link]()
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
49

#Plotting the distributions of the numerical variables


color_plot =
['#de972c','#74c91e','#1681de','#e069f5','#f54545','#f0ea46','#7950cc']

fig,ax = [Link](4,2,figsize=(20,20))
[Link](df['HeartDisease'],color=[Link](color_plot), ax=ax[0][0],
shade=True)
[Link](df['Oldpeak'],color=[Link](color_plot), ax=ax[0][1],
shade=True)
[Link](df['Age'],color=[Link](color_plot), ax=ax[1][0],
shade=True)
[Link](df['FastingBS'],color=[Link](color_plot), ax=ax[1][1],
shade=True)
[Link](df['RestingBP'],color=[Link](color_plot),
ax=ax[2][0],shade=True)
[Link](df['Cholesterol'],color=[Link](color_plot), ax=ax[2][1],
shade=True)
[Link](df['MaxHR'],color=[Link](color_plot),
ax=ax[3][0],shade=True)
[Link](ax[3][1])

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
50

hm= [Link]('id', axis =1)


mask = np.zeros_like([Link](), dtype=[Link])
mask[np.triu_indices_from(mask)]= True

[Link]('Correlation', size = 20, weight='bold')

ax = [Link]([Link](), linewidths = 0.9, linecolor = 'white', cbar = True,mask


=mask, cmap=heatmap)

[Link]('Low Correlation',
fontsize=10,fontweight='bold',
xy=(1.3, 3.5), xycoords='data',
xytext=(0.6, 0.95), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=0.50'),
horizontalalignment='left', verticalalignment='top'
)

[Link]('High Correlation',
fontsize=10,fontweight='bold',
xy=(3.3, 7.5), xycoords='data',
xytext=(0.8, 0.4), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=-0.6'),
horizontalalignment='left', verticalalignment='top'
)
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
51

fig = [Link]( figsize=(8, 6))


ax = fig.add_axes([0,0,1,1])
[Link](ax=ax, data=df, x='TARGET', y='LDH')#,flierprops=dict(marker='o', mar
kersize=6),fliersize=2)

[Link](y=550,color='b')
[Link](y=650,color='orange')
[Link](y=1200,color='g')

[Link]('Target Variable', size = 20, weight='bold')

song_popularity = df['song_popularity'].map({0:'UnPopular', 1:'Popular'})

a = [Link](data = df, x =song_popularity,palette=theme)


plt.tick_params(axis="x", colors=theme[0],labelsize=15)

for p in [Link]:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
[Link](f'{height/[Link][0]*100} %', (x + width/2, y + height*1.02), ha='cent
er')

[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
52

cont = ['song_duration_ms', 'acousticness', 'danceability', 'energy',


'instrumentalness', 'liveness', 'loudness',
'speechiness', 'tempo', 'audio_valence']
cat = [ 'key', 'audio_mode', 'time_signature']

a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter

[Link](figsize= (18,18))

for i in cont:
[Link]('Distribution of Features', size = 20, weight='bold')
[Link](a, b, c)
A=[Link](data= df, x=i,hue=song_popularity,palette=theme[:-2], linewidt
h = 1.3,shade=True, alpha=0.35)
[Link](i)
[Link](" ")
c=c+1

#plotting
fig, (ax1, ax2) = [Link](1, 2, figsize=(18, 9))
Syed Afroz Ali
Data Scientist (Kaggle Grandmaster)
53

[Link](' Highest and Lowest Correlation ', size = 20, weight='bold')


axs = [ax1, ax2]

#kdeplot
[Link](data=df, y='energy', x='acousticness', ax=ax1, color=heatmap[0])
ax1.set_title('Energy vs Acousticness', size = 14, weight='bold', pad=20)

#kdeplot
[Link](data=df, y='energy', x='loudness', ax=ax2, color=heatmap[4])
ax2.set_title('Energy vs Loudness', size = 14, weight='bold', pad=20);

#Parameters for Plots


[Link]['[Link]'] = (10,6)
[Link]['[Link]'] = 'black'
[Link]['[Link]'] = 1.5
[Link]['[Link]'] = True
[Link]['[Link]'] = False
[Link]['[Link]'] = False
[Link]["[Link]"] = "monospace";

#Colors for charts


colors = ["#e9d9c8","#cca383","#070c23","#f82d06","#e8c195","#cd7551","#a499
95","#a3a49c","#6c7470"]
[Link](sns.color_palette(colors))

#plot
A = [Link](train_df['case_num'],
color=colors[1],
edgecolor='white',
linewidth=1.5,
saturation=1.5)

#Patch
patch_h = []
for patch in [Link]:
reading = patch.get_height()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
54

patch_h.append(reading)

idx_tallest = [Link](patch_h)
[Link][idx_tallest].set_facecolor(colors[3])

#Lables
[Link]('Count', weight='semibold', fontname = 'Georgia')
[Link]('Cases', weight='semibold', fontname = 'Georgia')
[Link]('Number of Cases', fontname = 'Georgia', weight='bold', size = 18, color = colors
[2])
A.bar_label([Link][0], label_type='edge')

[Link]()

import matplotlib as mlb


import [Link] as mpimg
from [Link] import AnnotationBbox, OffsetImage

#plotting
fig, (ax1, ax2) = [Link](1, 2, figsize=(16, 11))
[Link](' Potablity of Water Quality ', size = 26, color = theme[3], weight='bold')
axs = [ax1, ax2]

#Count-Plot
[Link](water_df['Potability'], ax=ax1, palette='husl')
ax1.set_title('Count Plot', size = 14, color = theme[3], weight='bold', pad=20)

#Data-2
names = ["Not Potable", "Potable"]
values = water_df['Potability'].value_counts()
colors = ["#E68193","#459E97"]
explode = (0.01, 0.01)

#Doughnut-chart

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
55

[Link](x= values,labels =names, colors=colors,autopct='%1.0f%%', pctdistance=


0.8,explode=explode)

#draw-circle
centre_circle = [Link]((0,0),0.62,fc='white')
ax2.add_artist(centre_circle)
[Link]('equal')

ax2.set_title('Pie Chart', size = 14, color = theme[3], weight='bold', pad=20)

#Image

path = [Link]('../input/water/water [Link]')


imagebox = OffsetImage(path , zoom=0.3)
xy = (0.5, 0.7)
ab = AnnotationBbox(imagebox, xy, frameon=False, pad=1, xybox=(0.02, 0.05))
ax2.add_artist(ab)

plt.subplots_adjust(left=None, bottom=None, right=None, top=0.8, wspace=0.4, hs


pace=None);

fig, ax = [Link](ncols=3, figsize=(18,6))

colors = [['#ADEFD1FF', '#00203FFF'], ['#97BC62FF', '#2C5F2D'], ['#F5C7B8FF', '#F


FA177FF']]
explode = [0, 0.2]
columns = ['Parking', 'Warehouse', 'Elevator']
for i in range(3):
data = df[columns[i]].value_counts()
ax[i].pie(data, labels=[Link], explode=explode, colors=colors[i], shadow
=True)
ax[i].legend(labels=[Link], fontsize='large')
ax[i].set_title('{} distribution'.format(columns[i]))

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
56

def plot_hist(feature):
fig, ax = [Link](2, 1, figsize=(17, 12))
[Link](data = titanic[feature], kde = True, ax =
ax[0],color="Brown")
ax[0].axvline(x = titanic[feature].mean(), color = 'r', linestyle = '--',
linewidth = 2, label = 'Mean: {}'.format(round(titanic[feature].mean(), 3)))
ax[0].axvline(x = titanic[feature].median(), color = 'orange', linewidth =
2, label = 'Median: {}'.format(round(titanic[feature].median(), 3)))
ax[0].axvline(x = [Link](titanic[feature]), color = 'yellow',
linewidth = 2, label = 'Mode: {}'.format([Link](titanic[feature])))
ax[0].legend()

[Link](x = titanic[feature], ax = ax[1],color="Brown")


[Link]()
plot_hist('Age')

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
57

[Link](figsize=(12,5))
[Link]('top categories')
[Link]('item_price')
[Link]('Embarked')['Fare'].mean().sort_values(ascending=Fa
lse)[0:15].plot(kind='line', marker='*', color='red', ms=10)
[Link]('Embarked')['Fare'].mean().sort_values(ascending=Fa
lse)[0:15].plot(kind='bar',color=sns.color_palette("inferno_r", 7))
[Link]()

import [Link] as plt


import seaborn as sns

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
58

[Link](x=[Link][:,0], y=[Link][:,1], hue=y)


[Link]("KD65", ([Link][64,0], [Link][64,1]), (8*1e6, 1), arrowprops=dict(arrowstyle="-
>"), fontsize="xx-large",c='red')
[Link]("KD99", ([Link][98,0], [Link][98,1]), (8*1e6, 2*1e6), arrowprops=dict(arrowstyl
e="->"), fontsize="xx-large",c='red')
[Link]("control3", ([Link][107,0], [Link][107,1]), (8*1e6, 3*1e6), arrowprops=dict(arro
wstyle="->"), fontsize="xx-large",c='red')
[Link]("control13", ([Link][117,0], [Link][117,1]), (8*1e6, 4*1e6), arrowprops=dict(arr
owstyle="->"), fontsize="xx-large",c='red')

l = df_current['Q3'].value_counts(normalize=True).mul(100).tolist()[1]-df_old['Q2'].v
alue_counts(normalize=True).mul(100).[Link]()[1]

print(5*'\n',"\033[1;32m Increase in Woman is only\033[1;32m",round(l, 2),'%\033[1;


32m Over Last Year\033[1;32m',5*'\n')

fig, ax = [Link](1, 2, figsize=(20,8))


[Link](0.1, 0.95, "Visualisation of Gender Distribution for 2022 and 2021", fontsiz
e=15, fontweight='bold')

[Link](x='Q3', data=df_current,palette="Dark2", ax=ax[0]); #Current Year


[Link](x='Q2', data=df_old,palette="Dark2",ax=ax[1]); #Last Year

for i, ax in enumerate([Link]()):
[Link](axis='y', linestyle='-', alpha=0.4)
if i==0:t=shape;year = 2022
else:t=shape_21;year =2021
for p in [Link]:
percentage = f'{100 * p.get_height() / t:.2f}%\n'
[Link](percentage, (p.get_x() + p.get_width() / 2,p.get_height()), ha='cen
ter', va='center')
ax.set_xlabel('Gender');ax.set_title("Gender Wise Distribution in "+ str(year))
if not(0.5 <= p.get_x() < 1.5):
p.set_facecolor('lightgrey')

[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
59

fig, ax = [Link](1,2, figsize=(20,8))


[Link](0.1, 0.95, "Age Distribution of Kaggle Users - 2022", fontsize=15, font
weight='bold')
[Link](x=df_current['Q2'].value_counts().index, y=df_current['Q2'].value_c
ounts().values, ax=ax[0],
edgecolor='black', linewidth=1.5, saturation=1.5)
ax[0].yaxis.set_major_locator(MaxNLocator(nbins=20));ax[0].grid(axis='y', line
style='-', alpha=0.4)
ax[0].set_ylabel('Count', weight='semibold')
ax[0].set_xlabel('Age Group 2022', weight='semibold')
ax[1].set_xlabel('Pie Chart for Age Group 2022', weight='semibold')
for p in ax[0].patches:
percentage = f'{100 * p.get_height() / t:.1f}%\n'
ax[0].annotate(percentage, (p.get_x() + p.get_width() / 2,p.get_height()), h
a='center', va='center')

ax[1].pie(df_current['Q2'].value_counts(), labels = df_current['Q2'].value_count


s().index, autopct='%1.1f%%',
explode=[0.03 for i in df_current['Q2'].value_counts().index])
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
60

fig, ax = [Link](1, 1)

[Link](-1,26)
[Link](0,1)
x = [Link]([Link](0.0000000001, dfn, dfd),[Link](0.9999999999, dfn, dfd), 10
0)
[Link](x, [Link](x, dfn, dfd), 'r-')
[Link]([Link](0.95, dfn, dfd), ls = "--", color = "navy")
print('upper 5%:', [Link](0.95, dfn, dfd))

import plotly.graph_objects as go

labels = confirmed_bookings['meal'].unique()
values = confirmed_bookings['meal'].value_counts()
palette = ["#f6bd60", "#f5cac3", "#84a59d", "#f28482"]

fig = [Link](data=[[Link](labels = labels,


values = values,
hole=.5,
title = 'Meal plans',
legendgroup = True,
pull = [0.1, 0.1, 0.1, 0.1]
)
]
)

fig.update_traces(marker = dict(colors = palette))


[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
61

x = rent_df["Rent"]
y = rent_df["Size"]
colors = rent_df["Size"]
sizes = rent_df["Size"]

[Link](figsize = (25, 8))


plt.ticklabel_format(style = 'plain')
[Link](x, y, c = colors, s = sizes, alpha = 0.3, cmap = 'viridis')
[Link]();

# Free or Paid Courses - Countplot


fig, ax = [Link](figsize=(7,5), dpi=100)
ax = [Link](data=courses, x='is_paid', palette='magma_r')
ax.set_xticklabels(labels=['Free', 'Paid'])
ax.set_xlabel("Free/Paid courses")
ax.set_ylabel("Number of courses")
ax.set_title("Share of Free and Paid Courses on Udemy")
percentage = round(courses['is_paid'].value_counts() * 100 /len(courses), 2)
patches = [Link]
for i in range(len(patches)):
x = patches[i].get_x() + patches[i].get_width()/2
y = patches[i].get_height()+.05
[Link]('{:.2f}%'.format(percentage[i]), (x, y), ha='center')

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
62

df_cpy = [Link](['profile_id'], axis=1)

flierprops = dict(markerfacecolor='g', color='g', alpha=0.5)

n_cols = 4
n_rows = int([Link](df_cpy.shape[-1]*2 / n_cols))
fig, axes = [Link](n_rows, n_cols, figsize=(4 * n_cols, 3 * n_rows))
for i, (col) in enumerate(list(df_cpy.columns)):
mean = df_cpy[col].mean()
median = df_cpy[col].median()
[Link](df_cpy[col], ax=[Link]()[2*i], kde=True)
[Link](x=df_cpy[col], orient='h', ax=[Link]()[2*i+1], color='g')
[Link]()[2*i+1].vlines(mean, ymin = -1, ymax = 1, color='r',
label=f"For [{col}]\nMean: {mean:.2}\nMedian: {median:.2}")
[Link]()[2*i+1].legend()

if i % n_cols == 0:
ax.set_ylabel('Frequency')
else:
ax.set_ylabel('')
plt.tight_layout()

[Link](rc={'[Link]':(10,7)})
sns.set_style("white")
[Link](data=df, x="horsepower", y="mpg", size="acceleration",
hue='origin',legend=True, sizes=(10, 500))

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
63

import altair as alt


plot=[Link](df).mark_bar(size=40).encode(
alt.X('cylinders'),
alt.Y('mpg'),
[Link]('origin')
)
[Link](title='cylinders vs mpg')

import altair as alt


select = [Link](type='interval')
values = [Link](df).mark_point().encode(
x='horsepower:Q',
y='mpg:Q',
color=[Link](select, 'origin:N', [Link]('lightgray'))
).add_selection(
select
)
bars = [Link](df).mark_bar().encode(
y='origin:N',
color='origin:N',
x='count(origin):Q'
).transform_filter(
select
)
values & bars

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
64

import pandas as pd
import [Link] as plt
from mpl_toolkits.mplot3d import Axes3D
iris = pd.read_csv("[Link]")
# Create a figure and axes for the 3D plot
fig = [Link]()
ax = fig.add_subplot(111, projection='3d')
# Scatter plot the data
[Link](iris["sepal_length"], iris["sepal_width"], iris["petal_length"],
c=iris["petal_length"], cmap='viridis')
# Add labels to the axes
ax.set_xlabel("sepal_length")
ax.set_ylabel("sepal_width")
ax.set_zlabel("petal_length")
# Show the plot
[Link]()

fig = [Link]()
ax = [Link](projection='3d')
ax.plot_trisurf(data['temp'], data['co'], data['smoke'], cmap =
[Link].twilight_shifted)
[Link]('Relation between Carbon di oxide levels, Smoke and
Temperature.')
[Link]('co')
[Link]('smoke')
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
65

import pandas as pd
import [Link] as plt
from mpl_toolkits.mplot3d import Axes3D
df = pd.read_csv("[Link]")
# Create a figure and axes for the 3D plot
fig = [Link]()
ax = fig.add_subplot(111, projection='3d')
# Scatter plot the data
[Link](df["Age"], df["Fare"], df["Survived"], c=df["Survived"],
cmap='viridis')
# Add labels to the axes
ax.set_xlabel("Age")
ax.set_ylabel("Fare")
ax.set_zlabel("Survived")
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
66

from mpl_toolkits.mplot3d import Axes3D


x = [Link](-5, 5, 100)
y = [Link](-5, 5, 100)
X, Y = [Link](x, y)
Z = [Link]([Link](X**2 + Y**2))
fig = [Link]()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, cmap='viridis')
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
[Link]()
# Create a figure and axes for the 3D plot
fig = [Link]()
ax = fig.add_subplot(111, projection='3d')
hist, xedges, yedges = np.histogram2d(df["Age"], df["Fare"],
bins=10)
X, Y = [Link](xedges[:-1], yedges[:-1])
ax.plot_surface(X, Y, hist, cmap='viridis')
# Add labels to the axes
ax.set_xlabel("Age")
ax.set_ylabel("Fare")
ax.set_zlabel("Count")

# Show the plot


[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
67

# Create a figure and axes for the 3D plot


fig = [Link]()
ax = fig.add_subplot(111, projection='3d')

# Create a histogram of the data


hist1, xedges1, yedges1 = np.histogram2d(df[df["Survived"] ==
1]["Age"], df[df["Survived"] == 1]["Fare"], bins=10)
hist2, xedges2, yedges2 = np.histogram2d(df[df["Survived"] ==
0]["Age"], df[df["Survived"] == 0]["Fare"], bins=10)

# Create a mesh grid of the binned data


X1, Y1 = [Link](xedges1[:-1], yedges1[:-1])
X2, Y2 = [Link](xedges2[:-1], yedges2[:-1])

# Plot the Tri-Surface plot


ax.plot_surface(X1, Y1, hist1, color='r', alpha=0.3)
ax.plot_surface(X2, Y2, hist2, color='b', alpha=0.3)

# Add labels to the axes


ax.set_xlabel("Age")
ax.set_ylabel("Fare")
ax.set_zlabel("Count")

# Show the plot


[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
68

# 3D Scatter Plot
import [Link] as px

fig = px.scatter_3d(titanic, x='Embarked', y='Fare', z='Age',


color='Sex')
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
69

# 3D Scatter Plot
import [Link] as px
fig = px.scatter_3d(wine, x='fixed acidity', y='volatile acidity',
z='total sulfur dioxide', color='quality')
[Link]()

fig = [Link](figsize=(20,20))
ax = [Link](projection="3d")
ax.scatter3D(normalized_i_q, normalized_u_d, normalized_torque, s=0.5,
c=normalized_torque, cmap=plt.get_cmap("jet"))
[Link]()

u_q = electric_motor_temprature_data['u_q']
u_d = electric_motor_temprature_data['u_d']
i_q = electric_motor_temprature_data['i_q']
i_d = electric_motor_temprature_data['i_d']
torque = electric_motor_temprature_data['torque']

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)
70

normalized_u_q = (u_q - u_q.min())/(u_q.max()-u_q.min())


normalized_u_d = (u_d - u_d.min())/(u_d.max()-u_d.min())
normalized_i_q = (i_q - i_q.min())/(i_q.max()-i_q.min())
normalized_i_d = (i_d - i_d.min())/(i_d.max()-i_d.min())
normalized_torque = (torque - [Link]())/([Link]()-[Link]())
fig = [Link](figsize=(20,20))
ax = [Link](projection="3d")
ax.scatter3D(normalized_u_q, normalized_u_d, normalized_torque, s=0.5,
c=normalized_torque, cmap=plt.get_cmap("jet"))
[Link]()

fig = [Link]()
ax = fig.add_subplot(111, projection='3d')
hist, xedges, yedges = np.histogram2d(df["Age"], df["Fare"],
bins=10)
X, Y = [Link](xedges[:-1], yedges[:-1])
ax.plot_wireframe(X, Y, hist)
ax.set_xlabel("Age")
ax.set_ylabel("Fare")
ax.set_zlabel("Count")
[Link]()

Syed Afroz Ali


Data Scientist (Kaggle Grandmaster)

You might also like