%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
import seaborn as sns
import pandas as pd
from seaborn.clustering import clusterplot
import numpy as np
import string
shape = (10, 20)
np.random.seed(2013)
df = pd.DataFrame(np.random.randn(*shape), index=list(string.lowercase[0:shape[0]]),
columns=list(string.uppercase[0:shape[1]]))
# -- Add some structure in the matrix so we can try to be correct -- #
# Add 5 to rows a,b,c,d,e and columns K,L,M,N,O,P,Q,R,S,T
df.ix[0:5,10:20] += 5
# Subtract 5 from rows f,g,h,i,j and columns A,B,C,D,E
df.ix[5:10,0:5] -= 5
fig, row_dendrogram, col_dendrogram = clusterplot(df)
import pandas as pd
import numpy as np
import string
import matplotlib as mpl
import brewer2mpl
shape = (10, 20)
np.random.seed(2013)
df = pd.DataFrame(np.random.randn(*shape), index=list(string.lowercase[0:shape[0]]),
columns=list(string.uppercase[0:shape[1]]))
# -- Add some structure in the matrix so we can try to be correct -- #
# Add 5 to rows a,b,c,d,e and columns K,L,M,N,O,P,Q,R,S,T
df.ix[0:5,10:20] += 100
# Subtract 5 from rows f,g,h,i,j and columns A,B,C,D,E
df.ix[5:10,0:5] = np.random.uniform(high=0.001, size=(5,5))
# --- Crazy stuff starts here!!! ---
# Set the df to absolute so we can show log scaling
df = df.abs()
# Add some NAs
df.ix['c', 'C'] = np.nan
df_na_mean = df.fillna(df.mean())
vowels = ['a', 'e', 'i', 'o', 'u']
vowels += map(string.upper, vowels)
# print vowels
set1 = brewer2mpl.get_map('Set1', 'qualitative', 9).mpl_colors
grey = set1[8]
pink = set1[7]
col_side_colors = [pink if letter in vowels else grey for letter in df.columns]
row_side_colors = [pink if letter in vowels else grey for letter in df.index]
cmap = mpl.cm.YlGnBu
# highlight the NA with white
cmap.set_under('white')
fig, row_dendrogram, col_dendrogram = clusterplot(df=df_na_mean,
title='Awesome heatmap example',
title_fontsize=32,
colorbar_label='powers of 10',
col_side_colors=col_side_colors,
row_side_colors=row_side_colors,
color_scale='log',
cmap=cmap,
linkage_method='single',
figsize=(20,10),
label_rows=[letter+'++' for letter in df.index],
label_cols=False,
xlabel_fontsize=8,
ylabel_fontsize=20,
cluster_rows=False,
cluster_cols=True,
vmin=1e-4, vmax=1e2,
plot_df=df, edgecolor='white', linewidth=0.01)