0% found this document useful (0 votes)
212 views30 pages

Altair Gallery

This document provides examples of various chart types that can be created using the Altair visualization library in Python. It includes examples of simple bar charts, heatmaps, histograms, line charts, area charts, and more. For each chart type, it shows the Python code used to generate the chart and a brief description. The goal is to demonstrate the capabilities of Altair through these simple examples.

Uploaded by

Ramón Ovalle
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
212 views30 pages

Altair Gallery

This document provides examples of various chart types that can be created using the Altair visualization library in Python. It includes examples of simple bar charts, heatmaps, histograms, line charts, area charts, and more. For each chart type, it shows the Python code used to generate the chart and a brief description. The goal is to demonstrate the capabilities of Altair through these simple examples.

Uploaded by

Ramón Ovalle
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 30

altair_gallery

June 23, 2021

[1]: import altair as alt


import numpy as np
import pandas as pd
import geopandas as gpd
import gpdvega
from vega_datasets import data
import seaborn as sns
alt.data_transformers.enable('json')
alt.data_transformers.enable('default', max_rows=10000)

[1]: DataTransformerRegistry.enable('default')

1 Altair Gallery
1.1 Simple Charts
1.1.1 Simple Bar Chart

[2]: source = pd.DataFrame({


'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]
})
alt.Chart(source).mark_bar().encode(
x='a',
y='b'
).properties(width=400, height=250).display(renderer='svg')

alt.Chart(…)

1.1.2 Simple Heatmap


[3]: x, y = np.meshgrid(range(-5, 5), range(-5, 5))
z = x ** 2 + y ** 2

# Convert this grid to columnar data expected by Altair


source = pd.DataFrame({'x': x.ravel(), 'y': y.ravel(), 'z': z.ravel()})

alt.Chart(source).mark_rect().encode(

1
x='x:O',
y='y:O',
color='z:Q'
).properties(width=400, height=400).display(renderer='svg')

alt.Chart(…)

1.1.3 Simple Histogram

[4]: source = data.movies.url

alt.Chart(source).mark_bar().encode(
alt.X("IMDB_Rating:Q", bin=True),
y='count()',
).properties(width=500, height=300).display(renderer='svg')

alt.Chart(…)

1.1.4 Simple Line Chart

[5]: x = np.arange(100)
source = pd.DataFrame({
'x': x,
'f(x)': np.sin(x / 5)
})

alt.Chart(source).mark_line().encode(
x='x',
y='f(x)'
).properties(width=500, height=300).display(renderer='svg')

alt.Chart(…)

1.2 Bar Charts


1.2.1 Bar Chart with Highlighted Bar
[6]: source = data.wheat()

alt.Chart(source).mark_bar().encode(
x='year:O',
y="wheat:Q",
# The highlight will be set on the result of a conditional statement
color=alt.condition(
alt.datum.year == 1810, # If the year is 1810 this test returns True,
alt.value('orange'), # which sets the bar orange.
alt.value('steelblue') # And if it's not true it sets the bar␣
,→steelblue.

2
).properties(width=600).display(renderer='svg')

alt.Chart(…)

1.2.2 Bar Chart with Labels


[7]: source = data.wheat()

bars = alt.Chart(source).mark_bar().encode(
x='wheat:Q',
y="year:O"
)
text = bars.mark_text(
align='left',
baseline='middle',
dx=5 # Nudges text to right so it doesn't appear on top of the bar
).encode(
text='wheat:Q'
)
(bars + text).properties(height=900).display(renderer='svg')

alt.LayerChart(…)

1.2.3 Bar Chart with Negative Values

[8]: source = data.us_employment()

alt.Chart(source).mark_bar().encode(
x="month:T",
y="nonfarm_change:Q",
color=alt.condition(
alt.datum.nonfarm_change > 0,
alt.value("steelblue"), # The positive color
alt.value("orange") # The negative color
)
).properties(width=600).display(renderer='svg')

alt.Chart(…)

1.2.4 Bar Chart with rounded edges


[9]: source = data.seattle_weather()

alt.Chart(source).mark_bar(
cornerRadiusTopLeft=3,
cornerRadiusTopRight=3
).encode(
x='month(date):O',
# y='PercentOfTotal:Q',

3
y='count():Q',
color='weather:N'
).properties(width=400).display(renderer='svg')

alt.Chart(…)

1.2.5 Calculating Percentage of Total

[10]: source = pd.DataFrame({'Activity': ['Sleeping', 'Eating', 'TV', 'Work',␣


,→'Exercise'],

'Time': [8, 2, 4, 8, 2]})

alt.Chart(source).transform_joinaggregate(
TotalTime='sum(Time)',
).transform_calculate(
PercentOfTotal="datum.Time / datum.TotalTime"
).mark_bar().encode(
alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')),
y='Activity:N'
).properties(width=400, height=200).display(renderer='svg')

alt.Chart(…)

1.2.6 Grouped Bar Chart

[11]: source = data.barley()

alt.Chart(source).mark_bar().encode(
x='year:O',
y='sum(yield):Q',
color='year:N',
column='site:N',
# row='site:N'
).properties(height=200).display(renderer='svg')

alt.Chart(…)

1.2.7 Horizontal Stacked Bar Chart


[12]: source = data.barley()

alt.Chart(source).mark_bar().encode(
x='sum(yield)',
y='variety',
color='site'
).properties(height=200).display(renderer='svg')

alt.Chart(…)

4
1.2.8 Layered Bar Chart

[13]: source = data.iowa_electricity()

alt.Chart(source).mark_bar(opacity=0.7).encode(
x='year(year):O',
y=alt.Y('net_generation:Q', stack=None),
color="source",
).properties(height=300, width=400).display(renderer='svg')

alt.Chart(…)

1.2.9 Normalized Stacked Bar Chart


[14]: source = data.barley()

alt.Chart(source).mark_bar().encode(
x=alt.X('sum(yield)', stack="normalize"),
y='variety',
color='site'
).properties(height=250, width=400).display(renderer='svg')

alt.Chart(…)

1.2.10 Sorted Bar Chart


[15]: source = data.barley()

alt.Chart(source).mark_bar().encode(
x='sum(yield):Q',
y=alt.Y('site:N', sort='-x')
).properties(height=250, width=400).display(renderer='svg')

alt.Chart(…)

1.2.11 Stacked Bar Chart with Text Overlay


[16]: source=data.barley()

bars = alt.Chart(source).mark_bar().encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
color=alt.Color('site')
)

text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode(


x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
detail='site:N',

5
text=alt.Text('sum(yield):Q', format='.1f')
)

(bars + text).properties(height=350, width=400).display(renderer='svg')

alt.LayerChart(…)

1.3 Line Charts


1.3.1 Filled Step Chart

[17]: source = data.stocks()

alt.Chart(source).mark_area(
color="lightblue",
interpolate='step-after',
line=True
).encode(
x='date',
y='price'
).transform_filter(alt.datum.symbol == 'GOOG').display(renderer='svg')

alt.Chart(…)

1.3.2 Line Chart with Confidence Interval Band


[18]: source = data.cars()

line = alt.Chart(source).mark_line().encode(
x='Year',
y='mean(Miles_per_Gallon)'
)

band = alt.Chart(source).mark_errorband(extent='ci').encode(
x='Year',
y=alt.Y('Miles_per_Gallon', title='Miles/Gallon'),
)

(band + line).display(renderer='svg')

alt.LayerChart(…)

1.3.3 Line Chart with Layered Aggregates


[19]: source = data.stocks()

base = alt.Chart(source).properties(width=550)

line = base.mark_line().encode(

6
x='date',
y='price',
color='symbol'
)

rule = base.mark_rule().encode(
y='average(price)',
color='symbol',
size=alt.value(2)
)

(line + rule).display(renderer='svg')

alt.LayerChart(…)

1.3.4 Line Chart with Percent axis


[20]: source = data.jobs.url
(alt.Chart(source).mark_area().encode(
alt.X('year:O'),
alt.Y('perc:Q', axis=alt.Axis(format='%')),
color='sex:N'
)
.transform_filter(alt.datum.job == 'Welder')
.properties(width=400)
.display(renderer='svg')
)

alt.Chart(…)

1.3.5 Line Chart with Points


[21]: x = np.arange(100)
source = pd.DataFrame({
'x': x,
'f(x)': np.sin(x / 5)
})

alt.Chart(source).mark_line(point=True).encode(
x='x',
y='f(x)'
).display(renderer='svg')

alt.Chart(…)

7
1.3.6 Line Chart with Sequence Generator

[22]: source = alt.sequence(start=0, stop=12.7, step=0.1, as_='x')

alt.Chart(source).mark_line().transform_calculate(
sin='sin(datum.x)',
cos='cos(datum.x)'
).transform_fold(
['sin', 'cos']
).encode(
x='x:Q',
y='value:Q',
color='key:N'
).display(renderer='svg')

alt.Chart(…)

1.3.7 Multi Series Line Chart


[23]: source = data.stocks()

alt.Chart(source).mark_line().encode(
x='date',
y='price',
color='symbol',
strokeDash='symbol',
).display(renderer='svg')

alt.Chart(…)

1.4 Area Charts


1.4.1 Area Chart with Gradient
[24]: source = data.stocks()
source.head()

[24]: symbol date price


0 MSFT 2000-01-01 39.81
1 MSFT 2000-02-01 36.35
2 MSFT 2000-03-01 43.22
3 MSFT 2000-04-01 28.37
4 MSFT 2000-05-01 25.45

[25]: source = data.stocks()


alt.Chart(source).transform_filter(
'datum.symbol==="GOOG"'
).mark_area(
line={'color':'darkgreen'},

8
color=alt.Gradient(
gradient='linear',
stops=[alt.GradientStop(color='white', offset=0),
alt.GradientStop(color='darkgreen', offset=1)],
x1=1, x2=1, y1=1,y2=0
)
).encode(
alt.X('date:T'),
alt.Y('price:Q')
).display(renderer='svg')

alt.Chart(…)

1.4.2 Cumulative Count Chart


[26]: source = data.movies.url
alt.Chart(source).transform_window(
cumulative_count="count()",
# cumulative_count = "sum(IMDB_Rating)",
sort=[{"field": "IMDB_Rating"}],
).mark_area().encode(
x="IMDB_Rating:Q",
y="cumulative_count:Q"
).display(renderer='svg')

alt.Chart(…)

1.4.3 Faceted Density Estimates

[27]: source = data.iris()


source.reset_index().head()

[27]: index sepalLength sepalWidth petalLength petalWidth species


0 0 5.1 3.5 1.4 0.2 setosa
1 1 4.9 3.0 1.4 0.2 setosa
2 2 4.7 3.2 1.3 0.2 setosa
3 3 4.6 3.1 1.5 0.2 setosa
4 4 5.0 3.6 1.4 0.2 setosa

[28]: (alt.Chart(source).transform_fold(
['petalWidth',
'petalLength',
'sepalWidth',
'sepalLength'],
as_ = ['Measurement_type', 'value']
).transform_density(
density='value',
bandwidth=0.3,

9
groupby=['Measurement_type'],
extent= [0, 8]
).mark_area().encode(
alt.X('value:Q'),
alt.Y('density:Q'),
alt.Row('Measurement_type:N')
).properties(width=300, height=50)
).display(renderer='svg')

alt.Chart(…)

[29]: # use pd.melt to wide format


df = pd.melt(source.reset_index(),
id_vars=['index','species'],
value_vars=['petalWidth','petalLength','sepalWidth','sepalLength'],
var_name='Measurement_type', value_name='value'
)
df.head()
(alt.Chart(df)
.transform_density(
density='value',
bandwidth=0.3,
groupby=['Measurement_type'],
extent= [0, 8]
).mark_area().encode(
alt.X('value:Q'),
alt.Y('density:Q'),
alt.Row('Measurement_type:N')
).properties(width=300, height=50)
).display(renderer='svg')

alt.Chart(…)

1.4.4 Horizon Graph


[30]: source = pd.DataFrame([
{"x": 1, "y": 28}, {"x": 2, "y": 55},
{"x": 3, "y": 43}, {"x": 4, "y": 91},
{"x": 5, "y": 81}, {"x": 6, "y": 53},
{"x": 7, "y": 19}, {"x": 8, "y": 87},
{"x": 9, "y": 52}, {"x": 10, "y": 48},
{"x": 11, "y": 24}, {"x": 12, "y": 49},
{"x": 13, "y": 87}, {"x": 14, "y": 66},
{"x": 15, "y": 17}, {"x": 16, "y": 27},
{"x": 17, "y": 68}, {"x": 18, "y": 16},
{"x": 19, "y": 49}, {"x": 20, "y": 15}
])

10
area1 = alt.Chart(source).mark_area(
clip=True,
interpolate='monotone'
).encode(
alt.X('x', scale=alt.Scale(zero=False, nice=False)),
alt.Y('y', scale=alt.Scale(domain=[0, 50]), title='y'),
opacity=alt.value(0.6)
).properties(
width=500,
height=75
)

area2 = area1.encode(
alt.Y('ny:Q', scale=alt.Scale(domain=[0, 50]))
).transform_calculate(
"ny", alt.datum.y - 50
)

(area1 + area2).display(renderer='svg')

alt.LayerChart(…)

1.4.5 Interval selection


[31]: # source = data.sp500.url
source = data.seattle_temps()
source['temp_C'] = (source.temp - 32)*5/9

brush = alt.selection(type='interval', encodings=['x'])


base = alt.Chart(source).mark_line(color='black', opacity=0.8).encode(
x = 'date:T',
y = 'temp_C:Q'
).properties(
width=600,
height=200
)
upper = base.encode(
alt.X('date:T', scale=alt.Scale(domain=brush))
)

lower = base.properties(
height=60
).add_selection(brush)

(upper & lower).display(renderer='svg')

alt.VConcatChart(…)

11
1.4.6 Layered Area Chart

[32]: source = data.iowa_electricity()

alt.Chart(source).mark_area(opacity=0.3).encode(
x="year:T",
y=alt.Y("net_generation:Q", stack=None),
color="source:N"
).properties(height=250).display(renderer='svg')

alt.Chart(…)

1.4.7 Normalized Stacked Area Chart


[33]: source = data.iowa_electricity()

alt.Chart(source).mark_area().encode(
x="year:T",
y=alt.Y("net_generation:Q", stack="normalize"),
color="source:N"
).properties(height=250).display(renderer='svg')

alt.Chart(…)

1.4.8 Streamgraph

[34]: source = data.unemployment_across_industries.url

alt.Chart(source).mark_area().encode(
alt.X('yearmonth(date):T',
axis=alt.Axis(format='%Y', domain=False, tickSize=0)
),
alt.Y('sum(count):Q', stack='center', axis=None),
alt.Color('series:N',
scale=alt.Scale(scheme='category20b')
)
).display(renderer='svg')

alt.Chart(…)

1.4.9 Trellis Area Sort Chart


[35]: source = data.iowa_electricity()

slist = ['Nuclear Energy', 'Fossil Fuels', 'Renewables']


alt.Chart(source).mark_area().encode(
x="year:T",
y="net_generation:Q",
color= alt.Color("source:N", sort=slist),

12
row=alt.Row("source:N", sort=slist)
).properties(
height=100
).display(renderer='svg')

alt.Chart(…)

1.5 Scatter Plots


1.5.1 Binned Scatterplot

[36]: source = data.movies.url

alt.Chart(source).mark_circle().encode(
alt.X('IMDB_Rating:Q', bin=True),
alt.Y('Rotten_Tomatoes_Rating:Q', bin=True),
size='count()'
).display(renderer='svg')

alt.Chart(…)

1.5.2 Brushing Scatter Plot to show data on a table


[37]: source = data.cars()

# Brush for selection


brush = alt.selection(type='interval')

# Scatter Plot
points = alt.Chart(source).mark_point().encode(
x='Horsepower:Q',
y='Miles_per_Gallon:Q',
color=alt.condition(brush, 'Cilinders:O', alt.value('grey'))
).add_selection(brush)

# Base chart for data tables


ranked_text = alt.Chart(source).mark_text().encode(
y=alt.Y('row_number:O', axis=None)
).transform_window(
row_number='row_number()'
).transform_filter(
brush
).transform_window(
rank='rank(row_number)'
).transform_filter(
alt.datum.rank<20
)

13
# Data Tables
horsepower = ranked_text.encode(text='Horsepower:N').
,→properties(title='Horsepower')

mpg = ranked_text.encode(text='Miles_per_Gallon:N').properties(title='MPG')
origin = ranked_text.encode(text='Origin:N').properties(title='Origin')
text = alt.hconcat(horsepower, mpg, origin) # Combine data tables

# Build chart
alt.hconcat(
points,
text
).resolve_legend(
color="independent"
).display(renderer='svg')

alt.HConcatChart(…)

1.5.3 Dot Dash Plot


[38]: source = data.cars()

# Configure the options common to all layers


brush = alt.selection(type='interval')
base = alt.Chart(source).add_selection(brush)

# Configure the points


points = base.mark_point().encode(
x=alt.X('Miles_per_Gallon', title=''),
y=alt.Y('Horsepower', title=''),
color=alt.condition(brush, 'Origin', alt.value('grey'))
)

# Configure the ticks


tick_axis = alt.Axis(labels=False, domain=False, ticks=False)

x_ticks = base.mark_tick().encode(
alt.X('Miles_per_Gallon', axis=tick_axis),
alt.Y('Origin', title='', axis=tick_axis),
color=alt.condition(brush, 'Origin', alt.value('lightgrey'))
)

y_ticks = base.mark_tick().encode(
alt.X('Origin', title='', axis=tick_axis),
alt.Y('Horsepower', axis=tick_axis),
color=alt.condition(brush, 'Origin', alt.value('lightgrey'))
)

# Build the chart

14
(y_ticks | (points & x_ticks)).display(renderer='svg')

alt.HConcatChart(…)

1.5.4 Multifeature Scatter Plot


[39]: source = data.iris()

alt.Chart(source).mark_circle().encode(
alt.X('sepalLength', scale=alt.Scale(zero=False)),
alt.Y('sepalWidth', scale=alt.Scale(zero=False, padding=1)),
color='species',
size='petalWidth'
).display(renderer='svg')

alt.Chart(…)

1.5.5 Quantile-Quantile Plot

[40]: source = data.normal_2d.url

base = alt.Chart(source).transform_quantile(
'u',
step=0.01,
as_ = ['p', 'v']
).transform_calculate(
uniform = 'quantileUniform(datum.p)',
normal = 'quantileNormal(datum.p)'
).mark_point().encode(
alt.Y('v:Q')
)

(base.encode(x='uniform:Q') | base.encode(x='normal:Q')).display(renderer='svg')

alt.HConcatChart(…)

1.5.6 Scatter Matrix


[41]: source = data.cars()

alt.Chart(source).mark_circle().encode(
alt.X(alt.repeat("column"), type='quantitative'),
alt.Y(alt.repeat("row"), type='quantitative'),
color='Origin:N'
).properties(
width=150,
height=150
).repeat(
row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'],

15
column=['Miles_per_Gallon', 'Acceleration', 'Horsepower']
).display(renderer='svg')

alt.RepeatChart(…)

1.5.7 Scatter Plot with Href


[42]: source = data.cars()

alt.Chart(source).transform_calculate(
url='https://www.google.com/search?q=' + alt.datum.Name
).mark_point().encode(
x='Horsepower:Q',
y='Miles_per_Gallon:Q',
color='Origin:N',
href='url:N',
tooltip=['Name:N'],
# tooltip=['Name:N', 'url:N']
).display(renderer='svg')

alt.Chart(…)

1.5.8 Scatter Plot with LOESS Lines


[43]: np.random.seed(1)

source = pd.DataFrame({
'x': np.arange(100),
'A': np.random.randn(100).cumsum(),
'B': np.random.randn(100).cumsum(),
'C': np.random.randn(100).cumsum(),
})

base = alt.Chart(source).mark_circle(opacity=0.5).transform_fold(
fold=['A', 'B', 'C'],
as_=['category', 'y']
).encode(
alt.X('x:Q'),
alt.Y('y:Q'),
alt.Color('category:N')
)

(base
+ base.transform_loess('x', 'y', groupby=['category']).mark_line(size=4)
).display(renderer='svg')

alt.LayerChart(…)

16
1.5.9 Scatter Plot with Rolling Mean

[44]: source = data.seattle_weather()

line = alt.Chart(source).mark_line(
color='red',
size=2
).transform_window(
rolling_mean='mean(temp_max)',
frame=[-30, 30]
).encode(
x='date:T',
y='rolling_mean:Q'
)

points = alt.Chart(source).mark_point(color='gray', filled=True).encode(


x='date:T',
y=alt.Y('temp_max:Q',
axis=alt.Axis(title='Max Temp'))
)

(points + line).display(renderer='svg')

alt.LayerChart(…)

1.5.10 Stripplot
[45]: source = data.movies.url

stripplot = alt.Chart(source, width=40).mark_circle(size=8).encode(


x=alt.X(
'jitter:Q',
title=None,
axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
scale=alt.Scale(),
),
y=alt.Y('IMDB_Rating:Q'),
color=alt.Color('Major_Genre:N', legend=None),
column=alt.Column(
'Major_Genre:N',
header=alt.Header(
labelAngle=-90,
titleOrient='top',
labelOrient='bottom',
labelAlign='right',
labelPadding=3,
),
),

17
).transform_calculate(
# Generate Gaussian jitter with a Box-Muller transform
jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).configure_facet(
spacing=0
).configure_view(
stroke=None
)

(stripplot).display(renderer='svg')

alt.Chart(…)

1.6 Histrograms
1.6.1 Histogram with Responsive Bins

[46]: source = data.flights_5k.url

brush = alt.selection_interval(encodings=['x'])

base = alt.Chart(source).transform_calculate(
time="hours(datum.date) + minutes(datum.date) / 60"
).mark_bar().encode(
y='count():Q'
).properties(
width=600,
height=100
)

alt.vconcat(
base.encode(
alt.X('time:Q',
bin=alt.Bin(maxbins=30, extent=brush),
scale=alt.Scale(domain=brush)
)
),
base.encode(
alt.X('time:Q', bin=alt.Bin(maxbins=30)),
).add_selection(brush)
).display(renderer='svg')

alt.VConcatChart(…)

18
1.6.2 Layered Histogram

[47]: # Generating Data


source = pd.DataFrame({
'Trial A': np.random.normal(0, 0.8, 1000),
'Trial B': np.random.normal(-2, 1, 1000),
'Trial C': np.random.normal(3, 2, 1000)
})

alt.Chart(source).transform_fold(
['Trial A', 'Trial B', 'Trial C'],
as_=['Experiment', 'Measurement']
).mark_area(
opacity=0.3,
interpolate='step'
).encode(
alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
alt.Y('count()', stack=None),
alt.Color('Experiment:N')
).display(renderer='svg')

alt.Chart(…)

1.7 Maps
1.7.1 Choropleth Map

[48]: counties = alt.topo_feature(data.us_10m.url, 'counties')


source = data.unemployment.url

alt.Chart(counties).mark_geoshape().encode(
color='rate:Q'
).transform_lookup(
lookup='id',
from_=alt.LookupData(source, 'id', ['rate'])
).project(
type='albersUsa'
).properties(
width=500,
height=300
).display(renderer='svg')

alt.Chart(…)

19
1.7.2 World Map

[49]: # Data generators for the background


sphere = alt.sphere()
graticule = alt.graticule()

# Source of land data


source = alt.topo_feature(data.world_110m.url, 'countries')

# Layering and configuring the components


alt.layer(
alt.Chart(sphere).mark_geoshape(fill='lightblue'),
alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),
alt.Chart(source).mark_geoshape(fill='ForestGreen', stroke='black')
).project(
'naturalEarth1'
).properties(width=600, height=400).configure_view(stroke=None).
,→display(renderer='svg')

alt.LayerChart(…)

1.7.3 World Projections

[50]: source = alt.topo_feature(data.world_110m.url, 'countries')

base = alt.Chart(source).mark_geoshape(
fill='#666666',
stroke='white'
).properties(
width=300,
height=180
)

projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic']


charts = [base.project(proj).properties(title=proj)
for proj in projections]

alt.concat(*charts, columns=2)

[50]: alt.ConcatChart(…)

from https://github.com/altair-viz/altair/issues/588

[51]: import geopandas as gpd

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = world[world.continent!='Antarctica'] # do not display Antarctica

20
df = alt.InlineData(values = world.to_json(), #geopandas to geojson string
# root object type is "FeatureCollection" but we need␣
,→its features

format = alt.DataFormat(property='features',type='json'))
alt.Chart(df).mark_geoshape(
).encode(
color='properties.pop_est:Q', # DataFrame fields are accessible through a␣
,→"properties" object

tooltip='properties.name:N'
).properties(
projection={"type":'mercator'},
width=500,
height=300
).properties(width=600, height=400).display(renderer='svg')

alt.Chart(…)

1.8 Interactive Charts


from https://iliatimofeev.github.io/gpdvega/gallery/map_with_data.html

[52]: continent = 'South America'

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
df = world[world.continent == continent]
brush = alt.selection_single(encodings=["y"], on="mouseover", empty='none')
color = alt.Color('pop_est', scale= alt.Scale(type='pow', exponent=0.4))
bars = (alt.Chart(df).mark_bar().encode(
x=alt.X('pop_est', scale=alt.Scale(nice=False)),
y=alt.Y('name', sort=alt.EncodingSortField(field='pop_est', op='sum',␣
,→order='descending')),

tooltip=['name','pop_est','gdp_md_est'],
color=alt.condition(brush, alt.value('red'), color)
).add_selection(
brush
).properties(
width=200,
height=450
)
)
countries = (alt.Chart(df).mark_geoshape().project().encode(
color=alt.condition(
brush,
alt.value('red'),
color,
),
tooltip=['name','pop_est','gdp_md_est'],
).properties(

21
width=300,
height=450,
title=f'{continent} population'
)
)

(bars | countries).display(renderer='svg')

alt.HConcatChart(…)

1.9 Time Series Plots


TimeUnit Transform
“year”, “yearquarter”, “yearquartermonth”, “yearmonth”, “yearmonthdate”, “yearmonthdate-
hours”, “yearmonthdatehoursminutes”, “yearmonthdatehoursminutesseconds”. “quarter”, “quar-
termonth” “month”, “monthdate” “date” (Day of month, i.e., 1 - 31) “day” (Day of week, i.e.,
Monday - Friday) “hours”, “hoursminutes”, “hoursminutesseconds” “minutes”, “minutesseconds”
“seconds”, “secondsmilliseconds” “milliseconds”

[53]: temps = data.seattle_temps()


temps.head()

[53]: date temp


0 2010-01-01 00:00:00 39.4
1 2010-01-01 01:00:00 39.2
2 2010-01-01 02:00:00 39.0
3 2010-01-01 03:00:00 38.9
4 2010-01-01 04:00:00 38.8

[54]: points = alt.Chart(temps, width=600).mark_point(size=5, color='black',␣


,→filled=True).encode(

x='date:T',
y='temp:Q'
).transform_sample(7000)
points.display(renderer='svg')

alt.Chart(…)

[55]: mean = alt.Chart(temps).mark_line(color='red').encode(


x='yearmonthdate(date):T',
y='mean(temp):Q'
)
max = alt.Chart(temps).mark_line(color='red').encode(
x='yearmonthdate(date):T',
y='max(temp):Q'
)

bands = alt.Chart(temps).mark_area(opacity=0.7).encode(

22
alt.X('yearmonthdate(date):T'),
alt.Y('ci0(temp)', scale=alt.Scale(domain=[30,80])),
alt.Y2('ci1(temp)')
)
(bands + mean + max+ points).properties(width=700, height=400).
,→configure_view(fill='lightgray', opacity=0.4).display(renderer='svg')

alt.LayerChart(…)

[56]: temps['doy'] = pd.DatetimeIndex(temps.date).dayofyear


mean = alt.Chart(temps).mark_line(color='red').encode(
x='doy:O',
y='mean(temp):Q'
)
bands = alt.Chart(temps).mark_area(opacity=0.7).encode(
alt.X('doy:O'),
alt.Y('ci0(temp)', scale=alt.Scale(domain=[30,70])),
alt.Y2('ci1(temp)')
# alt.Y2('ci1(temp)', scale=alt.Scale(domain=[40,70]))
)
(bands + mean).properties(width=700, height=250).
,→configure_view(fill='lightgray', opacity=0.4).display(renderer='svg')

alt.LayerChart(…)

[57]: temps = data.seattle_temps()


alt.Chart(temps).mark_bar().encode(
# x='date:T',
x='month(date):O',
y='temp:Q'
).properties(width=400, height=200).display(renderer='svg')

alt.Chart(…)

[58]: alt.Chart(temps).mark_area().encode(
x='month(date):T',
y=alt.Y('mean(temp):Q', title = 'Temperatura media °F', scale=alt.
,→Scale(domain=[20, 70]))

).properties(width=400).display(renderer='svg')

alt.Chart(…)

[59]: temps = data.seattle_temps()


temps.index = pd.to_datetime(temps.date)
df = temps.loc['2010-01-02']
(alt.Chart(df).mark_line().encode(
x='date:T',
y='temp:Q'
).configure_view(fill='lightgray')

23
).display(renderer='svg')

alt.Chart(…)

[60]: temps = data.seattle_temps()


temps['temp_C'] = (temps.temp - 32)*5/9
temps['hora'] = pd.DatetimeIndex(temps.date).hour
rect = alt.Chart(temps).mark_rect().encode(
alt.X('month(date):O', title='mes'),
alt.Y('hora:O', title='hora del día',
scale=alt.Scale(domain=(list(range(6,24)) + list(range(0,7))))),
alt.Color('mean(temp_C):Q',
title='Temperatura (°C)',
scale=alt.Scale(domain=(5,25),range=sns.color_palette('RdYlBu_r').
,→as_hex())), #scheme='redblue')

)
texts = alt.Chart(temps).mark_text(align='center', size=9, color='black').
,→encode(

alt.X('month(date):O', title='mes'),
alt.Y('hora:O',
scale=alt.Scale(domain=list(range(6,24)) + list(range(0,7)))),
alt.Text('mean(temp_C):Q', format=",.1f"),
# color=alt.condition('datum.temp_C < 10', alt.value('black'), alt.
,→value('red'))

)
(rect + texts).properties(width=600, height=600).display(renderer='svg')

alt.LayerChart(…)

1.10 Interactive Maps


1.10.1 Interactive Chart with Cross-Highlight
[61]: source = data.movies.url

pts = alt.selection(type="single", encodings=['x'])

rect = alt.Chart(data.movies.url).mark_rect().encode(
alt.X('IMDB_Rating:Q', bin=True),
alt.Y('Rotten_Tomatoes_Rating:Q', bin=True),
alt.Color('count()',
scale=alt.Scale(scheme='greenblue'),
legend=alt.Legend(title='Total Records')
)
)

circ = rect.mark_point().encode(
alt.ColorValue('grey'),

24
alt.Size('count()',
legend=alt.Legend(title='Records in Selection')
)
).transform_filter(
pts
)

bar = alt.Chart(source).mark_bar().encode(
x = alt.X('Major_Genre:N', sort='y'),
y='count()',
color=alt.condition(pts, alt.ColorValue("steelblue"), alt.
,→ColorValue("grey"))

).properties(
width=550,
height=200
).add_selection(pts)

alt.vconcat(
rect + circ,
bar
).resolve_legend(
color="independent",
size="independent"
).display(renderer='svg')

alt.VConcatChart(…)

1.10.2 Interval selection


[62]: source = data.cars()

brush = alt.selection(type='interval', encodings=['x'])

points = alt.Chart(source).mark_point().encode(
x='Horsepower',
y='Miles_per_Gallon',
color=alt.condition(brush, 'Origin', alt.value('lightgray'))
).add_selection(brush)

bars = alt.Chart(source).mark_bar().encode(
y='Origin',
color='Origin',
x='count()'
).transform_filter(brush)

(points & bars).display(renderer='svg')

alt.VConcatChart(…)

25
1.10.3 Multi-Line Highlight (single selection)

[63]: source = data.stocks()

highlight = alt.selection(type='single', on='mouseover',


fields=['symbol'], nearest=True)

base = alt.Chart(source).encode(
x='date:T',
y='price:Q',
color='symbol:N'
)

points = base.mark_circle().encode(
opacity=alt.value(0)
).add_selection(
highlight
).properties(
width=600
)

lines = base.mark_line().encode(
size=alt.condition(~highlight, alt.value(1), alt.value(3))
)

(points + lines).display(renderer='svg')

alt.LayerChart(…)

1.10.4 Clickable Legend


[64]: source = data.cars()

brush = alt.selection(type='single', encodings=['color'], empty='none')

base = alt.Chart(source).mark_point(color='lightgray', filled=True).encode(


x='Horsepower',
y='Miles_per_Gallon'
)
points = alt.Chart(source).mark_point(filled=True, size=70).encode(
x='Horsepower',
y='Miles_per_Gallon',
color=alt.condition(brush, 'Origin', alt.value('lightgray'), legend=None)
).transform_filter(brush)

legend = alt.Chart(source).mark_point(filled=True, size=70).encode(


y='Origin',
color='Origin',

26
# x='count()'
).add_selection(brush)

((base + points) | legend).display(renderer='svg')

alt.HConcatChart(…)

1.11 Other Charts


1.11.1 Violin Plot
[65]: alt.Chart(data.cars()).transform_density(
'Miles_per_Gallon',
as_=['Miles_per_Gallon', 'density'],
extent=[5, 50],
groupby=['Origin']
).mark_area(orient='horizontal').encode(
y='Miles_per_Gallon:Q',
color='Origin:N',
x=alt.X(
'density:Q',
stack='center',
impute=None,
title=None,
axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
),
column=alt.Column(
'Origin:N',
header=alt.Header(
titleOrient='bottom',
labelOrient='bottom',
labelPadding=0,
),
)
).properties(
width=100
).configure_facet(
spacing=0
).configure_view(
stroke=None
).display(renderer='svg')

alt.Chart(…)

27
1.11.2 Gantt Chart
[66]: source = pd.DataFrame([
{"task": "A", "start": 1, "end": 3},
{"task": "B", "start": 3, "end": 8},
{"task": "C", "start": 8, "end": 10}
])

alt.Chart(source).mark_bar().encode(
x='start',
x2='end',
y='task'
).display(renderer='svg')

alt.Chart(…)

1.11.3 Layered chart with Dual-Axis

[67]: source = data.seattle_weather()

base = alt.Chart(source).encode(
alt.X('month(date):T', axis=alt.Axis(title=None))
)

area = base.mark_area(opacity=0.5, color='#57A44C', line={'color':'green'}).


,→encode(

alt.Y('average(temp_max)',
axis=alt.Axis(title='Avg. Temperature (°C)', titleColor='#57A44C')),
alt.Y2('average(temp_min)')
)

line = base.mark_area(stroke='#5276A7', interpolate='monotone', opacity=0.5,␣


,→line=True).encode(

alt.Y('average(precipitation)',
axis=alt.Axis(title='Precipitation (inches)', titleColor='#5276A7'))
)

(area + line).resolve_scale(
y = 'independent'
).display(renderer='svg')

alt.LayerChart(…)

1.11.4 Ridgeline plot Example


[68]: source = data.seattle_weather.url

step = 20

28
overlap = 1

alt.Chart(source, height=step).transform_timeunit(
Month='month(date)'
).transform_joinaggregate(
mean_temp='mean(temp_max)', groupby=['Month']
).transform_bin(
['bin_max', 'bin_min'], 'temp_max'
).transform_aggregate(
value='count()', groupby=['Month', 'mean_temp', 'bin_min', 'bin_max']
).transform_impute(
impute='value', groupby=['Month', 'mean_temp'], key='bin_min', value=0
).mark_area(
interpolate='monotone',
fillOpacity=0.8,
stroke='lightgray',
strokeWidth=0.5
).encode(
alt.X('bin_min:Q', bin='binned', title='Maximum Daily Temperature (C)'),
alt.Y(
'value:Q',
scale=alt.Scale(range=[step, -step * overlap]),
axis=None
),
alt.Fill(
'mean_temp:Q',
legend=None,
scale=alt.Scale(domain=[30, 5], scheme='redyellowblue')
)
).facet(
row=alt.Row(
'Month:T',
title=None,
header=alt.Header(labelAngle=0, labelAlign='right', format='%B')
)
).properties(
title='Seattle Weather',
bounds='flush'
).configure_facet(
spacing=0
).configure_view(
stroke=None
).configure_title(
anchor='end'
).display(renderer='svg')

alt.FacetChart(…)

29
1.11.5 Boxplot

[69]: source = data.population.url

alt.Chart(source).mark_boxplot().encode(
x='age:O',
y='people:Q'
).properties(width=500).display(renderer='svg')

alt.Chart(…)

30

You might also like