HW210 1
HW210 1
October 1, 2024
df.head().style.background_gradient(cmap=custom_cmap, axis=1)
[9]: df
1
12650 World Projection-STEPS EV stock share Cars
12651 World Projection-APS EV charging points EV
12652 World Projection-APS EV charging points EV
12653 World Projection-STEPS EV stock share Trucks
[11]: df.isna().sum()
[11]: region 0
category 0
parameter 0
mode 0
powertrain 0
year 0
unit 0
value 0
dtype: int64
[13]: df.duplicated().sum()
[13]: 0
[15]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12654 entries, 0 to 12653
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 region 12654 non-null object
1 category 12654 non-null object
2 parameter 12654 non-null object
3 mode 12654 non-null object
2
4 powertrain 12654 non-null object
5 year 12654 non-null int64
6 unit 12654 non-null object
7 value 12654 non-null float64
dtypes: float64(1), int64(1), object(6)
memory usage: 791.0+ KB
[17]: # Convert 'year' to int and 'value' to float if they are not already
df['year'] = df['year'].astype(int)
df['value'] = df['value'].astype(float)
[19]: df['value'].describe()
3
[67]: fig = px.bar(df.groupby('region').agg({'value': 'sum'}).reset_index(),␣
↪x='region', y='value', title='Value Distribution by Region')
fig.show()
# Plotting heatmap
import seaborn as sns
import matplotlib.pyplot as plt
year value
year 1.000000 0.120782
value 0.120782 1.000000
4
[69]: # Distribution of EV Sales and Stock Over the Years
fig = px.line(df[df['parameter'].isin(['EV stock', 'EV sales'])],
x='year', y='value', color='parameter',
title='Distribution of EV Sales and Stock Over the Years')
fig.show()
5
[31]: from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()
show(p)
fig.add_trace(go.Scatter(x=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪stock']['year'],
y=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪stock']['value'],
mode='lines+markers',
6
name='EV Stock'))
fig.add_trace(go.Scatter(x=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪sales']['year'],
y=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪sales']['value'],
mode='lines+markers',
name='EV Sales'))
fig.show()
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.show()
7
[39]: # Aggregate data by powertrain
powertrain_dist = df.groupby('powertrain')['value'].sum().reset_index()
fig.show()
fig.show()
8
[43]: import plotly.express as px
fig.update_geos(projection_type="natural earth")
fig.update_layout(geo=dict(showframe=False, showcoastlines=False))
fig.show()
9
[77]: # Filter data for selected parameters
parameters_of_interest = ['EV stock', 'EV sales', 'EV stock share']
time_series_data = df[df['parameter'].isin(parameters_of_interest)]
# Stacked area plot to show the share of each parameter over time
fig = px.area(time_series_data, x='year', y='value', color='parameter',
title='Share of EV Parameters Over Time',
labels={'value': 'Value', 'year': 'Year'})
fig.show()
fig.show()
10
[81]: # Bubble chart for EV stock over time with region as bubble size
bubble_data = df[df['parameter'] == 'EV stock']
fig.show()
[83]: # Sunburst chart for hierarchical view of categories, parameters, and regions
fig = px.sunburst(df, path=['category', 'parameter', 'region'], values='value',
title='Hierarchical View of EV Data by Category, Parameter,␣
↪and Region',
labels={'value': 'Value'})
fig.show()
11
[55]: import plotly.express as px
fig.show()
12
[87]: ev_data = df[df['parameter'].isin(['EV stock', 'EV sales'])].groupby(['year',␣
↪'parameter'])['value'].sum().reset_index()
13
[ ]:
14