0% found this document useful (0 votes)
34 views14 pages

HW210 1

The document contains a Python script that analyzes a dataset on global electric vehicle (EV) data for 2024 using libraries such as pandas, seaborn, and plotly. It includes data loading, cleaning, and various visualizations to explore trends in EV stock, sales, and other parameters across different regions and years. The script also generates statistical summaries and correlation analyses to provide insights into the EV market.

Uploaded by

thaophuong51006
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
34 views14 pages

HW210 1

The document contains a Python script that analyzes a dataset on global electric vehicle (EV) data for 2024 using libraries such as pandas, seaborn, and plotly. It includes data loading, cleaning, and various visualizations to explore trends in EV stock, sales, and other parameters across different regions and years. The script also generates statistical summaries and correlation analyses to provide insights into the EV market.

Uploaded by

thaophuong51006
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 14

st8lgwlg3

October 1, 2024

[1]: import numpy as np # linear algebra


import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

[3]: # importing usefull libraries


import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import bokeh.plotting as bkp
from bokeh.models import ColumnDataSource
from bokeh.io import show
warnings.filterwarnings("ignore")

[5]: df = pd.read_csv("Global EV Data 2024.csv")


# data description
custom_cmap = sns.color_palette("BrBG", as_cmap=True) # Or use any other␣
↪colormap

df.head().style.background_gradient(cmap=custom_cmap, axis=1)

[5]: <pandas.io.formats.style.Styler at 0x200b2b16390>

[7]: df.describe(include='all').style.background_gradient(cmap=custom_cmap, axis=1)

[7]: <pandas.io.formats.style.Styler at 0x200b1790f80>

[9]: df

[9]: region category parameter mode \


0 Australia Historical EV stock share Cars
1 Australia Historical EV sales share Cars
2 Australia Historical EV sales Cars
3 Australia Historical EV stock Cars
4 Australia Historical EV stock Cars
… … … … …
12649 World Projection-STEPS EV sales share Cars

1
12650 World Projection-STEPS EV stock share Cars
12651 World Projection-APS EV charging points EV
12652 World Projection-APS EV charging points EV
12653 World Projection-STEPS EV stock share Trucks

powertrain year unit value


0 EV 2011 percent 3.900000e-04
1 EV 2011 percent 6.500000e-03
2 BEV 2011 Vehicles 4.900000e+01
3 BEV 2011 Vehicles 4.900000e+01
4 BEV 2012 Vehicles 2.200000e+02
… … … … …
12649 EV 2035 percent 5.500000e+01
12650 EV 2035 percent 3.100000e+01
12651 Publicly available fast 2035 charging points 9.400000e+06
12652 Publicly available slow 2035 charging points 1.500000e+07
12653 EV 2035 percent 9.000000e+00

[12654 rows x 8 columns]

[11]: df.isna().sum()

[11]: region 0
category 0
parameter 0
mode 0
powertrain 0
year 0
unit 0
value 0
dtype: int64

[13]: df.duplicated().sum()

[13]: 0

[15]: df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12654 entries, 0 to 12653
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 region 12654 non-null object
1 category 12654 non-null object
2 parameter 12654 non-null object
3 mode 12654 non-null object

2
4 powertrain 12654 non-null object
5 year 12654 non-null int64
6 unit 12654 non-null object
7 value 12654 non-null float64
dtypes: float64(1), int64(1), object(6)
memory usage: 791.0+ KB

[17]: # Convert 'year' to int and 'value' to float if they are not already
df['year'] = df['year'].astype(int)
df['value'] = df['value'].astype(float)

[19]: df['value'].describe()

[19]: count 1.265400e+04


mean 4.273742e+05
std 6.860498e+06
min 1.200000e-06
25% 2.000000e+00
50% 1.300000e+02
75% 5.500000e+03
max 4.400000e+08
Name: value, dtype: float64

[21]: fig = px.histogram(df, x='value', title='Distribution of Values')


fig.show()

[65]: fig = px.box(df, x='category', y='value', title='Comparison of Categories')


fig.show()

3
[67]: fig = px.bar(df.groupby('region').agg({'value': 'sum'}).reset_index(),␣
↪x='region', y='value', title='Value Distribution by Region')

fig.show()

[27]: correlation_matrix = df[['year', 'value']].corr()


print(correlation_matrix)

# Plotting heatmap
import seaborn as sns
import matplotlib.pyplot as plt

sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')


plt.title('Correlation Heatmap')
plt.show()

year value
year 1.000000 0.120782
value 0.120782 1.000000

4
[69]: # Distribution of EV Sales and Stock Over the Years
fig = px.line(df[df['parameter'].isin(['EV stock', 'EV sales'])],
x='year', y='value', color='parameter',
title='Distribution of EV Sales and Stock Over the Years')
fig.show()

5
[31]: from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

# Scatter plot of EV Stock Share vs. Year


p = figure(title="EV Stock Share vs. Year", x_axis_label='Year',␣
↪y_axis_label='EV Stock Share')

p.scatter(df[df['parameter'] == 'EV stock share']['year'],


df[df['parameter'] == 'EV stock share']['value'],
size=10, color="navy", alpha=0.5)

show(p)

[71]: # Pie chart of Powertrain distribution


powertrain_dist = df.groupby('powertrain')['value'].sum()
fig = px.pie(values=powertrain_dist.values,names=powertrain_dist.index)
fig.show()

[73]: import plotly.graph_objects as go

# Filter data for EV stock and sales


ev_stock_sales = df[df['parameter'].isin(['EV stock', 'EV sales'])]

# Create line plots for both EV stock and EV sales


fig = go.Figure()

fig.add_trace(go.Scatter(x=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪stock']['year'],

y=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪stock']['value'],

mode='lines+markers',

6
name='EV Stock'))

fig.add_trace(go.Scatter(x=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪sales']['year'],

y=ev_stock_sales[ev_stock_sales['parameter'] == 'EV␣
↪sales']['value'],

mode='lines+markers',
name='EV Sales'))

fig.update_layout(title='Trend Analysis of EV Stock and Sales Over the Years',


xaxis_title='Year',
yaxis_title='Value',
legend_title='Parameter')

fig.show()

[75]: # Aggregate EV stock by region


ev_stock_by_region = df[df['parameter'] == 'EV stock'].
↪groupby('region')['value'].sum().reset_index()

# Bar plot for EV stock by region


fig = px.bar(ev_stock_by_region, x='region', y='value',
title='EV Stock by Region',
labels={'value': 'Total EV Stock', 'region': 'Region'},
text='value')

fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.show()

7
[39]: # Aggregate data by powertrain
powertrain_dist = df.groupby('powertrain')['value'].sum().reset_index()

# Pie chart for powertrain distribution


fig = px.pie(powertrain_dist, values='value', names='powertrain',
title='Distribution of Powertrains',
labels={'value': 'Total Value'})

fig.show()

[41]: # Calculate yearly growth rate of EV stock


ev_stock = df[df['parameter'] == 'EV stock'].groupby('year')['value'].sum().
↪pct_change().reset_index()

# Line plot for yearly growth rate of EV stock


fig = px.line(ev_stock, x='year', y='value',
title='Yearly Growth Rate of EV Stock',
labels={'value': 'Growth Rate', 'year': 'Year'})

fig.show()

8
[43]: import plotly.express as px

# Filter data for a specific parameter, such as EV stock


map_data = df[df['parameter'] == 'EV stock']

# Aggregate the data by region and year


map_data_aggregated = map_data.groupby(['region', 'year'])['value'].sum().
↪reset_index()

[45]: # Create a choropleth map


fig = px.choropleth(map_data_aggregated,
locations="region",
locationmode='country names',
color="value",
hover_name="region",
animation_frame="year",
color_continuous_scale=px.colors.sequential.Plasma,
title="EV Stock by Region Over Time",
labels={'value': 'EV Stock'})

fig.update_geos(projection_type="natural earth")
fig.update_layout(geo=dict(showframe=False, showcoastlines=False))

fig.show()

9
[77]: # Filter data for selected parameters
parameters_of_interest = ['EV stock', 'EV sales', 'EV stock share']
time_series_data = df[df['parameter'].isin(parameters_of_interest)]
# Stacked area plot to show the share of each parameter over time
fig = px.area(time_series_data, x='year', y='value', color='parameter',
title='Share of EV Parameters Over Time',
labels={'value': 'Value', 'year': 'Year'})

fig.show()

[79]: # Box plot for distribution of EV stock across regions


fig = px.box(df[df['parameter'] == 'EV stock'],
x='region', y='value',
title='Distribution of EV Stock Across Regions',
labels={'value': 'EV Stock', 'region': 'Region'})

fig.show()

10
[81]: # Bubble chart for EV stock over time with region as bubble size
bubble_data = df[df['parameter'] == 'EV stock']

fig = px.scatter(bubble_data, x='year', y='value', size='value', color='region',


title='EV Stock Over Time by Region',
labels={'value': 'EV Stock', 'year': 'Year'},
hover_name='region', size_max=60)

fig.show()

[83]: # Sunburst chart for hierarchical view of categories, parameters, and regions
fig = px.sunburst(df, path=['category', 'parameter', 'region'], values='value',
title='Hierarchical View of EV Data by Category, Parameter,␣
↪and Region',

labels={'value': 'Value'})

fig.show()

11
[55]: import plotly.express as px

fig = px.scatter_3d(df, x='year', y='value', z='region',


color='region',
title='3D Scatter Plot of EV Data',
labels={'value': 'Value', 'year': 'Year', 'region':␣
↪'Region'})

fig.show()

[85]: ev_stock_total = df[df['parameter'] == 'EV stock'].groupby('year')['value'].


↪sum().reset_index()

fig = px.line(ev_stock_total, x='year', y='value',


title='Total EV Stock Over Years',
labels={'value': 'Total EV Stock', 'year': 'Year'})
fig.show()

12
[87]: ev_data = df[df['parameter'].isin(['EV stock', 'EV sales'])].groupby(['year',␣
↪'parameter'])['value'].sum().reset_index()

fig = px.bar(ev_data, x='year', y='value', color='parameter',


title='Total EV Stock and Sales Over Years',
labels={'value': 'Total', 'year': 'Year'})
fig.show()

[89]: fig = px.histogram(df[df['parameter'] == 'EV stock share'], x='value',


title='Distribution of EV Stock Share',
labels={'value': 'EV Stock Share'})
fig.show()

13
[ ]:

14

You might also like