This repository was archived by the owner on May 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 281
/
Copy pathgraph.py
56 lines (51 loc) · 2.05 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Use this to graph the benchmarking results (see benchmark.sh)
#
# To run this:
# - pip install pandas
# - pip install plotly
#
import pandas as pd
import plotly.graph_objects as go
from data_diff.utils import number_to_human
import glob
for benchmark_file in glob.glob("benchmark_*.jsonl"):
rows = pd.read_json(benchmark_file, lines=True)
rows["cloud"] = rows["test"].str.match(r".*(snowflake|redshift|presto|bigquery)")
sha = benchmark_file.split("_")[1].split(".")[0]
print(f"Generating graphs from {benchmark_file}..")
for n_rows, group in rows.groupby(["rows"]):
image_path = f"benchmark_{sha}_{number_to_human(n_rows)}.png"
print(f"\t rows: {number_to_human(n_rows)}, image: {image_path}")
r = group.drop_duplicates(subset=["name_human"])
r = r.sort_values(by=["cloud", "source_type", "target_type", "name_human"])
fig = go.Figure(
data=[
go.Bar(
name="count(*)",
x=r["name_human"],
y=r["count_max_sec"],
text=r["count_max_sec"],
textfont=dict(color="blue"),
),
go.Bar(
name="data-diff (checksum)",
x=r["name_human"],
y=r["checksum_sec"],
text=r["checksum_sec"],
textfont=dict(color="red"),
),
go.Bar(
name="Download and compare †",
x=r["name_human"],
y=r["download_sec"],
text=r["download_sec"],
textfont=dict(color="green"),
),
]
)
# Change the bar mode
fig.update_layout(title=f"data-diff {number_to_human(n_rows)} rows, sha: {sha}")
fig.update_traces(texttemplate="%{text:.1f}", textposition="outside")
fig.update_layout(uniformtext_minsize=2, uniformtext_mode="hide")
fig.update_yaxes(title="Time")
fig.write_image(image_path, scale=2)