-
Notifications
You must be signed in to change notification settings - Fork 7k
/
Copy pathencoding_decoding.py
99 lines (88 loc) · 3.83 KB
/
encoding_decoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import platform
import statistics
import torch
import torch.utils.benchmark as benchmark
import torchvision
def print_machine_specs():
print("Processor:", platform.processor())
print("Platform:", platform.platform())
print("Logical CPUs:", os.cpu_count())
print(f"\nCUDA device: {torch.cuda.get_device_name()}")
print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
def get_data():
transform = torchvision.transforms.Compose(
[
torchvision.transforms.PILToTensor(),
]
)
path = os.path.join(os.getcwd(), "data")
testset = torchvision.datasets.Places365(
root="./data", download=not os.path.exists(path), transform=transform, split="val"
)
testloader = torch.utils.data.DataLoader(
testset, batch_size=1000, shuffle=False, num_workers=1, collate_fn=lambda batch: [r[0] for r in batch]
)
return next(iter(testloader))
def run_encoding_benchmark(decoded_images):
results = []
for device in ["cpu", "cuda"]:
decoded_images_device = [t.to(device=device) for t in decoded_images]
for size in [1, 100, 1000]:
for num_threads in [1, 12, 24]:
for stmt, strat in zip(
[
"[torchvision.io.encode_jpeg(img) for img in decoded_images_device_trunc]",
"torchvision.io.encode_jpeg(decoded_images_device_trunc)",
],
["unfused", "fused"],
):
decoded_images_device_trunc = decoded_images_device[:size]
t = benchmark.Timer(
stmt=stmt,
setup="import torchvision",
globals={"decoded_images_device_trunc": decoded_images_device_trunc},
label="Image Encoding",
sub_label=f"{device.upper()} ({strat}): {stmt}",
description=f"{size} images",
num_threads=num_threads,
)
results.append(t.blocked_autorange())
compare = benchmark.Compare(results)
compare.print()
def run_decoding_benchmark(encoded_images):
results = []
for device in ["cpu", "cuda"]:
for size in [1, 100, 1000]:
for num_threads in [1, 12, 24]:
for stmt, strat in zip(
[
f"[torchvision.io.decode_jpeg(img, device='{device}') for img in encoded_images_trunc]",
f"torchvision.io.decode_jpeg(encoded_images_trunc, device='{device}')",
],
["unfused", "fused"],
):
encoded_images_trunc = encoded_images[:size]
t = benchmark.Timer(
stmt=stmt,
setup="import torchvision",
globals={"encoded_images_trunc": encoded_images_trunc},
label="Image Decoding",
sub_label=f"{device.upper()} ({strat}): {stmt}",
description=f"{size} images",
num_threads=num_threads,
)
results.append(t.blocked_autorange())
compare = benchmark.Compare(results)
compare.print()
if __name__ == "__main__":
print_machine_specs()
decoded_images = get_data()
mean_h, mean_w = statistics.mean(t.shape[-2] for t in decoded_images), statistics.mean(
t.shape[-1] for t in decoded_images
)
print(f"\nMean image size: {int(mean_h)}x{int(mean_w)}")
run_encoding_benchmark(decoded_images)
encoded_images_cuda = torchvision.io.encode_jpeg([img.cuda() for img in decoded_images])
encoded_images_cpu = [img.cpu() for img in encoded_images_cuda]
run_decoding_benchmark(encoded_images_cpu)