Lab7 TPU
Lab7 TPU
import time
def benchmark(func, A, B, label, runs=3):
times = []
for _ in range(runs):
torch.cuda.empty_cache()
start = time.time()
_ = func(A, B)
torch.cuda.synchronize() if torch.cuda.is_available() else None
times.append(time.time() - start)
avg_time = sum(times) / len(times)
print(f"{label}: {avg_time:.4f} seconds")
return avg_time
import torch
cpu = torch.device("cpu")
results = []
for N in range(1024, 8192+1024, 1024):
results.append(benchmark(torch.matmul, MAT(N), MAT(N), f"N = {N}"))
[]
TPU version
import time
def benchmark(func, A, B, label, runs=3):
times = []
size = A.shape[0] - 1
for _ in range(runs):
torch.cuda.empty_cache()
start = time.time()
_ = func(A, B)
print(f"{_[size][size]*_[int(size/2)][int(size/2)]*_[size][int(size/
2)]*_[int(size/2)][size]*0}")
torch.cuda.synchronize() if torch.cuda.is_available() else None
times.append(time.time() - start)
avg_time = sum(times) / len(times)
print(f"{label}: {avg_time:.4f} seconds")
return avg_time
import torch_xla.core.xla_model as xm
tpu = xm.xla_device()
import torch
results = []
for N in range(1024, 8192+1024, 1024):
results.append(benchmark(torch.matmul, MAT(N), MAT(N), f"N = {N}"))
-0.0
-0.0
-0.0
N = 1024: 0.2381 seconds
-0.0
-0.0
-0.0
N = 2048: 0.2768 seconds
0.0
0.0
0.0
N = 3072: 0.3290 seconds
0.0
0.0
0.0
N = 4096: 0.3554 seconds
-0.0
-0.0
-0.0
N = 5120: 0.4916 seconds
-0.0
-0.0
-0.0
N = 6144: 0.5138 seconds
0.0
0.0
0.0
N = 7168: 0.5576 seconds
-0.0
-0.0
-0.0
N = 8192: 0.5849 seconds
import matplotlib.pyplot as plt
[]
GPU version
import time
def benchmark(func, A, B, label, runs=3):
times = []
for _ in range(runs):
torch.cuda.empty_cache()
start = time.time()
_ = func(A, B)
torch.cuda.synchronize() if torch.cuda.is_available() else None
times.append(time.time() - start)
avg_time = sum(times) / len(times)
print(f"{label}: {avg_time:.4f} seconds")
return avg_time
import torch
gpu = torch.device("cuda")
results = []
for N in range(1024, (8192+1024), 1024):
results.append(benchmark(torch.matmul, MAT(N), MAT(N), f"N = {N}"))
[]