Skip to content

Commit 2e2e9d5

Browse files
author
qiyu8
committed
improve float64 performance by sse2
1 parent 95289be commit 2e2e9d5

File tree

1 file changed

+45
-1
lines changed

1 file changed

+45
-1
lines changed

benchmarks/benchmarks/bench_linalg.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import numpy as np
44

5-
65
class Eindot(Benchmark):
76
def setup(self):
87
self.a = np.arange(60000.0).reshape(150, 400)
@@ -105,3 +104,48 @@ def setup(self):
105104

106105
def time_numpy_linalg_lstsq_a__b_float64(self):
107106
np.linalg.lstsq(self.a, self.b, rcond=-1)
107+
108+
class Einsum(Benchmark):
109+
param_names = ['dtype']
110+
params = [[np.float32, np.float64]]
111+
def setup(self, dtype):
112+
self.a = np.arange(3000, dtype=dtype)
113+
self.b = np.arange(2990, dtype=dtype)
114+
self.c = np.arange(24000, dtype=dtype).reshape(20, 30, 40)
115+
self.c1 = np.arange(1200, dtype=dtype).reshape(30, 40)
116+
self.c2 = np.arange(40, dtype=dtype)
117+
self.c3 = np.arange(30000, dtype=dtype).reshape(30, 20, 50)
118+
self.d = np.arange(2*1000, dtype=dtype).reshape(2, 1000)
119+
self.e = np.arange(100*100, dtype=dtype).reshape(100, 100)
120+
121+
#outer(a,b)
122+
def time_einsum_outer(self, dtype):
123+
np.einsum("i,j", self.a, self.b, optimize=True)
124+
125+
#inner(a,b)
126+
def time_einsum_inner(self, dtype):
127+
np.einsum("...i, ...i", self.c, self.c2, optimize=True)
128+
129+
# swap axes
130+
def time_einsum_swap(self, dtype):
131+
np.einsum("ijk->jik", self.c, optimize=True)
132+
133+
# sum(a, axis=0)
134+
def time_einsum_sum(self, dtype):
135+
np.einsum("i...->...", self.d, optimize=True)
136+
137+
# trace(a)
138+
def time_einsum_trace(self, dtype):
139+
np.einsum("ii", self.e, optimize=True)
140+
141+
# multiply(a, b)
142+
def time_einsum_multiply(self, dtype):
143+
np.einsum("..., ...", self.c1, self.c , optimize=True)
144+
145+
# tensordot(a, b)
146+
def time_einsum_tensordot(self, dtype):
147+
np.einsum("ijk, jil -> kl", self.c, self.c3 , optimize=True)
148+
149+
# a.dot(b)
150+
def time_einsum_matmat(self, dtype):
151+
np.einsum("ij,jk", self.e, self.e , optimize=True)

0 commit comments

Comments
 (0)