Skip to content

Commit 87381b2

Browse files
author
qiyu8
committed
Simplify the benchmark case by focusing only on float64 input and SSE2 optimization
1 parent 2e2e9d5 commit 87381b2

File tree

2 files changed

+12
-34
lines changed

2 files changed

+12
-34
lines changed

benchmarks/benchmarks/bench_linalg.py

+12-31
Original file line numberDiff line numberDiff line change
@@ -107,45 +107,26 @@ def time_numpy_linalg_lstsq_a__b_float64(self):
107107

108108
class Einsum(Benchmark):
109109
param_names = ['dtype']
110-
params = [[np.float32, np.float64]]
110+
params = [[np.float64]]
111111
def setup(self, dtype):
112-
self.a = np.arange(3000, dtype=dtype)
113-
self.b = np.arange(2990, dtype=dtype)
112+
self.a = np.arange(2900, dtype=dtype)
113+
self.b = np.arange(3000, dtype=dtype)
114114
self.c = np.arange(24000, dtype=dtype).reshape(20, 30, 40)
115115
self.c1 = np.arange(1200, dtype=dtype).reshape(30, 40)
116-
self.c2 = np.arange(40, dtype=dtype)
117-
self.c3 = np.arange(30000, dtype=dtype).reshape(30, 20, 50)
118-
self.d = np.arange(2*1000, dtype=dtype).reshape(2, 1000)
119-
self.e = np.arange(100*100, dtype=dtype).reshape(100, 100)
116+
self.d = np.arange(10000, dtype=dtype).reshape(10,100,10)
120117

121-
#outer(a,b)
118+
#outer(a,b): trigger sum_of_products_contig_stride0_outcontig_two
122119
def time_einsum_outer(self, dtype):
123120
np.einsum("i,j", self.a, self.b, optimize=True)
124121

125-
#inner(a,b)
126-
def time_einsum_inner(self, dtype):
127-
np.einsum("...i, ...i", self.c, self.c2, optimize=True)
128-
129-
# swap axes
130-
def time_einsum_swap(self, dtype):
131-
np.einsum("ijk->jik", self.c, optimize=True)
132-
133-
# sum(a, axis=0)
134-
def time_einsum_sum(self, dtype):
135-
np.einsum("i...->...", self.d, optimize=True)
136-
137-
# trace(a)
138-
def time_einsum_trace(self, dtype):
139-
np.einsum("ii", self.e, optimize=True)
140-
141-
# multiply(a, b)
122+
# multiply(a, b):trigger sum_of_products_contig_two
142123
def time_einsum_multiply(self, dtype):
143124
np.einsum("..., ...", self.c1, self.c , optimize=True)
144125

145-
# tensordot(a, b)
146-
def time_einsum_tensordot(self, dtype):
147-
np.einsum("ijk, jil -> kl", self.c, self.c3 , optimize=True)
126+
# sum and multiply:trigger sum_of_products_contig_stride0_outstride0_two
127+
def time_einsum_sum_mul(self, dtype):
128+
np.einsum(",i...->", 300, self.d, optimize=True)
148129

149-
# a.dot(b)
150-
def time_einsum_matmat(self, dtype):
151-
np.einsum("ij,jk", self.e, self.e , optimize=True)
130+
# sum and multiply:trigger sum_of_products_stride0_contig_outstride0_two
131+
def time_einsum_sum_mul2(self, dtype):
132+
np.einsum("i...,->", self.d, 300, optimize=True)

numpy/core/src/multiarray/einsum.c.src

-3
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@
3131
#define EINSUM_USE_SSE1 0
3232
#endif
3333

34-
/*
35-
* TODO: Only some SSE2 for float64 is implemented.
36-
*/
3734
#ifdef NPY_HAVE_SSE2_INTRINSICS
3835
#define EINSUM_USE_SSE2 1
3936
#else

0 commit comments

Comments
 (0)