Skip to content

Commit c457a75

Browse files
committed
MAINT: AVX512 intrinsics implementation for float64 input np.log
1 parent a89f3eb commit c457a75

File tree

7 files changed

+573
-3
lines changed

7 files changed

+573
-3
lines changed

benchmarks/benchmarks/bench_avx.py

+13
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,19 @@ def setup(self, ufuncname, stride, dtype):
3939
def time_ufunc(self, ufuncname, stride, dtype):
4040
self.f(self.arr[::stride])
4141

42+
class AVX_UFunc_log(Benchmark):
43+
params = [stride, dtype]
44+
param_names = ['stride', 'dtype']
45+
timeout = 10
46+
47+
def setup(self, stride, dtype):
48+
np.seterr(all='ignore')
49+
N = 10000
50+
self.arr = np.array(np.random.random_sample(stride*N), dtype=dtype)
51+
52+
def time_log(self, stride, dtype):
53+
np.log(self.arr[::stride])
54+
4255
avx_bfuncs = ['maximum',
4356
'minimum']
4457

numpy/core/code_generators/generate_umath.py

+1
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ def english_upper(s):
726726
None,
727727
TD('e', f='log', astype={'e':'f'}),
728728
TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]),
729+
TD('d', simd=[('avx512f', 'd')]),
729730
TD('fdg' + cmplx, f='log'),
730731
TD(P, f='log'),
731732
),

numpy/core/src/umath/loops.c.src

+18
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,14 @@ DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void
15581558
*(npy_double *)op1 = npy_exp(in1);
15591559
}
15601560
}
1561+
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1562+
DOUBLE_log(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1563+
{
1564+
UNARY_LOOP {
1565+
const npy_double in1 = *(npy_double *)ip1;
1566+
*(npy_double *)op1 = npy_log(in1);
1567+
}
1568+
}
15611569

15621570
/**begin repeat
15631571
* #isa = avx512f, fma#
@@ -1700,6 +1708,16 @@ DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *step
17001708
}
17011709
}
17021710

1711+
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1712+
DOUBLE_log_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1713+
{
1714+
if (!run_unary_avx512f_log_DOUBLE(args, dimensions, steps)) {
1715+
UNARY_LOOP {
1716+
const npy_double in1 = *(npy_double *)ip1;
1717+
*(npy_double *)op1 = npy_log(in1);
1718+
}
1719+
}
1720+
}
17031721

17041722
/**begin repeat
17051723
* Float types

numpy/core/src/umath/loops.h.src

+6
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,12 @@ DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void
202202
NPY_NO_EXPORT void
203203
DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
204204

205+
NPY_NO_EXPORT void
206+
DOUBLE_log(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
207+
208+
NPY_NO_EXPORT void
209+
DOUBLE_log_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
210+
205211
/**begin repeat
206212
* #func = sin, cos, exp, log#
207213
*/

numpy/core/src/umath/npy_simd_data.h

+145
Original file line numberDiff line numberDiff line change
@@ -134,4 +134,149 @@ static npy_uint64 EXP_Table_tail[32] = {
134134
#define NPY_COEFF_INVF7_SINEf -0x1.a06bbap-13f
135135
#define NPY_COEFF_INVF9_SINEf 0x1.7d3bbcp-19f
136136

137+
/*
138+
* Lookup table of log(c_k)
139+
*/
140+
static npy_uint64 LOG_TABLE_TOP[64] = {
141+
0x0000000000000000,
142+
0x3F8FC0A8B1000000,
143+
0x3F9F829B0E780000,
144+
0x3FA77458F6340000,
145+
0x3FAF0A30C0100000,
146+
0x3FB341D7961C0000,
147+
0x3FB6F0D28AE60000,
148+
0x3FBA926D3A4A0000,
149+
0x3FBE27076E2A0000,
150+
0x3FC0D77E7CD10000,
151+
0x3FC29552F8200000,
152+
0x3FC44D2B6CCB0000,
153+
0x3FC5FF3070A80000,
154+
0x3FC7AB8902110000,
155+
0x3FC9525A9CF40000,
156+
0x3FCAF3C94E810000,
157+
0x3FCC8FF7C79B0000,
158+
0x3FCE27076E2B0000,
159+
0x3FCFB9186D5E0000,
160+
0x3FD0A324E2738000,
161+
0x3FD1675CABAB8000,
162+
0x3FD22941FBCF8000,
163+
0x3FD2E8E2BAE10000,
164+
0x3FD3A64C55698000,
165+
0x3FD4618BC21C8000,
166+
0x3FD51AAD872E0000,
167+
0x3FD5D1BDBF580000,
168+
0x3FD686C81E9B0000,
169+
0x3FD739D7F6BC0000,
170+
0x3FD7EAF83B828000,
171+
0x3FD89A3386C18000,
172+
0x3FD947941C210000,
173+
0x3FD9F323ECBF8000,
174+
0x3FDA9CEC9A9A0000,
175+
0x3FDB44F77BCC8000,
176+
0x3FDBEB4D9DA70000,
177+
0x3FDC8FF7C79A8000,
178+
0x3FDD32FE7E010000,
179+
0x3FDDD46A04C20000,
180+
0x3FDE744261D68000,
181+
0x3FDF128F5FAF0000,
182+
0x3FDFAF588F790000,
183+
0x3FE02552A5A5C000,
184+
0x3FE0723E5C1CC000,
185+
0x3FE0BE72E4254000,
186+
0x3FE109F39E2D4000,
187+
0x3FE154C3D2F4C000,
188+
0x3FE19EE6B467C000,
189+
0x3FE1E85F5E704000,
190+
0x3FE23130D7BEC000,
191+
0x3FE2795E1289C000,
192+
0x3FE2C0E9ED448000,
193+
0x3FE307D7334F0000,
194+
0x3FE34E289D9D0000,
195+
0x3FE393E0D3564000,
196+
0x3FE3D9026A714000,
197+
0x3FE41D8FE8468000,
198+
0x3FE4618BC21C4000,
199+
0x3FE4A4F85DB04000,
200+
0x3FE4E7D811B74000,
201+
0x3FE52A2D265BC000,
202+
0x3FE56BF9D5B40000,
203+
0x3FE5AD404C358000,
204+
0x3FE5EE02A9240000,
205+
};
206+
207+
static npy_uint64 LOG_TABLE_TAIL[64] = {
208+
0x0000000000000000,
209+
0xBD5FE0E183092C59,
210+
0x3D2980267C7E09E4,
211+
0xBD62303B9CB0D5E1,
212+
0x3D662A6617CC9717,
213+
0xBD4717B6B33E44F8,
214+
0xBD62968C836CC8C2,
215+
0x3D6AAC6CA17A4554,
216+
0x3D6E5CBD3D50FFFC,
217+
0xBD6C69A65A23A170,
218+
0xBD35B967F4471DFC,
219+
0x3D6F4799F4F6543E,
220+
0xBD6B0B0DE3077D7E,
221+
0xBD537B720E4A694B,
222+
0x3D65AD1D904C1D4E,
223+
0xBD600349CC67F9B2,
224+
0xBD697794F689F843,
225+
0xBD3A342C2AF0003C,
226+
0x3D5F1546AAA3361C,
227+
0x3D50E35F73F7A018,
228+
0x3D630701CE63EAB9,
229+
0xBD3A6976F5EB0963,
230+
0x3D5D309C2CC91A85,
231+
0xBD6D0B1C68651946,
232+
0xBD609EC17A426426,
233+
0xBD3F4BD8DB0A7CC1,
234+
0x3D4394A11B1C1EE4,
235+
0x3D54AEC442BE1015,
236+
0xBD67FCB18ED9D603,
237+
0x3D67E1B259D2F3DA,
238+
0xBD6ED2A52C73BF78,
239+
0x3D56FABA4CDD147D,
240+
0x3D584BF2B68D766F,
241+
0x3D40931A909FEA5E,
242+
0x3D4EC5197DDB55D3,
243+
0x3D5B7BF7861D37AC,
244+
0x3D5A21AC25DB1EF3,
245+
0xBD542A9E21373414,
246+
0xBD6DAFA08CECADB1,
247+
0x3D3E1F8DF68DBCF3,
248+
0x3D3BB2CD720EC44C,
249+
0xBD49C24CA098362B,
250+
0x3D60FEC69C695D7F,
251+
0x3D6F404E57963891,
252+
0xBD657D49676844CC,
253+
0x3D592DFBC7D93617,
254+
0x3D65E9A98F33A396,
255+
0x3D52DD98B97BAEF0,
256+
0x3D1A07BD8B34BE7C,
257+
0xBD17AFA4392F1BA7,
258+
0xBD5DCA290F818480,
259+
0x3D5D1772F5386374,
260+
0x3D60BE1FB590A1F5,
261+
0xBD6E2CE9146D271A,
262+
0xBD65E6563BBD9FC9,
263+
0x3D66FAA404263D0B,
264+
0xBD5AA33736867A17,
265+
0x3D6EC27D0B7B37B3,
266+
0xBD244FDD840B8591,
267+
0x3D6BB09CB0985646,
268+
0x3D46ABB9DF22BC57,
269+
0xBD58CD7DC73BD194,
270+
0x3D6F2CFB29AAA5F0,
271+
0x3D66757006095FD2,
272+
};
273+
274+
#define NPY_TANG_LOG_A1 0x1.55555555554e6p-4
275+
#define NPY_TANG_LOG_A2 0x1.9999999bac6d4p-7
276+
#define NPY_TANG_LOG_A3 0x1.2492307f1519fp-9
277+
#define NPY_TANG_LOG_A4 0x1.c8034c85dfffp-12
278+
279+
#define NPY_TANG_LOG_LN2HI 0x1.62e42fefa4p-1
280+
#define NPY_TANG_LOG_LN2LO -0x1.8432a1b0e2634p-43
281+
137282
#endif

0 commit comments

Comments
 (0)