|
1 | 1 | #ifndef __NPY_SIMD_DATA_H_
|
2 | 2 | #define __NPY_SIMD_DATA_H_
|
3 | 3 | #if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
|
| 4 | +#if !(defined(__clang__) && (__clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 1))) |
4 | 5 | /*
|
5 | 6 | * Constants used in vector implementation of float64 exp(x)
|
6 | 7 | */
|
@@ -85,6 +86,7 @@ static npy_uint64 EXP_Table_tail[32] = {
|
85 | 86 | 0x3C99D3E12DD8A18B,
|
86 | 87 | };
|
87 | 88 | #endif
|
| 89 | +#endif |
88 | 90 |
|
89 | 91 | /*
|
90 | 92 | * Constants used in vector implementation of exp(x)
|
@@ -134,4 +136,156 @@ static npy_uint64 EXP_Table_tail[32] = {
|
134 | 136 | #define NPY_COEFF_INVF7_SINEf -0x1.a06bbap-13f
|
135 | 137 | #define NPY_COEFF_INVF9_SINEf 0x1.7d3bbcp-19f
|
136 | 138 |
|
| 139 | +/* |
| 140 | + * Lookup table of log(c_k) |
| 141 | + * Reference form: Tang, Ping-Tak Peter. "Table-driven implementation of the |
| 142 | + * logarithm function in IEEE floating-point arithmetic." ACM Transactions |
| 143 | + * on Mathematical Software (TOMS) 16.4 (1990): 378-400. |
| 144 | + */ |
| 145 | +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS |
| 146 | +#if !(defined(__clang__) && (__clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 1))) |
| 147 | +static npy_uint64 LOG_TABLE_TOP[64] = { |
| 148 | + 0x0000000000000000, |
| 149 | + 0x3F8FC0A8B1000000, |
| 150 | + 0x3F9F829B0E780000, |
| 151 | + 0x3FA77458F6340000, |
| 152 | + 0x3FAF0A30C0100000, |
| 153 | + 0x3FB341D7961C0000, |
| 154 | + 0x3FB6F0D28AE60000, |
| 155 | + 0x3FBA926D3A4A0000, |
| 156 | + 0x3FBE27076E2A0000, |
| 157 | + 0x3FC0D77E7CD10000, |
| 158 | + 0x3FC29552F8200000, |
| 159 | + 0x3FC44D2B6CCB0000, |
| 160 | + 0x3FC5FF3070A80000, |
| 161 | + 0x3FC7AB8902110000, |
| 162 | + 0x3FC9525A9CF40000, |
| 163 | + 0x3FCAF3C94E810000, |
| 164 | + 0x3FCC8FF7C79B0000, |
| 165 | + 0x3FCE27076E2B0000, |
| 166 | + 0x3FCFB9186D5E0000, |
| 167 | + 0x3FD0A324E2738000, |
| 168 | + 0x3FD1675CABAB8000, |
| 169 | + 0x3FD22941FBCF8000, |
| 170 | + 0x3FD2E8E2BAE10000, |
| 171 | + 0x3FD3A64C55698000, |
| 172 | + 0x3FD4618BC21C8000, |
| 173 | + 0x3FD51AAD872E0000, |
| 174 | + 0x3FD5D1BDBF580000, |
| 175 | + 0x3FD686C81E9B0000, |
| 176 | + 0x3FD739D7F6BC0000, |
| 177 | + 0x3FD7EAF83B828000, |
| 178 | + 0x3FD89A3386C18000, |
| 179 | + 0x3FD947941C210000, |
| 180 | + 0x3FD9F323ECBF8000, |
| 181 | + 0x3FDA9CEC9A9A0000, |
| 182 | + 0x3FDB44F77BCC8000, |
| 183 | + 0x3FDBEB4D9DA70000, |
| 184 | + 0x3FDC8FF7C79A8000, |
| 185 | + 0x3FDD32FE7E010000, |
| 186 | + 0x3FDDD46A04C20000, |
| 187 | + 0x3FDE744261D68000, |
| 188 | + 0x3FDF128F5FAF0000, |
| 189 | + 0x3FDFAF588F790000, |
| 190 | + 0x3FE02552A5A5C000, |
| 191 | + 0x3FE0723E5C1CC000, |
| 192 | + 0x3FE0BE72E4254000, |
| 193 | + 0x3FE109F39E2D4000, |
| 194 | + 0x3FE154C3D2F4C000, |
| 195 | + 0x3FE19EE6B467C000, |
| 196 | + 0x3FE1E85F5E704000, |
| 197 | + 0x3FE23130D7BEC000, |
| 198 | + 0x3FE2795E1289C000, |
| 199 | + 0x3FE2C0E9ED448000, |
| 200 | + 0x3FE307D7334F0000, |
| 201 | + 0x3FE34E289D9D0000, |
| 202 | + 0x3FE393E0D3564000, |
| 203 | + 0x3FE3D9026A714000, |
| 204 | + 0x3FE41D8FE8468000, |
| 205 | + 0x3FE4618BC21C4000, |
| 206 | + 0x3FE4A4F85DB04000, |
| 207 | + 0x3FE4E7D811B74000, |
| 208 | + 0x3FE52A2D265BC000, |
| 209 | + 0x3FE56BF9D5B40000, |
| 210 | + 0x3FE5AD404C358000, |
| 211 | + 0x3FE5EE02A9240000, |
| 212 | +}; |
| 213 | + |
| 214 | +static npy_uint64 LOG_TABLE_TAIL[64] = { |
| 215 | + 0x0000000000000000, |
| 216 | + 0xBD5FE0E183092C59, |
| 217 | + 0x3D2980267C7E09E4, |
| 218 | + 0xBD62303B9CB0D5E1, |
| 219 | + 0x3D662A6617CC9717, |
| 220 | + 0xBD4717B6B33E44F8, |
| 221 | + 0xBD62968C836CC8C2, |
| 222 | + 0x3D6AAC6CA17A4554, |
| 223 | + 0x3D6E5CBD3D50FFFC, |
| 224 | + 0xBD6C69A65A23A170, |
| 225 | + 0xBD35B967F4471DFC, |
| 226 | + 0x3D6F4799F4F6543E, |
| 227 | + 0xBD6B0B0DE3077D7E, |
| 228 | + 0xBD537B720E4A694B, |
| 229 | + 0x3D65AD1D904C1D4E, |
| 230 | + 0xBD600349CC67F9B2, |
| 231 | + 0xBD697794F689F843, |
| 232 | + 0xBD3A342C2AF0003C, |
| 233 | + 0x3D5F1546AAA3361C, |
| 234 | + 0x3D50E35F73F7A018, |
| 235 | + 0x3D630701CE63EAB9, |
| 236 | + 0xBD3A6976F5EB0963, |
| 237 | + 0x3D5D309C2CC91A85, |
| 238 | + 0xBD6D0B1C68651946, |
| 239 | + 0xBD609EC17A426426, |
| 240 | + 0xBD3F4BD8DB0A7CC1, |
| 241 | + 0x3D4394A11B1C1EE4, |
| 242 | + 0x3D54AEC442BE1015, |
| 243 | + 0xBD67FCB18ED9D603, |
| 244 | + 0x3D67E1B259D2F3DA, |
| 245 | + 0xBD6ED2A52C73BF78, |
| 246 | + 0x3D56FABA4CDD147D, |
| 247 | + 0x3D584BF2B68D766F, |
| 248 | + 0x3D40931A909FEA5E, |
| 249 | + 0x3D4EC5197DDB55D3, |
| 250 | + 0x3D5B7BF7861D37AC, |
| 251 | + 0x3D5A21AC25DB1EF3, |
| 252 | + 0xBD542A9E21373414, |
| 253 | + 0xBD6DAFA08CECADB1, |
| 254 | + 0x3D3E1F8DF68DBCF3, |
| 255 | + 0x3D3BB2CD720EC44C, |
| 256 | + 0xBD49C24CA098362B, |
| 257 | + 0x3D60FEC69C695D7F, |
| 258 | + 0x3D6F404E57963891, |
| 259 | + 0xBD657D49676844CC, |
| 260 | + 0x3D592DFBC7D93617, |
| 261 | + 0x3D65E9A98F33A396, |
| 262 | + 0x3D52DD98B97BAEF0, |
| 263 | + 0x3D1A07BD8B34BE7C, |
| 264 | + 0xBD17AFA4392F1BA7, |
| 265 | + 0xBD5DCA290F818480, |
| 266 | + 0x3D5D1772F5386374, |
| 267 | + 0x3D60BE1FB590A1F5, |
| 268 | + 0xBD6E2CE9146D271A, |
| 269 | + 0xBD65E6563BBD9FC9, |
| 270 | + 0x3D66FAA404263D0B, |
| 271 | + 0xBD5AA33736867A17, |
| 272 | + 0x3D6EC27D0B7B37B3, |
| 273 | + 0xBD244FDD840B8591, |
| 274 | + 0x3D6BB09CB0985646, |
| 275 | + 0x3D46ABB9DF22BC57, |
| 276 | + 0xBD58CD7DC73BD194, |
| 277 | + 0x3D6F2CFB29AAA5F0, |
| 278 | + 0x3D66757006095FD2, |
| 279 | +}; |
| 280 | + |
| 281 | +#define NPY_TANG_LOG_A1 0x1.55555555554e6p-4 |
| 282 | +#define NPY_TANG_LOG_A2 0x1.9999999bac6d4p-7 |
| 283 | +#define NPY_TANG_LOG_A3 0x1.2492307f1519fp-9 |
| 284 | +#define NPY_TANG_LOG_A4 0x1.c8034c85dfffp-12 |
| 285 | + |
| 286 | +#define NPY_TANG_LOG_LN2HI 0x1.62e42fefa4p-1 |
| 287 | +#define NPY_TANG_LOG_LN2LO -0x1.8432a1b0e2634p-43 |
| 288 | +#endif |
| 289 | +#endif |
| 290 | + |
137 | 291 | #endif
|
0 commit comments