Skip to content

Commit f094768

Browse files
ENH: Implement SIMD versions of isnan,isinf, isfinite and signbit
NumPy has SIMD versions of float / double `isnan`, `isinf`, `isfinite`, and `signbit` for SSE2 and AVX-512. The changes here replace the SSE2 version with one that uses their universal intrinsics. This allows other architectures to have SIMD versions of the functions too.
1 parent d8c09c5 commit f094768

File tree

5 files changed

+417
-277
lines changed

5 files changed

+417
-277
lines changed

numpy/core/code_generators/generate_umath.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -909,7 +909,7 @@ def english_upper(s):
909909
Ufunc(1, 1, None,
910910
docstrings.get('numpy.core.umath.isnan'),
911911
'PyUFunc_IsFiniteTypeResolver',
912-
TD(noobj, simd=[('avx512_skx', 'fd')], out='?'),
912+
TD(noobj, out='?', dispatch=[('loops_unary_fp', inexactvec)]),
913913
),
914914
'isnat':
915915
Ufunc(1, 1, None,
@@ -921,19 +921,19 @@ def english_upper(s):
921921
Ufunc(1, 1, None,
922922
docstrings.get('numpy.core.umath.isinf'),
923923
'PyUFunc_IsFiniteTypeResolver',
924-
TD(noobj, simd=[('avx512_skx', 'fd')], out='?'),
924+
TD(noobj, out='?', dispatch=[('loops_unary_fp', inexactvec)]),
925925
),
926926
'isfinite':
927927
Ufunc(1, 1, None,
928928
docstrings.get('numpy.core.umath.isfinite'),
929929
'PyUFunc_IsFiniteTypeResolver',
930-
TD(noobj, simd=[('avx512_skx', 'fd')], out='?'),
930+
TD(noobj, out='?', dispatch=[('loops_unary_fp', inexactvec)]),
931931
),
932932
'signbit':
933933
Ufunc(1, 1, None,
934934
docstrings.get('numpy.core.umath.signbit'),
935935
None,
936-
TD(flts, simd=[('avx512_skx', 'fd')], out='?'),
936+
TD(flts, out='?', dispatch=[('loops_unary_fp', inexactvec)]),
937937
),
938938
'copysign':
939939
Ufunc(2, 1, None,

numpy/core/src/umath/loops.c.src

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,6 +1404,8 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const *
14041404
* #TYPE = FLOAT, DOUBLE, LONGDOUBLE#
14051405
* #c = f, , l#
14061406
* #C = F, , L#
1407+
* #fd = 1, 1, 0#
1408+
* #VCHK = 1, 1, 0#
14071409
*/
14081410
/**begin repeat1
14091411
* #kind = logical_and, logical_or#
@@ -1440,28 +1442,22 @@ NPY_NO_EXPORT void
14401442
}
14411443
}
14421444

1445+
#if !@fd@
14431446
/**begin repeat1
14441447
* #kind = isnan, isinf, isfinite, signbit#
14451448
* #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit#
14461449
**/
1447-
1448-
/**begin repeat2
1449-
* #ISA = , _avx512_skx#
1450-
* #isa = simd, avx512_skx#
1451-
**/
14521450
NPY_NO_EXPORT void
1453-
@TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1451+
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
14541452
{
1455-
if (!run_@kind@_@isa@_@TYPE@(args, dimensions, steps)) {
1456-
UNARY_LOOP {
1457-
const @type@ in1 = *(@type@ *)ip1;
1458-
*((npy_bool *)op1) = @func@(in1) != 0;
1459-
}
1453+
UNARY_LOOP {
1454+
const @type@ in1 = *(@type@ *)ip1;
1455+
*((npy_bool *)op1) = @func@(in1) != 0;
14601456
}
14611457
npy_clear_floatstatus_barrier((char*)dimensions);
14621458
}
1463-
/**end repeat2**/
14641459
/**end repeat1**/
1460+
#endif
14651461

14661462
NPY_NO_EXPORT void
14671463
@TYPE@_spacing(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))

numpy/core/src/umath/loops.h.src

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,8 @@ NPY_NO_EXPORT void
218218
* #TYPE = FLOAT, DOUBLE#
219219
*/
220220
/**begin repeat1
221-
* #kind = rint, floor, trunc, ceil, sqrt, absolute, square, reciprocal#
221+
* #kind = rint, floor, trunc, ceil, sqrt, absolute, square, reciprocal,
222+
* isnan, isinf, isfinite, signbit#
222223
*/
223224
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
224225
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)))
@@ -340,6 +341,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (
340341
* #TYPE = HALF, FLOAT, DOUBLE, LONGDOUBLE#
341342
* #c = f, f, , l#
342343
* #C = F, F, , L#
344+
* #fd = 0, 1, 1, 0#
343345
*/
344346

345347
/**begin repeat1
@@ -368,13 +370,13 @@ NPY_NO_EXPORT void
368370
/**begin repeat1
369371
* #kind = isnan, isinf, isfinite, signbit, copysign, nextafter, spacing#
370372
* #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit, npy_copysign, nextafter, spacing#
373+
* #dispatched = 1, 1, 1, 1, 0, 0, 0#
371374
**/
372375

373-
/**begin repeat2
374-
* #ISA = , _avx512_skx#
375-
**/
376+
#if !@fd@ || !@dispatched@
376377
NPY_NO_EXPORT void
377-
@TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
378+
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
379+
#endif
378380
/**end repeat2**/
379381
/**end repeat1**/
380382

0 commit comments

Comments
 (0)