3
3
** sse2 sse41
4
4
** vsx2
5
5
** neon asimd
6
- ** vx vxe
7
6
**/
7
+
8
+ /**
9
+ * We ran into lots of test failures trying to enable this file for
10
+ * VSE and VE on s390x (qemu) so avoiding these targets for now.
11
+ */
12
+
8
13
/**
9
14
* Force use SSE only on x86, even if AVX2 or AVX512F are enabled
10
15
* through the baseline, since scatter(AVX512F) and gather very costly
11
16
* to handle non-contiguous memory access comparing with SSE for
12
17
* such small operations that this file covers.
13
18
*/
14
19
#define NPY_SIMD_FORCE_128
20
+ #define _UMATHMODULE
21
+ #define _MULTIARRAYMODULE
15
22
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
16
23
#include <float.h>
17
24
#include "numpy/npy_math.h"
@@ -119,7 +126,12 @@ npyv_isinf_@sfx@(npyv_@sfx@ v)
119
126
// fabs via masking of sign bit
120
127
const npyv_@sfx@ signmask = npyv_setall_@sfx@(-0.@fd@);
121
128
npyv_u8 r_u8 = npyv_andc_u8(npyv_reinterpret_u8_@sfx@(v), npyv_reinterpret_u8_@sfx@(signmask));
129
+ #if defined(NPY_HAVE_SSE2) || defined (NPY_HAVE_SSE41)
130
+ // return cast already done in npyv_cmpgt_@sfx@
122
131
npyv_u@ssfx@ r = npyv_cmpgt_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax);
132
+ #else
133
+ npyv_u@ssfx@ r = npyv_reinterpret_u@ssfx@_@sfx@(npyv_cmpgt_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax));
134
+ #endif
123
135
#endif
124
136
return npyv_shri_u@ssfx@(r, (sizeof(npyv_lanetype_@sfx@)*8)-1);
125
137
}
@@ -135,7 +147,12 @@ npyv_isfinite_@sfx@(npyv_@sfx@ v)
135
147
// fabs via masking of sign bit
136
148
const npyv_@sfx@ signmask = npyv_setall_@sfx@(-0.@fd@);
137
149
npyv_u8 r_u8 = npyv_andc_u8(npyv_reinterpret_u8_@sfx@(v), npyv_reinterpret_u8_@sfx@(signmask));
150
+ #if defined(NPY_HAVE_SSE2) || defined (NPY_HAVE_SSE41)
151
+ // return cast already done in npyv_cmpgt_@sfx@
138
152
npyv_u@ssfx@ r = npyv_cmple_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax);
153
+ #else
154
+ npyv_u@ssfx@ r = npyv_reinterpret_u@ssfx@_@sfx@(npyv_cmple_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax));
155
+ #endif
139
156
#endif
140
157
return npyv_shri_u@ssfx@(r, (sizeof(npyv_lanetype_@sfx@)*8)-1);
141
158
}
@@ -149,7 +166,8 @@ npyv_signbit_@sfx@(npyv_@sfx@ v)
149
166
#endif // @VCHK@
150
167
/**end repeat**/
151
168
152
- #if defined(NPY_HAVE_NEON)
169
+ // In these functions we use vqtbl4q_u8 which is only available on aarch64
170
+ #if defined(NPY_HAVE_NEON) && defined(__aarch64__)
153
171
#define PREPACK_ISFINITE 1
154
172
#define PREPACK_SIGNBIT 1
155
173
@@ -257,7 +275,7 @@ npyv_signbit_@sfx@(npyv_@sfx@ v)
257
275
#else
258
276
#define PREPACK_ISFINITE 0
259
277
#define PREPACK_SIGNBIT 0
260
- #endif // defined(NPY_HAVE_NEON)
278
+ #endif // defined(NPY_HAVE_NEON) && defined(__aarch64__)
261
279
262
280
#endif // NPY_SIMD
263
281
@@ -503,15 +521,15 @@ static void simd_unary_@kind@_@TYPE@_@STYPE@_@DTYPE@
503
521
v4_@N@, v5_@N@, v6_@N@, v7_@N@);
504
522
#endif
505
523
#else
506
- npyv_u @ssfx@ r0_@N@ = npyv_@kind@_@sfx@(v0_@N@);
507
- npyv_u @ssfx@ r1_@N@ = npyv_@kind@_@sfx@(v1_@N@);
508
- npyv_u @ssfx@ r2_@N@ = npyv_@kind@_@sfx@(v2_@N@);
509
- npyv_u @ssfx@ r3_@N@ = npyv_@kind@_@sfx@(v3_@N@);
524
+ npyv_b @ssfx@ r0_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v0_@N@) );
525
+ npyv_b @ssfx@ r1_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v1_@N@) );
526
+ npyv_b @ssfx@ r2_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v2_@N@) );
527
+ npyv_b @ssfx@ r3_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v3_@N@) );
510
528
#if PACK_FACTOR == 8
511
- npyv_u @ssfx@ r4_@N@ = npyv_@kind@_@sfx@(v4_@N@);
512
- npyv_u @ssfx@ r5_@N@ = npyv_@kind@_@sfx@(v5_@N@);
513
- npyv_u @ssfx@ r6_@N@ = npyv_@kind@_@sfx@(v6_@N@);
514
- npyv_u @ssfx@ r7_@N@ = npyv_@kind@_@sfx@(v7_@N@);
529
+ npyv_b @ssfx@ r4_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v4_@N@) );
530
+ npyv_b @ssfx@ r5_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v5_@N@) );
531
+ npyv_b @ssfx@ r6_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v6_@N@) );
532
+ npyv_b @ssfx@ r7_@N@ = npyv_cvt_b@ssfx@_u@ssfx@( npyv_@kind@_@sfx@(v7_@N@) );
515
533
#endif // PACK_FACTOR == 8
516
534
#endif // @PREPACK@ && (@ssfx@ == 32 || @ssfx@ == 64)
517
535
#endif // @unroll@ > @N@
0 commit comments