Skip to content

Commit 274af7d

Browse files
resolve additional platform test failures
Special case SSE Fix PPC64 build Only use vqtbl4q_u8 on A64 Stop trying to use optimizations on s390x
1 parent 12960e8 commit 274af7d

File tree

2 files changed

+39
-11
lines changed

2 files changed

+39
-11
lines changed

numpy/core/src/umath/loops_unary_fp.dispatch.c.src

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,22 @@
33
** sse2 sse41
44
** vsx2
55
** neon asimd
6-
** vx vxe
76
**/
7+
8+
/**
9+
* We ran into lots of test failures trying to enable this file for
10+
* VSE and VE on s390x (qemu) so avoiding these targets for now.
11+
*/
12+
813
/**
914
* Force use SSE only on x86, even if AVX2 or AVX512F are enabled
1015
* through the baseline, since scatter(AVX512F) and gather very costly
1116
* to handle non-contiguous memory access comparing with SSE for
1217
* such small operations that this file covers.
1318
*/
1419
#define NPY_SIMD_FORCE_128
20+
#define _UMATHMODULE
21+
#define _MULTIARRAYMODULE
1522
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
1623
#include <float.h>
1724
#include "numpy/npy_math.h"
@@ -119,7 +126,12 @@ npyv_isinf_@sfx@(npyv_@sfx@ v)
119126
// fabs via masking of sign bit
120127
const npyv_@sfx@ signmask = npyv_setall_@sfx@(-0.@fd@);
121128
npyv_u8 r_u8 = npyv_andc_u8(npyv_reinterpret_u8_@sfx@(v), npyv_reinterpret_u8_@sfx@(signmask));
129+
#if defined(NPY_HAVE_SSE2) || defined (NPY_HAVE_SSE41)
130+
// return cast already done in npyv_cmpgt_@sfx@
122131
npyv_u@ssfx@ r = npyv_cmpgt_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax);
132+
#else
133+
npyv_u@ssfx@ r = npyv_reinterpret_u@ssfx@_@sfx@(npyv_cmpgt_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax));
134+
#endif
123135
#endif
124136
return npyv_shri_u@ssfx@(r, (sizeof(npyv_lanetype_@sfx@)*8)-1);
125137
}
@@ -135,7 +147,12 @@ npyv_isfinite_@sfx@(npyv_@sfx@ v)
135147
// fabs via masking of sign bit
136148
const npyv_@sfx@ signmask = npyv_setall_@sfx@(-0.@fd@);
137149
npyv_u8 r_u8 = npyv_andc_u8(npyv_reinterpret_u8_@sfx@(v), npyv_reinterpret_u8_@sfx@(signmask));
150+
#if defined(NPY_HAVE_SSE2) || defined (NPY_HAVE_SSE41)
151+
// return cast already done in npyv_cmpgt_@sfx@
138152
npyv_u@ssfx@ r = npyv_cmple_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax);
153+
#else
154+
npyv_u@ssfx@ r = npyv_reinterpret_u@ssfx@_@sfx@(npyv_cmple_@sfx@(npyv_reinterpret_@sfx@_u8(r_u8), fltmax));
155+
#endif
139156
#endif
140157
return npyv_shri_u@ssfx@(r, (sizeof(npyv_lanetype_@sfx@)*8)-1);
141158
}
@@ -149,7 +166,8 @@ npyv_signbit_@sfx@(npyv_@sfx@ v)
149166
#endif // @VCHK@
150167
/**end repeat**/
151168

152-
#if defined(NPY_HAVE_NEON)
169+
// In these functions we use vqtbl4q_u8 which is only available on aarch64
170+
#if defined(NPY_HAVE_NEON) && defined(__aarch64__)
153171
#define PREPACK_ISFINITE 1
154172
#define PREPACK_SIGNBIT 1
155173

@@ -257,7 +275,7 @@ npyv_signbit_@sfx@(npyv_@sfx@ v)
257275
#else
258276
#define PREPACK_ISFINITE 0
259277
#define PREPACK_SIGNBIT 0
260-
#endif // defined(NPY_HAVE_NEON)
278+
#endif // defined(NPY_HAVE_NEON) && defined(__aarch64__)
261279

262280
#endif // NPY_SIMD
263281

@@ -503,15 +521,15 @@ static void simd_unary_@kind@_@TYPE@_@STYPE@_@DTYPE@
503521
v4_@N@, v5_@N@, v6_@N@, v7_@N@);
504522
#endif
505523
#else
506-
npyv_u@ssfx@ r0_@N@ = npyv_@kind@_@sfx@(v0_@N@);
507-
npyv_u@ssfx@ r1_@N@ = npyv_@kind@_@sfx@(v1_@N@);
508-
npyv_u@ssfx@ r2_@N@ = npyv_@kind@_@sfx@(v2_@N@);
509-
npyv_u@ssfx@ r3_@N@ = npyv_@kind@_@sfx@(v3_@N@);
524+
npyv_b@ssfx@ r0_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v0_@N@));
525+
npyv_b@ssfx@ r1_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v1_@N@));
526+
npyv_b@ssfx@ r2_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v2_@N@));
527+
npyv_b@ssfx@ r3_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v3_@N@));
510528
#if PACK_FACTOR == 8
511-
npyv_u@ssfx@ r4_@N@ = npyv_@kind@_@sfx@(v4_@N@);
512-
npyv_u@ssfx@ r5_@N@ = npyv_@kind@_@sfx@(v5_@N@);
513-
npyv_u@ssfx@ r6_@N@ = npyv_@kind@_@sfx@(v6_@N@);
514-
npyv_u@ssfx@ r7_@N@ = npyv_@kind@_@sfx@(v7_@N@);
529+
npyv_b@ssfx@ r4_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v4_@N@));
530+
npyv_b@ssfx@ r5_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v5_@N@));
531+
npyv_b@ssfx@ r6_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v6_@N@));
532+
npyv_b@ssfx@ r7_@N@ = npyv_cvt_b@ssfx@_u@ssfx@(npyv_@kind@_@sfx@(v7_@N@));
515533
#endif // PACK_FACTOR == 8
516534
#endif // @PREPACK@ && (@ssfx@ == 32 || @ssfx@ == 64)
517535
#endif // @unroll@ > @N@

numpy/core/tests/test_api.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@ def test_array_array():
102102
assert_raises(ValueError, np.array, [nested], dtype=np.float64)
103103

104104
# Try with lists...
105+
# float32
106+
assert_equal(np.array([None] * 10, dtype=np.float32),
107+
np.full((10,), np.nan, dtype=np.float32))
108+
assert_equal(np.array([[None]] * 10, dtype=np.float32),
109+
np.full((10, 1), np.nan, dtype=np.float32))
110+
assert_equal(np.array([[None] * 10], dtype=np.float32),
111+
np.full((1, 10), np.nan, dtype=np.float32))
112+
assert_equal(np.array([[None] * 10] * 10, dtype=np.float32),
113+
np.full((10, 10), np.nan, dtype=np.float32))
114+
# float64
105115
assert_equal(np.array([None] * 10, dtype=np.float64),
106116
np.full((10,), np.nan, dtype=np.float64))
107117
assert_equal(np.array([[None]] * 10, dtype=np.float64),

0 commit comments

Comments
 (0)