Skip to content

Commit 3a843f9

Browse files
dixyescmb69
authored andcommitted
Windows arm64 zend and standard extension support
* Port zend_cpuid for windows arm64 * Fix zend_atomic windows arm64 build * Fix windows arm64 multiply * Enable arm64 neon for windows in standard extension * Enable arm64 neon for windows in zend_hash.c * Workaround for msvc arm64 optimization bug Closes GH-9115.
1 parent aeabb51 commit 3a843f9

File tree

8 files changed

+68
-28
lines changed

8 files changed

+68
-28
lines changed

Zend/zend_atomic.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ BEGIN_EXTERN_C()
5757

5858
#if ZEND_WIN32
5959

60+
#ifndef InterlockedExchange8
61+
#define InterlockedExchange8 _InterlockedExchange8
62+
#endif
63+
#ifndef InterlockedOr8
64+
#define InterlockedOr8 _InterlockedOr8
65+
#endif
66+
6067
#define ZEND_ATOMIC_BOOL_INIT(obj, desired) ((obj)->value = (desired))
6168

6269
static zend_always_inline bool zend_atomic_bool_exchange_ex(zend_atomic_bool *obj, bool desired) {

Zend/zend_cpuinfo.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ typedef struct _zend_cpu_info {
2828

2929
static zend_cpu_info cpuinfo = {0};
3030

31-
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
32-
# if defined(HAVE_CPUID_H) && defined(HAVE_CPUID_COUNT)
33-
# include <cpuid.h>
31+
#if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
32+
# if defined(HAVE_CPUID_H) && defined(HAVE_CPUID_COUNT) /* use cpuid.h functions */
33+
# include <cpuid.h>
3434
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
3535
__cpuid_count(func, subfunc, cpuinfo->eax, cpuinfo->ebx, cpuinfo->ecx, cpuinfo->edx);
3636
}
37-
# else
37+
# else /* use inline asm */
3838
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
39-
#if defined(__i386__) && (defined(__pic__) || defined(__PIC__))
39+
# if defined(__i386__) && (defined(__pic__) || defined(__PIC__))
4040
/* PIC on i386 uses %ebx, so preserve it. */
4141
__asm__ __volatile__ (
4242
"pushl %%ebx\n"
@@ -46,16 +46,16 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo
4646
: "=a"(cpuinfo->eax), "=r"(cpuinfo->ebx), "=c"(cpuinfo->ecx), "=d"(cpuinfo->edx)
4747
: "a"(func), "c"(subfunc)
4848
);
49-
#else
49+
# else
5050
__asm__ __volatile__ (
5151
"cpuid"
5252
: "=a"(cpuinfo->eax), "=b"(cpuinfo->ebx), "=c"(cpuinfo->ecx), "=d"(cpuinfo->edx)
5353
: "a"(func), "c"(subfunc)
5454
);
55-
#endif
55+
# endif
5656
}
5757
# endif
58-
#elif defined(ZEND_WIN32) && !defined(__clang__)
58+
#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_X64) || defined(_M_IX86)) /* use MSVC __cpuidex intrin */
5959
# include <intrin.h>
6060
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
6161
int regs[4];
@@ -67,7 +67,7 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo
6767
cpuinfo->ecx = regs[2];
6868
cpuinfo->edx = regs[3];
6969
}
70-
#else
70+
#else /* fall back to zero */
7171
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
7272
cpuinfo->eax = 0;
7373
}

Zend/zend_hash.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include "zend_globals.h"
2323
#include "zend_variables.h"
2424

25-
#if defined(__aarch64__)
25+
#if defined(__aarch64__) || defined(_M_ARM64)
2626
# include <arm_neon.h>
2727
#endif
2828

@@ -183,7 +183,7 @@ static zend_always_inline void zend_hash_real_init_mixed_ex(HashTable *ht)
183183
_mm_storeu_si128((__m128i*)&HT_HASH_EX(data, 8), xmm0);
184184
_mm_storeu_si128((__m128i*)&HT_HASH_EX(data, 12), xmm0);
185185
} while (0);
186-
#elif defined(__aarch64__)
186+
#elif defined(__aarch64__) || defined(_M_ARM64)
187187
do {
188188
int32x4_t t = vdupq_n_s32(-1);
189189
vst1q_s32((int32_t*)&HT_HASH_EX(data, 0), t);

Zend/zend_multiply.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,19 @@
9494
(dval) = (double)(a) * (double)(b); \
9595
} \
9696
} while (0)
97+
# elif defined(_M_ARM64)
98+
# pragma intrinsic(__mulh)
99+
# define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \
100+
__int64 __high = __mulh((a), (b)); \
101+
__int64 __low = (a) * (b); \
102+
if ((__low >> 63I64) == __high) { \
103+
(usedval) = 0; \
104+
(lval) = __low; \
105+
} else { \
106+
(usedval) = 1; \
107+
(dval) = (double)(a) * (double)(b); \
108+
} \
109+
} while (0)
97110
# else
98111
# define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \
99112
zend_long __lres = (a) * (b); \

ext/json/json_encoder.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,14 @@ static inline void php_json_pretty_print_indent(smart_str *buf, int options, php
7171

7272
/* }}} */
7373

74-
static inline bool php_json_is_valid_double(double d) /* {{{ */
74+
static
75+
#if defined(_MSC_VER) && defined(_M_ARM64)
76+
// MSVC bug: https://developercommunity.visualstudio.com/t/corrupt-optimization-on-arm64-with-Ox-/10102551
77+
zend_never_inline
78+
#else
79+
inline
80+
#endif
81+
bool php_json_is_valid_double(double d) /* {{{ */
7582
{
7683
return !zend_isinf(d) && !zend_isnan(d);
7784
}

ext/opcache/ZendAccelerator.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2190,7 +2190,7 @@ zend_op_array *persistent_compile_file(zend_file_handle *file_handle, int type)
21902190
ZCSG(hits)++; /* TBFixed: may lose one hit */
21912191
persistent_script->dynamic_members.hits++; /* see above */
21922192
#else
2193-
#ifdef _M_X64
2193+
#if ZEND_ENABLE_ZVAL_LONG64
21942194
InterlockedIncrement64(&ZCSG(hits));
21952195
#else
21962196
InterlockedIncrement(&ZCSG(hits));

ext/standard/base64.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static const short base64_reverse_table[256] = {
5151
};
5252
/* }}} */
5353

54-
#ifdef __aarch64__
54+
#if defined(__aarch64__) || defined(_M_ARM64)
5555
#include <arm_neon.h>
5656

5757
static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT)
@@ -118,11 +118,11 @@ static zend_always_inline unsigned char *neon_base64_encode(const unsigned char
118118
*left = inl;
119119
return out;
120120
}
121-
#endif /* __aarch64__ */
121+
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
122122

123123
static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
124124
{
125-
#ifdef __aarch64__
125+
#if defined(__aarch64__) || defined(_M_ARM64)
126126
if (inl >= 16 * 3) {
127127
size_t left = 0;
128128
out = neon_base64_encode(in, inl, out, &left);
@@ -161,7 +161,7 @@ static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned c
161161
}
162162
/* }}} */
163163

164-
#ifdef __aarch64__
164+
#if defined(__aarch64__) || defined(_M_ARM64)
165165
static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) {
166166
const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
167167
const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
@@ -241,14 +241,14 @@ static zend_always_inline size_t neon_base64_decode(const unsigned char *in, siz
241241
*left = inl;
242242
return out - out_orig;
243243
}
244-
#endif /* __aarch64__ */
244+
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
245245

246246
static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */
247247
{
248248
int ch;
249249
size_t i = 0, padding = 0, j = *outl;
250250

251-
#ifdef __aarch64__
251+
#if defined(__aarch64__) || defined(_M_ARM64)
252252
if (inl >= 16 * 4) {
253253
size_t left = 0;
254254
j += neon_base64_decode(in, inl, out, &left);

ext/standard/string.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3338,7 +3338,7 @@ PHP_FUNCTION(strtr)
33383338
/* {{{ Reverse a string */
33393339
#ifdef ZEND_INTRIN_SSSE3_NATIVE
33403340
#include <tmmintrin.h>
3341-
#elif defined(__aarch64__)
3341+
#elif defined(__aarch64__) || defined(_M_ARM64)
33423342
#include <arm_neon.h>
33433343
#endif
33443344
PHP_FUNCTION(strrev)
@@ -3385,6 +3385,19 @@ PHP_FUNCTION(strrev)
33853385
e -= 16;
33863386
} while (e - s > 15);
33873387
}
3388+
#elif defined(_M_ARM64)
3389+
if (e - s > 15) {
3390+
do {
3391+
const __n128 str = vld1q_u8((uint8_t *)(e - 15));
3392+
/* Synthesize rev128 with a rev64 + ext. */
3393+
/* strange force cast limit on windows: you cannot convert anything */
3394+
const __n128 rev = vrev64q_u8(str);
3395+
const __n128 ext = vextq_u64(rev, rev, 1);
3396+
vst1q_u8((uint8_t *)p, ext);
3397+
p += 16;
3398+
e -= 16;
3399+
} while (e - s > 15);
3400+
}
33883401
#endif
33893402
while (e >= s) {
33903403
*p++ = *e--;
@@ -3864,7 +3877,7 @@ zend_string *php_addslashes_sse42(zend_string *str)
38643877
/* }}} */
38653878
#endif
38663879

3867-
#ifdef __aarch64__
3880+
#if defined(__aarch64__) || defined(_M_ARM64)
38683881
typedef union {
38693882
uint8_t mem[16];
38703883
uint64_t dw[2];
@@ -3899,7 +3912,7 @@ static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *s
38993912
}
39003913
return target;
39013914
}
3902-
#endif /* __aarch64__ */
3915+
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
39033916

39043917
#ifndef ZEND_INTRIN_SSE4_2_NATIVE
39053918
# ifdef ZEND_INTRIN_SSE4_2_RESOLVER
@@ -3921,7 +3934,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
39213934
source = ZSTR_VAL(str);
39223935
end = source + ZSTR_LEN(str);
39233936

3924-
# ifdef __aarch64__
3937+
# if defined(__aarch64__) || defined(_M_ARM64)
39253938
quad_word res = {0};
39263939
if (ZSTR_LEN(str) > 15) {
39273940
do {
@@ -3932,7 +3945,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
39323945
} while ((end - source) > 15);
39333946
}
39343947
/* Finish the last 15 bytes or less with the scalar loop. */
3935-
# endif /* __aarch64__ */
3948+
# endif /* defined(__aarch64__) || defined(_M_ARM64) */
39363949

39373950
while (source < end) {
39383951
switch (*source) {
@@ -3955,7 +3968,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
39553968
memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
39563969
target = ZSTR_VAL(new_str) + offset;
39573970

3958-
# ifdef __aarch64__
3971+
# if defined(__aarch64__) || defined(_M_ARM64)
39593972
if (res.dw[0] | res.dw[1]) {
39603973
target = aarch64_add_slashes(res, source, target);
39613974
source += 16;
@@ -3971,7 +3984,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
39713984
}
39723985
}
39733986
/* Finish the last 15 bytes or less with the scalar loop. */
3974-
# endif /* __aarch64__ */
3987+
# endif /* defined(__aarch64__) || defined(_M_ARM64) */
39753988

39763989
while (source < end) {
39773990
switch (*source) {
@@ -4010,7 +4023,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
40104023
* be careful, this edits the string in-place */
40114024
static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
40124025
{
4013-
#ifdef __aarch64__
4026+
#if defined(__aarch64__) || defined(_M_ARM64)
40144027
while (len > 15) {
40154028
uint8x16_t x = vld1q_u8((uint8_t *)str);
40164029
quad_word q;
@@ -4040,7 +4053,7 @@ static zend_always_inline char *php_stripslashes_impl(const char *str, char *out
40404053
}
40414054
}
40424055
/* Finish the last 15 bytes or less with the scalar loop. */
4043-
#endif /* __aarch64__ */
4056+
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
40444057
while (len > 0) {
40454058
if (*str == '\\') {
40464059
str++; /* skip the slash */

0 commit comments

Comments
 (0)