Skip to content

ext/bcmath: use XSSE #18770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions ext/bcmath/libbcmath/src/convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,22 @@
#include "bcmath.h"
#include "convert.h"
#include "private.h"
#include "simd.h"
#include "xsse.h"

char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end)
{
const size_t bulk_shift = SWAR_REPEAT('0');

#ifdef HAVE_BC_SIMD_128
#ifdef XSSE2
/* SIMD SSE2 or NEON bulk shift + copy */
bc_simd_128_t shift_vector = bc_simd_set_8x16('0');
while (source + sizeof(bc_simd_128_t) <= source_end) {
bc_simd_128_t bytes = bc_simd_load_8x16((const bc_simd_128_t *) source);
bytes = bc_simd_xor_8x16(bytes, shift_vector);
bc_simd_store_8x16((bc_simd_128_t *) dest, bytes);

source += sizeof(bc_simd_128_t);
dest += sizeof(bc_simd_128_t);
__m128i shift_vector = _mm_set1_epi8('0');
while (source + sizeof(__m128i) <= source_end) {
__m128i bytes = _mm_loadu_si128((const __m128i *) source);
bytes = _mm_xor_si128(bytes, shift_vector);
_mm_storeu_si128((__m128i *) dest, bytes);

source += sizeof(__m128i);
dest += sizeof(__m128i);
}
#endif

Expand Down
59 changes: 0 additions & 59 deletions ext/bcmath/libbcmath/src/simd.h

This file was deleted.

36 changes: 18 additions & 18 deletions ext/bcmath/libbcmath/src/str2num.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,28 @@
#include "bcmath.h"
#include "convert.h"
#include "private.h"
#include "simd.h"
#include "xsse.h"
#include <stdbool.h>
#include <stddef.h>

/* Convert strings to bc numbers. Base 10 only.*/
static inline const char *bc_count_digits(const char *str, const char *end)
{
/* Process in bulk */
#ifdef HAVE_BC_SIMD_128
const bc_simd_128_t offset = bc_simd_set_8x16((signed char) (SCHAR_MIN - '0'));
#ifdef XSSE2
const __m128i offset = _mm_set1_epi8((signed char) (SCHAR_MIN - '0'));
/* we use the less than comparator, so add 1 */
const bc_simd_128_t threshold = bc_simd_set_8x16(SCHAR_MIN + ('9' + 1 - '0'));
const __m128i threshold = _mm_set1_epi8(SCHAR_MIN + ('9' + 1 - '0'));

while (str + sizeof(bc_simd_128_t) <= end) {
bc_simd_128_t bytes = bc_simd_load_8x16((const bc_simd_128_t *) str);
while (str + sizeof(__m128i) <= end) {
__m128i bytes = _mm_loadu_si128((const __m128i *) str);
/* Wrapping-add the offset to the bytes, such that all bytes below '0' are positive and others are negative.
* More specifically, '0' will be -128 and '9' will be -119. */
bytes = bc_simd_add_8x16(bytes, offset);
bytes = _mm_add_epi8(bytes, offset);
/* Now mark all bytes that are <= '9', i.e. <= -119, i.e. < -118, i.e. the threshold. */
bytes = bc_simd_cmplt_8x16(bytes, threshold);
bytes = _mm_cmplt_epi8(bytes, threshold);

int mask = bc_simd_movemask_8x16(bytes);
int mask = _mm_movemask_epi8(bytes);
if (mask != 0xffff) {
/* At least one of the bytes is not within range. Move to the first offending byte. */
#ifdef PHP_HAVE_BUILTIN_CTZL
Expand All @@ -63,7 +63,7 @@ static inline const char *bc_count_digits(const char *str, const char *end)
#endif
}

str += sizeof(bc_simd_128_t);
str += sizeof(__m128i);
}
#endif

Expand All @@ -77,19 +77,19 @@ static inline const char *bc_count_digits(const char *str, const char *end)
static inline const char *bc_skip_zero_reverse(const char *scanner, const char *stop)
{
/* Check in bulk */
#ifdef HAVE_BC_SIMD_128
const bc_simd_128_t c_zero_repeat = bc_simd_set_8x16('0');
while (scanner - sizeof(bc_simd_128_t) >= stop) {
scanner -= sizeof(bc_simd_128_t);
bc_simd_128_t bytes = bc_simd_load_8x16((const bc_simd_128_t *) scanner);
#ifdef XSSE2
const __m128i c_zero_repeat = _mm_set1_epi8('0');
while (scanner - sizeof(__m128i) >= stop) {
scanner -= sizeof(__m128i);
__m128i bytes = _mm_loadu_si128((const __m128i *) scanner);
/* Checks if all numeric strings are equal to '0'. */
bytes = bc_simd_cmpeq_8x16(bytes, c_zero_repeat);
bytes = _mm_cmpeq_epi8(bytes, c_zero_repeat);

int mask = bc_simd_movemask_8x16(bytes);
int mask = _mm_movemask_epi8(bytes);
/* The probability of having 16 trailing 0s in a row is very low, so we use EXPECTED. */
if (EXPECTED(mask != 0xffff)) {
/* Move the pointer back and check each character in loop. */
scanner += sizeof(bc_simd_128_t);
scanner += sizeof(__m128i);
break;
}
}
Expand Down
Loading