Skip to content

Commit a604d1d

Browse files
committed
Use SSE2 in bc_copy_and_shift_numbers() if possible
1 parent 4964c5c commit a604d1d

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

ext/bcmath/libbcmath/src/convert.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
#include "bcmath.h"
1818
#include "convert.h"
19+
#ifdef __SSE2__
20+
# include <emmintrin.h>
21+
#endif
1922

2023
/* This will be 0x01010101 for 32-bit and 0x0101010101010101 */
2124
#define SWAR_ONES (~((size_t) 0) / 0xFF)
@@ -31,6 +34,19 @@ static char *bc_copy_and_shift_numbers(char *restrict dest, const char *source,
3134
shift = -shift;
3235
}
3336

37+
#ifdef __SSE2__
38+
/* SIMD SSE2 bulk shift + copy */
39+
__m128i shift_vector = _mm_set1_epi8(shift);
40+
while (source + sizeof(__m128i) <= source_end) {
41+
__m128i bytes = _mm_loadu_si128((const __m128i *) source);
42+
bytes = _mm_add_epi8(bytes, shift_vector);
43+
_mm_storeu_si128((__m128i *) dest, bytes);
44+
45+
source += sizeof(__m128i);
46+
dest += sizeof(__m128i);
47+
}
48+
#endif
49+
3450
/* Handle sizeof(size_t) (i.e. 4/8) bytes at once.
3551
* We know that adding/subtracting an individual byte cannot overflow,
3652
* so it is possible to add/subtract an entire word of bytes at once

0 commit comments

Comments
 (0)