@@ -5090,52 +5090,98 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
5090
5090
return p - start ;
5091
5091
}
5092
5092
5093
- static Py_ssize_t
5094
- find_first_nonascii (const char * start , const char * end )
5095
- {
5096
- const char * p = start ;
5097
-
5098
- while (p < end ) {
5099
- /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
5100
- for an explanation. */
5101
- if (_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5102
- const char * e = end - SIZEOF_SIZE_T ;
5103
- while (p <= e ) {
5104
- size_t value = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5105
- if (value ) {
5106
- #if PY_LITTLE_ENDIAN && (defined(__clang__ ) || defined(__GNUC__ ))
5107
- #if SIZEOF_SIZE_T == 4
5108
- // __builtin_ctz(0x8000) == 15.
5109
- // (15-7) / 8 == 1.
5110
- // p+1 is first non-ASCII char.
5111
- return p - start + (__builtin_ctz (value ) - 7 ) / 8 ;
5112
- #else
5113
- return p - start + (__builtin_ctzll (value ) - 7 ) / 8 ;
5114
- #endif
5115
- #elif PY_LITTLE_ENDIAN && defined(_MSC_VER )
5116
- unsigned long bitpos ;
5093
+ #if (defined(__clang__ ) || defined(__GNUC__ ))
5094
+ #define HAS_CTZ 1
5095
+ static inline unsigned int ctz (size_t v ) {
5096
+ return __builtin_ctzll ((unsigned long long )v );
5097
+ }
5098
+ #elif defined(_MSC_VER )
5099
+ #define HAS_CTZ 1
5100
+ static inline unsigned int ctz (size_t v ) {
5101
+ unsigned long pos ;
5117
5102
#if SIZEOF_SIZE_T == 4
5118
- _BitScanForward (& bitpos , value );
5103
+ _BitScanForward (& pos , v );
5119
5104
#else
5120
- _BitScanForward64 (& bitpos , value );
5105
+ _BitScanForward64 (& pos , v );
5106
+ #endif /* SIZEOF_SIZE_T */
5107
+ return pos ;
5108
+ }
5121
5109
#endif
5122
- return p - start + (bitpos - 7 ) / 8 ;
5110
+
5111
+ static Py_ssize_t
5112
+ find_first_nonascii (const unsigned char * start , const unsigned char * end )
5113
+ {
5114
+ const unsigned char * p = start ;
5115
+
5116
+ if (end - start > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
5117
+ while (!_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5118
+ if ((unsigned char )* p & 0x80 ) {
5119
+ return p - start ;
5120
+ }
5121
+ p ++ ;
5122
+ }
5123
+ const unsigned char * e = end - SIZEOF_SIZE_T ;
5124
+ while (p <= e ) {
5125
+ size_t value = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5126
+ if (value ) {
5127
+ #if PY_LITTLE_ENDIAN && HAS_CTZ
5128
+ return p - start + (ctz (value ) - 7 ) / 8 ;
5123
5129
#else
5124
- // big endian and minor compilers are difficult to test.
5125
- // fallback to per byte check.
5126
- break ;
5130
+ // big endian and minor compilers are difficult to test.
5131
+ // fallback to per byte check.
5132
+ break ;
5127
5133
#endif
5128
- }
5129
- p += SIZEOF_SIZE_T ;
5130
5134
}
5131
- if (p == end )
5135
+ p += SIZEOF_SIZE_T ;
5136
+ }
5137
+ }
5138
+ #if HAS_CTZ
5139
+ // This part looks bit tricky, but decoding short ASCII is super important.
5140
+ // Since we copy from p to size_t manually, this part works fine with big endian.
5141
+ while (p < end ) {
5142
+ size_t u = (size_t )(p [0 ]);
5143
+ switch (end - p ) {
5144
+ default :
5145
+ #if SIZEOF_SIZE_T == 8
5146
+ u |= (size_t )(p [7 ]) << 56ull ;
5147
+ // fall through
5148
+ case 7 :
5149
+ u |= (size_t )(p [6 ]) << 48ull ;
5150
+ // fall through
5151
+ case 6 :
5152
+ u |= (size_t )(p [5 ]) << 40ull ;
5153
+ // fall through
5154
+ case 5 :
5155
+ u |= (size_t )(p [4 ]) << 32ull ;
5156
+ // fall through
5157
+ case 4 :
5158
+ #endif
5159
+ u |= (size_t )(p [3 ]) << 24 ;
5160
+ // fall through
5161
+ case 3 :
5162
+ u |= (size_t )(p [2 ]) << 16 ;
5163
+ // fall through
5164
+ case 2 :
5165
+ u |= (size_t )(p [1 ]) << 8 ;
5166
+ break ;
5167
+ case 1 :
5132
5168
break ;
5133
5169
}
5134
- if ((unsigned char )* p & 0x80 )
5170
+ if (u & ASCII_CHAR_MASK ) {
5171
+ return p - start + (ctz (u & ASCII_CHAR_MASK ) - 7 ) / 8 ;
5172
+ }
5173
+ p += SIZEOF_SIZE_T ;
5174
+ }
5175
+ return end - start ;
5176
+ #else
5177
+ while (p < end ) {
5178
+ if ((unsigned char )* p & 0x80 ) {
5135
5179
break ;
5136
- ++ p ;
5180
+ }
5181
+ p ++ ;
5137
5182
}
5138
5183
return p - start ;
5184
+ #endif
5139
5185
}
5140
5186
5141
5187
static inline int scalar_utf8_start_char (unsigned int ch )
@@ -5153,7 +5199,7 @@ static Py_ssize_t utf8_count_codepoints(const unsigned char *s, const unsigned c
5153
5199
{
5154
5200
Py_ssize_t len = 0 ;
5155
5201
5156
- if (end - s > SIZEOF_SIZE_T * 2 ) {
5202
+ if (end - s > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
5157
5203
while (!_Py_IS_ALIGNED (s , ALIGNOF_SIZE_T )) {
5158
5204
len += scalar_utf8_start_char (* s ++ );
5159
5205
}
@@ -5337,7 +5383,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
5337
5383
const char * starts = s ;
5338
5384
const char * end = s + size ;
5339
5385
5340
- Py_ssize_t pos = find_first_nonascii (starts , end );
5386
+ Py_ssize_t pos = find_first_nonascii (( const unsigned char * ) starts , ( const unsigned char * ) end );
5341
5387
if (pos == size ) { // fast path: ASCII string.
5342
5388
PyObject * u = ascii_new (size );
5343
5389
if (u == NULL ) {
@@ -5355,7 +5401,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
5355
5401
int maxchr = 127 ;
5356
5402
Py_ssize_t maxsize = size ;
5357
5403
5358
- unsigned char ch = (unsigned char )s [pos ];
5404
+ unsigned char ch = (unsigned char )( s [pos ]) ;
5359
5405
// error handler other than strict may remove/replace the invalid byte.
5360
5406
// consumed != NULL allows 1~3 bytes remainings.
5361
5407
// 0x80 <= ch < 0xc2 is invalid start byte that cause UnicodeDecodeError.
0 commit comments