@@ -4788,6 +4788,7 @@ bool utf8_range(const unsigned char *data, size_t len)
4788
4788
const uint8x16_t const_1 = vdupq_n_u8 (1 );
4789
4789
const uint8x16_t const_2 = vdupq_n_u8 (2 );
4790
4790
const uint8x16_t const_e0 = vdupq_n_u8 (0xE0 );
4791
+ const uint8x16_t const_7f = vdupq_n_u8 (0x7F );
4791
4792
4792
4793
/* We use two error registers to remove a dependency. */
4793
4794
uint8x16_t error1 = vdupq_n_u8 (0 );
@@ -4799,6 +4800,29 @@ bool utf8_range(const unsigned char *data, size_t len)
4799
4800
const uint8x16_t input_3 = vld1q_u8 (data + 32 );
4800
4801
const uint8x16_t input_4 = vld1q_u8 (data + 48 );
4801
4802
4803
+ uint64_t ascii_paired = vgetq_lane_u64 (vreinterpretq_u64_u8 (prev_first_len ), 0 );
4804
+ if (ascii_paired == 0 ) {
4805
+ uint8x16_t is_ascii_0 = vorrq_u8 (input_1 , input_2 );
4806
+ is_ascii_0 = vorrq_u8 (is_ascii_0 , input_3 );
4807
+ is_ascii_0 = vorrq_u8 (is_ascii_0 , input_4 );
4808
+
4809
+ uint8x16_t is_ascii = vqsubq_u8 (is_ascii_0 , const_7f );
4810
+ uint64_t is_ascii_paired = vgetq_lane_u64 (vreinterpretq_u64_u8 (is_ascii ), 0 );
4811
+
4812
+ /* ascii */
4813
+ if (is_ascii_paired == 0 ) {
4814
+ const uint8x16_t high_nibbles_4 = vshrq_n_u8 (input_4 , 4 );
4815
+ const uint8x16_t first_len_4 = vqtbl1q_u8 (first_len_tbl , high_nibbles_4 );
4816
+
4817
+ prev_input = input_4 ;
4818
+ prev_first_len = first_len_4 ;
4819
+
4820
+ data += 64 ;
4821
+ len -= 64 ;
4822
+ continue ;
4823
+ }
4824
+ }
4825
+
4802
4826
/* high_nibbles = input >> 4 */
4803
4827
const uint8x16_t high_nibbles_1 = vshrq_n_u8 (input_1 , 4 );
4804
4828
const uint8x16_t high_nibbles_2 = vshrq_n_u8 (input_2 , 4 );
@@ -4941,14 +4965,33 @@ bool utf8_range(const unsigned char *data, size_t len)
4941
4965
while (len >= 16 ) {
4942
4966
const uint8x16_t input = vld1q_u8 (data );
4943
4967
4968
+ /* ASCII check */
4969
+ uint64_t ascii_paired = vgetq_lane_u64 (vreinterpretq_u64_u8 (prev_first_len ), 0 );
4970
+ if (ascii_paired == 0 ) {
4971
+ uint8x16_t is_ascii = vqsubq_u8 (input , const_7f );
4972
+
4973
+ /* ascii */
4974
+ uint64_t is_ascii_paired = vgetq_lane_u64 (vreinterpretq_u64_u8 (is_ascii ), 0 );
4975
+ if (is_ascii_paired == 0 ) {
4976
+ const uint8x16_t high_nibbles = vshrq_n_u8 (input , 4 );
4977
+ const uint8x16_t first_len = vqtbl1q_u8 (first_len_tbl , high_nibbles );
4978
+
4979
+ prev_input = input ;
4980
+ prev_first_len = first_len ;
4981
+
4982
+ data += 16 ;
4983
+ len -= 16 ;
4984
+ continue ;
4985
+ }
4986
+ }
4987
+
4944
4988
/* high_nibbles = input >> 4 */
4945
4989
const uint8x16_t high_nibbles = vshrq_n_u8 (input , 4 );
4946
4990
4947
4991
/* first_len = legal character length minus 1 */
4948
4992
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
4949
4993
/* first_len = first_len_tbl[high_nibbles] */
4950
- const uint8x16_t first_len =
4951
- vqtbl1q_u8 (first_len_tbl , high_nibbles );
4994
+ const uint8x16_t first_len = vqtbl1q_u8 (first_len_tbl , high_nibbles );
4952
4995
4953
4996
/* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
4954
4997
/* range = first_range_tbl[high_nibbles] */
@@ -4957,8 +5000,7 @@ bool utf8_range(const unsigned char *data, size_t len)
4957
5000
/* Second Byte: set range index to first_len */
4958
5001
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
4959
5002
/* range |= (first_len, prev_first_len) << 1 byte */
4960
- range =
4961
- vorrq_u8 (range , vextq_u8 (prev_first_len , first_len , 15 ));
5003
+ range = vorrq_u8 (range , vextq_u8 (prev_first_len , first_len , 15 ));
4962
5004
4963
5005
/* Third Byte: set range index to saturate_sub(first_len, 1) */
4964
5006
/* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
@@ -5033,8 +5075,9 @@ bool utf8_range(const unsigned char *data, size_t len)
5033
5075
/* Merge our error counters together */
5034
5076
error1 = vorrq_u8 (error1 , error2 );
5035
5077
5078
+ uint64_t error_raw_last = vgetq_lane_u64 (vreinterpretq_u64_u8 (error1 ), 0 );
5036
5079
/* Delay error check till loop ends */
5037
- if (vmaxvq_u8 ( error1 ) ) {
5080
+ if (error_raw_last != 0 ) {
5038
5081
return false;
5039
5082
}
5040
5083
0 commit comments