|
47 | 47 | #include <tuple>
|
48 | 48 | #include <utility>
|
49 | 49 |
|
| 50 | +#ifdef __SSE4_2__ |
| 51 | +#include <nmmintrin.h> |
| 52 | +#endif |
| 53 | + |
50 | 54 | using namespace clang;
|
51 | 55 |
|
52 | 56 | //===----------------------------------------------------------------------===//
|
@@ -1847,19 +1851,47 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,
|
1847 | 1851 | return true;
|
1848 | 1852 | }
|
1849 | 1853 |
|
| 1854 | +static const char * |
| 1855 | +fastParseASCIIIdentifier(const char *CurPtr, |
| 1856 | + [[maybe_unused]] const char *BufferEnd) { |
| 1857 | +#ifdef __SSE4_2__ |
| 1858 | + alignas(16) static constexpr char AsciiIdentifierRange[16] = { |
| 1859 | + '_', '_', 'A', 'Z', 'a', 'z', '0', '9', |
| 1860 | + }; |
| 1861 | + constexpr ssize_t BytesPerRegister = 16; |
| 1862 | + |
| 1863 | + __m128i AsciiIdentifierRangeV = |
| 1864 | + _mm_load_si128((const __m128i *)AsciiIdentifierRange); |
| 1865 | + |
| 1866 | + while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) { |
| 1867 | + __m128i Cv = _mm_loadu_si128((const __m128i *)(CurPtr)); |
| 1868 | + |
| 1869 | + int Consumed = _mm_cmpistri(AsciiIdentifierRangeV, Cv, |
| 1870 | + _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | |
| 1871 | + _SIDD_UBYTE_OPS | _SIDD_NEGATIVE_POLARITY); |
| 1872 | + CurPtr += Consumed; |
| 1873 | + if (Consumed == BytesPerRegister) |
| 1874 | + continue; |
| 1875 | + return CurPtr; |
| 1876 | + } |
| 1877 | +#endif |
| 1878 | + |
| 1879 | + unsigned char C = *CurPtr; |
| 1880 | + while (isAsciiIdentifierContinue(C)) |
| 1881 | + C = *++CurPtr; |
| 1882 | + return CurPtr; |
| 1883 | +} |
| 1884 | + |
1850 | 1885 | bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
|
1851 | 1886 | // Match [_A-Za-z0-9]*, we have already matched an identifier start.
|
| 1887 | + |
1852 | 1888 | while (true) {
|
1853 |
| - unsigned char C = *CurPtr; |
1854 |
| - // Fast path. |
1855 |
| - if (isAsciiIdentifierContinue(C)) { |
1856 |
| - ++CurPtr; |
1857 |
| - continue; |
1858 |
| - } |
| 1889 | + |
| 1890 | + CurPtr = fastParseASCIIIdentifier(CurPtr, BufferEnd); |
1859 | 1891 |
|
1860 | 1892 | unsigned Size;
|
1861 | 1893 | // Slow path: handle trigraph, unicode codepoints, UCNs.
|
1862 |
| - C = getCharAndSize(CurPtr, Size); |
| 1894 | + unsigned char C = getCharAndSize(CurPtr, Size); |
1863 | 1895 | if (isAsciiIdentifierContinue(C)) {
|
1864 | 1896 | CurPtr = ConsumeChar(CurPtr, Size, Result);
|
1865 | 1897 | continue;
|
|
0 commit comments