Skip to content

[Clang] Update Unicode version to 15.1 #77147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,8 @@ Non-comprehensive list of changes in this release

* Added ``#pragma clang fp reciprocal``.

* The version of Unicode used by Clang (primarily to parse identifiers) has been updated to 15.1.

New Compiler Flags
------------------

Expand Down
47 changes: 25 additions & 22 deletions clang/lib/Lex/UnicodeCharSets.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include "llvm/Support/UnicodeCharRanges.h"

// Unicode 15.0 XID_Start
// Unicode 15.1 XID_Start
static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {
{0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA},
{0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6},
Expand Down Expand Up @@ -233,9 +233,10 @@ static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {
{0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},
{0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DF}, {0x2A700, 0x2B739},
{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0},
{0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, {0x31350, 0x323AF}};
{0x2EBF0, 0x2EE5D}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
{0x31350, 0x323AF}};

// Unicode 15.0 XID_Continue, excluding XID_Start
// Unicode 15.1 XID_Continue, excluding XID_Start
// The Unicode Property XID_Continue is a super set of XID_Start.
// To save Space, the table below only contains the codepoints
// that are not also in XID_Start.
Expand Down Expand Up @@ -299,24 +300,25 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {
{0x1C24, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59},
{0x1CD0, 0x1CD2}, {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED},
{0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF},
{0x203F, 0x2040}, {0x2054, 0x2054}, {0x20D0, 0x20DC},
{0x20E1, 0x20E1}, {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1},
{0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},
{0x3099, 0x309A}, {0xA620, 0xA629}, {0xA66F, 0xA66F},
{0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1},
{0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B},
{0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},
{0xA8B4, 0xA8C5}, {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1},
{0xA8FF, 0xA909}, {0xA926, 0xA92D}, {0xA947, 0xA953},
{0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9},
{0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36},
{0xAA43, 0xAA43}, {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59},
{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4},
{0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1},
{0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA},
{0xABEC, 0xABED}, {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E},
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0xFE33, 0xFE34},
{0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F},
{0x200C, 0x200D}, {0x203F, 0x2040}, {0x2054, 0x2054},
{0x20D0, 0x20DC}, {0x20E1, 0x20E1}, {0x20E5, 0x20F0},
{0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF},
{0x302A, 0x302F}, {0x3099, 0x309A}, {0x30FB, 0x30FB},
{0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D},
{0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802},
{0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827},
{0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C5},
{0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA909},
{0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983},
{0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5},
{0xA9F0, 0xA9F9}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43},
{0xAA4C, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7D},
{0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},
{0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF},
{0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED},
{0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
{0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F},
{0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, {0xFF65, 0xFF65},
{0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0},
{0x10376, 0x1037A}, {0x104A0, 0x104A9}, {0x10A01, 0x10A03},
{0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},
Expand Down Expand Up @@ -364,7 +366,8 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {
{0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F}, {0x1E130, 0x1E136},
{0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2F9},
{0x1E4EC, 0x1E4F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
{0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF}};
{0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF},
};

// Clang supports the "Mathematical notation profile" as an extension,
// as described in https://www.unicode.org/L2/L2022/22230-math-profile.pdf
Expand Down
9 changes: 5 additions & 4 deletions clang/test/Lexer/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ extern int ༀ;
extern int 𑩐;
extern int 𐠈;
extern int ꙮ;
extern int \u1B4C; // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
extern int \U0001E4D0; // 𞓐 NAG MUNDARI LETTER O - Added in Unicode 15
extern int \u1B4C; // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
extern int \U0001E4D0; // 𞓐 NAG MUNDARI LETTER O - Added in Unicode 15
extern int \u{2EBF0}; // CJK UNIFIED IDEOGRAPH-2EBF0 - Added in Unicode 15.1
extern int a\N{TANGSA LETTER GA};
extern int a\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a valid Unicode character name}} \
// expected-error {{expected ';' after top level declarator}} \
Expand Down Expand Up @@ -74,7 +75,7 @@ extern int 👷; // expected-error {{unexpected character <U+1F477>}} \

extern int 👷‍♀; // expected-warning {{declaration does not declare anything}} \
expected-error {{unexpected character <U+1F477>}} \
expected-error {{unexpected character <U+200D>}} \
expected-error {{character <U+200D> not allowed at the start of an identifier}} \
expected-error {{unexpected character <U+2640>}}
#else

Expand Down
Loading