Skip to content

Commit 03e43cf

Browse files
authored
[Clang] Update Unicode version to 15.1 (#77147)
This update all of our Unicode tables to Unicode 15.1. This is a minor version so only a relatively small numbers of characters are added, mainly ideographs https://www.unicode.org/versions/Unicode15.1.0/#Appendices_nb
1 parent b26bfcc commit 03e43cf

File tree

9 files changed

+20038
-20015
lines changed

9 files changed

+20038
-20015
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ Non-comprehensive list of changes in this release
281281

282282
* Added ``#pragma clang fp reciprocal``.
283283

284+
* The version of Unicode used by Clang (primarily to parse identifiers) has been updated to 15.1.
285+
284286
New Compiler Flags
285287
------------------
286288

clang/lib/Lex/UnicodeCharSets.h

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
#include "llvm/Support/UnicodeCharRanges.h"
1212

13-
// Unicode 15.0 XID_Start
13+
// Unicode 15.1 XID_Start
1414
static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {
1515
{0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA},
1616
{0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6},
@@ -233,9 +233,10 @@ static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {
233233
{0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},
234234
{0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DF}, {0x2A700, 0x2B739},
235235
{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0},
236-
{0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, {0x31350, 0x323AF}};
236+
{0x2EBF0, 0x2EE5D}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
237+
{0x31350, 0x323AF}};
237238

238-
// Unicode 15.0 XID_Continue, excluding XID_Start
239+
// Unicode 15.1 XID_Continue, excluding XID_Start
239240
// The Unicode Property XID_Continue is a super set of XID_Start.
240241
// To save Space, the table below only contains the codepoints
241242
// that are not also in XID_Start.
@@ -299,24 +300,25 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {
299300
{0x1C24, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59},
300301
{0x1CD0, 0x1CD2}, {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED},
301302
{0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF},
302-
{0x203F, 0x2040}, {0x2054, 0x2054}, {0x20D0, 0x20DC},
303-
{0x20E1, 0x20E1}, {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1},
304-
{0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},
305-
{0x3099, 0x309A}, {0xA620, 0xA629}, {0xA66F, 0xA66F},
306-
{0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1},
307-
{0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B},
308-
{0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},
309-
{0xA8B4, 0xA8C5}, {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1},
310-
{0xA8FF, 0xA909}, {0xA926, 0xA92D}, {0xA947, 0xA953},
311-
{0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9},
312-
{0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36},
313-
{0xAA43, 0xAA43}, {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59},
314-
{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4},
315-
{0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1},
316-
{0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA},
317-
{0xABEC, 0xABED}, {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E},
318-
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0xFE33, 0xFE34},
319-
{0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F},
303+
{0x200C, 0x200D}, {0x203F, 0x2040}, {0x2054, 0x2054},
304+
{0x20D0, 0x20DC}, {0x20E1, 0x20E1}, {0x20E5, 0x20F0},
305+
{0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF},
306+
{0x302A, 0x302F}, {0x3099, 0x309A}, {0x30FB, 0x30FB},
307+
{0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D},
308+
{0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802},
309+
{0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827},
310+
{0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C5},
311+
{0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA909},
312+
{0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983},
313+
{0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5},
314+
{0xA9F0, 0xA9F9}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43},
315+
{0xAA4C, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7D},
316+
{0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},
317+
{0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF},
318+
{0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED},
319+
{0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
320+
{0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F},
321+
{0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, {0xFF65, 0xFF65},
320322
{0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0},
321323
{0x10376, 0x1037A}, {0x104A0, 0x104A9}, {0x10A01, 0x10A03},
322324
{0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},
@@ -364,7 +366,8 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {
364366
{0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F}, {0x1E130, 0x1E136},
365367
{0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2F9},
366368
{0x1E4EC, 0x1E4F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
367-
{0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF}};
369+
{0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF},
370+
};
368371

369372
// Clang supports the "Mathematical notation profile" as an extension,
370373
// as described in https://www.unicode.org/L2/L2022/22230-math-profile.pdf

clang/test/Lexer/unicode.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ extern int ༀ;
3838
extern int 𑩐;
3939
extern int 𐠈;
4040
extern int ;
41-
extern int \u1B4C; // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
42-
extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
43-
extern int \U0001E4D0; // 𞓐 NAG MUNDARI LETTER O - Added in Unicode 15
41+
extern int \u1B4C; // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
42+
extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
43+
extern int \U0001E4D0; // 𞓐 NAG MUNDARI LETTER O - Added in Unicode 15
44+
extern int \u{2EBF0}; // CJK UNIFIED IDEOGRAPH-2EBF0 - Added in Unicode 15.1
4445
extern int a\N{TANGSA LETTER GA};
4546
extern int a\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a valid Unicode character name}} \
4647
// expected-error {{expected ';' after top level declarator}} \
@@ -74,7 +75,7 @@ extern int 👷; // expected-error {{unexpected character <U+1F477>}} \
7475

7576
extern int 👷‍♀; // expected-warning {{declaration does not declare anything}} \
7677
expected-error {{unexpected character <U+1F477>}} \
77-
expected-error {{unexpected character <U+200D>}} \
78+
expected-error {{character <U+200D> not allowed at the start of an identifier}} \
7879
expected-error {{unexpected character <U+2640>}}
7980
#else
8081

0 commit comments

Comments
 (0)