Skip to content

Commit a68039c

Browse files
committed
[Clang] Add tests and mark as implemented WG14-N2728
This change expands testing of UTF-8, UTF-16, and UTF-32 character and string literals as validation that WG14 N2728 (char16_t & char32_t string literals shall be UTF-16 & UTF-32) has been implemented. Reviewed By: cor3ntin, aaron.ballman Differential Revision: https://reviews.llvm.org/D149098
1 parent 484e64f commit a68039c

File tree

2 files changed

+84
-3
lines changed

2 files changed

+84
-3
lines changed

clang/test/Lexer/char-literal.cpp

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -Wfour-char-constants -fsyntax-only -verify %s
2-
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -Wfour-char-constants -fsyntax-only -verify %s
1+
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -Wfour-char-constants -fsyntax-only -verify=cxx,expected %s
2+
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++17 -Wfour-char-constants -fsyntax-only -verify=cxx,expected %s
3+
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++20 -Wfour-char-constants -fsyntax-only -verify=cxx,expected %s
4+
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -Wfour-char-constants -fsyntax-only -verify=c,expected %s
5+
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c2x -x c -Wfour-char-constants -fsyntax-only -verify=c,expected %s
36

47
#ifndef __cplusplus
58
typedef __WCHAR_TYPE__ wchar_t;
@@ -38,3 +41,81 @@ char16_t q[2] = u"\U00010000";
3841
#ifdef __cplusplus
3942
// expected-error@-2 {{too long}}
4043
#endif
44+
45+
// UTF-8 character literal code point ranges.
46+
#if __cplusplus >= 201703L || __STDC_VERSION__ >= 201710L
47+
_Static_assert(u8'\U00000000' == 0x00, ""); // c-error {{universal character name refers to a control character}}
48+
_Static_assert(u8'\U0000007F' == 0x7F, ""); // c-error {{universal character name refers to a control character}}
49+
_Static_assert(u8'\U00000080', ""); // c-error {{universal character name refers to a control character}}
50+
// cxx-error@-1 {{character too large for enclosing character literal type}}
51+
_Static_assert((unsigned char)u8'\xFF' == (unsigned char)0xFF, "");
52+
#endif
53+
54+
// UTF-8 string literal code point ranges.
55+
_Static_assert(u8"\U00000000"[0] == 0x00, ""); // c-error {{universal character name refers to a control character}}
56+
_Static_assert(u8"\U0000007F"[0] == 0x7F, ""); // c-error {{universal character name refers to a control character}}
57+
_Static_assert((unsigned char)u8"\U00000080"[0] == (unsigned char)0xC2, ""); // c-error {{universal character name refers to a control character}}
58+
_Static_assert((unsigned char)u8"\U00000080"[1] == (unsigned char)0x80, ""); // c-error {{universal character name refers to a control character}}
59+
_Static_assert((unsigned char)u8"\U000007FF"[0] == (unsigned char)0xDF, "");
60+
_Static_assert((unsigned char)u8"\U000007FF"[1] == (unsigned char)0xBF, "");
61+
_Static_assert((unsigned char)u8"\U00000800"[0] == (unsigned char)0xE0, "");
62+
_Static_assert((unsigned char)u8"\U00000800"[1] == (unsigned char)0xA0, "");
63+
_Static_assert((unsigned char)u8"\U00000800"[2] == (unsigned char)0x80, "");
64+
_Static_assert(u8"\U0000D800"[0], ""); // expected-error {{invalid universal character}}
65+
_Static_assert(u8"\U0000DFFF"[0], ""); // expected-error {{invalid universal character}}
66+
_Static_assert((unsigned char)u8"\U0000FFFF"[0] == (unsigned char)0xEF, "");
67+
_Static_assert((unsigned char)u8"\U0000FFFF"[1] == (unsigned char)0xBF, "");
68+
_Static_assert((unsigned char)u8"\U0000FFFF"[2] == (unsigned char)0xBF, "");
69+
_Static_assert((unsigned char)u8"\U00010000"[0] == (unsigned char)0xF0, "");
70+
_Static_assert((unsigned char)u8"\U00010000"[1] == (unsigned char)0x90, "");
71+
_Static_assert((unsigned char)u8"\U00010000"[2] == (unsigned char)0x80, "");
72+
_Static_assert((unsigned char)u8"\U00010000"[3] == (unsigned char)0x80, "");
73+
_Static_assert((unsigned char)u8"\U0010FFFF"[0] == (unsigned char)0xF4, "");
74+
_Static_assert((unsigned char)u8"\U0010FFFF"[1] == (unsigned char)0x8F, "");
75+
_Static_assert((unsigned char)u8"\U0010FFFF"[2] == (unsigned char)0xBF, "");
76+
_Static_assert((unsigned char)u8"\U0010FFFF"[3] == (unsigned char)0xBF, "");
77+
_Static_assert(u8"\U00110000"[0], ""); // expected-error {{invalid universal character}}
78+
79+
#if !defined(__STDC_UTF_16__)
80+
#error __STDC_UTF_16__ is not defined.
81+
#endif
82+
#if __STDC_UTF_16__ != 1
83+
#error __STDC_UTF_16__ has the wrong value.
84+
#endif
85+
86+
// UTF-16 character literal code point ranges.
87+
_Static_assert(u'\U00000000' == 0x0000, ""); // c-error {{universal character name refers to a control character}}
88+
_Static_assert(u'\U0000D800', ""); // expected-error {{invalid universal character}}
89+
_Static_assert(u'\U0000DFFF', ""); // expected-error {{invalid universal character}}
90+
_Static_assert(u'\U0000FFFF' == 0xFFFF, "");
91+
_Static_assert(u'\U00010000', ""); // expected-error {{character too large for enclosing character literal type}}
92+
93+
// UTF-16 string literal code point ranges.
94+
_Static_assert(u"\U00000000"[0] == 0x0000, ""); // c-error {{universal character name refers to a control character}}
95+
_Static_assert(u"\U0000D800"[0], ""); // expected-error {{invalid universal character}}
96+
_Static_assert(u"\U0000DFFF"[0], ""); // expected-error {{invalid universal character}}
97+
_Static_assert(u"\U0000FFFF"[0] == 0xFFFF, "");
98+
_Static_assert(u"\U00010000"[0] == 0xD800, "");
99+
_Static_assert(u"\U00010000"[1] == 0xDC00, "");
100+
_Static_assert(u"\U0010FFFF"[0] == 0xDBFF, "");
101+
_Static_assert(u"\U0010FFFF"[1] == 0xDFFF, "");
102+
_Static_assert(u"\U00110000"[0], ""); // expected-error {{invalid universal character}}
103+
104+
#if !defined(__STDC_UTF_32__)
105+
#error __STDC_UTF_32__ is not defined.
106+
#endif
107+
#if __STDC_UTF_32__ != 1
108+
#error __STDC_UTF_32__ has the wrong value.
109+
#endif
110+
111+
// UTF-32 character literal code point ranges.
112+
_Static_assert(U'\U00000000' == 0x00000000, ""); // c-error {{universal character name refers to a control character}}
113+
_Static_assert(U'\U0010FFFF' == 0x0010FFFF, "");
114+
_Static_assert(U'\U00110000', ""); // expected-error {{invalid universal character}}
115+
116+
// UTF-32 string literal code point ranges.
117+
_Static_assert(U"\U00000000"[0] == 0x00000000, ""); // c-error {{universal character name refers to a control character}}
118+
_Static_assert(U"\U0000D800"[0], ""); // expected-error {{invalid universal character}}
119+
_Static_assert(U"\U0000DFFF"[0], ""); // expected-error {{invalid universal character}}
120+
_Static_assert(U"\U0010FFFF"[0] == 0x0010FFFF, "");
121+
_Static_assert(U"\U00110000"[0], ""); // expected-error {{invalid universal character}}

clang/www/c_status.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,7 @@ <h2 id="c2x">C2x implementation status</h2>
929929
<tr>
930930
<td>char16_t & char32_t string literals shall be UTF-16 & UTF-32</td>
931931
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2728.htm">N2728</a></td>
932-
<td class="unknown" align="center">Unknown</td>
932+
<td class="full" align="center">Yes</td>
933933
</tr>
934934
<tr>
935935
<td>IEC 60559 binding</td>

0 commit comments

Comments
 (0)