Skip to content

Commit 2176c5e

Browse files
committed
[Clang][Sema] Fix display of characters on static assertion failure
This patch fixes the display of characters appearing in LHS or RHS of == expression in notes to static assertion failure. This applies C-style escape if the printed character is a special character. This also adds a numerical value displayed next to the character representation. This also tries to print multi-byte characters if the user-provided expression is multi-byte char type. Reviewed By: cor3ntin Differential Revision: https://reviews.llvm.org/D155610
1 parent 8641cdf commit 2176c5e

File tree

7 files changed

+176
-16
lines changed

7 files changed

+176
-16
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,41 @@ Improvements to Clang's diagnostics
222222
- ``-Wfixed-enum-extension`` and ``-Wmicrosoft-fixed-enum`` diagnostics are no longer
223223
emitted when building as C23, since C23 standardizes support for enums with a
224224
fixed underlying type.
225+
- When describing the failure of static assertion of `==` expression, clang prints the integer
226+
representation of the value as well as its character representation when
227+
the user-provided expression is of character type. If the character is
228+
non-printable, clang now shows the escpaed character.
229+
Clang also prints multi-byte characters if the user-provided expression
230+
is of multi-byte character type.
231+
232+
*Example Code*:
233+
234+
.. code-block:: c++
235+
236+
static_assert("A\n"[1] == U'🌍');
237+
238+
*BEFORE*:
239+
240+
.. code-block:: text
241+
242+
source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d''
243+
1 | static_assert("A\n"[1] == U'🌍');
244+
| ^~~~~~~~~~~~~~~~~
245+
source:1:24: note: expression evaluates to ''
246+
' == 127757'
247+
1 | static_assert("A\n"[1] == U'🌍');
248+
| ~~~~~~~~~^~~~~~~~
249+
250+
*AFTER*:
251+
252+
.. code-block:: text
253+
254+
source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d''
255+
1 | static_assert("A\n"[1] == U'🌍');
256+
| ^~~~~~~~~~~~~~~~~
257+
source:1:24: note: expression evaluates to ''\n' (0x0A, 10) == U'🌍' (0x1F30D, 127757)'
258+
1 | static_assert("A\n"[1] == U'🌍');
259+
| ~~~~~~~~~^~~~~~~~
225260
226261
Bug Fixes in This Version
227262
-------------------------

clang/include/clang/Basic/Diagnostic.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1840,7 +1840,7 @@ const char ToggleHighlight = 127;
18401840
void ProcessWarningOptions(DiagnosticsEngine &Diags,
18411841
const DiagnosticOptions &Opts,
18421842
bool ReportDiags = true);
1843-
1843+
void EscapeStringForDiagnostic(StringRef Str, SmallVectorImpl<char> &OutStr);
18441844
} // namespace clang
18451845

18461846
#endif // LLVM_CLANG_BASIC_DIAGNOSTIC_H

clang/lib/Basic/Diagnostic.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -800,9 +800,10 @@ FormatDiagnostic(SmallVectorImpl<char> &OutStr) const {
800800
FormatDiagnostic(Diag.begin(), Diag.end(), OutStr);
801801
}
802802

803-
/// pushEscapedString - Append Str to the diagnostic buffer,
803+
/// EscapeStringForDiagnostic - Append Str to the diagnostic buffer,
804804
/// escaping non-printable characters and ill-formed code unit sequences.
805-
static void pushEscapedString(StringRef Str, SmallVectorImpl<char> &OutStr) {
805+
void clang::EscapeStringForDiagnostic(StringRef Str,
806+
SmallVectorImpl<char> &OutStr) {
806807
OutStr.reserve(OutStr.size() + Str.size());
807808
auto *Begin = reinterpret_cast<const unsigned char *>(Str.data());
808809
llvm::raw_svector_ostream OutStream(OutStr);
@@ -854,7 +855,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
854855
StringRef(DiagStr, DiagEnd - DiagStr).equals("%0") &&
855856
getArgKind(0) == DiagnosticsEngine::ak_std_string) {
856857
const std::string &S = getArgStdStr(0);
857-
pushEscapedString(S, OutStr);
858+
EscapeStringForDiagnostic(S, OutStr);
858859
return;
859860
}
860861

@@ -961,7 +962,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
961962
case DiagnosticsEngine::ak_std_string: {
962963
const std::string &S = getArgStdStr(ArgNo);
963964
assert(ModifierLen == 0 && "No modifiers for strings yet");
964-
pushEscapedString(S, OutStr);
965+
EscapeStringForDiagnostic(S, OutStr);
965966
break;
966967
}
967968
case DiagnosticsEngine::ak_c_string: {
@@ -971,7 +972,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
971972
// Don't crash if get passed a null pointer by accident.
972973
if (!S)
973974
S = "(null)";
974-
pushEscapedString(S, OutStr);
975+
EscapeStringForDiagnostic(S, OutStr);
975976
break;
976977
}
977978
// ---- INTEGERS ----

clang/lib/Sema/SemaDeclCXX.cpp

Lines changed: 99 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "llvm/ADT/ScopeExit.h"
5050
#include "llvm/ADT/SmallString.h"
5151
#include "llvm/ADT/StringExtras.h"
52+
#include "llvm/Support/ConvertUTF.h"
5253
#include "llvm/Support/SaveAndRestore.h"
5354
#include <map>
5455
#include <optional>
@@ -17026,10 +17027,74 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
1702617027
AssertMessageExpr, RParenLoc, false);
1702717028
}
1702817029

17030+
static void WriteCharTypePrefix(BuiltinType::Kind BTK, llvm::raw_ostream &OS) {
17031+
switch (BTK) {
17032+
case BuiltinType::Char_S:
17033+
case BuiltinType::Char_U:
17034+
break;
17035+
case BuiltinType::Char8:
17036+
OS << "u8";
17037+
break;
17038+
case BuiltinType::Char16:
17039+
OS << 'u';
17040+
break;
17041+
case BuiltinType::Char32:
17042+
OS << 'U';
17043+
break;
17044+
case BuiltinType::WChar_S:
17045+
case BuiltinType::WChar_U:
17046+
OS << 'L';
17047+
break;
17048+
default:
17049+
llvm_unreachable("Non-character type");
17050+
}
17051+
}
17052+
17053+
/// Convert character's value, interpreted as a code unit, to a string.
17054+
/// The value needs to be zero-extended to 32-bits.
17055+
/// FIXME: This assumes Unicode literal encodings
17056+
static void WriteCharValueForDiagnostic(uint32_t Value, const BuiltinType *BTy,
17057+
unsigned TyWidth,
17058+
SmallVectorImpl<char> &Str) {
17059+
char Arr[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
17060+
char *Ptr = Arr;
17061+
BuiltinType::Kind K = BTy->getKind();
17062+
llvm::raw_svector_ostream OS(Str);
17063+
17064+
// This should catch Char_S, Char_U, Char8, and use of escaped characters in
17065+
// other types.
17066+
if (K == BuiltinType::Char_S || K == BuiltinType::Char_U ||
17067+
K == BuiltinType::Char8 || Value <= 0x7F) {
17068+
StringRef Escaped = escapeCStyle<EscapeChar::Single>(Value);
17069+
if (!Escaped.empty())
17070+
EscapeStringForDiagnostic(Escaped, Str);
17071+
else
17072+
OS << static_cast<char>(Value);
17073+
return;
17074+
}
17075+
17076+
switch (K) {
17077+
case BuiltinType::Char16:
17078+
case BuiltinType::Char32:
17079+
case BuiltinType::WChar_S:
17080+
case BuiltinType::WChar_U: {
17081+
if (llvm::ConvertCodePointToUTF8(Value, Ptr))
17082+
EscapeStringForDiagnostic(StringRef(Arr, Ptr - Arr), Str);
17083+
else
17084+
OS << "\\x"
17085+
<< llvm::format_hex_no_prefix(Value, TyWidth / 4, /*Upper=*/true);
17086+
break;
17087+
}
17088+
default:
17089+
llvm_unreachable("Non-character type is passed");
17090+
}
17091+
}
17092+
1702917093
/// Convert \V to a string we can present to the user in a diagnostic
1703017094
/// \T is the type of the expression that has been evaluated into \V
1703117095
static bool ConvertAPValueToString(const APValue &V, QualType T,
17032-
SmallVectorImpl<char> &Str) {
17096+
SmallVectorImpl<char> &Str,
17097+
ASTContext &Context) {
1703317098
if (!V.hasValue())
1703417099
return false;
1703517100

@@ -17044,13 +17109,38 @@ static bool ConvertAPValueToString(const APValue &V, QualType T,
1704417109
"Bool type, but value is not 0 or 1");
1704517110
llvm::raw_svector_ostream OS(Str);
1704617111
OS << (BoolValue ? "true" : "false");
17047-
} else if (T->isCharType()) {
17112+
} else {
17113+
llvm::raw_svector_ostream OS(Str);
1704817114
// Same is true for chars.
17049-
Str.push_back('\'');
17050-
Str.push_back(V.getInt().getExtValue());
17051-
Str.push_back('\'');
17052-
} else
17115+
// We want to print the character representation for textual types
17116+
const auto *BTy = T->getAs<BuiltinType>();
17117+
if (BTy) {
17118+
switch (BTy->getKind()) {
17119+
case BuiltinType::Char_S:
17120+
case BuiltinType::Char_U:
17121+
case BuiltinType::Char8:
17122+
case BuiltinType::Char16:
17123+
case BuiltinType::Char32:
17124+
case BuiltinType::WChar_S:
17125+
case BuiltinType::WChar_U: {
17126+
unsigned TyWidth = Context.getIntWidth(T);
17127+
assert(8 <= TyWidth && TyWidth <= 32 && "Unexpected integer width");
17128+
uint32_t CodeUnit = static_cast<uint32_t>(V.getInt().getZExtValue());
17129+
WriteCharTypePrefix(BTy->getKind(), OS);
17130+
OS << '\'';
17131+
WriteCharValueForDiagnostic(CodeUnit, BTy, TyWidth, Str);
17132+
OS << "' (0x"
17133+
<< llvm::format_hex_no_prefix(CodeUnit, /*Width=*/2,
17134+
/*Upper=*/true)
17135+
<< ", " << V.getInt() << ')';
17136+
return true;
17137+
}
17138+
default:
17139+
break;
17140+
}
17141+
}
1705317142
V.getInt().toString(Str);
17143+
}
1705417144

1705517145
break;
1705617146

@@ -17147,8 +17237,9 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) {
1714717237

1714817238
Side->EvaluateAsRValue(DiagSide[I].Result, Context, true);
1714917239

17150-
DiagSide[I].Print = ConvertAPValueToString(
17151-
DiagSide[I].Result.Val, Side->getType(), DiagSide[I].ValueString);
17240+
DiagSide[I].Print =
17241+
ConvertAPValueToString(DiagSide[I].Result.Val, Side->getType(),
17242+
DiagSide[I].ValueString, Context);
1715217243
}
1715317244
if (DiagSide[0].Print && DiagSide[1].Print) {
1715417245
Diag(Op->getExprLoc(), diag::note_expr_evaluates_to)

clang/test/Lexer/cxx1z-trigraphs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ error here;
2121

2222
#if !ENABLED_TRIGRAPHS
2323
// expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
24-
// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
24+
// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (0x3F, 63) == '#' (0x23, 35)'}}
2525
// expected-error@16 {{}}
2626
// expected-error@20 {{}}
2727
#else

clang/test/SemaCXX/static-assert-cxx26.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,12 @@ Good<Frobble> a; // expected-note {{in instantiation}}
298298
Bad<int> b; // expected-note {{in instantiation}}
299299

300300
}
301+
302+
namespace EscapeInDiagnostic {
303+
static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
304+
// expected-note {{evaluates to ''\t' (0x09, 9) == '<U+0001>' (0x01, 1)'}}
305+
static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
306+
// expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
307+
static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
308+
// expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
309+
}

clang/test/SemaCXX/static-assert.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,31 @@ namespace Diagnostics {
268268
return 'c';
269269
}
270270
static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
271-
// expected-note {{evaluates to ''c' == 'a''}}
271+
// expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
272+
static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
273+
// expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
274+
static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
275+
// expected-note {{n' (0x0A, 10) == '<U+0000>' (0x00, 0)'}}
276+
// The note above is intended to match "evaluates to '\n' (0x0A, 10) == '<U+0000>' (0x00, 0)'", but if we write it as it is,
277+
// the "\n" cannot be consumed by the diagnostic consumer.
278+
static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
279+
// expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
280+
static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
281+
// expected-note {{evaluates to ''<FC>' (0xFC, -4) == 248'}}
282+
static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
283+
// expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
284+
static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
285+
// expected-note {{evaluates to ''<A0>' (0xA0, -96) == ' ' (0x20, 32)'}}
286+
static_assert((char16_t)L'' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
287+
// expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
288+
static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
289+
// expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}}
290+
static_assert(L""[0] == U'🌍', ""); // expected-error {{failed}} \
291+
// expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}}
292+
static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
293+
// expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}}
294+
static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
295+
// expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}}
272296

273297
/// Bools are printed as bools.
274298
constexpr bool invert(bool b) {

0 commit comments

Comments
 (0)