Skip to content

[Clang] Allow raw string literals in C as an extension #88265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 10, 2024
Merged
4 changes: 4 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ code bases.
C/C++ Language Potentially Breaking Changes
-------------------------------------------

- Clang now supports raw string literals in ``-std=gnuXY`` mode as an extension in
C99 and later. This behaviour can also be overridden using ``-f[no-]raw-string-literals``.
Support of raw string literals in C++ is not affected. Fixes (#GH85703).

C++ Specific Potentially Breaking Changes
-----------------------------------------
- Clang now diagnoses function/variable templates that shadow their own template parameters, e.g. ``template<class T> void T();``.
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,9 @@ def err_drv_negative_columns : Error<
"invalid value '%1' in '%0', value must be 'none' or a positive integer">;
def err_drv_small_columns : Error<
"invalid value '%1' in '%0', value must be '%2' or greater">;
def warn_drv_fraw_string_literals_in_cxx11 : Warning<
"ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">,
InGroup<UnusedCommandLineArgument>;

def err_drv_invalid_malign_branch_EQ : Error<
"invalid argument '%0' to -malign-branch=; each element must be one of: %1">;
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type")

LANGOPT(CXXAssumptions, 1, 1, "Enable or disable codegen and compile-time checks for C++23's [[assume]] attribute")

LANGOPT(RawStringLiterals, 1, 1, "Enable or disable raw string literals")

ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
StrictFlexArraysLevelKind::Default,
"Rely on strict definition of flexible arrays")
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/LangStandard.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ struct LangStandard {
/// hasDigraphs - Language supports digraphs.
bool hasDigraphs() const { return Flags & Digraphs; }

/// hasRawStringLiterals - Language supports R"()" raw string literals.
bool hasRawStringLiterals() const {
// GCC supports raw string literals in C99 and later, but not in C++
// before C++11.
return isCPlusPlus11() || (!isCPlusPlus() && isC99() && isGNUMode());
}

/// isGNUMode - Language includes GNU extensions.
bool isGNUMode() const { return Flags & GNUMode; }

Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4235,6 +4235,12 @@ def fenable_matrix : Flag<["-"], "fenable-matrix">, Group<f_Group>,
HelpText<"Enable matrix data type and related builtin functions">,
MarshallingInfoFlag<LangOpts<"MatrixTypes">>;

defm raw_string_literals : BoolFOption<"raw-string-literals",
LangOpts<"RawStringLiterals">, Default<std#".hasRawStringLiterals()">,
PosFlag<SetTrue, [], [], "Enable">,
NegFlag<SetFalse, [], [], "Disable">,
BothFlags<[], [ClangOption, CC1Option], " raw string literals">>;

def fzero_call_used_regs_EQ
: Joined<["-"], "fzero-call-used-regs=">, Group<f_Group>,
Visibility<[ClangOption, CC1Option]>,
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/LangOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
Opts.HexFloats = Std.hasHexFloats();
Opts.WChar = Std.isCPlusPlus();
Opts.Digraphs = Std.hasDigraphs();
Opts.RawStringLiterals = Std.hasRawStringLiterals();

Opts.HLSL = Lang == Language::HLSL;
if (Opts.HLSL && Opts.IncludeDefaultHeader)
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6516,6 +6516,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs);
Args.AddLastArg(CmdArgs, options::OPT_fzero_call_used_regs_EQ);
Args.AddLastArg(CmdArgs, options::OPT_fraw_string_literals,
options::OPT_fno_raw_string_literals);

if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
Triple.hasDefaultEmulatedTLS()))
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,19 @@ static bool FixupInvocation(CompilerInvocation &Invocation,
LangOpts.NewAlignOverride = 0;
}

// The -f[no-]raw-string-literals option is only valid in C and in C++
// standards before C++11.
if (LangOpts.CPlusPlus11) {
if (Args.hasArg(OPT_fraw_string_literals, OPT_fno_raw_string_literals)) {
Args.claimAllArgs(OPT_fraw_string_literals, OPT_fno_raw_string_literals);
Diags.Report(diag::warn_drv_fraw_string_literals_in_cxx11)
<< bool(LangOpts.RawStringLiterals);
}

// Do not allow disabling raw string literals in C++11 or later.
LangOpts.RawStringLiterals = true;
}

// Prevent the user from specifying both -fsycl-is-device and -fsycl-is-host.
if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsHost)
Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fsycl-is-device"
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Lex/DependencyDirectivesScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ struct Scanner {
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
LangOpts.ObjC = true;
LangOpts.LineComment = true;
// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and
// R"()" literals.
LangOpts.RawStringLiterals = true;
// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"".
return LangOpts;
}

Expand Down
10 changes: 5 additions & 5 deletions clang/lib/Lex/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3876,7 +3876,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
tok::utf16_char_constant);

// UTF-16 raw string literal
if (Char == 'R' && LangOpts.CPlusPlus11 &&
if (Char == 'R' && LangOpts.RawStringLiterals &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
Expand All @@ -3898,7 +3898,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
SizeTmp2, Result),
tok::utf8_char_constant);

if (Char2 == 'R' && LangOpts.CPlusPlus11) {
if (Char2 == 'R' && LangOpts.RawStringLiterals) {
unsigned SizeTmp3;
char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
// UTF-8 raw string literal
Expand Down Expand Up @@ -3934,7 +3934,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
tok::utf32_char_constant);

// UTF-32 raw string literal
if (Char == 'R' && LangOpts.CPlusPlus11 &&
if (Char == 'R' && LangOpts.RawStringLiterals &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
Expand All @@ -3949,7 +3949,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();

if (LangOpts.CPlusPlus11) {
if (LangOpts.RawStringLiterals) {
Char = getCharAndSize(CurPtr, SizeTmp);

if (Char == '"')
Expand All @@ -3972,7 +3972,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
tok::wide_string_literal);

// Wide raw string literal.
if (LangOpts.CPlusPlus11 && Char == 'R' &&
if (LangOpts.RawStringLiterals && Char == 'R' &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
Expand Down
17 changes: 17 additions & 0 deletions clang/test/Driver/fraw-string-literals-cxx.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++11 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++11 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++20 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++20 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s

// CHECK-PRE-CXX11-NOT: ignoring '-fraw-string-literals'
// CHECK-PRE-CXX11-NOT: ignoring '-fno-raw-string-literals'
// CHECK-POS: ignoring '-fraw-string-literals', which is only valid for C and C++ standards before C++11
// CHECK-NEG: ignoring '-fno-raw-string-literals', which is only valid for C and C++ standards before C++11
44 changes: 44 additions & 0 deletions clang/test/Lexer/raw-string-ext.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -verify=supported %s
// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -fraw-string-literals -verify=supported %s
// RUN: %clang_cc1 -fsyntax-only -std=gnu89 -verify=unsupported %s
// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -verify=unsupported %s
// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -DUNICODE -fno-raw-string-literals -verify=unsupported %s

// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -verify=unsupported,cxx-unsupported %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -verify=unsupported,cxx-unsupported %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -fraw-string-literals -verify=supported %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -fraw-string-literals -verify=supported %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -verify=supported,cxx %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -verify=supported,cxx %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s

// GCC supports raw string literals in C99 and later in '-std=gnuXY' mode; we
// additionally provide '-f[no-]raw-string-literals' to enable/disable them
// explicitly in C.
//
// We do not allow disabling raw string literals in C++ mode if they’re enabled
// by the language standard, i.e. in C++11 or later.

// Driver warnings.
// yes-warning@* {{ignoring '-fraw-string-literals'}}
// no-warning@* {{ignoring '-fno-raw-string-literals'}}

void f() {
(void) R"foo()foo"; // unsupported-error {{use of undeclared identifier 'R'}} cxx-unsupported-error {{expected ';' after expression}}
(void) LR"foo()foo"; // unsupported-error {{use of undeclared identifier 'LR'}} cxx-unsupported-error {{expected ';' after expression}}

#ifdef UNICODE
(void) uR"foo()foo"; // unsupported-error {{use of undeclared identifier 'uR'}} cxx-unsupported-error {{expected ';' after expression}}
(void) u8R"foo()foo"; // unsupported-error {{use of undeclared identifier 'u8R'}} cxx-unsupported-error {{expected ';' after expression}}
(void) UR"foo()foo"; // unsupported-error {{use of undeclared identifier 'UR'}} cxx-unsupported-error {{expected ';' after expression}}
#endif
}

// supported-error@* {{missing terminating delimiter}}
// supported-error@* {{expected expression}}
// supported-error@* {{expected ';' after top level declarator}}
#define R "bar"
const char* s = R"foo(";
6 changes: 4 additions & 2 deletions clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,10 +583,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest, UnderscorePragma) {
R"(_Pragma(u"clang module import"))", Out));
EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());

// FIXME: R"()" strings depend on using C++11 language mode
// R"()" strings are enabled by default.
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
R"(_Pragma(R"abc(clang module import)abc"))", Out));
EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
EXPECT_STREQ(R"(_Pragma(R"abc(clang module import)abc"))"
"\n",
Out.data());
}

TEST(MinimizeSourceToDependencyDirectivesTest, Include) {
Expand Down
Loading