Skip to content

Commit 73b1eb3

Browse files
zygoloidtbaederrAaronBallman
authored andcommitted
[clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation (llvm#109208)
Track the identity of each string literal object produced by evaluation with a global version number. Accept comparisons between literals of the same version, and between literals of different versions that cannot possibly be placed in overlapping storage. Treat the remaining comparisons as non-constant. --------- Co-authored-by: Timm Baeder <[email protected]> Co-authored-by: Aaron Ballman <[email protected]>
1 parent 87d09d4 commit 73b1eb3

File tree

11 files changed

+282
-41
lines changed

11 files changed

+282
-41
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,24 @@ C++ Specific Potentially Breaking Changes
8181
template <typename T>
8282
void f();
8383

84+
- During constant evaluation, comparisons between different evaluations of the
85+
same string literal are now correctly treated as non-constant, and comparisons
86+
between string literals that cannot possibly overlap in memory are now treated
87+
as constant. This updates Clang to match the anticipated direction of open core
88+
issue `CWG2765 <http://wg21.link/CWG2765>`, but is subject to change once that
89+
issue is resolved.
90+
91+
.. code-block:: c++
92+
93+
constexpr const char *f() { return "hello"; }
94+
constexpr const char *g() { return "world"; }
95+
// Used to evaluate to false, now error: non-constant comparison.
96+
constexpr bool a = f() == f();
97+
// Might evaluate to true or false, as before.
98+
bool at_runtime() { return f() == f(); }
99+
// Was error, now evaluates to false.
100+
constexpr bool b = f() == g();
101+
84102
ABI Changes in This Version
85103
---------------------------
86104

clang/include/clang/AST/ASTContext.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,14 @@ class ASTContext : public RefCountedBase<ASTContext> {
324324
/// This is lazily created. This is intentionally not serialized.
325325
mutable llvm::StringMap<StringLiteral *> StringLiteralCache;
326326

327+
/// The next string literal "version" to allocate during constant evaluation.
328+
/// This is used to distinguish between repeated evaluations of the same
329+
/// string literal.
330+
///
331+
/// We don't need to serialize this because constants get re-evaluated in the
332+
/// current file before they are compared locally.
333+
unsigned NextStringLiteralVersion = 0;
334+
327335
/// MD5 hash of CUID. It is calculated when first used and cached by this
328336
/// data member.
329337
mutable std::string CUIDHash;
@@ -3300,6 +3308,10 @@ class ASTContext : public RefCountedBase<ASTContext> {
33003308
/// PredefinedExpr to cache evaluated results.
33013309
StringLiteral *getPredefinedStringLiteralFromCache(StringRef Key) const;
33023310

3311+
/// Return the next version number to be used for a string literal evaluated
3312+
/// as part of constant evaluation.
3313+
unsigned getNextStringLiteralVersion() { return NextStringLiteralVersion++; }
3314+
33033315
/// Return a declaration for the global GUID object representing the given
33043316
/// GUID value.
33053317
MSGuidDecl *getMSGuidDecl(MSGuidDeclParts Parts) const;

clang/include/clang/Basic/DiagnosticASTKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ def note_constexpr_pointer_constant_comparison : Note<
9696
"at runtime">;
9797
def note_constexpr_literal_comparison : Note<
9898
"comparison of addresses of literals has unspecified value">;
99+
def note_constexpr_opaque_call_comparison : Note<
100+
"comparison against opaque constant address '%0' can only be performed at "
101+
"runtime">;
99102
def note_constexpr_pointer_weak_comparison : Note<
100103
"comparison against address of weak declaration '%0' can only be performed "
101104
"at runtime">;

clang/lib/AST/ExprConstant.cpp

Lines changed: 115 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,10 @@
5454
#include "clang/Basic/DiagnosticSema.h"
5555
#include "clang/Basic/TargetInfo.h"
5656
#include "llvm/ADT/APFixedPoint.h"
57+
#include "llvm/ADT/Sequence.h"
5758
#include "llvm/ADT/SmallBitVector.h"
5859
#include "llvm/ADT/StringExtras.h"
60+
#include "llvm/Support/Casting.h"
5961
#include "llvm/Support/Debug.h"
6062
#include "llvm/Support/SaveAndRestore.h"
6163
#include "llvm/Support/SipHash.h"
@@ -2061,15 +2063,21 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) {
20612063
return true;
20622064
}
20632065

2064-
/// Should this call expression be treated as a no-op?
2065-
static bool IsNoOpCall(const CallExpr *E) {
2066+
/// Should this call expression be treated as forming an opaque constant?
2067+
static bool IsOpaqueConstantCall(const CallExpr *E) {
20662068
unsigned Builtin = E->getBuiltinCallee();
20672069
return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
20682070
Builtin == Builtin::BI__builtin___NSStringMakeConstantString ||
20692071
Builtin == Builtin::BI__builtin_ptrauth_sign_constant ||
20702072
Builtin == Builtin::BI__builtin_function_start);
20712073
}
20722074

2075+
static bool IsOpaqueConstantCall(const LValue &LVal) {
2076+
const auto *BaseExpr =
2077+
llvm::dyn_cast_if_present<CallExpr>(LVal.Base.dyn_cast<const Expr *>());
2078+
return BaseExpr && IsOpaqueConstantCall(BaseExpr);
2079+
}
2080+
20732081
static bool IsGlobalLValue(APValue::LValueBase B) {
20742082
// C++11 [expr.const]p3 An address constant expression is a prvalue core
20752083
// constant expression of pointer type that evaluates to...
@@ -2115,7 +2123,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) {
21152123
case Expr::ObjCBoxedExprClass:
21162124
return cast<ObjCBoxedExpr>(E)->isExpressibleAsConstantInitializer();
21172125
case Expr::CallExprClass:
2118-
return IsNoOpCall(cast<CallExpr>(E));
2126+
return IsOpaqueConstantCall(cast<CallExpr>(E));
21192127
// For GCC compatibility, &&label has static storage duration.
21202128
case Expr::AddrLabelExprClass:
21212129
return true;
@@ -2142,11 +2150,91 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
21422150
return LVal.Base.dyn_cast<const ValueDecl*>();
21432151
}
21442152

2145-
static bool IsLiteralLValue(const LValue &Value) {
2146-
if (Value.getLValueCallIndex())
2153+
// Information about an LValueBase that is some kind of string.
2154+
struct LValueBaseString {
2155+
std::string ObjCEncodeStorage;
2156+
StringRef Bytes;
2157+
int CharWidth;
2158+
};
2159+
2160+
// Gets the lvalue base of LVal as a string.
2161+
static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal,
2162+
LValueBaseString &AsString) {
2163+
const auto *BaseExpr = LVal.Base.dyn_cast<const Expr *>();
2164+
if (!BaseExpr)
2165+
return false;
2166+
2167+
// For ObjCEncodeExpr, we need to compute and store the string.
2168+
if (const auto *EE = dyn_cast<ObjCEncodeExpr>(BaseExpr)) {
2169+
Info.Ctx.getObjCEncodingForType(EE->getEncodedType(),
2170+
AsString.ObjCEncodeStorage);
2171+
AsString.Bytes = AsString.ObjCEncodeStorage;
2172+
AsString.CharWidth = 1;
2173+
return true;
2174+
}
2175+
2176+
// Otherwise, we have a StringLiteral.
2177+
const auto *Lit = dyn_cast<StringLiteral>(BaseExpr);
2178+
if (const auto *PE = dyn_cast<PredefinedExpr>(BaseExpr))
2179+
Lit = PE->getFunctionName();
2180+
2181+
if (!Lit)
21472182
return false;
2148-
const Expr *E = Value.Base.dyn_cast<const Expr*>();
2149-
return E && !isa<MaterializeTemporaryExpr>(E);
2183+
2184+
AsString.Bytes = Lit->getBytes();
2185+
AsString.CharWidth = Lit->getCharByteWidth();
2186+
return true;
2187+
}
2188+
2189+
// Determine whether two string literals potentially overlap. This will be the
2190+
// case if they agree on the values of all the bytes on the overlapping region
2191+
// between them.
2192+
//
2193+
// The overlapping region is the portion of the two string literals that must
2194+
// overlap in memory if the pointers actually point to the same address at
2195+
// runtime. For example, if LHS is "abcdef" + 3 and RHS is "cdef\0gh" + 1 then
2196+
// the overlapping region is "cdef\0", which in this case does agree, so the
2197+
// strings are potentially overlapping. Conversely, for "foobar" + 3 versus
2198+
// "bazbar" + 3, the overlapping region contains all of both strings, so they
2199+
// are not potentially overlapping, even though they agree from the given
2200+
// addresses onwards.
2201+
//
2202+
// See open core issue CWG2765 which is discussing the desired rule here.
2203+
static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
2204+
const LValue &LHS,
2205+
const LValue &RHS) {
2206+
LValueBaseString LHSString, RHSString;
2207+
if (!GetLValueBaseAsString(Info, LHS, LHSString) ||
2208+
!GetLValueBaseAsString(Info, RHS, RHSString))
2209+
return false;
2210+
2211+
// This is the byte offset to the location of the first character of LHS
2212+
// within RHS. We don't need to look at the characters of one string that
2213+
// would appear before the start of the other string if they were merged.
2214+
CharUnits Offset = RHS.Offset - LHS.Offset;
2215+
if (Offset.isNegative())
2216+
LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity());
2217+
else
2218+
RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());
2219+
2220+
bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size();
2221+
StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes;
2222+
StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes;
2223+
int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth;
2224+
2225+
// The null terminator isn't included in the string data, so check for it
2226+
// manually. If the longer string doesn't have a null terminator where the
2227+
// shorter string ends, they aren't potentially overlapping.
2228+
for (int NullByte : llvm::seq(ShorterCharWidth)) {
2229+
if (Shorter.size() + NullByte >= Longer.size())
2230+
break;
2231+
if (Longer[Shorter.size() + NullByte])
2232+
return false;
2233+
}
2234+
2235+
// Otherwise, they're potentially overlapping if and only if the overlapping
2236+
// region is the same.
2237+
return Shorter == Longer.take_front(Shorter.size());
21502238
}
21512239

21522240
static bool IsWeakLValue(const LValue &Value) {
@@ -8573,7 +8661,10 @@ class LValueExprEvaluator
85738661
bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
85748662
bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
85758663
bool VisitMemberExpr(const MemberExpr *E);
8576-
bool VisitStringLiteral(const StringLiteral *E) { return Success(E); }
8664+
bool VisitStringLiteral(const StringLiteral *E) {
8665+
return Success(APValue::LValueBase(
8666+
E, 0, Info.getASTContext().getNextStringLiteralVersion()));
8667+
}
85778668
bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); }
85788669
bool VisitCXXTypeidExpr(const CXXTypeidExpr *E);
85798670
bool VisitCXXUuidofExpr(const CXXUuidofExpr *E);
@@ -9639,7 +9730,7 @@ static bool isOneByteCharacterType(QualType T) {
96399730

96409731
bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
96419732
unsigned BuiltinOp) {
9642-
if (IsNoOpCall(E))
9733+
if (IsOpaqueConstantCall(E))
96439734
return Success(E);
96449735

96459736
switch (BuiltinOp) {
@@ -13889,13 +13980,22 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
1388913980
(!RHSValue.Base && !RHSValue.Offset.isZero()))
1389013981
return DiagComparison(diag::note_constexpr_pointer_constant_comparison,
1389113982
!RHSValue.Base);
13892-
// It's implementation-defined whether distinct literals will have
13893-
// distinct addresses. In clang, the result of such a comparison is
13894-
// unspecified, so it is not a constant expression. However, we do know
13895-
// that the address of a literal will be non-null.
13896-
if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) &&
13897-
LHSValue.Base && RHSValue.Base)
13983+
// C++2c [intro.object]/10:
13984+
// Two objects [...] may have the same address if [...] they are both
13985+
// potentially non-unique objects.
13986+
// C++2c [intro.object]/9:
13987+
// An object is potentially non-unique if it is a string literal object,
13988+
// the backing array of an initializer list, or a subobject thereof.
13989+
//
13990+
// This makes the comparison result unspecified, so it's not a constant
13991+
// expression.
13992+
//
13993+
// TODO: Do we need to handle the initializer list case here?
13994+
if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue))
1389813995
return DiagComparison(diag::note_constexpr_literal_comparison);
13996+
if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue))
13997+
return DiagComparison(diag::note_constexpr_opaque_call_comparison,
13998+
!IsOpaqueConstantCall(LHSValue));
1389913999
// We can't tell whether weak symbols will end up pointing to the same
1390014000
// object.
1390114001
if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue))

clang/test/AST/ByteCode/builtin-functions.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -966,7 +966,8 @@ namespace shufflevector {
966966
namespace FunctionStart {
967967
void a(void) {}
968968
static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \
969-
// both-note {{comparison of addresses of literals has unspecified value}}
969+
// ref-note {{comparison against opaque constant address '&__builtin_function_start(a)'}} \
970+
// expected-note {{comparison of addresses of literals has unspecified value}}
970971
}
971972

972973
namespace BuiltinInImplicitCtor {

clang/test/AST/ByteCode/cxx20.cpp

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ constexpr int f() {
9999
static_assert(f());
100100
#endif
101101

102-
/// Distinct literals have disctinct addresses.
102+
/// Distinct literals have distinct addresses.
103103
/// see https://github.com/llvm/llvm-project/issues/58754
104104
constexpr auto foo(const char *p) { return p; }
105105
constexpr auto p1 = "test1";
@@ -108,22 +108,16 @@ constexpr auto p2 = "test2";
108108
constexpr bool b1 = foo(p1) == foo(p1);
109109
static_assert(b1);
110110

111-
constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \
112-
// ref-note {{comparison of addresses of literals}} \
113-
// ref-note {{declared here}}
114-
static_assert(!b2); // ref-error {{not an integral constant expression}} \
115-
// ref-note {{not a constant expression}}
111+
constexpr bool b2 = foo(p1) == foo(p2);
112+
static_assert(!b2);
116113

117114
constexpr auto name1() { return "name1"; }
118115
constexpr auto name2() { return "name2"; }
119116

120-
constexpr auto b3 = name1() == name1();
121-
static_assert(b3);
122-
constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \
123-
// ref-note {{has unspecified value}} \
124-
// ref-note {{declared here}}
125-
static_assert(!b4); // ref-error {{not an integral constant expression}} \
126-
// ref-note {{not a constant expression}}
117+
constexpr auto b3 = name1() == name1(); // ref-error {{must be initialized by a constant expression}} \
118+
// ref-note {{comparison of addresses of literals}}
119+
constexpr auto b4 = name1() == name2();
120+
static_assert(!b4);
127121

128122
namespace UninitializedFields {
129123
class A {
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// RUN: rm -rf %t
2+
// RUN: mkdir -p %t
3+
// RUN: split-file %s %t
4+
5+
// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cpp \
6+
// RUN: -o %t/A.pcm
7+
8+
// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/b.cpp \
9+
// RUN: -fmodule-file=A=%t/A.pcm -o %t/B.pcm
10+
11+
// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/c.cpp \
12+
// RUN: -fmodule-file=A=%t/A.pcm -o %t/C.pcm
13+
14+
// RUN: %clang_cc1 -std=c++20 -verify %t/main.cpp \
15+
// RUN: -fmodule-file=A=%t/A.pcm \
16+
// RUN: -fmodule-file=B=%t/B.pcm \
17+
// RUN: -fmodule-file=C=%t/C.pcm
18+
19+
// expected-no-diagnostics
20+
21+
//--- a.cpp
22+
23+
export module A;
24+
export consteval const char *hello() { return "hello"; }
25+
export constexpr const char *helloA0 = hello();
26+
export constexpr const char *helloA1 = helloA0;
27+
export constexpr const char *helloA2 = hello();
28+
29+
//--- b.cpp
30+
31+
export module B;
32+
import A;
33+
export constexpr const char *helloB1 = helloA0;
34+
export constexpr const char *helloB2 = hello();
35+
36+
//--- c.cpp
37+
38+
export module C;
39+
import A;
40+
export constexpr const char *helloC1 = helloA1;
41+
export constexpr const char *helloC2 = hello();
42+
43+
//--- main.cpp
44+
45+
import A;
46+
import B;
47+
import C;
48+
49+
// These are valid: they refer to the same evaluation of the same constant.
50+
static_assert(helloA0 == helloA1);
51+
static_assert(helloA0 == helloB1);
52+
static_assert(helloA0 == helloC1);
53+
54+
// These refer to distinct evaluations, and so may or may not be equal.
55+
static_assert(helloA1 == helloA2); // expected-error {{}} expected-note {{unspecified value}}
56+
static_assert(helloA1 == helloB2); // expected-error {{}} expected-note {{unspecified value}}
57+
static_assert(helloA1 == helloC2); // expected-error {{}} expected-note {{unspecified value}}
58+
static_assert(helloA2 == helloB2); // expected-error {{}} expected-note {{unspecified value}}
59+
static_assert(helloA2 == helloC2); // expected-error {{}} expected-note {{unspecified value}}
60+
static_assert(helloB2 == helloC2); // expected-error {{}} expected-note {{unspecified value}}

clang/test/SemaCXX/builtins.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
1-
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions
2-
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions
1+
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions -fptrauth-intrinsics
2+
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions -fptrauth-intrinsics
33
typedef const struct __CFString * CFStringRef;
44
#define CFSTR __builtin___CFStringMakeConstantString
5+
#define NSSTR __builtin___NSStringMakeConstantString
56

67
void f() {
78
#if !defined(__MVS__) && !defined(_AIX)
89
// Builtin function __builtin___CFStringMakeConstantString is currently
910
// unsupported on z/OS and AIX.
1011
(void)CFStringRef(CFSTR("Hello"));
12+
13+
constexpr bool a = CFSTR("Hello") == CFSTR("Hello");
14+
// expected-error@-1 {{constant expression}}
15+
// expected-note@-2 {{comparison against opaque constant address '&__builtin___CFStringMakeConstantString("Hello")'}}
16+
constexpr bool b = NSSTR("Hello") == NSSTR("Hello");
17+
// expected-error@-1 {{constant expression}}
18+
// expected-note@-2 {{comparison against opaque constant address '&__builtin___NSStringMakeConstantString("Hello")'}}
1119
#endif
1220
}
1321

@@ -47,7 +55,7 @@ void a(void) {}
4755
int n;
4856
void *p = __builtin_function_start(n); // expected-error {{argument must be a function}}
4957
static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static assertion expression is not an integral constant expression}}
50-
// expected-note@-1 {{comparison of addresses of literals has unspecified value}}
58+
// expected-note@-1 {{comparison against opaque constant address '&__builtin_function_start(a)'}}
5159
} // namespace function_start
5260

5361
void no_ms_builtins() {

0 commit comments

Comments
 (0)