Skip to content

Commit 02d90ef

Browse files
committed
[clang][bytecode] Partially address string literal uniqueness
This still leaves the case of the constexpr auto b3 = name1() == name1(); test from cxx20.cpp broken.
1 parent 843e362 commit 02d90ef

File tree

6 files changed

+102
-2
lines changed

6 files changed

+102
-2
lines changed

clang/lib/AST/ByteCode/Interp.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,6 +1979,51 @@ bool DiagTypeid(InterpState &S, CodePtr OpPC) {
19791979
return false;
19801980
}
19811981

1982+
bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
1983+
const Pointer &RHS) {
1984+
unsigned LHSOffset = LHS.getIndex();
1985+
unsigned RHSOffset = RHS.getIndex();
1986+
unsigned LHSLength = (LHS.getNumElems() - 1) * LHS.elemSize();
1987+
unsigned RHSLength = (RHS.getNumElems() - 1) * RHS.elemSize();
1988+
1989+
StringRef LHSStr(LHS.atIndex(0).getRawAddress(), LHSLength);
1990+
StringRef RHSStr(RHS.atIndex(0).getRawAddress(), RHSLength);
1991+
int32_t IndexDiff = RHSOffset - LHSOffset;
1992+
if (IndexDiff < 0) {
1993+
if (static_cast<int32_t>(LHSLength) < -IndexDiff)
1994+
return false;
1995+
LHSStr = LHSStr.drop_front(-IndexDiff);
1996+
} else {
1997+
if (static_cast<int32_t>(RHSLength) < IndexDiff)
1998+
return false;
1999+
RHSStr = RHSStr.drop_front(IndexDiff);
2000+
}
2001+
2002+
unsigned ShorterCharWidth;
2003+
StringRef Shorter;
2004+
StringRef Longer;
2005+
if (LHSLength < RHSLength) {
2006+
ShorterCharWidth = LHS.elemSize();
2007+
Shorter = LHSStr;
2008+
Longer = RHSStr;
2009+
} else {
2010+
ShorterCharWidth = RHS.elemSize();
2011+
Shorter = RHSStr;
2012+
Longer = LHSStr;
2013+
}
2014+
2015+
// The null terminator isn't included in the string data, so check for it
2016+
// manually. If the longer string doesn't have a null terminator where the
2017+
// shorter string ends, they aren't potentially overlapping.
2018+
for (unsigned NullByte : llvm::seq(ShorterCharWidth)) {
2019+
if (Shorter.size() + NullByte >= Longer.size())
2020+
break;
2021+
if (Longer[Shorter.size() + NullByte])
2022+
return false;
2023+
}
2024+
return Shorter == Longer.take_front(Shorter.size());
2025+
}
2026+
19822027
// https://github.com/llvm/llvm-project/issues/102513
19832028
#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
19842029
#pragma optimize("", off)

clang/lib/AST/ByteCode/Interp.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,9 @@ static inline bool IsOpaqueConstantCall(const CallExpr *E) {
10341034
Builtin == Builtin::BI__builtin_function_start);
10351035
}
10361036

1037+
bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
1038+
const Pointer &RHS);
1039+
10371040
template <>
10381041
inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
10391042
using BoolT = PrimConv<PT_Bool>::T;
@@ -1068,6 +1071,18 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
10681071
return true;
10691072
}
10701073

1074+
// FIXME: The source check here isn't entirely correct.
1075+
if (LHS.pointsToStringLiteral() && RHS.pointsToStringLiteral() &&
1076+
LHS.getFieldDesc()->asExpr() != RHS.getFieldDesc()->asExpr()) {
1077+
if (arePotentiallyOverlappingStringLiterals(LHS, RHS)) {
1078+
const SourceInfo &Loc = S.Current->getSource(OpPC);
1079+
S.FFDiag(Loc, diag::note_constexpr_literal_comparison)
1080+
<< LHS.toDiagnosticString(S.getASTContext())
1081+
<< RHS.toDiagnosticString(S.getASTContext());
1082+
return false;
1083+
}
1084+
}
1085+
10711086
if (Pointer::hasSameBase(LHS, RHS)) {
10721087
if (LHS.inUnion() && RHS.inUnion()) {
10731088
// If the pointers point into a union, things are a little more

clang/lib/AST/ByteCode/Pointer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,17 @@ bool Pointer::pointsToLiteral() const {
571571
return E && !isa<MaterializeTemporaryExpr, StringLiteral>(E);
572572
}
573573

574+
bool Pointer::pointsToStringLiteral() const {
575+
if (isZero() || !isBlockPointer())
576+
return false;
577+
578+
if (block()->isDynamic())
579+
return false;
580+
581+
const Expr *E = block()->getDescriptor()->asExpr();
582+
return E && isa<StringLiteral>(E);
583+
}
584+
574585
std::optional<std::pair<Pointer, Pointer>>
575586
Pointer::computeSplitPoint(const Pointer &A, const Pointer &B) {
576587
if (!A.isBlockPointer() || !B.isBlockPointer())

clang/lib/AST/ByteCode/Pointer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,7 @@ class Pointer {
756756
/// Whether this points to a block that's been created for a "literal lvalue",
757757
/// i.e. a non-MaterializeTemporaryExpr Expr.
758758
bool pointsToLiteral() const;
759+
bool pointsToStringLiteral() const;
759760

760761
/// Prints the pointer.
761762
void print(llvm::raw_ostream &OS) const;

clang/test/AST/ByteCode/cxx11.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,31 @@ namespace ZeroSizeCmp {
260260
static_assert(&start != &end, ""); // both-error {{constant expression}} \
261261
// both-note {{comparison of pointers '&start' and '&end' to unrelated zero-sized objects}}
262262
}
263+
264+
namespace OverlappingStrings {
265+
static_assert(+"foo" != +"bar", "");
266+
static_assert(&"xfoo"[1] != &"yfoo"[1], "");
267+
static_assert(+"foot" != +"foo", "");
268+
static_assert(+"foo\0bar" != +"foo\0baz", "");
269+
270+
271+
#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
272+
static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), "");
273+
static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), "");
274+
static_assert(fold((const char*)u"AAA" != (const char*)"AAA\0\0x"), "");
275+
276+
constexpr const char *string = "hello";
277+
constexpr const char *also_string = string;
278+
static_assert(string == string, "");
279+
static_assert(string == also_string, "");
280+
281+
282+
// These strings may overlap, and so the result of the comparison is unknown.
283+
constexpr bool may_overlap_1 = +"foo" == +"foo"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
284+
constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
285+
constexpr bool may_overlap_3 = +"foo" == &"bar\0foo"[4]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
286+
constexpr bool may_overlap_4 = &"xfoo"[1] == &"xfoo"[1]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
287+
288+
289+
290+
}

clang/test/AST/ByteCode/cxx20.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ static_assert(!b4);
122122
constexpr auto bar(const char *p) { return p + __builtin_strlen(p); }
123123
constexpr auto b5 = bar(p1) == p1;
124124
static_assert(!b5);
125-
constexpr auto b6 = bar(p1) == ""; // ref-error {{must be initialized by a constant expression}} \
126-
// ref-note {{comparison of addresses of potentially overlapping literals}}
125+
constexpr auto b6 = bar(p1) == ""; // both-error {{must be initialized by a constant expression}} \
126+
// both-note {{comparison of addresses of potentially overlapping literals}}
127127
constexpr auto b7 = bar(p1) + 1 == ""; // both-error {{must be initialized by a constant expression}} \
128128
// both-note {{comparison against pointer '&"test1"[6]' that points past the end of a complete object has unspecified value}}
129129

0 commit comments

Comments
 (0)