Skip to content

[clang][bytecode] Partially address string literal uniqueness #142555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions clang/lib/AST/ByteCode/Interp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1979,6 +1979,51 @@ bool DiagTypeid(InterpState &S, CodePtr OpPC) {
return false;
}

bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
const Pointer &RHS) {
unsigned LHSOffset = LHS.getIndex();
unsigned RHSOffset = RHS.getIndex();
unsigned LHSLength = (LHS.getNumElems() - 1) * LHS.elemSize();
unsigned RHSLength = (RHS.getNumElems() - 1) * RHS.elemSize();

StringRef LHSStr((const char *)LHS.atIndex(0).getRawAddress(), LHSLength);
StringRef RHSStr((const char *)RHS.atIndex(0).getRawAddress(), RHSLength);
int32_t IndexDiff = RHSOffset - LHSOffset;
if (IndexDiff < 0) {
if (static_cast<int32_t>(LHSLength) < -IndexDiff)
return false;
LHSStr = LHSStr.drop_front(-IndexDiff);
} else {
if (static_cast<int32_t>(RHSLength) < IndexDiff)
return false;
RHSStr = RHSStr.drop_front(IndexDiff);
}

unsigned ShorterCharWidth;
StringRef Shorter;
StringRef Longer;
if (LHSLength < RHSLength) {
ShorterCharWidth = LHS.elemSize();
Shorter = LHSStr;
Longer = RHSStr;
} else {
ShorterCharWidth = RHS.elemSize();
Shorter = RHSStr;
Longer = LHSStr;
}

// The null terminator isn't included in the string data, so check for it
// manually. If the longer string doesn't have a null terminator where the
// shorter string ends, they aren't potentially overlapping.
for (unsigned NullByte : llvm::seq(ShorterCharWidth)) {
if (Shorter.size() + NullByte >= Longer.size())
break;
if (Longer[Shorter.size() + NullByte])
return false;
}
return Shorter == Longer.take_front(Shorter.size());
}

// https://github.com/llvm/llvm-project/issues/102513
#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
#pragma optimize("", off)
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/AST/ByteCode/Interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,9 @@ static inline bool IsOpaqueConstantCall(const CallExpr *E) {
Builtin == Builtin::BI__builtin_function_start);
}

bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
const Pointer &RHS);

template <>
inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
using BoolT = PrimConv<PT_Bool>::T;
Expand Down Expand Up @@ -1068,6 +1071,18 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
return true;
}

// FIXME: The source check here isn't entirely correct.
if (LHS.pointsToStringLiteral() && RHS.pointsToStringLiteral() &&
LHS.getFieldDesc()->asExpr() != RHS.getFieldDesc()->asExpr()) {
if (arePotentiallyOverlappingStringLiterals(LHS, RHS)) {
const SourceInfo &Loc = S.Current->getSource(OpPC);
S.FFDiag(Loc, diag::note_constexpr_literal_comparison)
<< LHS.toDiagnosticString(S.getASTContext())
<< RHS.toDiagnosticString(S.getASTContext());
return false;
}
}

if (Pointer::hasSameBase(LHS, RHS)) {
if (LHS.inUnion() && RHS.inUnion()) {
// If the pointers point into a union, things are a little more
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/AST/ByteCode/Pointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,17 @@ bool Pointer::pointsToLiteral() const {
return E && !isa<MaterializeTemporaryExpr, StringLiteral>(E);
}

bool Pointer::pointsToStringLiteral() const {
if (isZero() || !isBlockPointer())
return false;

if (block()->isDynamic())
return false;

const Expr *E = block()->getDescriptor()->asExpr();
return E && isa<StringLiteral>(E);
}

std::optional<std::pair<Pointer, Pointer>>
Pointer::computeSplitPoint(const Pointer &A, const Pointer &B) {
if (!A.isBlockPointer() || !B.isBlockPointer())
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/ByteCode/Pointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ class Pointer {
/// Whether this points to a block that's been created for a "literal lvalue",
/// i.e. a non-MaterializeTemporaryExpr Expr.
bool pointsToLiteral() const;
bool pointsToStringLiteral() const;

/// Prints the pointer.
void print(llvm::raw_ostream &OS) const;
Expand Down
28 changes: 28 additions & 0 deletions clang/test/AST/ByteCode/cxx11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,31 @@ namespace ZeroSizeCmp {
static_assert(&start != &end, ""); // both-error {{constant expression}} \
// both-note {{comparison of pointers '&start' and '&end' to unrelated zero-sized objects}}
}

namespace OverlappingStrings {
static_assert(+"foo" != +"bar", "");
static_assert(&"xfoo"[1] != &"yfoo"[1], "");
static_assert(+"foot" != +"foo", "");
static_assert(+"foo\0bar" != +"foo\0baz", "");


#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), "");
static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), "");
static_assert(fold((const char*)u"AAA" != (const char*)"AAA\0\0x"), "");

constexpr const char *string = "hello";
constexpr const char *also_string = string;
static_assert(string == string, "");
static_assert(string == also_string, "");


// These strings may overlap, and so the result of the comparison is unknown.
constexpr bool may_overlap_1 = +"foo" == +"foo"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
constexpr bool may_overlap_3 = +"foo" == &"bar\0foo"[4]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
constexpr bool may_overlap_4 = &"xfoo"[1] == &"xfoo"[1]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}



}
4 changes: 2 additions & 2 deletions clang/test/AST/ByteCode/cxx20.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ static_assert(!b4);
constexpr auto bar(const char *p) { return p + __builtin_strlen(p); }
constexpr auto b5 = bar(p1) == p1;
static_assert(!b5);
constexpr auto b6 = bar(p1) == ""; // ref-error {{must be initialized by a constant expression}} \
// ref-note {{comparison of addresses of potentially overlapping literals}}
constexpr auto b6 = bar(p1) == ""; // both-error {{must be initialized by a constant expression}} \
// both-note {{comparison of addresses of potentially overlapping literals}}
constexpr auto b7 = bar(p1) + 1 == ""; // both-error {{must be initialized by a constant expression}} \
// both-note {{comparison against pointer '&"test1"[6]' that points past the end of a complete object has unspecified value}}

Expand Down
Loading