Skip to content

Commit 458b1e9

Browse files
authored
[TBAA] Refine pointer-tbaa for void pointers by pointer depth (#126047)
Commit 77d3f8a avoids distinct tags for any pointers where the ultimate pointee type is `void`, to solve breakage in real-world code that uses (indirections to) `void*` for polymorphism over different pointer types. While this matches the TBAA implementation in GCC, this patch implements a refinement that distinguishes void pointers by pointer depth, as described in the "strict aliasing" documentation included in the aforementioned commit: > `void*` is permitted to alias any pointer type, `void**` is permitted > to alias any pointer to pointer type, and so on. For example, `void**` is no longer considered to alias `int*` in this refinement, but it remains possible to use `void**` for polymorphism over pointers to pointers.
1 parent 9855d76 commit 458b1e9

File tree

4 files changed

+62
-15
lines changed

4 files changed

+62
-15
lines changed

clang/lib/CodeGen/CodeGenTBAA.cpp

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,42 @@ llvm::MDNode *CodeGenTBAA::getChar() {
8080
return Char;
8181
}
8282

83+
llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) {
84+
assert(PtrDepth >= 1 && "Pointer must have some depth");
85+
86+
// Populate at least PtrDepth elements in AnyPtrs. These are the type nodes
87+
// for "any" pointers of increasing pointer depth, and are organized in the
88+
// hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ...
89+
//
90+
// Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1),
91+
// since there is no node for pointer depth 0.
92+
//
93+
// These "any" pointer type nodes are used in pointer TBAA. The type node of
94+
// a concrete pointer type has the "any" pointer type node of appropriate
95+
// pointer depth as its parent. The "any" pointer type nodes are also used
96+
// directly for accesses to void pointers, or to specific pointers that we
97+
// conservatively do not distinguish in pointer TBAA (e.g. pointers to
98+
// members). Essentially, this establishes that e.g. void** can alias with
99+
// any type that can unify with T**, ignoring things like qualifiers. Here, T
100+
// is a variable that represents an arbitrary type, including pointer types.
101+
// As such, each depth is naturally a subtype of the previous depth, and thus
102+
// transitively of all previous depths.
103+
if (AnyPtrs.size() < PtrDepth) {
104+
AnyPtrs.reserve(PtrDepth);
105+
auto Size = Module.getDataLayout().getPointerSize();
106+
// Populate first element.
107+
if (AnyPtrs.empty())
108+
AnyPtrs.push_back(createScalarTypeNode("any pointer", getChar(), Size));
109+
// Populate further elements.
110+
for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) {
111+
auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer").str();
112+
AnyPtrs.push_back(createScalarTypeNode(Name, AnyPtrs[Idx - 1], Size));
113+
}
114+
}
115+
116+
return AnyPtrs[PtrDepth - 1];
117+
}
118+
83119
static bool TypeHasMayAlias(QualType QTy) {
84120
// Tagged types have declarations, and therefore may have attributes.
85121
if (auto *TD = QTy->getAsTagDecl())
@@ -202,9 +238,8 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
202238
// they involve a significant representation difference. We don't
203239
// currently do so, however.
204240
if (Ty->isPointerType() || Ty->isReferenceType()) {
205-
llvm::MDNode *AnyPtr = createScalarTypeNode("any pointer", getChar(), Size);
206241
if (!CodeGenOpts.PointerTBAA)
207-
return AnyPtr;
242+
return getAnyPtr();
208243
// C++ [basic.lval]p11 permits objects to accessed through an l-value of
209244
// similar type. Two types are similar under C++ [conv.qual]p2 if the
210245
// decomposition of the types into pointers, member pointers, and arrays has
@@ -232,7 +267,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
232267
// common idioms and there is no good alternative to re-write the code
233268
// without strict-aliasing violations.
234269
if (Ty->isVoidType())
235-
return AnyPtr;
270+
return getAnyPtr(PtrDepth);
236271

237272
assert(!isa<VariableArrayType>(Ty));
238273
// When the underlying type is a builtin type, we compute the pointee type
@@ -256,7 +291,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
256291
// similar-types rule.
257292
const auto *RT = Ty->getAs<RecordType>();
258293
if (!RT)
259-
return AnyPtr;
294+
return getAnyPtr(PtrDepth);
260295

261296
// For unnamed structs or unions C's compatible types rule applies. Two
262297
// compatible types in different compilation units can have different
@@ -270,7 +305,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
270305
// compatibility rule, but it doesn't matter because you can never have a
271306
// pointer to an anonymous struct or union.
272307
if (!RT->getDecl()->getDeclName())
273-
return AnyPtr;
308+
return getAnyPtr(PtrDepth);
274309

275310
// For non-builtin types use the mangled name of the canonical type.
276311
llvm::raw_svector_ostream TyOut(TyName);
@@ -281,7 +316,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
281316
OutName += std::to_string(PtrDepth);
282317
OutName += " ";
283318
OutName += TyName;
284-
return createScalarTypeNode(OutName, AnyPtr, Size);
319+
return createScalarTypeNode(OutName, getAnyPtr(PtrDepth), Size);
285320
}
286321

287322
// Accesses to arrays are accesses to objects of their element types.

clang/lib/CodeGen/CodeGenTBAA.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ class CodeGenTBAA {
139139

140140
llvm::MDNode *Root;
141141
llvm::MDNode *Char;
142+
llvm::SmallVector<llvm::MDNode *, 4> AnyPtrs;
142143

143144
/// getRoot - This is the mdnode for the root of the metadata type graph
144145
/// for this translation unit.
@@ -148,6 +149,10 @@ class CodeGenTBAA {
148149
/// considered to be equivalent to it.
149150
llvm::MDNode *getChar();
150151

152+
/// getAnyPtr - This is the mdnode for any pointer type of (at least) the
153+
/// given pointer depth.
154+
llvm::MDNode *getAnyPtr(unsigned PtrDepth = 1);
155+
151156
/// CollectFields - Collect information about the fields of a type for
152157
/// !tbaa.struct metadata formation. Return false for an unsupported type.
153158
bool CollectFields(uint64_t BaseOffset,

clang/test/CXX/drs/cwg158.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,6 @@ const int * h(const int * (*p)[10], int *(*q)[9]) {
4242
}
4343

4444
// POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0}
45-
// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYPTR:!.+]], i64 0}
45+
// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYP2PTR:!.+]], i64 0}
46+
// POINTER-TBAA: [[ANYP2PTR]] = !{!"any p2 pointer", [[ANYPTR:!.+]],
4647
// POINTER-TBAA: [[ANYPTR]] = !{!"any pointer"

clang/test/CodeGen/tbaa-pointers.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,10 @@ int void_ptrs(void **ptr) {
208208
// COMMON-LABEL: define i32 @void_ptrs(
209209
// COMMON-SAME: ptr noundef [[PTRA:%.+]])
210210
// COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8
211-
// COMMON-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]]
212-
// COMMON-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]]
211+
// DISABLE-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]]
212+
// DEFAULT-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2:!.+]]
213+
// DISABLE-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]]
214+
// DEFAULT-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2]]
213215
// COMMON-NEXT: [[L1:%.+]] = load ptr, ptr [[L0]], align 8, !tbaa [[ANYPTR]]
214216
// COMMON-NEXT: [[BOOL:%.+]] = icmp ne ptr [[L1]], null
215217
// COMMON-NEXT: [[BOOL_EXT:%.+]] = zext i1 [[BOOL]] to i64
@@ -220,25 +222,28 @@ int void_ptrs(void **ptr) {
220222
}
221223

222224
// DEFAULT: [[P2INT_0]] = !{[[P2INT:!.+]], [[P2INT]], i64 0}
223-
// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_POINTER:!.+]], i64 0}
225+
// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_P2_POINTER:!.+]], i64 0}
226+
// DEFAULT: [[ANY_P2_POINTER]] = !{!"any p2 pointer", [[ANY_POINTER:!.+]], i64 0}
224227
// DISABLE: [[ANYPTR]] = !{[[ANY_POINTER:!.+]], [[ANY_POINTER]], i64 0}
225228
// COMMON: [[ANY_POINTER]] = !{!"any pointer", [[CHAR:!.+]], i64 0}
226229
// COMMON: [[CHAR]] = !{!"omnipotent char", [[TBAA_ROOT:!.+]], i64 0}
227230
// COMMON: [[TBAA_ROOT]] = !{!"Simple C/C++ TBAA"}
228231
// DEFAULT: [[P1INT_0]] = !{[[P1INT:!.+]], [[P1INT]], i64 0}
229232
// DEFAULT: [[P1INT]] = !{!"p1 int", [[ANY_POINTER]], i64 0}
230233
// DEFAULT: [[P3INT_0]] = !{[[P3INT:!.+]], [[P3INT]], i64 0}
231-
// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_POINTER]], i64 0}
234+
// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_P3_POINTER:!.+]], i64 0}
235+
// DEFAULT: [[ANY_P3_POINTER]] = !{!"any p3 pointer", [[ANY_P2_POINTER]], i64 0}
232236
// DEFAULT: [[P4CHAR_0]] = !{[[P4CHAR:!.+]], [[P4CHAR]], i64 0}
233-
// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_POINTER]], i64 0}
237+
// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_P4_POINTER:!.*]], i64 0}
238+
// DEFAULT: [[ANY_P4_POINTER]] = !{!"any p4 pointer", [[ANY_P3_POINTER]], i64 0}
234239
// DEFAULT: [[P3CHAR_0]] = !{[[P3CHAR:!.+]], [[P3CHAR]], i64 0}
235-
// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_POINTER]], i64 0}
240+
// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_P3_POINTER]], i64 0}
236241
// DEFAULT: [[P2CHAR_0]] = !{[[P2CHAR:!.+]], [[P2CHAR]], i64 0}
237-
// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_POINTER]], i64 0}
242+
// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_P2_POINTER]], i64 0}
238243
// DEFAULT: [[P1CHAR_0]] = !{[[P1CHAR:!.+]], [[P1CHAR]], i64 0}
239244
// DEFAULT: [[P1CHAR]] = !{!"p1 omnipotent char", [[ANY_POINTER]], i64 0}
240245
// DEFAULT: [[P2S1_TAG]] = !{[[P2S1:!.+]], [[P2S1]], i64 0}
241-
// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_POINTER]], i64 0}
246+
// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_P2_POINTER]], i64 0}
242247
// DEFAULT: [[P1S1_TAG:!.+]] = !{[[P1S1:!.+]], [[P1S1]], i64 0}
243248
// DEFAULT: [[P1S1]] = !{!"p1 _ZTS2S1", [[ANY_POINTER]], i64 0}
244249
// DEFAULT: [[P1S2_TAG]] = !{[[P1S2:!.+]], [[P1S2]], i64 0}
@@ -251,3 +256,4 @@ int void_ptrs(void **ptr) {
251256
// COMMON: [[INT_TAG]] = !{[[INT_TY:!.+]], [[INT_TY]], i64 0}
252257
// COMMON: [[INT_TY]] = !{!"int", [[CHAR]], i64 0}
253258
// DEFAULT: [[ANYPTR]] = !{[[ANY_POINTER]], [[ANY_POINTER]], i64 0}
259+
// DEFAULT: [[ANYP2]] = !{[[ANY_P2_POINTER]], [[ANY_P2_POINTER]], i64 0}

0 commit comments

Comments
 (0)