Skip to content

Commit 66bcb25

Browse files
committed
[IR][TBAA] Allow multiple fileds with same offset in TBAA struct-path
Support for multiple fields to have same offset in TBAA struct-path metadata nodes. Primary goal is to support union-like structures to participate in TBAA struct-path resolution.
1 parent e79ad7b commit 66bcb25

File tree

8 files changed

+602
-82
lines changed

8 files changed

+602
-82
lines changed

llvm/docs/LangRef.rst

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6434,9 +6434,10 @@ tuples this way:
64346434
undefined if ``Offset`` is non-zero.
64356435

64366436
* If ``BaseTy`` is a struct type then ``ImmediateParent(BaseTy, Offset)``
6437-
is ``(NewTy, NewOffset)`` where ``NewTy`` is the type contained in
6438-
``BaseTy`` at offset ``Offset`` and ``NewOffset`` is ``Offset`` adjusted
6439-
to be relative within that inner type.
6437+
is array of ``(NewTy[N], NewOffset)`` where ``NewTy[N]`` is the Nth type
6438+
contained in ``BaseTy`` at offset ``Offset`` and ``NewOffset`` is
6439+
``Offset`` adjusted to be relative within that inner type. Multiple types
6440+
occupying same offset allow to describe union-like structures.
64406441

64416442
A memory access with an access tag ``(BaseTy1, AccessTy1, Offset1)``
64426443
aliases a memory access with an access tag ``(BaseTy2, AccessTy2,
@@ -6447,9 +6448,9 @@ As a concrete example, the type descriptor graph for the following program
64476448

64486449
.. code-block:: c
64496450

6450-
struct Inner {
6451+
union Inner {
64516452
int i; // offset 0
6452-
float f; // offset 4
6453+
float f; // offset 0
64536454
};
64546455

64556456
struct Outer {
@@ -6461,7 +6462,7 @@ As a concrete example, the type descriptor graph for the following program
64616462
void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
64626463
outer->f = 0; // tag0: (OuterStructTy, FloatScalarTy, 0)
64636464
outer->inner_a.i = 0; // tag1: (OuterStructTy, IntScalarTy, 12)
6464-
outer->inner_a.f = 0.0; // tag2: (OuterStructTy, FloatScalarTy, 16)
6465+
outer->inner_a.f = 0.0; // tag2: (OuterStructTy, FloatScalarTy, 12)
64656466
*f = 0.0; // tag3: (FloatScalarTy, FloatScalarTy, 0)
64666467
}
64676468

@@ -6475,13 +6476,13 @@ type):
64756476
FloatScalarTy = ("float", CharScalarTy, 0)
64766477
DoubleScalarTy = ("double", CharScalarTy, 0)
64776478
IntScalarTy = ("int", CharScalarTy, 0)
6478-
InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 4)}
6479+
InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 0)}
64796480
OuterStructTy = {"Outer", (FloatScalarTy, 0), (DoubleScalarTy, 4),
64806481
(InnerStructTy, 12)}
64816482

64826483

64836484
with (e.g.) ``ImmediateParent(OuterStructTy, 12)`` = ``(InnerStructTy,
6484-
0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0)``, and
6485+
0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0), (FloatScalarTy, 0)``, and
64856486
``ImmediateParent(IntScalarTy, 0)`` = ``(CharScalarTy, 0)``.
64866487

64876488
.. _tbaa_node_representation:

llvm/include/llvm/IR/Verifier.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,15 @@ class TBAAVerifier {
5959

6060
/// \name Helper functions used by \c visitTBAAMetadata.
6161
/// @{
62-
MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode,
63-
APInt &Offset, bool IsNewFormat);
62+
std::vector<MDNode *> getFieldNodeFromTBAABaseNode(Instruction &I,
63+
const MDNode *BaseNode,
64+
APInt &Offset,
65+
bool IsNewFormat);
66+
bool findAccessTypeNode(Instruction &I,
67+
SmallPtrSetImpl<const MDNode *> &StructPath,
68+
APInt Offset, bool IsNewFormat,
69+
const MDNode *AccessType, const MDNode *BaseNode,
70+
const MDNode *MD);
6471
TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I,
6572
const MDNode *BaseNode,
6673
bool IsNewFormat);

llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

Lines changed: 85 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
#include "llvm/Support/ErrorHandling.h"
122122
#include <cassert>
123123
#include <cstdint>
124+
#include <stack>
124125

125126
using namespace llvm;
126127

@@ -299,21 +300,22 @@ class TBAAStructTypeNode {
299300
return TBAAStructTypeNode(TypeNode);
300301
}
301302

302-
/// Get this TBAAStructTypeNode's field in the type DAG with
303+
/// Get this TBAAStructTypeNode's fields in the type DAG with
303304
/// given offset. Update the offset to be relative to the field type.
304-
TBAAStructTypeNode getField(uint64_t &Offset) const {
305+
/// There could be multiple fields with same offset.
306+
std::vector<TBAAStructTypeNode> getField(uint64_t &Offset) const {
305307
bool NewFormat = isNewFormat();
306308
const ArrayRef<MDOperand> Operands = Node->operands();
307309
const unsigned NumOperands = Operands.size();
308310

309311
if (NewFormat) {
310312
// New-format root and scalar type nodes have no fields.
311313
if (NumOperands < 6)
312-
return TBAAStructTypeNode();
314+
return {TBAAStructTypeNode()};
313315
} else {
314316
// Parent can be omitted for the root node.
315317
if (NumOperands < 2)
316-
return TBAAStructTypeNode();
318+
return {TBAAStructTypeNode()};
317319

318320
// Fast path for a scalar type node and a struct type node with a single
319321
// field.
@@ -325,8 +327,8 @@ class TBAAStructTypeNode {
325327
Offset -= Cur;
326328
MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
327329
if (!P)
328-
return TBAAStructTypeNode();
329-
return TBAAStructTypeNode(P);
330+
return {TBAAStructTypeNode()};
331+
return {TBAAStructTypeNode(P)};
330332
}
331333
}
332334

@@ -336,6 +338,8 @@ class TBAAStructTypeNode {
336338
unsigned NumOpsPerField = NewFormat ? 3 : 2;
337339
unsigned TheIdx = 0;
338340

341+
std::vector<TBAAStructTypeNode> Ret;
342+
339343
for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
340344
Idx += NumOpsPerField) {
341345
uint64_t Cur =
@@ -353,10 +357,20 @@ class TBAAStructTypeNode {
353357
uint64_t Cur =
354358
mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
355359
Offset -= Cur;
360+
361+
// Collect all fields that have right offset.
356362
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
357-
if (!P)
358-
return TBAAStructTypeNode();
359-
return TBAAStructTypeNode(P);
363+
Ret.emplace_back(P ? TBAAStructTypeNode(P) : TBAAStructTypeNode());
364+
365+
while (TheIdx > FirstFieldOpNo) {
366+
TheIdx -= NumOpsPerField;
367+
auto Val = mdconst::extract<ConstantInt>(Operands[TheIdx + 1]);
368+
if (Cur != Val->getZExtValue())
369+
break;
370+
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
371+
P ? Ret.emplace_back(P) : Ret.emplace_back();
372+
}
373+
return Ret;
360374
}
361375
};
362376

@@ -572,6 +586,39 @@ static bool hasField(TBAAStructTypeNode BaseType,
572586
return false;
573587
}
574588

589+
static bool rangeOverlap(std::pair<size_t, size_t> Range1,
590+
std::pair<size_t, size_t> Range2) {
591+
return Range1.first < Range2.first + Range2.second &&
592+
Range1.first + Range1.second > Range2.first;
593+
}
594+
595+
/// Return true if two accessess to given \p BaseType at \p Offset1 and
596+
/// at \p Offset2 may alias. This check does not account for NewStructType
597+
/// parameters such as size and may be more conservative.
598+
static bool mayFieldAccessesAlias(TBAAStructTypeNode BaseType, uint64_t Offset1,
599+
uint64_t Offset2) {
600+
if (!BaseType.getNode())
601+
return true;
602+
603+
auto PrevDiff = (long long)(Offset1) - (long long)(Offset2);
604+
auto Fields1 = BaseType.getField(Offset1);
605+
auto Fields2 = BaseType.getField(Offset2);
606+
auto CurrentDiff = (long long)(Offset1) - (long long)(Offset2);
607+
608+
// If distance between offsets is not same that mean accesses are
609+
// to different fields.
610+
if (PrevDiff != CurrentDiff)
611+
return false;
612+
613+
// Fields that share same offset may have various internal structure. For
614+
// some of them - same field may be accessed while for others - different
615+
// ones. To be conservative we report MayAlias if any of fields report
616+
// MayAlias.
617+
return llvm::any_of(Fields1, [&](auto &FieldType) {
618+
return mayFieldAccessesAlias(FieldType, Offset1, Offset2);
619+
});
620+
}
621+
575622
/// Return true if for two given accesses, one of the accessed objects may be a
576623
/// subobject of the other. The \p BaseTag and \p SubobjectTag parameters
577624
/// describe the accesses to the base object and the subobject respectively.
@@ -599,20 +646,38 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
599646
// from the base type, follow the edge with the correct offset in the type DAG
600647
// and adjust the offset until we reach the field type or until we reach the
601648
// access type.
649+
// If multiple fields have same offset in some base type, then scan each such
650+
// field.
602651
bool NewFormat = BaseTag.isNewFormat();
603652
TBAAStructTypeNode BaseType(BaseTag.getBaseType());
604653
uint64_t OffsetInBase = BaseTag.getOffset();
605654

606-
for (;;) {
607-
// In the old format there is no distinction between fields and parent
608-
// types, so in this case we consider all nodes up to the root.
609-
if (!BaseType.getNode()) {
610-
assert(!NewFormat && "Did not see access type in access path!");
611-
break;
612-
}
655+
SmallVector<std::pair<TBAAStructTypeNode, uint64_t>, 4> ToCheck;
656+
ToCheck.emplace_back(BaseType, OffsetInBase);
657+
while (!ToCheck.empty()) {
658+
std::tie(BaseType, OffsetInBase) = ToCheck.back();
659+
ToCheck.pop_back();
660+
661+
// In case if root is reached, still check the remaining candidates.
662+
// For new format it is always expected for access type to be found.
663+
// For old format all nodes up to the root are considered from all
664+
// candidates.
665+
if (!BaseType.getNode())
666+
continue;
613667

614668
if (BaseType.getNode() == SubobjectTag.getBaseType()) {
615-
bool SameMemberAccess = OffsetInBase == SubobjectTag.getOffset();
669+
bool SameMemberAccess;
670+
uint64_t SubobjectOffset = SubobjectTag.getOffset();
671+
if (NewFormat)
672+
// If size information is available, check if their access locations
673+
// overlap.
674+
SameMemberAccess = rangeOverlap(
675+
std::make_pair(OffsetInBase, BaseTag.getSize()),
676+
std::make_pair(SubobjectOffset, SubobjectTag.getSize()));
677+
else
678+
// Else do a more conservative check.
679+
SameMemberAccess =
680+
mayFieldAccessesAlias(BaseType, OffsetInBase, SubobjectOffset);
616681
if (GenericTag) {
617682
*GenericTag = SameMemberAccess ? SubobjectTag.getNode() :
618683
createAccessTag(CommonType);
@@ -627,13 +692,15 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
627692

628693
// Follow the edge with the correct offset. Offset will be adjusted to
629694
// be relative to the field type.
630-
BaseType = BaseType.getField(OffsetInBase);
695+
for (auto &&F : BaseType.getField(OffsetInBase))
696+
ToCheck.emplace_back(F, OffsetInBase);
631697
}
632698

633699
// If the base object has a direct or indirect field of the subobject's type,
634700
// then this may be an access to that field. We need this to check now that
635701
// we support aggregates as access types.
636702
if (NewFormat) {
703+
assert(BaseType.getNode() && "Did not see access type in access path!");
637704
// TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType());
638705
TBAAStructTypeNode FieldType(SubobjectTag.getBaseType());
639706
if (hasField(BaseType, FieldType)) {

0 commit comments

Comments
 (0)