Skip to content

Commit a0a4787

Browse files
committed
[IR][TBAA] Allow multiple fileds with same offset in TBAA struct-path
Support for multiple fields to have same offset in TBAA struct-path metadata nodes. Primary goal is to support union-like structures to participate in TBAA struct-path resolution.
1 parent c3eb297 commit a0a4787

File tree

6 files changed

+177
-81
lines changed

6 files changed

+177
-81
lines changed

llvm/docs/LangRef.rst

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6424,9 +6424,10 @@ tuples this way:
64246424
undefined if ``Offset`` is non-zero.
64256425

64266426
* If ``BaseTy`` is a struct type then ``ImmediateParent(BaseTy, Offset)``
6427-
is ``(NewTy, NewOffset)`` where ``NewTy`` is the type contained in
6428-
``BaseTy`` at offset ``Offset`` and ``NewOffset`` is ``Offset`` adjusted
6429-
to be relative within that inner type.
6427+
is array of ``(NewTy[N], NewOffset)`` where ``NewTy[N]`` is the Nth type
6428+
contained in ``BaseTy`` at offset ``Offset`` and ``NewOffset`` is
6429+
``Offset`` adjusted to be relative within that inner type. Multiple types
6430+
occupying same offset allow to describe union-like structures.
64306431

64316432
A memory access with an access tag ``(BaseTy1, AccessTy1, Offset1)``
64326433
aliases a memory access with an access tag ``(BaseTy2, AccessTy2,
@@ -6437,9 +6438,9 @@ As a concrete example, the type descriptor graph for the following program
64376438

64386439
.. code-block:: c
64396440

6440-
struct Inner {
6441+
union Inner {
64416442
int i; // offset 0
6442-
float f; // offset 4
6443+
float f; // offset 0
64436444
};
64446445

64456446
struct Outer {
@@ -6451,7 +6452,7 @@ As a concrete example, the type descriptor graph for the following program
64516452
void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
64526453
outer->f = 0; // tag0: (OuterStructTy, FloatScalarTy, 0)
64536454
outer->inner_a.i = 0; // tag1: (OuterStructTy, IntScalarTy, 12)
6454-
outer->inner_a.f = 0.0; // tag2: (OuterStructTy, FloatScalarTy, 16)
6455+
outer->inner_a.f = 0.0; // tag2: (OuterStructTy, FloatScalarTy, 12)
64556456
*f = 0.0; // tag3: (FloatScalarTy, FloatScalarTy, 0)
64566457
}
64576458

@@ -6465,13 +6466,13 @@ type):
64656466
FloatScalarTy = ("float", CharScalarTy, 0)
64666467
DoubleScalarTy = ("double", CharScalarTy, 0)
64676468
IntScalarTy = ("int", CharScalarTy, 0)
6468-
InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 4)}
6469+
InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 0)}
64696470
OuterStructTy = {"Outer", (FloatScalarTy, 0), (DoubleScalarTy, 4),
64706471
(InnerStructTy, 12)}
64716472

64726473

64736474
with (e.g.) ``ImmediateParent(OuterStructTy, 12)`` = ``(InnerStructTy,
6474-
0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0)``, and
6475+
0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0), (FloatScalarTy, 0)``, and
64756476
``ImmediateParent(IntScalarTy, 0)`` = ``(CharScalarTy, 0)``.
64766477

64776478
.. _tbaa_node_representation:

llvm/include/llvm/IR/Verifier.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,15 @@ class TBAAVerifier {
5959

6060
/// \name Helper functions used by \c visitTBAAMetadata.
6161
/// @{
62-
MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode,
63-
APInt &Offset, bool IsNewFormat);
62+
std::vector<MDNode *> getFieldNodeFromTBAABaseNode(Instruction &I,
63+
const MDNode *BaseNode,
64+
APInt &Offset,
65+
bool IsNewFormat);
66+
bool findAccessTypeNode(Instruction &I,
67+
SmallPtrSetImpl<const MDNode *> &StructPath,
68+
APInt Offset, bool IsNewFormat,
69+
const MDNode *AccessType, const MDNode *BaseNode,
70+
const MDNode *MD);
6471
TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I,
6572
const MDNode *BaseNode,
6673
bool IsNewFormat);

llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
#include "llvm/Support/ErrorHandling.h"
122122
#include <cassert>
123123
#include <cstdint>
124+
#include <stack>
124125

125126
using namespace llvm;
126127

@@ -299,21 +300,22 @@ class TBAAStructTypeNode {
299300
return TBAAStructTypeNode(TypeNode);
300301
}
301302

302-
/// Get this TBAAStructTypeNode's field in the type DAG with
303+
/// Get this TBAAStructTypeNode's fields in the type DAG with
303304
/// given offset. Update the offset to be relative to the field type.
304-
TBAAStructTypeNode getField(uint64_t &Offset) const {
305+
/// There could be multiple fields with same offset.
306+
std::vector<TBAAStructTypeNode> getField(uint64_t &Offset) const {
305307
bool NewFormat = isNewFormat();
306308
const ArrayRef<MDOperand> Operands = Node->operands();
307309
const unsigned NumOperands = Operands.size();
308310

309311
if (NewFormat) {
310312
// New-format root and scalar type nodes have no fields.
311313
if (NumOperands < 6)
312-
return TBAAStructTypeNode();
314+
return {TBAAStructTypeNode()};
313315
} else {
314316
// Parent can be omitted for the root node.
315317
if (NumOperands < 2)
316-
return TBAAStructTypeNode();
318+
return {TBAAStructTypeNode()};
317319

318320
// Fast path for a scalar type node and a struct type node with a single
319321
// field.
@@ -325,8 +327,8 @@ class TBAAStructTypeNode {
325327
Offset -= Cur;
326328
MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
327329
if (!P)
328-
return TBAAStructTypeNode();
329-
return TBAAStructTypeNode(P);
330+
return {TBAAStructTypeNode()};
331+
return {TBAAStructTypeNode(P)};
330332
}
331333
}
332334

@@ -336,6 +338,8 @@ class TBAAStructTypeNode {
336338
unsigned NumOpsPerField = NewFormat ? 3 : 2;
337339
unsigned TheIdx = 0;
338340

341+
std::vector<TBAAStructTypeNode> Ret;
342+
339343
for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
340344
Idx += NumOpsPerField) {
341345
uint64_t Cur =
@@ -353,10 +357,20 @@ class TBAAStructTypeNode {
353357
uint64_t Cur =
354358
mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
355359
Offset -= Cur;
360+
361+
// Collect all fields that have right offset.
356362
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
357-
if (!P)
358-
return TBAAStructTypeNode();
359-
return TBAAStructTypeNode(P);
363+
Ret.emplace_back(P ? TBAAStructTypeNode(P) : TBAAStructTypeNode());
364+
365+
while (TheIdx > FirstFieldOpNo) {
366+
TheIdx -= NumOpsPerField;
367+
auto Val = mdconst::extract<ConstantInt>(Operands[TheIdx + 1]);
368+
if (Cur != Val->getZExtValue())
369+
break;
370+
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
371+
P ? Ret.emplace_back(P) : Ret.emplace_back();
372+
}
373+
return Ret;
360374
}
361375
};
362376

@@ -599,17 +613,24 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
599613
// from the base type, follow the edge with the correct offset in the type DAG
600614
// and adjust the offset until we reach the field type or until we reach the
601615
// access type.
616+
// If multiple fields have same offset in some base type, then scan each such
617+
// field.
602618
bool NewFormat = BaseTag.isNewFormat();
603619
TBAAStructTypeNode BaseType(BaseTag.getBaseType());
604620
uint64_t OffsetInBase = BaseTag.getOffset();
605621

606-
for (;;) {
607-
// In the old format there is no distinction between fields and parent
608-
// types, so in this case we consider all nodes up to the root.
609-
if (!BaseType.getNode()) {
610-
assert(!NewFormat && "Did not see access type in access path!");
611-
break;
612-
}
622+
SmallVector<std::pair<TBAAStructTypeNode, uint64_t>, 4> ToCheck;
623+
ToCheck.emplace_back(BaseType, OffsetInBase);
624+
while (!ToCheck.empty()) {
625+
std::tie(BaseType, OffsetInBase) = ToCheck.back();
626+
ToCheck.pop_back();
627+
628+
// In case if root is reached, still check the remaining candidates.
629+
// For new format it is always expected for access type to be found.
630+
// For old format all nodes up to the root are considered from all
631+
// candidates.
632+
if (!BaseType.getNode())
633+
continue;
613634

614635
if (BaseType.getNode() == SubobjectTag.getBaseType()) {
615636
bool SameMemberAccess = OffsetInBase == SubobjectTag.getOffset();
@@ -627,13 +648,15 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
627648

628649
// Follow the edge with the correct offset. Offset will be adjusted to
629650
// be relative to the field type.
630-
BaseType = BaseType.getField(OffsetInBase);
651+
for (auto &&F : BaseType.getField(OffsetInBase))
652+
ToCheck.emplace_back(F, OffsetInBase);
631653
}
632654

633655
// If the base object has a direct or indirect field of the subobject's type,
634656
// then this may be an access to that field. We need this to check now that
635657
// we support aggregates as access types.
636658
if (NewFormat) {
659+
assert(BaseType.getNode() && "Did not see access type in access path!");
637660
// TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType());
638661
TBAAStructTypeNode FieldType(SubobjectTag.getBaseType());
639662
if (hasField(BaseType, FieldType)) {

llvm/lib/IR/Verifier.cpp

Lines changed: 94 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6968,48 +6968,56 @@ bool TBAAVerifier::isValidScalarTBAANode(const MDNode *MD) {
69686968
return Result;
69696969
}
69706970

6971-
/// Returns the field node at the offset \p Offset in \p BaseNode. Update \p
6972-
/// Offset in place to be the offset within the field node returned.
6971+
/// Returns one or several field nodes at the offset \p Offset in \p BaseNode.
6972+
/// Returns empty vector if \p BaseNode has no fields with specified offset.
6973+
/// Update \p Offset in place to be the offset within the field node returned.
69736974
///
69746975
/// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode.
6975-
MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I,
6976-
const MDNode *BaseNode,
6977-
APInt &Offset,
6978-
bool IsNewFormat) {
6976+
std::vector<MDNode *> TBAAVerifier::getFieldNodeFromTBAABaseNode(
6977+
Instruction &I, const MDNode *BaseNode, APInt &Offset, bool IsNewFormat) {
69796978
assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!");
69806979

69816980
// Scalar nodes have only one possible "field" -- their parent in the access
69826981
// hierarchy. Offset must be zero at this point, but our caller is supposed
69836982
// to check that.
69846983
if (BaseNode->getNumOperands() == 2)
6985-
return cast<MDNode>(BaseNode->getOperand(1));
6984+
return {cast<MDNode>(BaseNode->getOperand(1))};
69866985

69876986
unsigned FirstFieldOpNo = IsNewFormat ? 3 : 1;
69886987
unsigned NumOpsPerField = IsNewFormat ? 3 : 2;
6988+
6989+
unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
69896990
for (unsigned Idx = FirstFieldOpNo; Idx < BaseNode->getNumOperands();
69906991
Idx += NumOpsPerField) {
69916992
auto *OffsetEntryCI =
69926993
mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1));
69936994
if (OffsetEntryCI->getValue().ugt(Offset)) {
69946995
if (Idx == FirstFieldOpNo) {
6995-
CheckFailed("Could not find TBAA parent in struct type node", &I,
6996-
BaseNode, &Offset);
6997-
return nullptr;
6996+
return {};
69986997
}
69996998

7000-
unsigned PrevIdx = Idx - NumOpsPerField;
7001-
auto *PrevOffsetEntryCI =
7002-
mdconst::extract<ConstantInt>(BaseNode->getOperand(PrevIdx + 1));
7003-
Offset -= PrevOffsetEntryCI->getValue();
7004-
return cast<MDNode>(BaseNode->getOperand(PrevIdx));
6999+
LastIdx = Idx - NumOpsPerField;
7000+
break;
70057001
}
70067002
}
70077003

7008-
unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
70097004
auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>(
70107005
BaseNode->getOperand(LastIdx + 1));
7011-
Offset -= LastOffsetEntryCI->getValue();
7012-
return cast<MDNode>(BaseNode->getOperand(LastIdx));
7006+
auto LastOffsetVal = LastOffsetEntryCI->getValue();
7007+
Offset -= LastOffsetVal;
7008+
7009+
std::vector<MDNode *> Ret;
7010+
Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
7011+
while (LastIdx > FirstFieldOpNo) {
7012+
LastIdx -= NumOpsPerField;
7013+
LastOffsetEntryCI =
7014+
mdconst::extract<ConstantInt>(BaseNode->getOperand(LastIdx + 1));
7015+
if (LastOffsetEntryCI->getValue() != LastOffsetVal)
7016+
break;
7017+
Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
7018+
}
7019+
7020+
return Ret;
70137021
}
70147022

70157023
static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
@@ -7086,47 +7094,84 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
70867094
CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
70877095

70887096
APInt Offset = OffsetCI->getValue();
7089-
bool SeenAccessTypeInPath = false;
70907097

7091-
SmallPtrSet<MDNode *, 4> StructPath;
7098+
SmallPtrSet<const MDNode *, 4> StructPath;
70927099

7093-
for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
7094-
BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset,
7095-
IsNewFormat)) {
7096-
if (!StructPath.insert(BaseNode).second) {
7097-
CheckFailed("Cycle detected in struct path", &I, MD);
7098-
return false;
7099-
}
7100+
auto &&[Invalid, BaseNodeBitWidth] =
7101+
verifyTBAABaseNode(I, BaseNode, IsNewFormat);
71007102

7101-
bool Invalid;
7102-
unsigned BaseNodeBitWidth;
7103-
std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode,
7104-
IsNewFormat);
7103+
// If the base node is invalid in itself, then we've already printed all the
7104+
// errors we wanted to print.
7105+
if (Invalid)
7106+
return false;
71057107

7106-
// If the base node is invalid in itself, then we've already printed all the
7107-
// errors we wanted to print.
7108-
if (Invalid)
7109-
return false;
7108+
bool SeenAccessTypeInPath = BaseNode == AccessType;
7109+
if (SeenAccessTypeInPath) {
7110+
CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access", &I,
7111+
MD, &Offset);
7112+
if (IsNewFormat)
7113+
return true;
7114+
}
71107115

7111-
SeenAccessTypeInPath |= BaseNode == AccessType;
7116+
CheckTBAA(findAccessTypeNode(I, StructPath, Offset, IsNewFormat, AccessType,
7117+
BaseNode, MD) ||
7118+
SeenAccessTypeInPath,
7119+
"Did not see access type in access path!", &I, MD);
7120+
return true;
7121+
}
71127122

7113-
if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
7114-
CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access",
7115-
&I, MD, &Offset);
7123+
bool TBAAVerifier::findAccessTypeNode(
7124+
Instruction &I, SmallPtrSetImpl<const MDNode *> &StructPath, APInt Offset,
7125+
bool IsNewFormat, const MDNode *AccessType, const MDNode *BaseNode,
7126+
const MDNode *MD) {
7127+
if (!BaseNode || IsRootTBAANode(BaseNode))
7128+
return false;
71167129

7117-
CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
7118-
(BaseNodeBitWidth == 0 && Offset == 0) ||
7119-
(IsNewFormat && BaseNodeBitWidth == ~0u),
7120-
"Access bit-width not the same as description bit-width", &I, MD,
7121-
BaseNodeBitWidth, Offset.getBitWidth());
7130+
auto &&[Invalid, BaseNodeBitWidth] =
7131+
verifyTBAABaseNode(I, BaseNode, IsNewFormat);
71227132

7123-
if (IsNewFormat && SeenAccessTypeInPath)
7124-
break;
7133+
// If the base node is invalid in itself, then we've already printed all the
7134+
// errors we wanted to print.
7135+
if (Invalid)
7136+
return false;
7137+
7138+
// Offset at point of scalar access must be zero. Skip mismatched nodes.
7139+
if ((isValidScalarTBAANode(BaseNode) || BaseNode == AccessType) &&
7140+
Offset != 0)
7141+
return false;
7142+
7143+
CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
7144+
(BaseNodeBitWidth == 0 && Offset == 0) ||
7145+
(IsNewFormat && BaseNodeBitWidth == ~0u),
7146+
"Access bit-width not the same as description bit-width", &I, MD,
7147+
BaseNodeBitWidth, Offset.getBitWidth());
7148+
7149+
bool SeenAccessTypeInPath = (BaseNode == AccessType && Offset == 0);
7150+
7151+
if (IsNewFormat && SeenAccessTypeInPath)
7152+
return true;
7153+
7154+
auto ProbableNodes =
7155+
getFieldNodeFromTBAABaseNode(I, BaseNode, Offset, IsNewFormat);
7156+
7157+
if (!StructPath.insert(BaseNode).second) {
7158+
CheckFailed("Cycle detected in struct path", &I, MD);
7159+
return false;
71257160
}
71267161

7127-
CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
7128-
MD);
7129-
return true;
7162+
for (auto *PN : ProbableNodes) {
7163+
if (!PN || IsRootTBAANode(PN))
7164+
continue;
7165+
7166+
SmallPtrSet<const MDNode *, 4> StructPathCopy;
7167+
StructPathCopy.insert(StructPath.begin(), StructPath.end());
7168+
7169+
if (findAccessTypeNode(I, StructPathCopy, Offset, IsNewFormat, AccessType,
7170+
PN, MD))
7171+
return true;
7172+
}
7173+
7174+
return SeenAccessTypeInPath;
71307175
}
71317176

71327177
char VerifierLegacyPass::ID = 0;

0 commit comments

Comments
 (0)