Skip to content

Commit ae3c905

Browse files
committed
[IR][TBAA] Allow multiple fileds with same offset in TBAA struct-path
Support for multiple fields to have same offset in TBAA struct-path metadata nodes. Primary goal is to support union-like structures to participate in TBAA struct-path resolution.
1 parent 08c0eb1 commit ae3c905

File tree

6 files changed

+177
-81
lines changed

6 files changed

+177
-81
lines changed

llvm/docs/LangRef.rst

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6424,9 +6424,10 @@ tuples this way:
64246424
undefined if ``Offset`` is non-zero.
64256425

64266426
* If ``BaseTy`` is a struct type then ``ImmediateParent(BaseTy, Offset)``
6427-
is ``(NewTy, NewOffset)`` where ``NewTy`` is the type contained in
6428-
``BaseTy`` at offset ``Offset`` and ``NewOffset`` is ``Offset`` adjusted
6429-
to be relative within that inner type.
6427+
is array of ``(NewTy[N], NewOffset)`` where ``NewTy[N]`` is the Nth type
6428+
contained in ``BaseTy`` at offset ``Offset`` and ``NewOffset`` is
6429+
``Offset`` adjusted to be relative within that inner type. Multiple types
6430+
occupying same offset allow to describe union-like structures.
64306431

64316432
A memory access with an access tag ``(BaseTy1, AccessTy1, Offset1)``
64326433
aliases a memory access with an access tag ``(BaseTy2, AccessTy2,
@@ -6437,9 +6438,9 @@ As a concrete example, the type descriptor graph for the following program
64376438

64386439
.. code-block:: c
64396440

6440-
struct Inner {
6441+
union Inner {
64416442
int i; // offset 0
6442-
float f; // offset 4
6443+
float f; // offset 0
64436444
};
64446445

64456446
struct Outer {
@@ -6451,7 +6452,7 @@ As a concrete example, the type descriptor graph for the following program
64516452
void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
64526453
outer->f = 0; // tag0: (OuterStructTy, FloatScalarTy, 0)
64536454
outer->inner_a.i = 0; // tag1: (OuterStructTy, IntScalarTy, 12)
6454-
outer->inner_a.f = 0.0; // tag2: (OuterStructTy, FloatScalarTy, 16)
6455+
outer->inner_a.f = 0.0; // tag2: (OuterStructTy, FloatScalarTy, 12)
64556456
*f = 0.0; // tag3: (FloatScalarTy, FloatScalarTy, 0)
64566457
}
64576458

@@ -6465,13 +6466,13 @@ type):
64656466
FloatScalarTy = ("float", CharScalarTy, 0)
64666467
DoubleScalarTy = ("double", CharScalarTy, 0)
64676468
IntScalarTy = ("int", CharScalarTy, 0)
6468-
InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 4)}
6469+
InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 0)}
64696470
OuterStructTy = {"Outer", (FloatScalarTy, 0), (DoubleScalarTy, 4),
64706471
(InnerStructTy, 12)}
64716472

64726473

64736474
with (e.g.) ``ImmediateParent(OuterStructTy, 12)`` = ``(InnerStructTy,
6474-
0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0)``, and
6475+
0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0), (FloatScalarTy, 0)``, and
64756476
``ImmediateParent(IntScalarTy, 0)`` = ``(CharScalarTy, 0)``.
64766477

64776478
.. _tbaa_node_representation:

llvm/include/llvm/IR/Verifier.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,15 @@ class TBAAVerifier {
5959

6060
/// \name Helper functions used by \c visitTBAAMetadata.
6161
/// @{
62-
MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode,
63-
APInt &Offset, bool IsNewFormat);
62+
std::vector<MDNode *> getFieldNodeFromTBAABaseNode(Instruction &I,
63+
const MDNode *BaseNode,
64+
APInt &Offset,
65+
bool IsNewFormat);
66+
bool findAccessTypeNode(Instruction &I,
67+
SmallPtrSetImpl<const MDNode *> &StructPath,
68+
APInt Offset, bool IsNewFormat,
69+
const MDNode *AccessType, const MDNode *BaseNode,
70+
const MDNode *MD);
6471
TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I,
6572
const MDNode *BaseNode,
6673
bool IsNewFormat);

llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
#include "llvm/Support/ErrorHandling.h"
122122
#include <cassert>
123123
#include <cstdint>
124+
#include <stack>
124125

125126
using namespace llvm;
126127

@@ -299,21 +300,22 @@ class TBAAStructTypeNode {
299300
return TBAAStructTypeNode(TypeNode);
300301
}
301302

302-
/// Get this TBAAStructTypeNode's field in the type DAG with
303+
/// Get this TBAAStructTypeNode's fields in the type DAG with
303304
/// given offset. Update the offset to be relative to the field type.
304-
TBAAStructTypeNode getField(uint64_t &Offset) const {
305+
/// There could be multiple fields with same offset.
306+
std::vector<TBAAStructTypeNode> getField(uint64_t &Offset) const {
305307
bool NewFormat = isNewFormat();
306308
const ArrayRef<MDOperand> Operands = Node->operands();
307309
const unsigned NumOperands = Operands.size();
308310

309311
if (NewFormat) {
310312
// New-format root and scalar type nodes have no fields.
311313
if (NumOperands < 6)
312-
return TBAAStructTypeNode();
314+
return {TBAAStructTypeNode()};
313315
} else {
314316
// Parent can be omitted for the root node.
315317
if (NumOperands < 2)
316-
return TBAAStructTypeNode();
318+
return {TBAAStructTypeNode()};
317319

318320
// Fast path for a scalar type node and a struct type node with a single
319321
// field.
@@ -325,8 +327,8 @@ class TBAAStructTypeNode {
325327
Offset -= Cur;
326328
MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
327329
if (!P)
328-
return TBAAStructTypeNode();
329-
return TBAAStructTypeNode(P);
330+
return {TBAAStructTypeNode()};
331+
return {TBAAStructTypeNode(P)};
330332
}
331333
}
332334

@@ -336,6 +338,8 @@ class TBAAStructTypeNode {
336338
unsigned NumOpsPerField = NewFormat ? 3 : 2;
337339
unsigned TheIdx = 0;
338340

341+
std::vector<TBAAStructTypeNode> Ret;
342+
339343
for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
340344
Idx += NumOpsPerField) {
341345
uint64_t Cur =
@@ -353,10 +357,20 @@ class TBAAStructTypeNode {
353357
uint64_t Cur =
354358
mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
355359
Offset -= Cur;
360+
361+
// Collect all fields that have right offset.
356362
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
357-
if (!P)
358-
return TBAAStructTypeNode();
359-
return TBAAStructTypeNode(P);
363+
Ret.emplace_back(P ? TBAAStructTypeNode(P) : TBAAStructTypeNode());
364+
365+
while (TheIdx > FirstFieldOpNo) {
366+
TheIdx -= NumOpsPerField;
367+
auto Val = mdconst::extract<ConstantInt>(Operands[TheIdx + 1]);
368+
if (Cur != Val->getZExtValue())
369+
break;
370+
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
371+
P ? Ret.emplace_back(P) : Ret.emplace_back();
372+
}
373+
return Ret;
360374
}
361375
};
362376

@@ -599,17 +613,24 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
599613
// from the base type, follow the edge with the correct offset in the type DAG
600614
// and adjust the offset until we reach the field type or until we reach the
601615
// access type.
616+
// If multiple fields have same offset in some base type, then scan each such
617+
// field.
602618
bool NewFormat = BaseTag.isNewFormat();
603619
TBAAStructTypeNode BaseType(BaseTag.getBaseType());
604620
uint64_t OffsetInBase = BaseTag.getOffset();
605621

606-
for (;;) {
607-
// In the old format there is no distinction between fields and parent
608-
// types, so in this case we consider all nodes up to the root.
609-
if (!BaseType.getNode()) {
610-
assert(!NewFormat && "Did not see access type in access path!");
611-
break;
612-
}
622+
SmallVector<std::pair<TBAAStructTypeNode, uint64_t>, 4> ToCheck;
623+
ToCheck.emplace_back(BaseType, OffsetInBase);
624+
while (!ToCheck.empty()) {
625+
std::tie(BaseType, OffsetInBase) = ToCheck.back();
626+
ToCheck.pop_back();
627+
628+
// In case if root is reached, still check the remaining candidates.
629+
// For new format it is always expected for access type to be found.
630+
// For old format all nodes up to the root are considered from all
631+
// candidates.
632+
if (!BaseType.getNode())
633+
continue;
613634

614635
if (BaseType.getNode() == SubobjectTag.getBaseType()) {
615636
bool SameMemberAccess = OffsetInBase == SubobjectTag.getOffset();
@@ -627,13 +648,15 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
627648

628649
// Follow the edge with the correct offset. Offset will be adjusted to
629650
// be relative to the field type.
630-
BaseType = BaseType.getField(OffsetInBase);
651+
for (auto &&F : BaseType.getField(OffsetInBase))
652+
ToCheck.emplace_back(F, OffsetInBase);
631653
}
632654

633655
// If the base object has a direct or indirect field of the subobject's type,
634656
// then this may be an access to that field. We need this to check now that
635657
// we support aggregates as access types.
636658
if (NewFormat) {
659+
assert(!NewFormat && "Did not see access type in access path!");
637660
// TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType());
638661
TBAAStructTypeNode FieldType(SubobjectTag.getBaseType());
639662
if (hasField(BaseType, FieldType)) {

llvm/lib/IR/Verifier.cpp

Lines changed: 94 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6973,48 +6973,56 @@ bool TBAAVerifier::isValidScalarTBAANode(const MDNode *MD) {
69736973
return Result;
69746974
}
69756975

6976-
/// Returns the field node at the offset \p Offset in \p BaseNode. Update \p
6977-
/// Offset in place to be the offset within the field node returned.
6976+
/// Returns one or several field nodes at the offset \p Offset in \p BaseNode.
6977+
/// Returns empty vector if \p BaseNode has no fields with specified offset.
6978+
/// Update \p Offset in place to be the offset within the field node returned.
69786979
///
69796980
/// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode.
6980-
MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I,
6981-
const MDNode *BaseNode,
6982-
APInt &Offset,
6983-
bool IsNewFormat) {
6981+
std::vector<MDNode *> TBAAVerifier::getFieldNodeFromTBAABaseNode(
6982+
Instruction &I, const MDNode *BaseNode, APInt &Offset, bool IsNewFormat) {
69846983
assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!");
69856984

69866985
// Scalar nodes have only one possible "field" -- their parent in the access
69876986
// hierarchy. Offset must be zero at this point, but our caller is supposed
69886987
// to check that.
69896988
if (BaseNode->getNumOperands() == 2)
6990-
return cast<MDNode>(BaseNode->getOperand(1));
6989+
return {cast<MDNode>(BaseNode->getOperand(1))};
69916990

69926991
unsigned FirstFieldOpNo = IsNewFormat ? 3 : 1;
69936992
unsigned NumOpsPerField = IsNewFormat ? 3 : 2;
6993+
6994+
unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
69946995
for (unsigned Idx = FirstFieldOpNo; Idx < BaseNode->getNumOperands();
69956996
Idx += NumOpsPerField) {
69966997
auto *OffsetEntryCI =
69976998
mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1));
69986999
if (OffsetEntryCI->getValue().ugt(Offset)) {
69997000
if (Idx == FirstFieldOpNo) {
7000-
CheckFailed("Could not find TBAA parent in struct type node", &I,
7001-
BaseNode, &Offset);
7002-
return nullptr;
7001+
return {};
70037002
}
70047003

7005-
unsigned PrevIdx = Idx - NumOpsPerField;
7006-
auto *PrevOffsetEntryCI =
7007-
mdconst::extract<ConstantInt>(BaseNode->getOperand(PrevIdx + 1));
7008-
Offset -= PrevOffsetEntryCI->getValue();
7009-
return cast<MDNode>(BaseNode->getOperand(PrevIdx));
7004+
LastIdx = Idx - NumOpsPerField;
7005+
break;
70107006
}
70117007
}
70127008

7013-
unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
70147009
auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>(
70157010
BaseNode->getOperand(LastIdx + 1));
7016-
Offset -= LastOffsetEntryCI->getValue();
7017-
return cast<MDNode>(BaseNode->getOperand(LastIdx));
7011+
auto LastOffsetVal = LastOffsetEntryCI->getValue();
7012+
Offset -= LastOffsetVal;
7013+
7014+
std::vector<MDNode *> Ret;
7015+
Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
7016+
while (LastIdx > FirstFieldOpNo) {
7017+
LastIdx -= NumOpsPerField;
7018+
LastOffsetEntryCI =
7019+
mdconst::extract<ConstantInt>(BaseNode->getOperand(LastIdx + 1));
7020+
if (LastOffsetEntryCI->getValue() != LastOffsetVal)
7021+
break;
7022+
Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
7023+
}
7024+
7025+
return Ret;
70187026
}
70197027

70207028
static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
@@ -7091,47 +7099,84 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
70917099
CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
70927100

70937101
APInt Offset = OffsetCI->getValue();
7094-
bool SeenAccessTypeInPath = false;
70957102

7096-
SmallPtrSet<MDNode *, 4> StructPath;
7103+
SmallPtrSet<const MDNode *, 4> StructPath;
70977104

7098-
for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
7099-
BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset,
7100-
IsNewFormat)) {
7101-
if (!StructPath.insert(BaseNode).second) {
7102-
CheckFailed("Cycle detected in struct path", &I, MD);
7103-
return false;
7104-
}
7105+
auto &&[Invalid, BaseNodeBitWidth] =
7106+
verifyTBAABaseNode(I, BaseNode, IsNewFormat);
71057107

7106-
bool Invalid;
7107-
unsigned BaseNodeBitWidth;
7108-
std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode,
7109-
IsNewFormat);
7108+
// If the base node is invalid in itself, then we've already printed all the
7109+
// errors we wanted to print.
7110+
if (Invalid)
7111+
return false;
71107112

7111-
// If the base node is invalid in itself, then we've already printed all the
7112-
// errors we wanted to print.
7113-
if (Invalid)
7114-
return false;
7113+
bool SeenAccessTypeInPath = BaseNode == AccessType;
7114+
if (SeenAccessTypeInPath) {
7115+
CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access", &I,
7116+
MD, &Offset);
7117+
if (IsNewFormat)
7118+
return true;
7119+
}
71157120

7116-
SeenAccessTypeInPath |= BaseNode == AccessType;
7121+
CheckTBAA(findAccessTypeNode(I, StructPath, Offset, IsNewFormat, AccessType,
7122+
BaseNode, MD) ||
7123+
SeenAccessTypeInPath,
7124+
"Did not see access type in access path!", &I, MD);
7125+
return true;
7126+
}
71177127

7118-
if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
7119-
CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access",
7120-
&I, MD, &Offset);
7128+
bool TBAAVerifier::findAccessTypeNode(
7129+
Instruction &I, SmallPtrSetImpl<const MDNode *> &StructPath, APInt Offset,
7130+
bool IsNewFormat, const MDNode *AccessType, const MDNode *BaseNode,
7131+
const MDNode *MD) {
7132+
if (!BaseNode || IsRootTBAANode(BaseNode))
7133+
return false;
71217134

7122-
CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
7123-
(BaseNodeBitWidth == 0 && Offset == 0) ||
7124-
(IsNewFormat && BaseNodeBitWidth == ~0u),
7125-
"Access bit-width not the same as description bit-width", &I, MD,
7126-
BaseNodeBitWidth, Offset.getBitWidth());
7135+
auto &&[Invalid, BaseNodeBitWidth] =
7136+
verifyTBAABaseNode(I, BaseNode, IsNewFormat);
71277137

7128-
if (IsNewFormat && SeenAccessTypeInPath)
7129-
break;
7138+
// If the base node is invalid in itself, then we've already printed all the
7139+
// errors we wanted to print.
7140+
if (Invalid)
7141+
return false;
7142+
7143+
// Offset at point of scalar access must be zero. Skip mismatched nodes.
7144+
if ((isValidScalarTBAANode(BaseNode) || BaseNode == AccessType) &&
7145+
Offset != 0)
7146+
return false;
7147+
7148+
CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
7149+
(BaseNodeBitWidth == 0 && Offset == 0) ||
7150+
(IsNewFormat && BaseNodeBitWidth == ~0u),
7151+
"Access bit-width not the same as description bit-width", &I, MD,
7152+
BaseNodeBitWidth, Offset.getBitWidth());
7153+
7154+
bool SeenAccessTypeInPath = (BaseNode == AccessType && Offset == 0);
7155+
7156+
if (IsNewFormat && SeenAccessTypeInPath)
7157+
return true;
7158+
7159+
auto ProbableNodes =
7160+
getFieldNodeFromTBAABaseNode(I, BaseNode, Offset, IsNewFormat);
7161+
7162+
if (!StructPath.insert(BaseNode).second) {
7163+
CheckFailed("Cycle detected in struct path", &I, MD);
7164+
return false;
71307165
}
71317166

7132-
CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
7133-
MD);
7134-
return true;
7167+
for (auto *PN : ProbableNodes) {
7168+
if (!PN || IsRootTBAANode(PN))
7169+
continue;
7170+
7171+
SmallPtrSet<const MDNode *, 4> StructPathCopy;
7172+
StructPathCopy.insert(StructPath.begin(), StructPath.end());
7173+
7174+
if (findAccessTypeNode(I, StructPathCopy, Offset, IsNewFormat, AccessType,
7175+
PN, MD))
7176+
return true;
7177+
}
7178+
7179+
return SeenAccessTypeInPath;
71357180
}
71367181

71377182
char VerifierLegacyPass::ID = 0;

0 commit comments

Comments
 (0)