Skip to content

Commit 9350860

Browse files
authored
[AsmParser] Add support for reading incomplete IR (part 1) (#78421)
Add an `-allow-incomplete-ir` flag to the IR parser, which allows reading IR with missing declarations. This is intended to produce a best-effort interpretation of the IR, along the same lines of what we would manually do when taking, for example, a function from `-print-after-all` output and fixing it up to be valid IR. This patch only supports dropping references to undeclared metadata, either by dropping metadata attachments from instructions/functions, or by dropping calls to certain intrinsics (like debug intrinsics). I will implement support for inserting missing function/global declarations in a followup patch. We don't have real use lists for metadata, so the approach here is to iterate over the whole IR and identify metadata that needs to be dropped. This does not support all possible cases, but should handle anything that's relevant for the function-only IR use case.
1 parent 535b197 commit 9350860

File tree

9 files changed

+151
-15
lines changed

9 files changed

+151
-15
lines changed

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ namespace llvm {
331331

332332
// Top-Level Entities
333333
bool parseTopLevelEntities();
334+
void dropUnknownMetadataReferences();
334335
bool validateEndOfModule(bool UpgradeDebugInfo);
335336
bool validateEndOfIndex();
336337
bool parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback);

llvm/include/llvm/IR/GlobalObject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ class GlobalObject : public GlobalValue {
133133
using Value::addMetadata;
134134
using Value::clearMetadata;
135135
using Value::eraseMetadata;
136+
using Value::eraseMetadataIf;
136137
using Value::getAllMetadata;
137138
using Value::getMetadata;
138139
using Value::hasMetadata;

llvm/include/llvm/IR/Instruction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,9 @@ class Instruction : public User,
384384
void copyMetadata(const Instruction &SrcInst,
385385
ArrayRef<unsigned> WL = ArrayRef<unsigned>());
386386

387+
/// Erase all metadata that matches the predicate.
388+
void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
389+
387390
/// If the instruction has "branch_weights" MD_prof metadata and the MDNode
388391
/// has three operands (including name string), swap the order of the
389392
/// metadata.

llvm/include/llvm/IR/Metadata.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,8 @@ class ReplaceableMetadataImpl {
417417
/// is resolved.
418418
void resolveAllUses(bool ResolveUsers = true);
419419

420+
unsigned getNumUses() const { return UseMap.size(); }
421+
420422
private:
421423
void addRef(void *Ref, OwnerTy Owner);
422424
void dropRef(void *Ref);
@@ -1243,6 +1245,11 @@ class MDNode : public Metadata {
12431245
bool isReplaceable() const { return isTemporary() || isAlwaysReplaceable(); }
12441246
bool isAlwaysReplaceable() const { return getMetadataID() == DIAssignIDKind; }
12451247

1248+
unsigned getNumTemporaryUses() const {
1249+
assert(isTemporary() && "Only for temporaries");
1250+
return Context.getReplaceableUses()->getNumUses();
1251+
}
1252+
12461253
/// RAUW a temporary.
12471254
///
12481255
/// \pre \a isTemporary() must be \c true.

llvm/include/llvm/IR/Value.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,9 @@ class Value {
618618
/// \returns true if any metadata was removed.
619619
bool eraseMetadata(unsigned KindID);
620620

621+
/// Erase all metadata attachments matching the given predicate.
622+
void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
623+
621624
/// Erase all metadata attached to this Value.
622625
void clearMetadata();
623626

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
#include "llvm/AsmParser/LLParser.h"
1414
#include "llvm/ADT/APSInt.h"
1515
#include "llvm/ADT/DenseMap.h"
16-
#include "llvm/ADT/ScopeExit.h"
1716
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/ScopeExit.h"
1818
#include "llvm/ADT/SmallPtrSet.h"
1919
#include "llvm/AsmParser/LLToken.h"
2020
#include "llvm/AsmParser/SlotMapping.h"
@@ -32,7 +32,9 @@
3232
#include "llvm/IR/GlobalIFunc.h"
3333
#include "llvm/IR/GlobalObject.h"
3434
#include "llvm/IR/InlineAsm.h"
35+
#include "llvm/IR/InstIterator.h"
3536
#include "llvm/IR/Instructions.h"
37+
#include "llvm/IR/IntrinsicInst.h"
3638
#include "llvm/IR/Intrinsics.h"
3739
#include "llvm/IR/LLVMContext.h"
3840
#include "llvm/IR/Metadata.h"
@@ -54,6 +56,12 @@
5456

5557
using namespace llvm;
5658

59+
static cl::opt<bool> AllowIncompleteIR(
60+
"allow-incomplete-ir", cl::init(false), cl::Hidden,
61+
cl::desc(
62+
"Allow incomplete IR on a best effort basis (references to unknown "
63+
"metadata will be dropped)"));
64+
5765
static std::string getTypeString(Type *T) {
5866
std::string Result;
5967
raw_string_ostream Tmp(Result);
@@ -123,6 +131,55 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
123131
std::make_pair(I.first, std::make_pair(I.second, LocTy())));
124132
}
125133

134+
static void dropIntrinsicWithUnknownMetadataArgument(IntrinsicInst *II) {
135+
// White-list intrinsics that are safe to drop.
136+
if (!isa<DbgInfoIntrinsic>(II) &&
137+
II->getIntrinsicID() != Intrinsic::experimental_noalias_scope_decl)
138+
return;
139+
140+
SmallVector<MetadataAsValue *> MVs;
141+
for (Value *V : II->args())
142+
if (auto *MV = dyn_cast<MetadataAsValue>(V))
143+
if (auto *MD = dyn_cast<MDNode>(MV->getMetadata()))
144+
if (MD->isTemporary())
145+
MVs.push_back(MV);
146+
147+
if (!MVs.empty()) {
148+
assert(II->use_empty() && "Cannot have uses");
149+
II->eraseFromParent();
150+
151+
// Also remove no longer used MetadataAsValue wrappers.
152+
for (MetadataAsValue *MV : MVs)
153+
if (MV->use_empty())
154+
delete MV;
155+
}
156+
}
157+
158+
void LLParser::dropUnknownMetadataReferences() {
159+
auto Pred = [](unsigned MDKind, MDNode *Node) { return Node->isTemporary(); };
160+
for (Function &F : *M) {
161+
F.eraseMetadataIf(Pred);
162+
for (Instruction &I : make_early_inc_range(instructions(F))) {
163+
I.eraseMetadataIf(Pred);
164+
165+
if (auto *II = dyn_cast<IntrinsicInst>(&I))
166+
dropIntrinsicWithUnknownMetadataArgument(II);
167+
}
168+
}
169+
170+
for (GlobalVariable &GV : M->globals())
171+
GV.eraseMetadataIf(Pred);
172+
173+
for (const auto &[ID, Info] : make_early_inc_range(ForwardRefMDNodes)) {
174+
// Check whether there is only a single use left, which would be in our
175+
// own NumberedMetadata.
176+
if (Info.first->getNumTemporaryUses() == 1) {
177+
NumberedMetadata.erase(ID);
178+
ForwardRefMDNodes.erase(ID);
179+
}
180+
}
181+
}
182+
126183
/// validateEndOfModule - Do final validity and basic correctness checks at the
127184
/// end of the module.
128185
bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
@@ -284,6 +341,9 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
284341
"use of undefined value '@" +
285342
Twine(ForwardRefValIDs.begin()->first) + "'");
286343

344+
if (AllowIncompleteIR && !ForwardRefMDNodes.empty())
345+
dropUnknownMetadataReferences();
346+
287347
if (!ForwardRefMDNodes.empty())
288348
return error(ForwardRefMDNodes.begin()->second.second,
289349
"use of undefined metadata '!" +
@@ -297,10 +357,14 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
297357

298358
for (auto *Inst : InstsWithTBAATag) {
299359
MDNode *MD = Inst->getMetadata(LLVMContext::MD_tbaa);
300-
assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
301-
auto *UpgradedMD = UpgradeTBAANode(*MD);
302-
if (MD != UpgradedMD)
303-
Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
360+
// With incomplete IR, the tbaa metadata may have been dropped.
361+
if (!AllowIncompleteIR)
362+
assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
363+
if (MD) {
364+
auto *UpgradedMD = UpgradeTBAANode(*MD);
365+
if (MD != UpgradedMD)
366+
Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
367+
}
304368
}
305369

306370
// Look for intrinsic functions and CallInst that need to be upgraded. We use

llvm/lib/IR/Metadata.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,21 @@ bool Value::eraseMetadata(unsigned KindID) {
15331533
return Changed;
15341534
}
15351535

1536+
void Value::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
1537+
if (!HasMetadata)
1538+
return;
1539+
1540+
auto &MetadataStore = getContext().pImpl->ValueMetadata;
1541+
MDAttachments &Info = MetadataStore.find(this)->second;
1542+
assert(!Info.empty() && "bit out of sync with hash table");
1543+
Info.remove_if([Pred](const MDAttachments::Attachment &I) {
1544+
return Pred(I.MDKind, I.Node);
1545+
});
1546+
1547+
if (Info.empty())
1548+
clearMetadata();
1549+
}
1550+
15361551
void Value::clearMetadata() {
15371552
if (!HasMetadata)
15381553
return;
@@ -1556,6 +1571,13 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
15561571
return Value::getMetadata(KindID);
15571572
}
15581573

1574+
void Instruction::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
1575+
if (DbgLoc && Pred(LLVMContext::MD_dbg, DbgLoc.getAsMDNode()))
1576+
DbgLoc = {};
1577+
1578+
Value::eraseMetadataIf(Pred);
1579+
}
1580+
15591581
void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
15601582
if (!Value::hasMetadata())
15611583
return; // Nothing to remove!
@@ -1566,17 +1588,9 @@ void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
15661588
// A DIAssignID attachment is debug metadata, don't drop it.
15671589
KnownSet.insert(LLVMContext::MD_DIAssignID);
15681590

1569-
auto &MetadataStore = getContext().pImpl->ValueMetadata;
1570-
MDAttachments &Info = MetadataStore.find(this)->second;
1571-
assert(!Info.empty() && "bit out of sync with hash table");
1572-
Info.remove_if([&KnownSet](const MDAttachments::Attachment &I) {
1573-
return !KnownSet.count(I.MDKind);
1591+
Value::eraseMetadataIf([&KnownSet](unsigned MDKind, MDNode *Node) {
1592+
return !KnownSet.count(MDKind);
15741593
});
1575-
1576-
if (Info.empty()) {
1577-
// Drop our entry at the store.
1578-
clearMetadata();
1579-
}
15801594
}
15811595

15821596
void Instruction::updateDIAssignIDMapping(DIAssignID *ID) {
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
; RUN: not llvm-as -allow-incomplete-ir < %s 2>&1 | FileCheck %s
2+
3+
; CHECK: error: use of undefined metadata '!1'
4+
define void @test(ptr %p) {
5+
%v = load i8, ptr %p, !noalias !0
6+
ret void
7+
}
8+
9+
!0 = !{!1}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -allow-incomplete-ir < %s | FileCheck %s
3+
4+
@g = global i8 0, !exclude !4
5+
6+
define void @test(ptr %p) !dbg !3 {
7+
; CHECK-LABEL: define void @test(
8+
; CHECK-SAME: ptr [[P:%.*]]) {
9+
; CHECK-NEXT: [[V1:%.*]] = load i8, ptr [[P]], align 1
10+
; CHECK-NEXT: [[V2:%.*]] = load i8, ptr [[P]], align 1
11+
; CHECK-NEXT: [[V3:%.*]] = load i8, ptr [[P]], align 1, !noalias [[META0:![0-9]+]]
12+
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0]])
13+
; CHECK-NEXT: ret void
14+
;
15+
%v1 = load i8, ptr %p, !noalias !0
16+
%v2 = load i8, ptr %p, !tbaa !1
17+
%v3 = load i8, ptr %p, !dbg !2, !noalias !100
18+
call void @llvm.experimental.noalias.scope.decl(metadata !5)
19+
call void @llvm.dbg.value(metadata i32 0, metadata !7, metadata !8)
20+
call void @llvm.experimental.noalias.scope.decl(metadata !100)
21+
ret void
22+
}
23+
24+
declare void @llvm.experimental.noalias.scope.decl(metadata)
25+
declare void @llvm.dbg.value(metadata, metadata, metadata)
26+
27+
!100 = !{!101}
28+
!101 = !{!101, !102}
29+
!102 = !{!102}
30+
;.
31+
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
32+
; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
33+
; CHECK: [[META2]] = distinct !{[[META2]]}
34+
;.

0 commit comments

Comments
 (0)