Skip to content

Commit 6f37114

Browse files
authored
[AsmParser] Don't require value numbers to be consecutive (#78171)
Currently, the IR parser requires that %n style numbered values are consecutive. This means that the IR becomes invalid as soon as you remove an instruction, argument or block. This makes it very annoying to modify IR without running it through instnamer first. I don't think there is any good reason to impose this requirement. This PR relaxes it to allow value IDs to be non-consecutive, but it still keeps the requirement that they're increasing (i.e. you can't skip a value number and then assign it later). This only implements support for skipping numbers for local values. We should extend this to global values in the future as well.
1 parent ea9d75a commit 6f37114

File tree

10 files changed

+214
-73
lines changed

10 files changed

+214
-73
lines changed

llvm/docs/LangRef.rst

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,13 @@ lexical features of LLVM:
128128
#. Comments are delimited with a '``;``' and go until the end of line.
129129
#. Unnamed temporaries are created when the result of a computation is
130130
not assigned to a named value.
131-
#. Unnamed temporaries are numbered sequentially (using a per-function
132-
incrementing counter, starting with 0). Note that basic blocks and unnamed
133-
function parameters are included in this numbering. For example, if the
134-
entry basic block is not given a label name and all function parameters are
135-
named, then it will get number 0.
131+
#. By default, unnamed temporaries are numbered sequentially (using a
132+
per-function incrementing counter, starting with 0). However, when explicitly
133+
specifying temporary numbers, it is allowed to skip over numbers.
134+
135+
Note that basic blocks and unnamed function parameters are included in this
136+
numbering. For example, if the entry basic block is not given a label name
137+
and all function parameters are named, then it will get number 0.
136138

137139
It also shows a convention that we follow in this document. When
138140
demonstrating instructions, we will follow an instruction with a comment

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ namespace llvm {
203203
bool error(LocTy L, const Twine &Msg) const { return Lex.Error(L, Msg); }
204204
bool tokError(const Twine &Msg) const { return error(Lex.getLoc(), Msg); }
205205

206+
bool checkValueID(LocTy L, StringRef Kind, StringRef Prefix,
207+
unsigned NextID, unsigned ID) const;
208+
206209
/// Restore the internal name and slot mappings using the mappings that
207210
/// were created at an earlier parsing stage.
208211
void restoreParsingState(const SlotMapping *Slots);
@@ -448,19 +451,35 @@ namespace llvm {
448451
bool parseFunctionType(Type *&Result);
449452
bool parseTargetExtType(Type *&Result);
450453

454+
class NumberedValues {
455+
DenseMap<unsigned, Value *> Vals;
456+
unsigned NextUnusedID = 0;
457+
458+
public:
459+
unsigned getNext() const { return NextUnusedID; }
460+
Value *get(unsigned ID) const { return Vals.lookup(ID); }
461+
void add(unsigned ID, Value *V) {
462+
assert(ID >= NextUnusedID && "Invalid value ID");
463+
Vals.insert({ID, V});
464+
NextUnusedID = ID + 1;
465+
}
466+
};
467+
451468
// Function Semantic Analysis.
452469
class PerFunctionState {
453470
LLParser &P;
454471
Function &F;
455472
std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
456473
std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
457-
std::vector<Value*> NumberedVals;
474+
NumberedValues NumberedVals;
458475

459476
/// FunctionNumber - If this is an unnamed function, this is the slot
460477
/// number of it, otherwise it is -1.
461478
int FunctionNumber;
479+
462480
public:
463-
PerFunctionState(LLParser &p, Function &f, int functionNumber);
481+
PerFunctionState(LLParser &p, Function &f, int functionNumber,
482+
ArrayRef<unsigned> UnnamedArgNums);
464483
~PerFunctionState();
465484

466485
Function &getFunction() const { return F; }
@@ -590,9 +609,12 @@ namespace llvm {
590609
ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
591610
: Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
592611
};
593-
bool parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &IsVarArg);
594-
bool parseFunctionHeader(Function *&Fn, bool IsDefine);
595-
bool parseFunctionBody(Function &Fn);
612+
bool parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
613+
SmallVectorImpl<unsigned> &UnnamedArgNums,
614+
bool &IsVarArg);
615+
bool parseFunctionHeader(Function *&Fn, bool IsDefine,
616+
SmallVectorImpl<unsigned> &UnnamedArgNums);
617+
bool parseFunctionBody(Function &Fn, ArrayRef<unsigned> UnnamedArgNums);
596618
bool parseBasicBlock(PerFunctionState &PFS);
597619

598620
enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,8 @@ bool LLParser::parseDeclare() {
600600
}
601601

602602
Function *F;
603-
if (parseFunctionHeader(F, false))
603+
SmallVector<unsigned> UnnamedArgNums;
604+
if (parseFunctionHeader(F, false, UnnamedArgNums))
604605
return true;
605606
for (auto &MD : MDs)
606607
F->addMetadata(MD.first, *MD.second);
@@ -614,8 +615,10 @@ bool LLParser::parseDefine() {
614615
Lex.Lex();
615616

616617
Function *F;
617-
return parseFunctionHeader(F, true) || parseOptionalFunctionMetadata(*F) ||
618-
parseFunctionBody(*F);
618+
SmallVector<unsigned> UnnamedArgNums;
619+
return parseFunctionHeader(F, true, UnnamedArgNums) ||
620+
parseOptionalFunctionMetadata(*F) ||
621+
parseFunctionBody(*F, UnnamedArgNums);
619622
}
620623

621624
/// parseGlobalType
@@ -2953,6 +2956,15 @@ bool LLParser::parseOptionalOperandBundles(
29532956
return false;
29542957
}
29552958

2959+
bool LLParser::checkValueID(LocTy Loc, StringRef Kind, StringRef Prefix,
2960+
unsigned NextID, unsigned ID) const {
2961+
if (ID < NextID)
2962+
return error(Loc, Kind + " expected to be numbered '" + Prefix +
2963+
Twine(NextID) + "' or greater");
2964+
2965+
return false;
2966+
}
2967+
29562968
/// parseArgumentList - parse the argument list for a function type or function
29572969
/// prototype.
29582970
/// ::= '(' ArgTypeListI ')'
@@ -2963,6 +2975,7 @@ bool LLParser::parseOptionalOperandBundles(
29632975
/// ::= ArgType (',' ArgType)*
29642976
///
29652977
bool LLParser::parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
2978+
SmallVectorImpl<unsigned> &UnnamedArgNums,
29662979
bool &IsVarArg) {
29672980
unsigned CurValID = 0;
29682981
IsVarArg = false;
@@ -2989,12 +3002,19 @@ bool LLParser::parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
29893002
if (Lex.getKind() == lltok::LocalVar) {
29903003
Name = Lex.getStrVal();
29913004
Lex.Lex();
2992-
} else if (Lex.getKind() == lltok::LocalVarID) {
2993-
if (Lex.getUIntVal() != CurValID)
2994-
return error(TypeLoc, "argument expected to be numbered '%" +
2995-
Twine(CurValID) + "'");
2996-
++CurValID;
2997-
Lex.Lex();
3005+
} else {
3006+
unsigned ArgID;
3007+
if (Lex.getKind() == lltok::LocalVarID) {
3008+
ArgID = Lex.getUIntVal();
3009+
if (checkValueID(TypeLoc, "argument", "%", CurValID, ArgID))
3010+
return true;
3011+
Lex.Lex();
3012+
} else {
3013+
ArgID = CurValID;
3014+
}
3015+
3016+
UnnamedArgNums.push_back(ArgID);
3017+
CurValID = ArgID + 1;
29983018
}
29993019

30003020
if (!FunctionType::isValidArgumentType(ArgTy))
@@ -3023,13 +3043,17 @@ bool LLParser::parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
30233043
Name = Lex.getStrVal();
30243044
Lex.Lex();
30253045
} else {
3046+
unsigned ArgID;
30263047
if (Lex.getKind() == lltok::LocalVarID) {
3027-
if (Lex.getUIntVal() != CurValID)
3028-
return error(TypeLoc, "argument expected to be numbered '%" +
3029-
Twine(CurValID) + "'");
3048+
ArgID = Lex.getUIntVal();
3049+
if (checkValueID(TypeLoc, "argument", "%", CurValID, ArgID))
3050+
return true;
30303051
Lex.Lex();
3052+
} else {
3053+
ArgID = CurValID;
30313054
}
3032-
++CurValID;
3055+
UnnamedArgNums.push_back(ArgID);
3056+
CurValID = ArgID + 1;
30333057
Name = "";
30343058
}
30353059

@@ -3055,7 +3079,8 @@ bool LLParser::parseFunctionType(Type *&Result) {
30553079

30563080
SmallVector<ArgInfo, 8> ArgList;
30573081
bool IsVarArg;
3058-
if (parseArgumentList(ArgList, IsVarArg))
3082+
SmallVector<unsigned> UnnamedArgNums;
3083+
if (parseArgumentList(ArgList, UnnamedArgNums, IsVarArg))
30593084
return true;
30603085

30613086
// Reject names on the arguments lists.
@@ -3291,13 +3316,18 @@ bool LLParser::parseTargetExtType(Type *&Result) {
32913316
//===----------------------------------------------------------------------===//
32923317

32933318
LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
3294-
int functionNumber)
3319+
int functionNumber,
3320+
ArrayRef<unsigned> UnnamedArgNums)
32953321
: P(p), F(f), FunctionNumber(functionNumber) {
32963322

32973323
// Insert unnamed arguments into the NumberedVals list.
3298-
for (Argument &A : F.args())
3299-
if (!A.hasName())
3300-
NumberedVals.push_back(&A);
3324+
auto It = UnnamedArgNums.begin();
3325+
for (Argument &A : F.args()) {
3326+
if (!A.hasName()) {
3327+
unsigned ArgNum = *It++;
3328+
NumberedVals.add(ArgNum, &A);
3329+
}
3330+
}
33013331
}
33023332

33033333
LLParser::PerFunctionState::~PerFunctionState() {
@@ -3378,7 +3408,7 @@ Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
33783408

33793409
Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc) {
33803410
// Look this name up in the normal function symbol table.
3381-
Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
3411+
Value *Val = NumberedVals.get(ID);
33823412

33833413
// If this is a forward reference for the value, see if we already created a
33843414
// forward ref record.
@@ -3426,11 +3456,11 @@ bool LLParser::PerFunctionState::setInstName(int NameID,
34263456
if (NameStr.empty()) {
34273457
// If neither a name nor an ID was specified, just use the next ID.
34283458
if (NameID == -1)
3429-
NameID = NumberedVals.size();
3459+
NameID = NumberedVals.getNext();
34303460

3431-
if (unsigned(NameID) != NumberedVals.size())
3432-
return P.error(NameLoc, "instruction expected to be numbered '%" +
3433-
Twine(NumberedVals.size()) + "'");
3461+
if (P.checkValueID(NameLoc, "instruction", "%", NumberedVals.getNext(),
3462+
NameID))
3463+
return true;
34343464

34353465
auto FI = ForwardRefValIDs.find(NameID);
34363466
if (FI != ForwardRefValIDs.end()) {
@@ -3445,7 +3475,7 @@ bool LLParser::PerFunctionState::setInstName(int NameID,
34453475
ForwardRefValIDs.erase(FI);
34463476
}
34473477

3448-
NumberedVals.push_back(Inst);
3478+
NumberedVals.add(NameID, Inst);
34493479
return false;
34503480
}
34513481

@@ -3492,15 +3522,15 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name,
34923522
int NameID, LocTy Loc) {
34933523
BasicBlock *BB;
34943524
if (Name.empty()) {
3495-
if (NameID != -1 && unsigned(NameID) != NumberedVals.size()) {
3496-
P.error(Loc, "label expected to be numbered '" +
3497-
Twine(NumberedVals.size()) + "'");
3498-
return nullptr;
3525+
if (NameID != -1) {
3526+
if (P.checkValueID(Loc, "label", "", NumberedVals.getNext(), NameID))
3527+
return nullptr;
3528+
} else {
3529+
NameID = NumberedVals.getNext();
34993530
}
3500-
BB = getBB(NumberedVals.size(), Loc);
3531+
BB = getBB(NameID, Loc);
35013532
if (!BB) {
3502-
P.error(Loc, "unable to create block numbered '" +
3503-
Twine(NumberedVals.size()) + "'");
3533+
P.error(Loc, "unable to create block numbered '" + Twine(NameID) + "'");
35043534
return nullptr;
35053535
}
35063536
} else {
@@ -3517,8 +3547,8 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name,
35173547

35183548
// Remove the block from forward ref sets.
35193549
if (Name.empty()) {
3520-
ForwardRefValIDs.erase(NumberedVals.size());
3521-
NumberedVals.push_back(BB);
3550+
ForwardRefValIDs.erase(NameID);
3551+
NumberedVals.add(NameID, BB);
35223552
} else {
35233553
// BB forward references are already in the function symbol table.
35243554
ForwardRefVals.erase(Name);
@@ -5962,7 +5992,8 @@ bool LLParser::parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
59625992
/// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName
59635993
/// '(' ArgList ')' OptAddrSpace OptFuncAttrs OptSection OptionalAlign
59645994
/// OptGC OptionalPrefix OptionalPrologue OptPersonalityFn
5965-
bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
5995+
bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine,
5996+
SmallVectorImpl<unsigned> &UnnamedArgNums) {
59665997
// parse the linkage.
59675998
LocTy LinkageLoc = Lex.getLoc();
59685999
unsigned Linkage;
@@ -6050,7 +6081,7 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
60506081
Constant *PersonalityFn = nullptr;
60516082
Comdat *C;
60526083

6053-
if (parseArgumentList(ArgList, IsVarArg) ||
6084+
if (parseArgumentList(ArgList, UnnamedArgNums, IsVarArg) ||
60546085
parseOptionalUnnamedAddr(UnnamedAddr) ||
60556086
parseOptionalProgramAddrSpace(AddrSpace) ||
60566087
parseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
@@ -6245,15 +6276,16 @@ bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() {
62456276

62466277
/// parseFunctionBody
62476278
/// ::= '{' BasicBlock+ UseListOrderDirective* '}'
6248-
bool LLParser::parseFunctionBody(Function &Fn) {
6279+
bool LLParser::parseFunctionBody(Function &Fn,
6280+
ArrayRef<unsigned> UnnamedArgNums) {
62496281
if (Lex.getKind() != lltok::lbrace)
62506282
return tokError("expected '{' in function body");
62516283
Lex.Lex(); // eat the {.
62526284

62536285
int FunctionNumber = -1;
62546286
if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
62556287

6256-
PerFunctionState PFS(*this, Fn, FunctionNumber);
6288+
PerFunctionState PFS(*this, Fn, FunctionNumber, UnnamedArgNums);
62576289

62586290
// Resolve block addresses and allow basic blocks to be forward-declared
62596291
// within this function.

llvm/test/Assembler/call-nonzero-program-addrspace-2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
; Check that numbered variables in a nonzero program address space 200 can be used in a call instruction
55

6-
define i8 @test_unnamed(ptr, ptr addrspace(42) %0) {
6+
define i8 @test_unnamed(ptr, ptr addrspace(42) %1) {
77
; Calls with explicit address spaces are fine:
88
call addrspace(0) i8 %0(i32 0)
99
call addrspace(42) i8 %1(i32 0)

llvm/test/Assembler/invalid-arg-num-1.ll

Lines changed: 0 additions & 6 deletions
This file was deleted.

llvm/test/Assembler/invalid-arg-num-2.ll

Lines changed: 0 additions & 6 deletions
This file was deleted.

llvm/test/Assembler/invalid-arg-num-3.ll

Lines changed: 0 additions & 6 deletions
This file was deleted.

llvm/test/Assembler/invalid-block-label-num.ll

Lines changed: 0 additions & 7 deletions
This file was deleted.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: split-file %s %t
2+
; RUN: not llvm-as < %s %t/instr_smaller_id.ll 2>&1 | FileCheck %s --check-prefix=INSTR-SMALLER-ID
3+
; RUN: not llvm-as < %s %t/arg_smaller_id.ll 2>&1 | FileCheck %s --check-prefix=ARG-SMALLER-ID
4+
; RUN: not llvm-as < %s %t/block_smaller_id.ll 2>&1 | FileCheck %s --check-prefix=BLOCK-SMALLER-ID
5+
6+
;--- instr_smaller_id.ll
7+
8+
; INSTR-SMALLER-ID: error: instruction expected to be numbered '%11' or greater
9+
define i32 @test() {
10+
%10 = add i32 1, 2
11+
%5 = add i32 %10, 3
12+
ret i32 %5
13+
}
14+
15+
;--- arg_smaller_id.ll
16+
17+
; ARG-SMALLER-ID: error: argument expected to be numbered '%11' or greater
18+
define i32 @test(i32 %10, i32 %5) {
19+
ret i32 %5
20+
}
21+
22+
;--- block_smaller_id.ll
23+
24+
; BLOCK-SMALLER-ID: error: label expected to be numbered '11' or greater
25+
define i32 @test() {
26+
10:
27+
br label %5
28+
29+
5:
30+
ret i32 0
31+
}

0 commit comments

Comments
 (0)