Skip to content

Commit f21b4bd

Browse files
committed
[AsmParser] Don't require value numbers to be consecutive
Currently, the IR parser requires that %n style numbered values are consecutive. This means that the IR becomes invalid as soon as you remove an instruction, argument or block. This makes it very annoying to modify IR without running it through instnamer first. I don't think there is any good reason to impose this requirement. This PR relaxes it to allow value IDs to be non-consecutive, but it still keeps the requirement that they're increasing (i.e. you can't skip a value number and then assign it later). I've kept the underlying representation the same and added a guard against skipping large ranges to guard against degenerate cases where the NumberedVals vector would become huge. If people prefer, I could change NumberedVals from a vector to a map + a counter instead, in which case we could support arbitrary numbers without large increases in memory usage. This only implements support for skipping numbers for local values. We should extend this to global values in the future as well.
1 parent 480cc41 commit f21b4bd

File tree

9 files changed

+228
-60
lines changed

9 files changed

+228
-60
lines changed

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ namespace llvm {
203203
bool error(LocTy L, const Twine &Msg) const { return Lex.Error(L, Msg); }
204204
bool tokError(const Twine &Msg) const { return error(Lex.getLoc(), Msg); }
205205

206+
bool checkValueID(LocTy L, StringRef Kind, StringRef Prefix,
207+
unsigned NextID, unsigned ID) const;
208+
206209
/// Restore the internal name and slot mappings using the mappings that
207210
/// were created at an earlier parsing stage.
208211
void restoreParsingState(const SlotMapping *Slots);
@@ -459,8 +462,10 @@ namespace llvm {
459462
/// FunctionNumber - If this is an unnamed function, this is the slot
460463
/// number of it, otherwise it is -1.
461464
int FunctionNumber;
465+
462466
public:
463-
PerFunctionState(LLParser &p, Function &f, int functionNumber);
467+
PerFunctionState(LLParser &p, Function &f, int functionNumber,
468+
ArrayRef<unsigned> UnnamedArgNums);
464469
~PerFunctionState();
465470

466471
Function &getFunction() const { return F; }
@@ -590,9 +595,12 @@ namespace llvm {
590595
ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
591596
: Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
592597
};
593-
bool parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &IsVarArg);
594-
bool parseFunctionHeader(Function *&Fn, bool IsDefine);
595-
bool parseFunctionBody(Function &Fn);
598+
bool parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
599+
SmallVectorImpl<unsigned> &UnnamedArgNums,
600+
bool &IsVarArg);
601+
bool parseFunctionHeader(Function *&Fn, bool IsDefine,
602+
SmallVectorImpl<unsigned> &UnnamedArgNums);
603+
bool parseFunctionBody(Function &Fn, ArrayRef<unsigned> UnnamedArgNums);
596604
bool parseBasicBlock(PerFunctionState &PFS);
597605

598606
enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 75 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454

5555
using namespace llvm;
5656

57+
// Avoid large gaps in the NumberedVals vector.
58+
static const size_t MaxNumberedValSkip = 1000;
59+
5760
static std::string getTypeString(Type *T) {
5861
std::string Result;
5962
raw_string_ostream Tmp(Result);
@@ -572,7 +575,8 @@ bool LLParser::parseDeclare() {
572575
}
573576

574577
Function *F;
575-
if (parseFunctionHeader(F, false))
578+
SmallVector<unsigned> UnnamedArgNums;
579+
if (parseFunctionHeader(F, false, UnnamedArgNums))
576580
return true;
577581
for (auto &MD : MDs)
578582
F->addMetadata(MD.first, *MD.second);
@@ -586,8 +590,10 @@ bool LLParser::parseDefine() {
586590
Lex.Lex();
587591

588592
Function *F;
589-
return parseFunctionHeader(F, true) || parseOptionalFunctionMetadata(*F) ||
590-
parseFunctionBody(*F);
593+
SmallVector<unsigned> UnnamedArgNums;
594+
return parseFunctionHeader(F, true, UnnamedArgNums) ||
595+
parseOptionalFunctionMetadata(*F) ||
596+
parseFunctionBody(*F, UnnamedArgNums);
591597
}
592598

593599
/// parseGlobalType
@@ -2925,6 +2931,19 @@ bool LLParser::parseOptionalOperandBundles(
29252931
return false;
29262932
}
29272933

2934+
bool LLParser::checkValueID(LocTy Loc, StringRef Kind, StringRef Prefix,
2935+
unsigned NextID, unsigned ID) const {
2936+
if (ID < NextID)
2937+
return error(Loc, Kind + " expected to be numbered '" + Prefix +
2938+
Twine(NextID) + "' or greater");
2939+
2940+
if (ID > NextID + MaxNumberedValSkip)
2941+
return error(Loc, "value numbers can skip at most " +
2942+
Twine(MaxNumberedValSkip) + " values");
2943+
2944+
return false;
2945+
}
2946+
29282947
/// parseArgumentList - parse the argument list for a function type or function
29292948
/// prototype.
29302949
/// ::= '(' ArgTypeListI ')'
@@ -2935,6 +2954,7 @@ bool LLParser::parseOptionalOperandBundles(
29352954
/// ::= ArgType (',' ArgType)*
29362955
///
29372956
bool LLParser::parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
2957+
SmallVectorImpl<unsigned> &UnnamedArgNums,
29382958
bool &IsVarArg) {
29392959
unsigned CurValID = 0;
29402960
IsVarArg = false;
@@ -2961,12 +2981,19 @@ bool LLParser::parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
29612981
if (Lex.getKind() == lltok::LocalVar) {
29622982
Name = Lex.getStrVal();
29632983
Lex.Lex();
2964-
} else if (Lex.getKind() == lltok::LocalVarID) {
2965-
if (Lex.getUIntVal() != CurValID)
2966-
return error(TypeLoc, "argument expected to be numbered '%" +
2967-
Twine(CurValID) + "'");
2968-
++CurValID;
2969-
Lex.Lex();
2984+
} else {
2985+
unsigned ArgID;
2986+
if (Lex.getKind() == lltok::LocalVarID) {
2987+
ArgID = Lex.getUIntVal();
2988+
if (checkValueID(TypeLoc, "argument", "%", CurValID, ArgID))
2989+
return true;
2990+
Lex.Lex();
2991+
} else {
2992+
ArgID = CurValID;
2993+
}
2994+
2995+
UnnamedArgNums.push_back(ArgID);
2996+
CurValID = ArgID + 1;
29702997
}
29712998

29722999
if (!FunctionType::isValidArgumentType(ArgTy))
@@ -2995,13 +3022,17 @@ bool LLParser::parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
29953022
Name = Lex.getStrVal();
29963023
Lex.Lex();
29973024
} else {
3025+
unsigned ArgID;
29983026
if (Lex.getKind() == lltok::LocalVarID) {
2999-
if (Lex.getUIntVal() != CurValID)
3000-
return error(TypeLoc, "argument expected to be numbered '%" +
3001-
Twine(CurValID) + "'");
3027+
ArgID = Lex.getUIntVal();
3028+
if (checkValueID(TypeLoc, "argument", "%", CurValID, ArgID))
3029+
return true;
30023030
Lex.Lex();
3031+
} else {
3032+
ArgID = CurValID;
30033033
}
3004-
++CurValID;
3034+
UnnamedArgNums.push_back(ArgID);
3035+
CurValID = ArgID + 1;
30053036
Name = "";
30063037
}
30073038

@@ -3027,7 +3058,8 @@ bool LLParser::parseFunctionType(Type *&Result) {
30273058

30283059
SmallVector<ArgInfo, 8> ArgList;
30293060
bool IsVarArg;
3030-
if (parseArgumentList(ArgList, IsVarArg))
3061+
SmallVector<unsigned> UnnamedArgNums;
3062+
if (parseArgumentList(ArgList, UnnamedArgNums, IsVarArg))
30313063
return true;
30323064

30333065
// Reject names on the arguments lists.
@@ -3263,13 +3295,19 @@ bool LLParser::parseTargetExtType(Type *&Result) {
32633295
//===----------------------------------------------------------------------===//
32643296

32653297
LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
3266-
int functionNumber)
3298+
int functionNumber,
3299+
ArrayRef<unsigned> UnnamedArgNums)
32673300
: P(p), F(f), FunctionNumber(functionNumber) {
32683301

32693302
// Insert unnamed arguments into the NumberedVals list.
3270-
for (Argument &A : F.args())
3271-
if (!A.hasName())
3303+
auto It = UnnamedArgNums.begin();
3304+
for (Argument &A : F.args()) {
3305+
if (!A.hasName()) {
3306+
unsigned ArgNum = *It++;
3307+
NumberedVals.resize(ArgNum);
32723308
NumberedVals.push_back(&A);
3309+
}
3310+
}
32733311
}
32743312

32753313
LLParser::PerFunctionState::~PerFunctionState() {
@@ -3400,9 +3438,9 @@ bool LLParser::PerFunctionState::setInstName(int NameID,
34003438
if (NameID == -1)
34013439
NameID = NumberedVals.size();
34023440

3403-
if (unsigned(NameID) != NumberedVals.size())
3404-
return P.error(NameLoc, "instruction expected to be numbered '%" +
3405-
Twine(NumberedVals.size()) + "'");
3441+
if (P.checkValueID(NameLoc, "instruction", "%", NumberedVals.size(),
3442+
NameID))
3443+
return true;
34063444

34073445
auto FI = ForwardRefValIDs.find(NameID);
34083446
if (FI != ForwardRefValIDs.end()) {
@@ -3417,6 +3455,9 @@ bool LLParser::PerFunctionState::setInstName(int NameID,
34173455
ForwardRefValIDs.erase(FI);
34183456
}
34193457

3458+
// Fill in skipped IDs using nullptr.
3459+
NumberedVals.resize(unsigned(NameID));
3460+
34203461
NumberedVals.push_back(Inst);
34213462
return false;
34223463
}
@@ -3464,12 +3505,13 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name,
34643505
int NameID, LocTy Loc) {
34653506
BasicBlock *BB;
34663507
if (Name.empty()) {
3467-
if (NameID != -1 && unsigned(NameID) != NumberedVals.size()) {
3468-
P.error(Loc, "label expected to be numbered '" +
3469-
Twine(NumberedVals.size()) + "'");
3470-
return nullptr;
3508+
if (NameID != -1) {
3509+
if (P.checkValueID(Loc, "label", "", NumberedVals.size(), NameID))
3510+
return nullptr;
3511+
} else {
3512+
NameID = NumberedVals.size();
34713513
}
3472-
BB = getBB(NumberedVals.size(), Loc);
3514+
BB = getBB(NameID, Loc);
34733515
if (!BB) {
34743516
P.error(Loc, "unable to create block numbered '" +
34753517
Twine(NumberedVals.size()) + "'");
@@ -3489,7 +3531,8 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name,
34893531

34903532
// Remove the block from forward ref sets.
34913533
if (Name.empty()) {
3492-
ForwardRefValIDs.erase(NumberedVals.size());
3534+
ForwardRefValIDs.erase(NameID);
3535+
NumberedVals.resize(NameID);
34933536
NumberedVals.push_back(BB);
34943537
} else {
34953538
// BB forward references are already in the function symbol table.
@@ -5934,7 +5977,8 @@ bool LLParser::parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
59345977
/// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName
59355978
/// '(' ArgList ')' OptAddrSpace OptFuncAttrs OptSection OptionalAlign
59365979
/// OptGC OptionalPrefix OptionalPrologue OptPersonalityFn
5937-
bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
5980+
bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine,
5981+
SmallVectorImpl<unsigned> &UnnamedArgNums) {
59385982
// parse the linkage.
59395983
LocTy LinkageLoc = Lex.getLoc();
59405984
unsigned Linkage;
@@ -6022,7 +6066,7 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
60226066
Constant *PersonalityFn = nullptr;
60236067
Comdat *C;
60246068

6025-
if (parseArgumentList(ArgList, IsVarArg) ||
6069+
if (parseArgumentList(ArgList, UnnamedArgNums, IsVarArg) ||
60266070
parseOptionalUnnamedAddr(UnnamedAddr) ||
60276071
parseOptionalProgramAddrSpace(AddrSpace) ||
60286072
parseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
@@ -6217,15 +6261,16 @@ bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() {
62176261

62186262
/// parseFunctionBody
62196263
/// ::= '{' BasicBlock+ UseListOrderDirective* '}'
6220-
bool LLParser::parseFunctionBody(Function &Fn) {
6264+
bool LLParser::parseFunctionBody(Function &Fn,
6265+
ArrayRef<unsigned> UnnamedArgNums) {
62216266
if (Lex.getKind() != lltok::lbrace)
62226267
return tokError("expected '{' in function body");
62236268
Lex.Lex(); // eat the {.
62246269

62256270
int FunctionNumber = -1;
62266271
if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
62276272

6228-
PerFunctionState PFS(*this, Fn, FunctionNumber);
6273+
PerFunctionState PFS(*this, Fn, FunctionNumber, UnnamedArgNums);
62296274

62306275
// Resolve block addresses and allow basic blocks to be forward-declared
62316276
// within this function.

llvm/test/Assembler/call-nonzero-program-addrspace-2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
; Check that numbered variables in a nonzero program address space 200 can be used in a call instruction
55

6-
define i8 @test_unnamed(ptr, ptr addrspace(42) %0) {
6+
define i8 @test_unnamed(ptr, ptr addrspace(42) %1) {
77
; Calls with explicit address spaces are fine:
88
call addrspace(0) i8 %0(i32 0)
99
call addrspace(42) i8 %1(i32 0)

llvm/test/Assembler/invalid-arg-num-1.ll

Lines changed: 0 additions & 6 deletions
This file was deleted.

llvm/test/Assembler/invalid-arg-num-2.ll

Lines changed: 0 additions & 6 deletions
This file was deleted.

llvm/test/Assembler/invalid-arg-num-3.ll

Lines changed: 0 additions & 6 deletions
This file was deleted.

llvm/test/Assembler/invalid-block-label-num.ll

Lines changed: 0 additions & 7 deletions
This file was deleted.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; RUN: split-file %s %t
2+
; RUN: not llvm-as < %s %t/instr_smaller_id.ll 2>&1 | FileCheck %s --check-prefix=INSTR-SMALLER-ID
3+
; RUN: not llvm-as < %s %t/instr_too_large_skip.ll 2>&1 | FileCheck %s --check-prefix=INSTR-TOO-LARGE-SKIP
4+
; RUN: not llvm-as < %s %t/arg_smaller_id.ll 2>&1 | FileCheck %s --check-prefix=ARG-SMALLER-ID
5+
; RUN: not llvm-as < %s %t/arg_too_large_skip.ll 2>&1 | FileCheck %s --check-prefix=ARG-TOO-LARGE-SKIP
6+
; RUN: not llvm-as < %s %t/block_smaller_id.ll 2>&1 | FileCheck %s --check-prefix=BLOCK-SMALLER-ID
7+
; RUN: not llvm-as < %s %t/block_too_large_skip.ll 2>&1 | FileCheck %s --check-prefix=BLOCK-TOO-LARGE-SKIP
8+
9+
;--- instr_smaller_id.ll
10+
11+
; INSTR-SMALLER-ID: error: instruction expected to be numbered '%11' or greater
12+
define i32 @test() {
13+
%10 = add i32 1, 2
14+
%5 = add i32 %10, 3
15+
ret i32 %5
16+
}
17+
18+
;--- instr_too_large_skip.ll
19+
20+
; INSTR-TOO-LARGE-SKIP: error: value numbers can skip at most 1000 values
21+
define i32 @test() {
22+
%10 = add i32 1, 2
23+
%1000000 = add i32 %10, 3
24+
ret i32 %1000000
25+
}
26+
27+
;--- arg_smaller_id.ll
28+
29+
; ARG-SMALLER-ID: error: argument expected to be numbered '%11' or greater
30+
define i32 @test(i32 %10, i32 %5) {
31+
ret i32 %5
32+
}
33+
34+
;--- arg_too_large_skip.ll
35+
36+
; ARG-TOO-LARGE-SKIP: error: value numbers can skip at most 1000 values
37+
define i32 @test(i32 %10, i32 %1000000) {
38+
ret i32 %1000000
39+
}
40+
41+
;--- block_smaller_id.ll
42+
43+
; BLOCK-SMALLER-ID: error: label expected to be numbered '11' or greater
44+
define i32 @test() {
45+
10:
46+
br label %5
47+
48+
5:
49+
ret i32 0
50+
}
51+
52+
;--- block_too_large_skip.ll
53+
54+
; BLOCK-TOO-LARGE-SKIP: error: value numbers can skip at most 1000 values
55+
define i32 @test() {
56+
10:
57+
br label %1000000
58+
59+
1000000:
60+
ret i32 0
61+
}

0 commit comments

Comments
 (0)