Skip to content

Commit cba7055

Browse files
authored
[SPIR-V] Fix BB ordering & register lifetime (#111026)
The "topological" sorting was behaving incorrectly in some cases: the exit of a loop could have a lower rank than a node in the loop. This causes issues when structurizing some patterns, and also codegen issues as we could generate BBs in the incorrect order in regard to the SPIR-V spec. Fixing this ordering alone broke other parts of the structurizer, which by luck worked. Had to fix those. Added more test cases, especially to test basic patterns. I also needed to tweak/disable some tests for 2 reasons: - SPIR-V now required reg2mem/mem2reg to run. Meaning dead stores are optimized away. Some tests require tweaks to avoid having the whole function removed. - Mem2Reg will generate variable & load/stores. This generates G_BITCAST in several cases. And there is currently something wrong we do with G_BITCAST which causes MIR verifier to complain. Until this is resolved, I disabled -verify-machineinstrs flag on those tests. --------- Signed-off-by: Nathan Gauër <[email protected]>
1 parent 8d406d8 commit cba7055

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+1143
-733
lines changed

llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
130130
assert(false && "Unhandled terminator type.");
131131
}
132132

133+
AllocaInst *CreateVariable(Function &F, Type *Type,
134+
BasicBlock::iterator Position) {
135+
const DataLayout &DL = F.getDataLayout();
136+
return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg",
137+
Position);
138+
}
139+
133140
// Run the pass on the given convergence region, ignoring the sub-regions.
134141
// Returns true if the CFG changed, false otherwise.
135142
bool runOnConvergenceRegionNoRecurse(LoopInfo &LI,
@@ -152,6 +159,9 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
152159
auto NewExitTarget = BasicBlock::Create(F->getContext(), "new.exit", F);
153160
IRBuilder<> Builder(NewExitTarget);
154161

162+
AllocaInst *Variable = CreateVariable(*F, Builder.getInt32Ty(),
163+
F->begin()->getFirstInsertionPt());
164+
155165
// CodeGen output needs to be stable. Using the set as-is would order
156166
// the targets differently depending on the allocation pattern.
157167
// Sorting per basic-block ordering in the function.
@@ -176,18 +186,16 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
176186
std::vector<std::pair<BasicBlock *, Value *>> ExitToVariable;
177187
for (auto Exit : SortedExits) {
178188
llvm::Value *Value = createExitVariable(Exit, TargetToValue);
189+
IRBuilder<> B2(Exit);
190+
B2.SetInsertPoint(Exit->getFirstInsertionPt());
191+
B2.CreateStore(Value, Variable);
179192
ExitToVariable.emplace_back(std::make_pair(Exit, Value));
180193
}
181194

182-
// Gather the correct value depending on the exit we came from.
183-
llvm::PHINode *node =
184-
Builder.CreatePHI(Builder.getInt32Ty(), ExitToVariable.size());
185-
for (auto [BB, Value] : ExitToVariable) {
186-
node->addIncoming(Value, BB);
187-
}
195+
llvm::Value *Load = Builder.CreateLoad(Builder.getInt32Ty(), Variable);
188196

189197
// Creating the switch to jump to the correct exit target.
190-
llvm::SwitchInst *Sw = Builder.CreateSwitch(node, SortedExitTargets[0],
198+
llvm::SwitchInst *Sw = Builder.CreateSwitch(Load, SortedExitTargets[0],
191199
SortedExitTargets.size() - 1);
192200
for (size_t i = 1; i < SortedExitTargets.size(); i++) {
193201
BasicBlock *BB = SortedExitTargets[i];

llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp

Lines changed: 56 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ BasicBlock *getExitFor(const ConvergenceRegion *CR) {
8787
// Returns the merge block designated by I if I is a merge instruction, nullptr
8888
// otherwise.
8989
BasicBlock *getDesignatedMergeBlock(Instruction *I) {
90-
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
90+
IntrinsicInst *II = dyn_cast_or_null<IntrinsicInst>(I);
9191
if (II == nullptr)
9292
return nullptr;
9393

@@ -102,7 +102,7 @@ BasicBlock *getDesignatedMergeBlock(Instruction *I) {
102102
// Returns the continue block designated by I if I is an OpLoopMerge, nullptr
103103
// otherwise.
104104
BasicBlock *getDesignatedContinueBlock(Instruction *I) {
105-
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
105+
IntrinsicInst *II = dyn_cast_or_null<IntrinsicInst>(I);
106106
if (II == nullptr)
107107
return nullptr;
108108

@@ -284,18 +284,6 @@ void replaceBranchTargets(BasicBlock *BB, BasicBlock *OldTarget,
284284
assert(false && "Unhandled terminator type.");
285285
}
286286

287-
// Replaces basic bloc operands |OldSrc| or OpPhi instructions in |BB| by
288-
// |NewSrc|. This function does not simplify the OpPhi instruction once
289-
// transformed.
290-
void replacePhiTargets(BasicBlock *BB, BasicBlock *OldSrc, BasicBlock *NewSrc) {
291-
for (PHINode &Phi : BB->phis()) {
292-
int index = Phi.getBasicBlockIndex(OldSrc);
293-
if (index == -1)
294-
continue;
295-
Phi.setIncomingBlock(index, NewSrc);
296-
}
297-
}
298-
299287
} // anonymous namespace
300288

301289
// Given a reducible CFG, produces a structurized CFG in the SPIR-V sense,
@@ -423,7 +411,7 @@ class SPIRVStructurizer : public FunctionPass {
423411
}
424412

425413
// Splits the given edges by recreating proxy nodes so that the destination
426-
// OpPhi instruction can still be viable.
414+
// has unique incoming edges from this region.
427415
//
428416
// clang-format off
429417
//
@@ -436,66 +424,58 @@ class SPIRVStructurizer : public FunctionPass {
436424
// A -> D -> C
437425
// B -> D -> C
438426
//
439-
// But if C had a phi node, adding such proxy-block breaks it. In such case, we must add 1 new block per
440-
// exit, and patchup the phi node:
427+
// This is fine (assuming C has no PHI nodes), but requires handling the merge instruction here.
428+
// By adding a proxy node, we create a regular divergent shape which can easily be regularized later on.
441429
// A -> D -> D1 -> C
442430
// B -> D -> D2 -> C
443431
//
444-
// A, B, D belongs to the construct. D is the exit. D1 and D2 are empty, just used as
445-
// source operands for C's phi node.
432+
// A, B, D belongs to the construct. D is the exit. D1 and D2 are empty.
446433
//
447434
// clang-format on
448435
std::vector<Edge>
449436
createAliasBlocksForComplexEdges(std::vector<Edge> Edges) {
450-
std::unordered_map<BasicBlock *, BasicBlock *> Seen;
437+
std::unordered_set<BasicBlock *> Seen;
451438
std::vector<Edge> Output;
452439
Output.reserve(Edges.size());
453440

454441
for (auto &[Src, Dst] : Edges) {
455-
auto [iterator, inserted] = Seen.insert({Src, Dst});
456-
if (inserted) {
457-
Output.emplace_back(Src, Dst);
458-
continue;
442+
auto [Iterator, Inserted] = Seen.insert(Src);
443+
if (!Inserted) {
444+
// Src already a source node. Cannot have 2 edges from A to B.
445+
// Creating alias source block.
446+
BasicBlock *NewSrc = BasicBlock::Create(
447+
F.getContext(), Src->getName() + ".new.src", &F);
448+
replaceBranchTargets(Src, Dst, NewSrc);
449+
IRBuilder<> Builder(NewSrc);
450+
Builder.CreateBr(Dst);
451+
Src = NewSrc;
459452
}
460453

461-
// The exact same edge was already seen. Ignoring.
462-
if (iterator->second == Dst)
463-
continue;
464-
465-
// The same Src block branches to 2 distinct blocks. This will be an
466-
// issue for the generated OpPhi. Creating alias block.
467-
BasicBlock *NewSrc =
468-
BasicBlock::Create(F.getContext(), "new.exit.src", &F);
469-
replaceBranchTargets(Src, Dst, NewSrc);
470-
replacePhiTargets(Dst, Src, NewSrc);
471-
472-
IRBuilder<> Builder(NewSrc);
473-
Builder.CreateBr(Dst);
474-
475-
Seen.emplace(NewSrc, Dst);
476-
Output.emplace_back(NewSrc, Dst);
454+
Output.emplace_back(Src, Dst);
477455
}
478456

479457
return Output;
480458
}
481459

460+
AllocaInst *CreateVariable(Function &F, Type *Type,
461+
BasicBlock::iterator Position) {
462+
const DataLayout &DL = F.getDataLayout();
463+
return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg",
464+
Position);
465+
}
466+
482467
// Given a construct defined by |Header|, and a list of exiting edges
483468
// |Edges|, creates a new single exit node, fixing up those edges.
484469
BasicBlock *createSingleExitNode(BasicBlock *Header,
485470
std::vector<Edge> &Edges) {
486-
auto NewExit = BasicBlock::Create(F.getContext(), "new.exit", &F);
487-
IRBuilder<> ExitBuilder(NewExit);
488471

489-
std::vector<BasicBlock *> Dsts;
490-
std::unordered_map<BasicBlock *, ConstantInt *> DstToIndex;
491-
492-
// Given 2 edges: Src1 -> Dst, Src2 -> Dst:
493-
// If Dst has an PHI node, and Src1 and Src2 are both operands, both Src1
494-
// and Src2 cannot be hidden by NewExit. Create 2 new nodes: Alias1,
495-
// Alias2 to which NewExit will branch before going to Dst. Then, patchup
496-
// Dst PHI node to look for Alias1 and Alias2.
497472
std::vector<Edge> FixedEdges = createAliasBlocksForComplexEdges(Edges);
498473

474+
std::vector<BasicBlock *> Dsts;
475+
std::unordered_map<BasicBlock *, ConstantInt *> DstToIndex;
476+
auto NewExit = BasicBlock::Create(F.getContext(),
477+
Header->getName() + ".new.exit", &F);
478+
IRBuilder<> ExitBuilder(NewExit);
499479
for (auto &[Src, Dst] : FixedEdges) {
500480
if (DstToIndex.count(Dst) != 0)
501481
continue;
@@ -506,33 +486,34 @@ class SPIRVStructurizer : public FunctionPass {
506486
if (Dsts.size() == 1) {
507487
for (auto &[Src, Dst] : FixedEdges) {
508488
replaceBranchTargets(Src, Dst, NewExit);
509-
replacePhiTargets(Dst, Src, NewExit);
510489
}
511490
ExitBuilder.CreateBr(Dsts[0]);
512491
return NewExit;
513492
}
514493

515-
PHINode *PhiNode =
516-
ExitBuilder.CreatePHI(ExitBuilder.getInt32Ty(), FixedEdges.size());
517-
494+
AllocaInst *Variable = CreateVariable(F, ExitBuilder.getInt32Ty(),
495+
F.begin()->getFirstInsertionPt());
518496
for (auto &[Src, Dst] : FixedEdges) {
519-
PhiNode->addIncoming(DstToIndex[Dst], Src);
497+
IRBuilder<> B2(Src);
498+
B2.SetInsertPoint(Src->getFirstInsertionPt());
499+
B2.CreateStore(DstToIndex[Dst], Variable);
520500
replaceBranchTargets(Src, Dst, NewExit);
521-
replacePhiTargets(Dst, Src, NewExit);
522501
}
523502

503+
llvm::Value *Load =
504+
ExitBuilder.CreateLoad(ExitBuilder.getInt32Ty(), Variable);
505+
524506
// If we can avoid an OpSwitch, generate an OpBranch. Reason is some
525507
// OpBranch are allowed to exist without a new OpSelectionMerge if one of
526508
// the branch is the parent's merge node, while OpSwitches are not.
527509
if (Dsts.size() == 2) {
528-
Value *Condition = ExitBuilder.CreateCmp(CmpInst::ICMP_EQ,
529-
DstToIndex[Dsts[0]], PhiNode);
510+
Value *Condition =
511+
ExitBuilder.CreateCmp(CmpInst::ICMP_EQ, DstToIndex[Dsts[0]], Load);
530512
ExitBuilder.CreateCondBr(Condition, Dsts[0], Dsts[1]);
531513
return NewExit;
532514
}
533515

534-
SwitchInst *Sw =
535-
ExitBuilder.CreateSwitch(PhiNode, Dsts[0], Dsts.size() - 1);
516+
SwitchInst *Sw = ExitBuilder.CreateSwitch(Load, Dsts[0], Dsts.size() - 1);
536517
for (auto It = Dsts.begin() + 1; It != Dsts.end(); ++It) {
537518
Sw->addCase(DstToIndex[*It], *It);
538519
}
@@ -576,7 +557,7 @@ class SPIRVStructurizer : public FunctionPass {
576557

577558
// Creates a new basic block in F with a single OpUnreachable instruction.
578559
BasicBlock *CreateUnreachable(Function &F) {
579-
BasicBlock *BB = BasicBlock::Create(F.getContext(), "new.exit", &F);
560+
BasicBlock *BB = BasicBlock::Create(F.getContext(), "unreachable", &F);
580561
IRBuilder<> Builder(BB);
581562
Builder.CreateUnreachable();
582563
return BB;
@@ -1027,17 +1008,8 @@ class SPIRVStructurizer : public FunctionPass {
10271008
return Modified;
10281009
}
10291010

1030-
bool IsRequiredForPhiNode(BasicBlock *BB) {
1031-
for (BasicBlock *Successor : successors(BB)) {
1032-
for (PHINode &Phi : Successor->phis()) {
1033-
if (Phi.getBasicBlockIndex(BB) != -1)
1034-
return true;
1035-
}
1036-
}
1037-
1038-
return false;
1039-
}
1040-
1011+
// Removes blocks not contributing to any structured CFG. This assumes there
1012+
// is no PHI nodes.
10411013
bool removeUselessBlocks(Function &F) {
10421014
std::vector<BasicBlock *> ToRemove;
10431015

@@ -1054,9 +1026,6 @@ class SPIRVStructurizer : public FunctionPass {
10541026
if (MergeBlocks.count(&BB) != 0 || ContinueBlocks.count(&BB) != 0)
10551027
continue;
10561028

1057-
if (IsRequiredForPhiNode(&BB))
1058-
continue;
1059-
10601029
if (BB.getUniqueSuccessor() == nullptr)
10611030
continue;
10621031

@@ -1127,6 +1096,18 @@ class SPIRVStructurizer : public FunctionPass {
11271096
continue;
11281097

11291098
Modified = true;
1099+
1100+
if (Merge == nullptr) {
1101+
Merge = *successors(Header).begin();
1102+
IRBuilder<> Builder(Header);
1103+
Builder.SetInsertPoint(Header->getTerminator());
1104+
1105+
auto MergeAddress = BlockAddress::get(Merge->getParent(), Merge);
1106+
SmallVector<Value *, 1> Args = {MergeAddress};
1107+
Builder.CreateIntrinsic(Intrinsic::spv_selection_merge, {}, {Args});
1108+
continue;
1109+
}
1110+
11301111
Instruction *SplitInstruction = Merge->getTerminator();
11311112
if (isMergeInstruction(SplitInstruction->getPrevNode()))
11321113
SplitInstruction = SplitInstruction->getPrevNode();

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/MC/TargetRegistry.h"
3030
#include "llvm/Pass.h"
3131
#include "llvm/Target/TargetOptions.h"
32+
#include "llvm/Transforms/Scalar/Reg2Mem.h"
3233
#include "llvm/Transforms/Utils.h"
3334
#include <optional>
3435

@@ -169,13 +170,21 @@ void SPIRVPassConfig::addIRPasses() {
169170
// - loops have a single back-edge.
170171
addPass(createLoopSimplifyPass());
171172

172-
// 2. Merge the convergence region exit nodes into one. After this step,
173+
// 2. Removes registers whose lifetime spans across basic blocks. Also
174+
// removes phi nodes. This will greatly simplify the next steps.
175+
addPass(createRegToMemWrapperPass());
176+
177+
// 3. Merge the convergence region exit nodes into one. After this step,
173178
// regions are single-entry, single-exit. This will help determine the
174179
// correct merge block.
175180
addPass(createSPIRVMergeRegionExitTargetsPass());
176181

177-
// 3. Structurize.
182+
// 4. Structurize.
178183
addPass(createSPIRVStructurizerPass());
184+
185+
// 5. Reduce the amount of variables required by pushing some operations
186+
// back to virtual registers.
187+
addPass(createPromoteMemoryToRegisterPass());
179188
}
180189

181190
addPass(createSPIRVRegularizerPass());

0 commit comments

Comments
 (0)