Skip to content

Commit 85cb6cf

Browse files
committed
[SPIR-V] Fix structurizer issues
The "topological" sorting was behaving incorrectly in some cases: the exit of a loop could have a lower rank than a node in the loop. This causes issues when structurizing some patterns, and also codegen issues as we could generate BBs in the incorrect order in regard to the SPIR-V spec. Fixing this ordering alone broke other parts of the structurizer, which by luck worked. Had to fix those. Added more test cases, especially to test basic patterns. I also needed to tweak/disable some tests for 2 reasons: - SPIR-V now required reg2mem/mem2reg to run. Meaning dead stores are optimized away. Some tests require tweaks to avoid having the whole function removed. - Mem2Reg will generate variable & load/stores. This generates G_BITCAST in several cases. And there is currently something wrong we do with G_BITCAST which causes MIR verifier to complain. Until this is resolved, I disabled -verify-machineinstrs flag on those tests. Signed-off-by: Nathan Gauër <[email protected]>
1 parent 7f7deaf commit 85cb6cf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+1140
-685
lines changed

llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
130130
assert(false && "Unhandled terminator type.");
131131
}
132132

133+
AllocaInst *CreateVariable(Function &F, Type *Type,
134+
BasicBlock::iterator Position) {
135+
const DataLayout &DL = F.getDataLayout();
136+
return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg",
137+
Position);
138+
}
139+
133140
// Run the pass on the given convergence region, ignoring the sub-regions.
134141
// Returns true if the CFG changed, false otherwise.
135142
bool runOnConvergenceRegionNoRecurse(LoopInfo &LI,
@@ -152,6 +159,9 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
152159
auto NewExitTarget = BasicBlock::Create(F->getContext(), "new.exit", F);
153160
IRBuilder<> Builder(NewExitTarget);
154161

162+
AllocaInst *Variable = CreateVariable(*F, Builder.getInt32Ty(),
163+
F->begin()->getFirstInsertionPt());
164+
155165
// CodeGen output needs to be stable. Using the set as-is would order
156166
// the targets differently depending on the allocation pattern.
157167
// Sorting per basic-block ordering in the function.
@@ -176,18 +186,16 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
176186
std::vector<std::pair<BasicBlock *, Value *>> ExitToVariable;
177187
for (auto Exit : SortedExits) {
178188
llvm::Value *Value = createExitVariable(Exit, TargetToValue);
189+
IRBuilder<> B2(Exit);
190+
B2.SetInsertPoint(Exit->getFirstInsertionPt());
191+
B2.CreateStore(Value, Variable);
179192
ExitToVariable.emplace_back(std::make_pair(Exit, Value));
180193
}
181194

182-
// Gather the correct value depending on the exit we came from.
183-
llvm::PHINode *node =
184-
Builder.CreatePHI(Builder.getInt32Ty(), ExitToVariable.size());
185-
for (auto [BB, Value] : ExitToVariable) {
186-
node->addIncoming(Value, BB);
187-
}
195+
llvm::Value *Load = Builder.CreateLoad(Builder.getInt32Ty(), Variable);
188196

189197
// Creating the switch to jump to the correct exit target.
190-
llvm::SwitchInst *Sw = Builder.CreateSwitch(node, SortedExitTargets[0],
198+
llvm::SwitchInst *Sw = Builder.CreateSwitch(Load, SortedExitTargets[0],
191199
SortedExitTargets.size() - 1);
192200
for (size_t i = 1; i < SortedExitTargets.size(); i++) {
193201
BasicBlock *BB = SortedExitTargets[i];

llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp

Lines changed: 55 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ BasicBlock *getExitFor(const ConvergenceRegion *CR) {
8787
// Returns the merge block designated by I if I is a merge instruction, nullptr
8888
// otherwise.
8989
BasicBlock *getDesignatedMergeBlock(Instruction *I) {
90+
if (I == nullptr)
91+
return nullptr;
9092
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
9193
if (II == nullptr)
9294
return nullptr;
@@ -102,6 +104,8 @@ BasicBlock *getDesignatedMergeBlock(Instruction *I) {
102104
// Returns the continue block designated by I if I is an OpLoopMerge, nullptr
103105
// otherwise.
104106
BasicBlock *getDesignatedContinueBlock(Instruction *I) {
107+
if (I == nullptr)
108+
return nullptr;
105109
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
106110
if (II == nullptr)
107111
return nullptr;
@@ -447,55 +451,52 @@ class SPIRVStructurizer : public FunctionPass {
447451
// clang-format on
448452
std::vector<Edge>
449453
createAliasBlocksForComplexEdges(std::vector<Edge> Edges) {
450-
std::unordered_map<BasicBlock *, BasicBlock *> Seen;
454+
std::unordered_set<BasicBlock *> Seen;
451455
std::vector<Edge> Output;
452456
Output.reserve(Edges.size());
453457

454458
for (auto &[Src, Dst] : Edges) {
455-
auto [iterator, inserted] = Seen.insert({Src, Dst});
456-
if (inserted) {
457-
Output.emplace_back(Src, Dst);
458-
continue;
459+
auto [iterator, inserted] = Seen.insert(Src);
460+
if (!inserted) {
461+
// Src already a source node. Cannot have 2 edges from A to B.
462+
// Creating alias source block.
463+
BasicBlock *NewSrc =
464+
BasicBlock::Create(F.getContext(), "new.src", &F);
465+
replaceBranchTargets(Src, Dst, NewSrc);
466+
// replacePhiTargets(Dst, Src, NewSrc);
467+
IRBuilder<> Builder(NewSrc);
468+
Builder.CreateBr(Dst);
469+
Src = NewSrc;
459470
}
460471

461-
// The exact same edge was already seen. Ignoring.
462-
if (iterator->second == Dst)
463-
continue;
464-
465-
// The same Src block branches to 2 distinct blocks. This will be an
466-
// issue for the generated OpPhi. Creating alias block.
467-
BasicBlock *NewSrc =
468-
BasicBlock::Create(F.getContext(), "new.exit.src", &F);
469-
replaceBranchTargets(Src, Dst, NewSrc);
470-
replacePhiTargets(Dst, Src, NewSrc);
471-
472-
IRBuilder<> Builder(NewSrc);
473-
Builder.CreateBr(Dst);
474-
475-
Seen.emplace(NewSrc, Dst);
476-
Output.emplace_back(NewSrc, Dst);
472+
Output.emplace_back(Src, Dst);
477473
}
478474

479475
return Output;
480476
}
481477

478+
AllocaInst *CreateVariable(Function &F, Type *Type,
479+
BasicBlock::iterator Position) {
480+
const DataLayout &DL = F.getDataLayout();
481+
return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg",
482+
Position);
483+
}
484+
482485
// Given a construct defined by |Header|, and a list of exiting edges
483486
// |Edges|, creates a new single exit node, fixing up those edges.
484487
BasicBlock *createSingleExitNode(BasicBlock *Header,
485488
std::vector<Edge> &Edges) {
486-
auto NewExit = BasicBlock::Create(F.getContext(), "new.exit", &F);
487-
IRBuilder<> ExitBuilder(NewExit);
488-
489-
std::vector<BasicBlock *> Dsts;
490-
std::unordered_map<BasicBlock *, ConstantInt *> DstToIndex;
491-
492489
// Given 2 edges: Src1 -> Dst, Src2 -> Dst:
493490
// If Dst has an PHI node, and Src1 and Src2 are both operands, both Src1
494491
// and Src2 cannot be hidden by NewExit. Create 2 new nodes: Alias1,
495492
// Alias2 to which NewExit will branch before going to Dst. Then, patchup
496493
// Dst PHI node to look for Alias1 and Alias2.
497494
std::vector<Edge> FixedEdges = createAliasBlocksForComplexEdges(Edges);
498495

496+
std::vector<BasicBlock *> Dsts;
497+
std::unordered_map<BasicBlock *, ConstantInt *> DstToIndex;
498+
auto NewExit = BasicBlock::Create(F.getContext(), "new.exit", &F);
499+
IRBuilder<> ExitBuilder(NewExit);
499500
for (auto &[Src, Dst] : FixedEdges) {
500501
if (DstToIndex.count(Dst) != 0)
501502
continue;
@@ -506,33 +507,38 @@ class SPIRVStructurizer : public FunctionPass {
506507
if (Dsts.size() == 1) {
507508
for (auto &[Src, Dst] : FixedEdges) {
508509
replaceBranchTargets(Src, Dst, NewExit);
509-
replacePhiTargets(Dst, Src, NewExit);
510+
// replacePhiTargets(Dst, Src, NewExit);
510511
}
511512
ExitBuilder.CreateBr(Dsts[0]);
512513
return NewExit;
513514
}
514515

515-
PHINode *PhiNode =
516-
ExitBuilder.CreatePHI(ExitBuilder.getInt32Ty(), FixedEdges.size());
516+
AllocaInst *Variable = CreateVariable(F, ExitBuilder.getInt32Ty(),
517+
F.begin()->getFirstInsertionPt());
518+
// PHINode *PhiNode = ExitBuilder.CreatePHI(ExitBuilder.getInt32Ty(),
519+
// FixedEdges.size());
517520

518521
for (auto &[Src, Dst] : FixedEdges) {
519-
PhiNode->addIncoming(DstToIndex[Dst], Src);
522+
IRBuilder<> B2(Src);
523+
B2.SetInsertPoint(Src->getFirstInsertionPt());
524+
B2.CreateStore(DstToIndex[Dst], Variable);
520525
replaceBranchTargets(Src, Dst, NewExit);
521-
replacePhiTargets(Dst, Src, NewExit);
522526
}
523527

528+
llvm::Value *Load =
529+
ExitBuilder.CreateLoad(ExitBuilder.getInt32Ty(), Variable);
530+
524531
// If we can avoid an OpSwitch, generate an OpBranch. Reason is some
525532
// OpBranch are allowed to exist without a new OpSelectionMerge if one of
526533
// the branch is the parent's merge node, while OpSwitches are not.
527534
if (Dsts.size() == 2) {
528-
Value *Condition = ExitBuilder.CreateCmp(CmpInst::ICMP_EQ,
529-
DstToIndex[Dsts[0]], PhiNode);
535+
Value *Condition =
536+
ExitBuilder.CreateCmp(CmpInst::ICMP_EQ, DstToIndex[Dsts[0]], Load);
530537
ExitBuilder.CreateCondBr(Condition, Dsts[0], Dsts[1]);
531538
return NewExit;
532539
}
533540

534-
SwitchInst *Sw =
535-
ExitBuilder.CreateSwitch(PhiNode, Dsts[0], Dsts.size() - 1);
541+
SwitchInst *Sw = ExitBuilder.CreateSwitch(Load, Dsts[0], Dsts.size() - 1);
536542
for (auto It = Dsts.begin() + 1; It != Dsts.end(); ++It) {
537543
Sw->addCase(DstToIndex[*It], *It);
538544
}
@@ -576,7 +582,7 @@ class SPIRVStructurizer : public FunctionPass {
576582

577583
// Creates a new basic block in F with a single OpUnreachable instruction.
578584
BasicBlock *CreateUnreachable(Function &F) {
579-
BasicBlock *BB = BasicBlock::Create(F.getContext(), "new.exit", &F);
585+
BasicBlock *BB = BasicBlock::Create(F.getContext(), "unreachable", &F);
580586
IRBuilder<> Builder(BB);
581587
Builder.CreateUnreachable();
582588
return BB;
@@ -1127,6 +1133,18 @@ class SPIRVStructurizer : public FunctionPass {
11271133
continue;
11281134

11291135
Modified = true;
1136+
1137+
if (Merge == nullptr) {
1138+
Merge = *successors(Header).begin();
1139+
IRBuilder<> Builder(Header);
1140+
Builder.SetInsertPoint(Header->getTerminator());
1141+
1142+
auto MergeAddress = BlockAddress::get(Merge->getParent(), Merge);
1143+
SmallVector<Value *, 1> Args = {MergeAddress};
1144+
Builder.CreateIntrinsic(Intrinsic::spv_selection_merge, {}, {Args});
1145+
continue;
1146+
}
1147+
11301148
Instruction *SplitInstruction = Merge->getTerminator();
11311149
if (isMergeInstruction(SplitInstruction->getPrevNode()))
11321150
SplitInstruction = SplitInstruction->getPrevNode();

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/MC/TargetRegistry.h"
3030
#include "llvm/Pass.h"
3131
#include "llvm/Target/TargetOptions.h"
32+
#include "llvm/Transforms/Scalar/Reg2Mem.h"
3233
#include "llvm/Transforms/Utils.h"
3334
#include <optional>
3435

@@ -162,20 +163,30 @@ void SPIRVPassConfig::addIRPasses() {
162163
TargetPassConfig::addIRPasses();
163164

164165
if (TM.getSubtargetImpl()->isVulkanEnv()) {
166+
addPass(createRegToMemWrapperPass());
167+
165168
// 1. Simplify loop for subsequent transformations. After this steps, loops
166169
// have the following properties:
167170
// - loops have a single entry edge (pre-header to loop header).
168171
// - all loop exits are dominated by the loop pre-header.
169172
// - loops have a single back-edge.
170173
addPass(createLoopSimplifyPass());
171174

172-
// 2. Merge the convergence region exit nodes into one. After this step,
175+
// 2. Removes registers whose lifetime spans across basic blocks. Also
176+
// removes phi nodes. This will greatly simplify the next steps.
177+
addPass(createRegToMemWrapperPass());
178+
179+
// 3. Merge the convergence region exit nodes into one. After this step,
173180
// regions are single-entry, single-exit. This will help determine the
174181
// correct merge block.
175182
addPass(createSPIRVMergeRegionExitTargetsPass());
176183

177-
// 3. Structurize.
184+
// 4. Structurize.
178185
addPass(createSPIRVStructurizerPass());
186+
187+
// 5. Reduce the amount of variables required by pushing some operations
188+
// back to virtual registers.
189+
addPass(createPromoteMemoryToRegisterPass());
179190
}
180191

181192
addPass(createSPIRVRegularizerPass());

llvm/lib/Target/SPIRV/SPIRVUtils.cpp

Lines changed: 77 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -460,53 +460,98 @@ PartialOrderingVisitor::getReachableFrom(BasicBlock *Start) {
460460
return Output;
461461
}
462462

463-
size_t PartialOrderingVisitor::visit(BasicBlock *BB, size_t Rank) {
464-
if (Visited.count(BB) != 0)
465-
return Rank;
463+
bool PartialOrderingVisitor::CanBeVisited(BasicBlock *BB) const {
464+
for (BasicBlock *P : predecessors(BB)) {
465+
// Ignore back-edges.
466+
if (DT.dominates(BB, P))
467+
continue;
466468

467-
Loop *L = LI.getLoopFor(BB);
468-
const bool isLoopHeader = LI.isLoopHeader(BB);
469+
// One of the predecessor hasn't been visited. Not ready yet.
470+
if (BlockToOrder.count(P) == 0)
471+
return false;
469472

470-
if (BlockToOrder.count(BB) == 0) {
471-
OrderInfo Info = {Rank, Visited.size()};
472-
BlockToOrder.emplace(BB, Info);
473-
} else {
474-
BlockToOrder[BB].Rank = std::max(BlockToOrder[BB].Rank, Rank);
473+
// If the block is a loop exit, the loop must be finished before
474+
// we can continue.
475+
Loop *L = LI.getLoopFor(P);
476+
if (L == nullptr || L->contains(BB))
477+
continue;
478+
479+
// SPIR-V requires a single back-edge. And the backend first
480+
// step transforms loops into the simplified format. If we have
481+
// more than 1 back-edge, something is wrong.
482+
assert(L->getNumBackEdges() <= 1);
483+
484+
// If the loop has no latch, loop's rank won't matter, so we can
485+
// proceed.
486+
BasicBlock *Latch = L->getLoopLatch();
487+
assert(Latch);
488+
if (Latch == nullptr)
489+
continue;
490+
491+
// The latch is not ready yet, let's wait.
492+
if (BlockToOrder.count(Latch) == 0)
493+
return false;
475494
}
476495

477-
for (BasicBlock *Predecessor : predecessors(BB)) {
478-
if (isLoopHeader && L->contains(Predecessor)) {
496+
return true;
497+
}
498+
499+
size_t PartialOrderingVisitor::GetNodeRank(BasicBlock *BB) const {
500+
size_t result = 0;
501+
502+
for (BasicBlock *P : predecessors(BB)) {
503+
// Ignore back-edges.
504+
if (DT.dominates(BB, P))
479505
continue;
480-
}
481506

482-
if (BlockToOrder.count(Predecessor) == 0) {
483-
return Rank;
507+
auto Iterator = BlockToOrder.end();
508+
Loop *L = LI.getLoopFor(P);
509+
BasicBlock *Latch = L ? L->getLoopLatch() : nullptr;
510+
511+
// If the predecessor is either outside a loop, or part of
512+
// the same loop, simply take its rank + 1.
513+
if (L == nullptr || L->contains(BB) || Latch == nullptr) {
514+
Iterator = BlockToOrder.find(P);
515+
} else {
516+
// Otherwise, take the loop's rank (highest rank in the loop) as base.
517+
// Since loops have a single latch, highest rank is easy to find.
518+
// If the loop has no latch, then it doesn't matter.
519+
Iterator = BlockToOrder.find(Latch);
484520
}
521+
522+
assert(Iterator != BlockToOrder.end());
523+
result = std::max(result, Iterator->second.Rank + 1);
485524
}
486525

487-
Visited.insert(BB);
526+
return result;
527+
}
528+
529+
size_t PartialOrderingVisitor::visit(BasicBlock *BB, size_t Unused) {
530+
ToVisit.push(BB);
531+
Queued.insert(BB);
488532

489-
SmallVector<BasicBlock *, 2> OtherSuccessors;
490-
SmallVector<BasicBlock *, 2> LoopSuccessors;
533+
while (ToVisit.size() != 0) {
534+
BasicBlock *BB = ToVisit.front();
535+
ToVisit.pop();
491536

492-
for (BasicBlock *Successor : successors(BB)) {
493-
// Ignoring back-edges.
494-
if (DT.dominates(Successor, BB))
537+
if (!CanBeVisited(BB)) {
538+
ToVisit.push(BB);
495539
continue;
540+
}
496541

497-
if (isLoopHeader && L->contains(Successor)) {
498-
LoopSuccessors.push_back(Successor);
499-
} else
500-
OtherSuccessors.push_back(Successor);
501-
}
542+
size_t Rank = GetNodeRank(BB);
543+
OrderInfo Info = {Rank, BlockToOrder.size()};
544+
BlockToOrder.emplace(BB, Info);
502545

503-
for (BasicBlock *BB : LoopSuccessors)
504-
Rank = std::max(Rank, visit(BB, Rank + 1));
546+
for (BasicBlock *S : successors(BB)) {
547+
if (Queued.count(S) != 0)
548+
continue;
549+
ToVisit.push(S);
550+
Queued.insert(S);
551+
}
552+
}
505553

506-
size_t OutputRank = Rank;
507-
for (BasicBlock *Item : OtherSuccessors)
508-
OutputRank = std::max(OutputRank, visit(Item, Rank + 1));
509-
return OutputRank;
554+
return 0;
510555
}
511556

512557
PartialOrderingVisitor::PartialOrderingVisitor(Function &F) {

0 commit comments

Comments
 (0)