-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SPIR-V] Add llvm.loop.unroll metadata lowering #132062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
.enable lowers to Unroll LoopControl .disable lowers to DontUnroll LoopControl .count lowers to PartialCount LoopControl .full lowers to Unroll + PartialCount LoopControls TODO: enable structurizer for non-vulkan targets. Signed-off-by: Sidorov, Dmitry <[email protected]>
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-spir-v Author: Dmitry Sidorov (MrSidims) Changes.enable lowers to Unroll LoopControl TODO: enable structurizer for non-vulkan targets. Full diff: https://github.com/llvm/llvm-project/pull/132062.diff 4 Files Affected:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index b188f36ca9a9e..ee93b9df3bc4e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2985,10 +2985,11 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_loop_merge: {
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpLoopMerge));
for (unsigned i = 1; i < I.getNumExplicitOperands(); ++i) {
- assert(I.getOperand(i).isMBB());
- MIB.addMBB(I.getOperand(i).getMBB());
+ if (I.getOperand(i).isMBB())
+ MIB.addMBB(I.getOperand(i).getMBB());
+ else
+ MIB.addImm(foldImm(I.getOperand(i), MRI));
}
- MIB.addImm(SPIRV::SelectionControl::None);
return MIB.constrainAllUses(TII, TRI, RBI);
}
case Intrinsic::spv_selection_merge: {
diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
index d20ea85f75909..8ba41b3c20702 100644
--- a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
@@ -611,6 +611,40 @@ class SPIRVStructurizer : public FunctionPass {
auto MergeAddress = BlockAddress::get(Merge->getParent(), Merge);
auto ContinueAddress = BlockAddress::get(Continue->getParent(), Continue);
SmallVector<Value *, 2> Args = {MergeAddress, ContinueAddress};
+ unsigned LC = SPIRV::LoopControl::None;
+ // Currently used only to store PartialCount value. Later when other
+ // LoopControls are added - this map should be sorted before making
+ // them loop_merge operands to satisfy 3.23. Loop Control requirements.
+ std::vector<std::pair<unsigned, unsigned>> MaskToValueMap;
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) {
+ LC |= SPIRV::LoopControl::DontUnroll;
+ } else {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) {
+ LC |= SPIRV::LoopControl::Unroll;
+ }
+ std::optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
+ if (Count && Count != 1) {
+ LC |= SPIRV::LoopControl::PartialCount;
+ MaskToValueMap.emplace_back(
+ std::make_pair(SPIRV::LoopControl::PartialCount, *Count));
+ }
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) {
+ // llvm.loop.unroll.full doesn't have a direct counterpart in SPIR-V,
+ // the closest thing we can do is to add Unroll mask and if the trip
+ // count is not known at compile time - either disable unrolling by
+ // setting PartialCount to 1 or reuse already available PartialCount.
+ LC |= SPIRV::LoopControl::Unroll;
+ if ((LC & SPIRV::LoopControl::PartialCount) == 0) {
+ LC |= SPIRV::LoopControl::PartialCount;
+ MaskToValueMap.emplace_back(
+ std::make_pair(SPIRV::LoopControl::PartialCount, 1));
+ }
+ }
+ }
+ Args.emplace_back(llvm::ConstantInt::get(Builder.getInt32Ty(), LC));
+ for (auto &[Mask, Val] : MaskToValueMap)
+ Args.emplace_back(llvm::ConstantInt::get(Builder.getInt32Ty(), Val));
Builder.CreateIntrinsic(Intrinsic::spv_loop_merge, {}, {Args});
Modified = true;
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 62b4b545f29bb..17a84757237bc 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -175,6 +175,8 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
// Create our unconditional branch.
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
NewBI->setDebugLoc(TI->getDebugLoc());
+ if (auto *LoopMD = TI->getMetadata(LLVMContext::MD_loop))
+ NewBI->setMetadata(LLVMContext::MD_loop, LoopMD);
// Insert the block into the function... right after the block TI lives in.
Function &F = *TIBB->getParent();
diff --git a/llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll b/llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll
new file mode 100644
index 0000000000000..296e7d3859f37
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll
@@ -0,0 +1,228 @@
+; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: OpName %[[#For:]] "for_loop"
+; CHECK-DAG: OpName %[[#While:]] "while_loop"
+; CHECK-DAG: OpName %[[#DoWhile:]] "do_while_loop"
+; CHECK-DAG: OpName %[[#Disable:]] "unroll_disable"
+; CHECK-DAG: OpName %[[#Count:]] "unroll_count"
+; CHECK-DAG: OpName %[[#Full:]] "unroll_full"
+; CHECK-DAG: OpName %[[#FullCount:]] "unroll_full_count"
+
+; CHECK: %[[#For]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll
+
+; CHECK: %[[#While]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll
+
+; CHECK: %[[#DoWhile]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll
+
+; CHECK: %[[#Disable]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] DontUnroll
+
+; CHECK: %[[#Count]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] PartialCount 4
+
+; CHECK: %[[#Full]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll|PartialCount 1
+
+; CHECK: %[[#FullCount]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll|PartialCount 4
+
+define dso_local void @for_loop(ptr noundef %0, i32 noundef %1) {
+ %3 = alloca ptr, align 8
+ %4 = alloca i32, align 4
+ %5 = alloca i32, align 4
+ store ptr %0, ptr %3, align 8
+ store i32 %1, ptr %4, align 4
+ store i32 0, ptr %5, align 4
+ br label %6
+
+6: ; preds = %15, %2
+ %7 = load i32, ptr %5, align 4
+ %8 = load i32, ptr %4, align 4
+ %9 = icmp slt i32 %7, %8
+ br i1 %9, label %10, label %18
+
+10: ; preds = %6
+ %11 = load i32, ptr %5, align 4
+ %12 = load ptr, ptr %3, align 8
+ %13 = load i32, ptr %12, align 4
+ %14 = add nsw i32 %13, %11
+ store i32 %14, ptr %12, align 4
+ br label %15
+
+15: ; preds = %10
+ %16 = load i32, ptr %5, align 4
+ %17 = add nsw i32 %16, 1
+ store i32 %17, ptr %5, align 4
+ br label %6, !llvm.loop !1
+
+18: ; preds = %6
+ ret void
+}
+
+define dso_local void @while_loop(ptr noundef %0, i32 noundef %1) {
+ %3 = alloca ptr, align 8
+ %4 = alloca i32, align 4
+ %5 = alloca i32, align 4
+ store ptr %0, ptr %3, align 8
+ store i32 %1, ptr %4, align 4
+ store i32 0, ptr %5, align 4
+ br label %6
+
+6: ; preds = %10, %2
+ %7 = load i32, ptr %5, align 4
+ %8 = load i32, ptr %4, align 4
+ %9 = icmp slt i32 %7, %8
+ br i1 %9, label %10, label %17
+
+10: ; preds = %6
+ %11 = load i32, ptr %5, align 4
+ %12 = load ptr, ptr %3, align 8
+ %13 = load i32, ptr %12, align 4
+ %14 = add nsw i32 %13, %11
+ store i32 %14, ptr %12, align 4
+ %15 = load i32, ptr %5, align 4
+ %16 = add nsw i32 %15, 1
+ store i32 %16, ptr %5, align 4
+ br label %6, !llvm.loop !3
+
+17: ; preds = %6
+ ret void
+}
+
+define dso_local void @do_while_loop(ptr noundef %0, i32 noundef %1) {
+ %3 = alloca ptr, align 8
+ %4 = alloca i32, align 4
+ %5 = alloca i32, align 4
+ store ptr %0, ptr %3, align 8
+ store i32 %1, ptr %4, align 4
+ store i32 0, ptr %5, align 4
+ br label %6
+
+6: ; preds = %13, %2
+ %7 = load i32, ptr %5, align 4
+ %8 = load ptr, ptr %3, align 8
+ %9 = load i32, ptr %8, align 4
+ %10 = add nsw i32 %9, %7
+ store i32 %10, ptr %8, align 4
+ %11 = load i32, ptr %5, align 4
+ %12 = add nsw i32 %11, 1
+ store i32 %12, ptr %5, align 4
+ br label %13
+
+13: ; preds = %6
+ %14 = load i32, ptr %5, align 4
+ %15 = load i32, ptr %4, align 4
+ %16 = icmp slt i32 %14, %15
+ br i1 %16, label %6, label %17, !llvm.loop !4
+
+17: ; preds = %13
+ ret void
+}
+
+define dso_local void @unroll_disable(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !5
+
+11: ; preds = %7
+ ret void
+}
+
+define dso_local void @unroll_count(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !7
+
+11: ; preds = %7
+ ret void
+}
+
+define dso_local void @unroll_full(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !9
+
+11: ; preds = %7
+ ret void
+}
+
+define dso_local void @unroll_full_count(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !11
+
+11: ; preds = %7
+ ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.unroll.enable"}
+!3 = distinct !{!3, !2}
+!4 = distinct !{!4, !2}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.unroll.disable"}
+!7 = distinct !{!7, !8}
+!8 = !{!"llvm.loop.unroll.count", i32 4}
+!9 = distinct !{!9, !10}
+!10 = !{!"llvm.loop.unroll.full"}
+!11 = distinct !{!11, !10, !8}
|
@@ -175,6 +175,8 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, | |||
// Create our unconditional branch. | |||
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); | |||
NewBI->setDebugLoc(TI->getDebugLoc()); | |||
if (auto *LoopMD = TI->getMetadata(LLVMContext::MD_loop)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally it should go into a separate PR, but the issue of not copying llvm.loop is being exposed during a call to reg2mem pass, which (if I understood correctly) is in semi-deprecated state and it's not trivial to test it in a stand-alone pull request.
ret void | ||
} | ||
|
||
define dso_local void @do_while_loop(ptr noundef %0, i32 noundef %1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for reference: this case has exposed issue in reg2mem pass
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, looks good, just some comments!
LC |= SPIRV::LoopControl::PartialCount; | ||
MaskToValueMap.emplace_back( | ||
std::make_pair(SPIRV::LoopControl::PartialCount, 1)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure I understand:
Why would unroll.full
imply unroll.count = 1
?
Isn't unroll.full
the same as LoopControl::Unroll
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The logic was, that llvm.loop.unroll.enable
implies, that if the trip count is not known at compile time, then the loop can still be partial unrolled. Meanwhile llvm.loop.unroll.full
just doesn't hints any unrolling, if trip count is not known at compile time. So (as SPIR-V spec doesn't distinguish between Unroll(enable) and Unroll(full)) Unroll + PartialCount(1)
would mean to the backend: "please evaluate if it's possible to unroll the loop, and if not - do nothing with it". And I followed this logic long ago in KhronosGroup/SPIRV-LLVM-Translator#1664 .
Now, after some thinking, this logic seem incorrect to me. As if optimizer faces Unroll + PartialCount(1)
loop controls, it should actually pick conservative approach and don't even try to unroll the loop. So I've removed this logic from the PR.
!1 = distinct !{!1, !2} | ||
!2 = !{!"llvm.loop.unroll.enable"} | ||
!3 = distinct !{!3, !2} | ||
!4 = distinct !{!4, !2} | ||
!5 = distinct !{!5, !6} | ||
!6 = !{!"llvm.loop.unroll.disable"} | ||
!7 = distinct !{!7, !8} | ||
!8 = !{!"llvm.loop.unroll.count", i32 4} | ||
!9 = distinct !{!9, !10} | ||
!10 = !{!"llvm.loop.unroll.full"} | ||
!11 = distinct !{!11, !10, !8} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it legal for LLVM to have both unroll.disable
and unroll.enable
set?
If yes, we should probably test that no matter what, we pick one and emit valid SPIR-V.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it's legal. Added a test case.
unsigned LC = SPIRV::LoopControl::None; | ||
// Currently used only to store PartialCount value. Later when other | ||
// LoopControls are added - this map should be sorted before making | ||
// them loop_merge operands to satisfy 3.23. Loop Control requirements. | ||
std::vector<std::pair<unsigned, unsigned>> MaskToValueMap; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall this be wrapped in a utility function?
SmallVector<unsigned, 1> getSpirvLoopControlOperandsFromLoopMetadata(Loop *L);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Applied, thanks!
Signed-off-by: Sidorov, Dmitry <[email protected]>
@Keenuts @VyacheslavLevytskyy please take a look |
Sorry missed your reply to the last review. Thanks for working on this! |
.enable lowers to Unroll LoopControl
.disable lowers to DontUnroll LoopControl
.count lowers to PartialCount LoopControl
.full lowers to Unroll LoopControl
TODO in future patches: enable structurizer for non-vulkan targets.