-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[LV]Process alloca in isPredicatedInst for tail-folded analysis. #101743
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LV]Process alloca in isPredicatedInst for tail-folded analysis. #101743
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesPatch fixes the compiler crash when it tries to check is alloca in the Full diff: https://github.com/llvm/llvm-project/pull/101743.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6daa8043a3fbf..47113ae1ce851 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3342,7 +3342,7 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
if (!blockNeedsPredicationForAnyReason(I->getParent()) ||
isSafeToSpeculativelyExecute(I) ||
(isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
- isa<BranchInst, PHINode>(I))
+ isa<BranchInst, PHINode, AllocaInst>(I))
return false;
// If the instruction was executed conditionally in the original scalar loop,
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
new file mode 100644
index 0000000000000..c04299c4cf31d
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-width=4 < %s | FileCheck %s
+
+define i32 @test(ptr %vf1, i64 %n) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[VF1:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 200, i64 200, i64 200, i64 200>
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; CHECK: [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; CHECK: [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204
+; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 true, label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 204, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store ptr [[TMP18]], ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 200
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = alloca i8, i64 %n, align 16
+ %arrayidx = getelementptr inbounds ptr, ptr %vf1, i64 %indvars.iv
+ store ptr %0, ptr %arrayidx, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv, 200
+ br i1 %exitcond.not, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ ret i32 0
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
|
Ping! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks should definitely not crash. Would
Be interesting to know if there are any real world cases where alloca are in the loop body to double check we are doing the right thing in general
%exitcond.not = icmp eq i64 %indvars.iv, 200 | ||
br i1 %exitcond.not, label %for.end.loopexit, label %for.body | ||
|
||
for.end.loopexit: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: can be shortened to exit
The test itself is a reduced version from llvm-test-suite, compiled with folded tail. |
Created using spr 1.3.5
Patch fixes the compiler crash when it tries to check is alloca in the
loop is a predicated instruction.