-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LoopVectorize] LLVM fails to vectorise loops with multi-bool varables #89226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
de0cd17
53a4a2f
9752839
f1dfe17
e77247f
e50cd3f
4135912
327f94d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,26 @@ | ||
; REQUIRES: asserts | ||
; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s | ||
; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S 2>&1 | FileCheck %s | ||
|
||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" | ||
target triple = "arm64-apple-ios5.0.0" | ||
|
||
define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) { | ||
; CHECK: LV: Checking a loop in 'selects_1' | ||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cond = select i1 %cmp1, i32 10, i32 %and | ||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cond6 = select i1 %cmp2, i32 30, i32 %and | ||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6 | ||
|
||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %cond = select i1 %cmp1, i32 10, i32 %and | ||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %cond6 = select i1 %cmp2, i32 30, i32 %and | ||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6 | ||
|
||
; CHECK-LABEL: define void @selects_1( | ||
; CHECK: vector.body: | ||
; CHECK: select <4 x i1> | ||
; CHECK: LV: Selecting VF: 4 | ||
|
||
entry: | ||
%cmp26 = icmp sgt i32 %N, 0 | ||
br i1 %cmp26, label %for.body.preheader, label %for.cond.cleanup | ||
|
||
for.body.preheader: ; preds = %entry | ||
%wide.trip.count = zext i32 %N to i64 | ||
%n = zext i32 %N to i64 | ||
br label %for.body | ||
|
||
for.body: ; preds = %for.body.preheader, %for.body | ||
|
@@ -38,7 +36,7 @@ for.body: ; preds = %for.body.preheader, | |
%cond11 = select i1 %cmp7, i32 %cond, i32 %cond6 | ||
store i32 %cond11, ptr %arrayidx, align 4 | ||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count | ||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | ||
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body | ||
|
||
for.cond.cleanup.loopexit: ; preds = %for.body | ||
|
@@ -47,3 +45,31 @@ for.cond.cleanup.loopexit: ; preds = %for.body | |
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry | ||
ret void | ||
} | ||
|
||
define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { | ||
; CHECK: LV: Checking a loop in 'multi_user_cmp' | ||
; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction: %cmp1 = fcmp olt float %load1, 0.000000e+00 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry @dtemirbulatov I just have one more comment. I guess it may be my fault you're hitting this issue, but at the moment these CHECK lines are fragile and it's not obvious which function we are actually testing. That's because these CHECK lines could in theory match the CHECK lines in
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction: %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09 | ||
; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction: %all.off = select i1 %cmp1, i1 %all.off.next, i1 false | ||
; CHECK: LV: Selecting VF: 16. | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||
%all.off.next = phi i1 [ true, %entry ], [ %all.off, %for.body ] | ||
%any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ] | ||
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv | ||
%load1 = load float, ptr %arrayidx, align 4 | ||
%cmp1 = fcmp olt float %load1, 0.000000e+00 | ||
%.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09 | ||
%all.off = select i1 %cmp1, i1 %all.off.next, i1 false | ||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | ||
br i1 %exitcond.not, label %exit, label %for.body | ||
|
||
exit: | ||
%0 = select i1 %.any.0.off0, i32 2, i32 3 | ||
%1 = select i1 %all.off, i32 1, i32 %0 | ||
ret i32 %1 | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I ignore the code changes in this PR and simply remove the
m_OneUse
in this line of code, then all the tests added in this PR still pass. The only test failure I see is in the existing testllvm/test/Transforms/LoopVectorize/select-cmp.ll
, where I think the vectorized result is not incorrect. I'm not sure if that would be different if the resulting PHI node (that uses the zero-extended result of theicmp
) is used, which it currently isn't.In either case, the lack of test failures suggests to me that the complicated logic added in this patch isn't really being tested by any of the tests?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there are two negative tests in llvm/test/Transforms/LoopVectorize/multicmp.ll : multi_user_cmp_no_vectorise() and multi_user_cmp_no_vectorise(). I will add more.