-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InstCombine][NFC] Precommit a test for folding a binary op of reductions. #121568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Mikhail Gudim (mgudim) ChangesFull diff: https://github.com/llvm/llvm-project/pull/121568.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll b/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll
new file mode 100644
index 00000000000000..ad47165d100156
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i32 @add_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @add_of_reduce_add(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+ %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
+ %res = add i32 %v0_red, %v1_red
+ ret i32 %res
+}
+
+define i32 @sub_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @sub_of_reduce_add(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i32> [[V0]], [[V1]]
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+ %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
+ %res = sub i32 %v0_red, %v1_red
+ ret i32 %res
+}
+
+define i32 @mul_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @mul_of_reduce_mul(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
+ %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
+ %res = mul i32 %v0_red, %v1_red
+ ret i32 %res
+}
+
+define i32 @and_of_reduce_and(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @and_of_reduce_and(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT: [[RES:%.*]] = and i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v0_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v0)
+ %v1_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v1)
+ %res = and i32 %v0_red, %v1_red
+ ret i32 %res
+}
+
+define i32 @or_of_reduce_or(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @or_of_reduce_or(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT: [[RES:%.*]] = or i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0)
+ %v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1)
+ %res = or i32 %v0_red, %v1_red
+ ret i32 %res
+}
+
+define i32 @xor_of_reduce_xor(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @xor_of_reduce_xor(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT: [[RES:%.*]] = xor i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v0_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v0)
+ %v1_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v1)
+ %res = xor i32 %v0_red, %v1_red
+ ret i32 %res
+}
+
+define i32 @add_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @add_of_reduce_mul(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT: [[RES:%.*]] = xor i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
+ %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
+ %res = xor i32 %v0_red, %v1_red
+ ret i32 %res
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a negative test with mismatched vector types?
Also multi use tests (that should fail). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please also add negative tests where intrinsic IDs are not the same or vector types don't match, and add a test that should flags are not preserved.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please also add negative tests where intrinsic IDs are not the same
I cannot find this test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reduction_does_not_match_binop
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess nikic means to add a test like reduce.mul + reduce.add
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right, just realized that and pushed.
abe25d4
to
f161113
Compare
done |
done. |
48f9a77
to
364f0bb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
ret i32 %res | ||
} | ||
|
||
define i32 @do_not_preserve_overflow_flags(<16 x i32> %v0, <16 x i32> %v1, ptr %p) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%p
is unused.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed it, thanks
ret i32 %res | ||
} | ||
|
||
define i32 @preserve_disjoint_flags(<16 x i32> %v0, <16 x i32> %v1, ptr %p) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%p
is unused.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed it, thanks
364f0bb
to
2faa49e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, though adding one test using scalable vectors probably wouldn't hurt.
2faa49e
to
a9ee495
Compare
added vscale tests |
No description provided.