-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Use ISD::XOR instead of RISCVISD::VMXOR_VL in lowerVectorMaskVecReduction of scalable ISD::VECREDUCE_AND #121812
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…VecReduction of scalable ISD::VECREDUCE_AND This allows combining the XOR with earlier ISD::ANDs inserted by type legalization.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis allows combining the XOR with earlier ISD::ANDs inserted by type legalization. Full diff: https://github.com/llvm/llvm-project/pull/121812.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7efe3732d8be13..0d443cf7ec5c83 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10154,7 +10154,10 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
case ISD::VP_REDUCE_AND: {
// vcpop ~x == 0
SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
- Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
+ if (IsVP || VecVT.isFixedLengthVector())
+ Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
+ else
+ Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
CC = ISD::SETEQ;
break;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
index d99fd036b4fc92..ce9d6c5ab91a8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
@@ -785,8 +785,7 @@ define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
; CHECK-LABEL: vreduce_and_nxv128i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmand.mm v8, v0, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v0, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -814,8 +813,7 @@ define zeroext i1 @vreduce_smax_nxv128i1(<vscale x 128 x i1> %v) {
; CHECK-LABEL: vreduce_smax_nxv128i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmand.mm v8, v0, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v0, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -829,8 +827,7 @@ define zeroext i1 @vreduce_umin_nxv128i1(<vscale x 128 x i1> %v) {
; CHECK-LABEL: vreduce_umin_nxv128i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmand.mm v8, v0, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v0, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -892,8 +889,7 @@ define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v0, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -925,8 +921,7 @@ define zeroext i1 @vreduce_smax_nxv256i1(<vscale x 256 x i1> %v) {
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v0, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -942,8 +937,7 @@ define zeroext i1 @vreduce_umin_nxv256i1(<vscale x 256 x i1> %v) {
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v0, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -1019,8 +1013,7 @@ define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v0, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -1060,8 +1053,7 @@ define zeroext i1 @vreduce_smax_nxv512i1(<vscale x 512 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v0, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -1081,8 +1073,7 @@ define zeroext i1 @vreduce_umin_nxv512i1(<vscale x 512 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v0, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -1186,8 +1177,7 @@ define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v15, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -1243,8 +1233,7 @@ define zeroext i1 @vreduce_smax_nxv1024i1(<vscale x 1024 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v15, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -1272,8 +1261,7 @@ define zeroext i1 @vreduce_umin_nxv1024i1(<vscale x 1024 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v15, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM w/optional comment.
@@ -10154,7 +10154,10 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, | |||
case ISD::VP_REDUCE_AND: { | |||
// vcpop ~x == 0 | |||
SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); | |||
Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); | |||
if (IsVP || VecVT.isFixedLengthVector()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need the special case? I'm somewhat surprised that the plain ISD::XOR doesn't work out fine for the fixed case at least. Any idea why? Even for the VP case, I think we only need the mask and VL on the vcpop itself?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It caused an extra vtype toggle for fixed vectors. I didn't dig any further.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/6462 Here is the relevant piece of the build log for the reference
|
This allows combining the XOR with earlier ISD::ANDs inserted by type legalization.