-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Allow undef elements in isDeinterleaveShuffle #114585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Allow undef elements in isDeinterleaveShuffle #114585
Conversation
This allows us to form vnsrl deinterleaves from non-power-of-two shuffles after they've been legalized to a power of two.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis allows us to form vnsrl deinterleaves from non-power-of-two shuffles after they've been legalized to a power of two. Full diff: https://github.com/llvm/llvm-project/pull/114585.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 920b06c7ba6ecd..047f74f3e6db9f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4475,10 +4475,10 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
if (Mask[0] != 0 && Mask[0] != 1)
return false;
- // The others must increase by 2 each time.
- // TODO: Support undef elements?
+ // The others must increase by 2 each time (or be undef).
for (unsigned i = 1; i != Mask.size(); ++i)
- if (Mask[i] != Mask[i - 1] + 2)
+ if (Mask[i] != Mask[i - 1] + 2 &&
+ Mask[i] != -1)
return false;
return true;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 5911e8248f2995..b3cc834c70a1cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -12,46 +12,20 @@ define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: vle32.v v10, (a0)
-; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 2
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vwaddu.vv v8, v10, v9
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: vwmaccu.vx v8, a0, v9
-; RV32-NEXT: vmv.v.i v0, 4
-; RV32-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT: vrgather.vi v8, v12, 0, v0.t
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vadd.vv v9, v9, v9
-; RV32-NEXT: vadd.vi v11, v9, 1
-; RV32-NEXT: vrgather.vv v9, v10, v11
-; RV32-NEXT: vrgather.vi v9, v12, 1, v0.t
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vnsrl.wi v8, v10, 0
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vnsrl.wx v9, v10, a0
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor2_v3:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: vle32.v v10, (a0)
+; RV64-NEXT: li a0, 32
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vadd.vi v8, v8, 1
-; RV64-NEXT: vrgather.vv v9, v10, v8
-; RV64-NEXT: vmv.v.i v0, 4
-; RV64-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v10, 4
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT: vrgather.vi v9, v12, 1, v0.t
-; RV64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v11, v10, 2
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vwaddu.vv v8, v10, v11
-; RV64-NEXT: li a0, -1
-; RV64-NEXT: vwmaccu.vx v8, a0, v11
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT: vrgather.vi v8, v12, 0, v0.t
+; RV64-NEXT: vnsrl.wx v9, v10, a0
+; RV64-NEXT: vnsrl.wi v8, v10, 0
; RV64-NEXT: ret
%interleaved.vec = load <6 x i32>, ptr %ptr
%v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index bc32518b671953..b2973826d65ded 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -269,6 +269,3 @@ declare {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float>)
declare {<8 x half>, <8 x half>} @llvm.vector.deinterleave2.v16f16(<16 x half>)
declare {<4 x float>, <4 x float>} @llvm.vector.deinterleave2.v8f32(<8 x float>)
declare {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
for (unsigned i = 1; i != Mask.size(); ++i) | ||
if (Mask[i] != Mask[i - 1] + 2) | ||
if (Mask[i] != Mask[i - 1] + 2 && Mask[i] != -1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a bug here - specifically, this allows 1 in an arbitrary position following an out of bounds index. Will refresh with a fixed version on Monday.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This allows us to form vnsrl deinterleaves from non-power-of-two shuffles after they've been legalized to a power of two.
This allows us to form vnsrl deinterleaves from non-power-of-two shuffles after they've been legalized to a power of two.