Skip to content

Commit ffe96ad

Browse files
authored
[RISCV] Allow undef elements in isDeinterleaveShuffle (#114585)
This allows us to form vnsrl deinterleaves from non-power-of-two shuffles after they've been legalized to a power of two.
1 parent 19ad7dd commit ffe96ad

File tree

4 files changed

+100
-39
lines changed

4 files changed

+100
-39
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4477,10 +4477,9 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
44774477
if (Mask[0] != 0 && Mask[0] != 1)
44784478
return false;
44794479

4480-
// The others must increase by 2 each time.
4481-
// TODO: Support undef elements?
4480+
// The others must increase by 2 each time (or be undef).
44824481
for (unsigned i = 1; i != Mask.size(); ++i)
4483-
if (Mask[i] != Mask[i - 1] + 2)
4482+
if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2)
44844483
return false;
44854484

44864485
return true;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,46 +12,20 @@ define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
1212
; RV32: # %bb.0:
1313
; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
1414
; RV32-NEXT: vle32.v v10, (a0)
15-
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
16-
; RV32-NEXT: vslidedown.vi v9, v10, 2
17-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
18-
; RV32-NEXT: vwaddu.vv v8, v10, v9
19-
; RV32-NEXT: li a0, -1
20-
; RV32-NEXT: vwmaccu.vx v8, a0, v9
21-
; RV32-NEXT: vmv.v.i v0, 4
22-
; RV32-NEXT: vsetivli zero, 4, e32, m2, ta, ma
23-
; RV32-NEXT: vslidedown.vi v12, v10, 4
24-
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
25-
; RV32-NEXT: vrgather.vi v8, v12, 0, v0.t
26-
; RV32-NEXT: vid.v v9
27-
; RV32-NEXT: vadd.vv v9, v9, v9
28-
; RV32-NEXT: vadd.vi v11, v9, 1
29-
; RV32-NEXT: vrgather.vv v9, v10, v11
30-
; RV32-NEXT: vrgather.vi v9, v12, 1, v0.t
15+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
16+
; RV32-NEXT: vnsrl.wi v8, v10, 0
17+
; RV32-NEXT: li a0, 32
18+
; RV32-NEXT: vnsrl.wx v9, v10, a0
3119
; RV32-NEXT: ret
3220
;
3321
; RV64-LABEL: load_factor2_v3:
3422
; RV64: # %bb.0:
3523
; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
3624
; RV64-NEXT: vle32.v v10, (a0)
25+
; RV64-NEXT: li a0, 32
3726
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
38-
; RV64-NEXT: vid.v v8
39-
; RV64-NEXT: vadd.vv v8, v8, v8
40-
; RV64-NEXT: vadd.vi v8, v8, 1
41-
; RV64-NEXT: vrgather.vv v9, v10, v8
42-
; RV64-NEXT: vmv.v.i v0, 4
43-
; RV64-NEXT: vsetivli zero, 4, e32, m2, ta, ma
44-
; RV64-NEXT: vslidedown.vi v12, v10, 4
45-
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
46-
; RV64-NEXT: vrgather.vi v9, v12, 1, v0.t
47-
; RV64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
48-
; RV64-NEXT: vslidedown.vi v11, v10, 2
49-
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
50-
; RV64-NEXT: vwaddu.vv v8, v10, v11
51-
; RV64-NEXT: li a0, -1
52-
; RV64-NEXT: vwmaccu.vx v8, a0, v11
53-
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
54-
; RV64-NEXT: vrgather.vi v8, v12, 0, v0.t
27+
; RV64-NEXT: vnsrl.wx v9, v10, a0
28+
; RV64-NEXT: vnsrl.wi v8, v10, 0
5529
; RV64-NEXT: ret
5630
%interleaved.vec = load <6 x i32>, ptr %ptr
5731
%v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,3 +347,94 @@ entry:
347347
store <2 x double> %shuffle.i5, ptr %out, align 8
348348
ret void
349349
}
350+
351+
define void @vnsrl_0_i8_undef(ptr %in, ptr %out) {
352+
; CHECK-LABEL: vnsrl_0_i8_undef:
353+
; CHECK: # %bb.0: # %entry
354+
; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
355+
; CHECK-NEXT: vle8.v v8, (a0)
356+
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
357+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
358+
; CHECK-NEXT: vse8.v v8, (a1)
359+
; CHECK-NEXT: ret
360+
entry:
361+
%0 = load <16 x i8>, ptr %in, align 1
362+
%shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 undef>
363+
store <8 x i8> %shuffle.i5, ptr %out, align 1
364+
ret void
365+
}
366+
367+
define void @vnsrl_0_i8_undef2(ptr %in, ptr %out) {
368+
; CHECK-LABEL: vnsrl_0_i8_undef2:
369+
; CHECK: # %bb.0: # %entry
370+
; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
371+
; CHECK-NEXT: vle8.v v8, (a0)
372+
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
373+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
374+
; CHECK-NEXT: vse8.v v8, (a1)
375+
; CHECK-NEXT: ret
376+
entry:
377+
%0 = load <16 x i8>, ptr %in, align 1
378+
%shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14>
379+
store <8 x i8> %shuffle.i5, ptr %out, align 1
380+
ret void
381+
}
382+
383+
; TODO: Allow an undef initial element
384+
define void @vnsrl_0_i8_undef3(ptr %in, ptr %out) {
385+
; CHECK-LABEL: vnsrl_0_i8_undef3:
386+
; CHECK: # %bb.0: # %entry
387+
; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
388+
; CHECK-NEXT: vle8.v v8, (a0)
389+
; CHECK-NEXT: lui a0, 24640
390+
; CHECK-NEXT: addi a0, a0, 6
391+
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
392+
; CHECK-NEXT: vmv.v.x v9, a0
393+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
394+
; CHECK-NEXT: vrgather.vv v10, v8, v9
395+
; CHECK-NEXT: vid.v v9
396+
; CHECK-NEXT: vadd.vv v9, v9, v9
397+
; CHECK-NEXT: vadd.vi v9, v9, -8
398+
; CHECK-NEXT: li a0, -32
399+
; CHECK-NEXT: vmv.s.x v0, a0
400+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
401+
; CHECK-NEXT: vslidedown.vi v8, v8, 8
402+
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu
403+
; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t
404+
; CHECK-NEXT: vse8.v v10, (a1)
405+
; CHECK-NEXT: ret
406+
entry:
407+
%0 = load <16 x i8>, ptr %in, align 1
408+
%shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 6, i32 10, i32 12, i32 14>
409+
store <8 x i8> %shuffle.i5, ptr %out, align 1
410+
ret void
411+
}
412+
413+
; Not a vnsrl (checking for a prior pattern matching bug)
414+
define void @vnsrl_0_i8_undef_negative(ptr %in, ptr %out) {
415+
; CHECK-LABEL: vnsrl_0_i8_undef_negative:
416+
; CHECK: # %bb.0: # %entry
417+
; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
418+
; CHECK-NEXT: vle8.v v8, (a0)
419+
; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
420+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0)
421+
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
422+
; CHECK-NEXT: vle8.v v9, (a0)
423+
; CHECK-NEXT: vrgather.vv v10, v8, v9
424+
; CHECK-NEXT: vid.v v9
425+
; CHECK-NEXT: vadd.vv v9, v9, v9
426+
; CHECK-NEXT: vadd.vi v9, v9, -8
427+
; CHECK-NEXT: li a0, 48
428+
; CHECK-NEXT: vmv.s.x v0, a0
429+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
430+
; CHECK-NEXT: vslidedown.vi v8, v8, 8
431+
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu
432+
; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t
433+
; CHECK-NEXT: vse8.v v10, (a1)
434+
; CHECK-NEXT: ret
435+
entry:
436+
%0 = load <16 x i8>, ptr %in, align 1
437+
%shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 1>
438+
store <8 x i8> %shuffle.i5, ptr %out, align 1
439+
ret void
440+
}

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,3 @@ declare {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float>)
269269
declare {<8 x half>, <8 x half>} @llvm.vector.deinterleave2.v16f16(<16 x half>)
270270
declare {<4 x float>, <4 x float>} @llvm.vector.deinterleave2.v8f32(<8 x float>)
271271
declare {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double>)
272-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
273-
; RV32: {{.*}}
274-
; RV64: {{.*}}

0 commit comments

Comments
 (0)