Skip to content

Commit ac92dcc

Browse files
authored
[RISCV] Lower fixed reverse vector_shuffles through vector_reverse (#104461)
This teaches lowerVECTOR_REVERSE to handle fixed length vectors, and then lowers reverse vector_shuffles through it. The motiviation for this is to allow fixed length vectors to share a potential optimization on vector_reverse in an upcoming patch (splitting up LMUL > 1 vrgathers.vv)
1 parent ef67601 commit ac92dcc

File tree

3 files changed

+83
-69
lines changed

3 files changed

+83
-69
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,8 +1087,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10871087
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
10881088
Custom);
10891089

1090-
setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
1091-
Custom);
1090+
setOperationAction(
1091+
{ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE}, VT,
1092+
Custom);
10921093

10931094
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
10941095
VT, Custom);
@@ -1235,8 +1236,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
12351236
// expansion to a build_vector of 0s.
12361237
setOperationAction(ISD::UNDEF, VT, Custom);
12371238

1238-
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1239-
ISD::EXTRACT_SUBVECTOR},
1239+
setOperationAction({ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE,
1240+
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
12401241
VT, Custom);
12411242

12421243
// FIXME: mload, mstore, mgather, mscatter, vp_load/store,
@@ -5160,6 +5161,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
51605161
return convertFromScalableVector(VT, Res, DAG, Subtarget);
51615162
}
51625163

5164+
if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5165+
return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5166+
51635167
// If this is a deinterleave and we can widen the vector, then we can use
51645168
// vnsrl to deinterleave.
51655169
if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
@@ -10308,14 +10312,24 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
1030810312
SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
1030910313
return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
1031010314
}
10311-
unsigned EltSize = VecVT.getScalarSizeInBits();
10312-
unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10315+
10316+
MVT ContainerVT = VecVT;
10317+
SDValue Vec = Op.getOperand(0);
10318+
if (VecVT.isFixedLengthVector()) {
10319+
ContainerVT = getContainerForFixedLengthVector(VecVT);
10320+
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10321+
}
10322+
10323+
unsigned EltSize = ContainerVT.getScalarSizeInBits();
10324+
unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
1031310325
unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
1031410326
unsigned MaxVLMAX =
10315-
RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10327+
VecVT.isFixedLengthVector()
10328+
? VecVT.getVectorNumElements()
10329+
: RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
1031610330

1031710331
unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10318-
MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10332+
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
1031910333

1032010334
// If this is SEW=8 and VLMAX is potentially more than 256, we need
1032110335
// to use vrgatherei16.vv.
@@ -10340,7 +10354,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
1034010354
}
1034110355

1034210356
// Just promote the int type to i16 which will double the LMUL.
10343-
IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10357+
IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
1034410358
GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
1034510359
}
1034610360

@@ -10354,12 +10368,13 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
1035410368
}
1035510369

1035610370
MVT XLenVT = Subtarget.getXLenVT();
10357-
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10371+
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
1035810372

1035910373
// Calculate VLMAX-1 for the desired SEW.
10360-
SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10361-
computeVLMax(VecVT, DL, DAG),
10362-
DAG.getConstant(1, DL, XLenVT));
10374+
SDValue VLMinus1 = DAG.getNode(
10375+
ISD::SUB, DL, XLenVT,
10376+
DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
10377+
DAG.getConstant(1, DL, XLenVT));
1036310378

1036410379
// Splat VLMAX-1 taking care to handle SEW==64 on RV32.
1036510380
bool IsRV32E64 =
@@ -10375,8 +10390,11 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
1037510390
SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
1037610391
DAG.getUNDEF(IntVT), Mask, VL);
1037710392

10378-
return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10379-
DAG.getUNDEF(VecVT), Mask, VL);
10393+
SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
10394+
DAG.getUNDEF(ContainerVT), Mask, VL);
10395+
if (VecVT.isFixedLengthVector())
10396+
Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
10397+
return Gather;
1038010398
}
1038110399

1038210400
SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,10 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
101101
; NO-ZVBB-LABEL: reverse_v32i1:
102102
; NO-ZVBB: # %bb.0:
103103
; NO-ZVBB-NEXT: li a0, 32
104-
; NO-ZVBB-NEXT: lui a1, %hi(.LCPI4_0)
105-
; NO-ZVBB-NEXT: addi a1, a1, %lo(.LCPI4_0)
106104
; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
107-
; NO-ZVBB-NEXT: vle8.v v8, (a1)
105+
; NO-ZVBB-NEXT: vid.v v8
106+
; NO-ZVBB-NEXT: li a0, 31
107+
; NO-ZVBB-NEXT: vrsub.vx v8, v8, a0
108108
; NO-ZVBB-NEXT: vmv.v.i v10, 0
109109
; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0
110110
; NO-ZVBB-NEXT: vrgather.vv v12, v10, v8
@@ -124,10 +124,10 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
124124
; NO-ZVBB-LABEL: reverse_v64i1:
125125
; NO-ZVBB: # %bb.0:
126126
; NO-ZVBB-NEXT: li a0, 64
127-
; NO-ZVBB-NEXT: lui a1, %hi(.LCPI5_0)
128-
; NO-ZVBB-NEXT: addi a1, a1, %lo(.LCPI5_0)
129127
; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
130-
; NO-ZVBB-NEXT: vle8.v v8, (a1)
128+
; NO-ZVBB-NEXT: vid.v v8
129+
; NO-ZVBB-NEXT: li a0, 63
130+
; NO-ZVBB-NEXT: vrsub.vx v8, v8, a0
131131
; NO-ZVBB-NEXT: vmv.v.i v12, 0
132132
; NO-ZVBB-NEXT: vmerge.vim v12, v12, 1, v0
133133
; NO-ZVBB-NEXT: vrgather.vv v16, v12, v8
@@ -147,10 +147,10 @@ define <128 x i1> @reverse_v128i1(<128 x i1> %a) {
147147
; CHECK-LABEL: reverse_v128i1:
148148
; CHECK: # %bb.0:
149149
; CHECK-NEXT: li a0, 128
150-
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
151-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI6_0)
152150
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
153-
; CHECK-NEXT: vle8.v v8, (a1)
151+
; CHECK-NEXT: vid.v v8
152+
; CHECK-NEXT: li a0, 127
153+
; CHECK-NEXT: vrsub.vx v8, v8, a0
154154
; CHECK-NEXT: vmv.v.i v16, 0
155155
; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
156156
; CHECK-NEXT: vrgather.vv v24, v16, v8
@@ -229,10 +229,10 @@ define <32 x i8> @reverse_v32i8(<32 x i8> %a) {
229229
; CHECK-LABEL: reverse_v32i8:
230230
; CHECK: # %bb.0:
231231
; CHECK-NEXT: li a0, 32
232-
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
233-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0)
234232
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
235-
; CHECK-NEXT: vle8.v v12, (a1)
233+
; CHECK-NEXT: vid.v v10
234+
; CHECK-NEXT: li a0, 31
235+
; CHECK-NEXT: vrsub.vx v12, v10, a0
236236
; CHECK-NEXT: vrgather.vv v10, v8, v12
237237
; CHECK-NEXT: vmv.v.v v8, v10
238238
; CHECK-NEXT: ret
@@ -244,10 +244,10 @@ define <64 x i8> @reverse_v64i8(<64 x i8> %a) {
244244
; CHECK-LABEL: reverse_v64i8:
245245
; CHECK: # %bb.0:
246246
; CHECK-NEXT: li a0, 64
247-
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
248-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI13_0)
249247
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
250-
; CHECK-NEXT: vle8.v v16, (a1)
248+
; CHECK-NEXT: vid.v v12
249+
; CHECK-NEXT: li a0, 63
250+
; CHECK-NEXT: vrsub.vx v16, v12, a0
251251
; CHECK-NEXT: vrgather.vv v12, v8, v16
252252
; CHECK-NEXT: vmv.v.v v8, v12
253253
; CHECK-NEXT: ret
@@ -324,11 +324,10 @@ define <32 x i16> @reverse_v32i16(<32 x i16> %a) {
324324
; CHECK-LABEL: reverse_v32i16:
325325
; CHECK: # %bb.0:
326326
; CHECK-NEXT: li a0, 32
327-
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
328-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI19_0)
329327
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
330-
; CHECK-NEXT: vle8.v v12, (a1)
331-
; CHECK-NEXT: vsext.vf2 v16, v12
328+
; CHECK-NEXT: vid.v v12
329+
; CHECK-NEXT: li a0, 31
330+
; CHECK-NEXT: vrsub.vx v16, v12, a0
332331
; CHECK-NEXT: vrgather.vv v12, v8, v16
333332
; CHECK-NEXT: vmv.v.v v8, v12
334333
; CHECK-NEXT: ret
@@ -521,11 +520,10 @@ define <32 x half> @reverse_v32f16(<32 x half> %a) {
521520
; CHECK-LABEL: reverse_v32f16:
522521
; CHECK: # %bb.0:
523522
; CHECK-NEXT: li a0, 32
524-
; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
525-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI34_0)
526523
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
527-
; CHECK-NEXT: vle8.v v12, (a1)
528-
; CHECK-NEXT: vsext.vf2 v16, v12
524+
; CHECK-NEXT: vid.v v12
525+
; CHECK-NEXT: li a0, 31
526+
; CHECK-NEXT: vrsub.vx v16, v12, a0
529527
; CHECK-NEXT: vrgather.vv v12, v8, v16
530528
; CHECK-NEXT: vmv.v.v v8, v12
531529
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -106,21 +106,20 @@ define <16 x i8> @v16i8(<16 x i8> %a) {
106106
define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) {
107107
; CHECK-LABEL: v16i8_2:
108108
; CHECK: # %bb.0:
109+
; CHECK-NEXT: vmv1r.v v12, v9
109110
; CHECK-NEXT: li a0, 32
110-
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
111-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI7_0)
112111
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
113-
; CHECK-NEXT: vle8.v v12, (a1)
114-
; CHECK-NEXT: vmv1r.v v14, v9
115-
; CHECK-NEXT: vrgather.vv v10, v8, v12
116-
; CHECK-NEXT: vid.v v8
117-
; CHECK-NEXT: vrsub.vi v8, v8, 15
112+
; CHECK-NEXT: vid.v v14
113+
; CHECK-NEXT: li a0, 31
114+
; CHECK-NEXT: vrsub.vx v16, v14, a0
115+
; CHECK-NEXT: vrgather.vv v10, v8, v16
116+
; CHECK-NEXT: vrsub.vi v8, v14, 15
118117
; CHECK-NEXT: lui a0, 16
119118
; CHECK-NEXT: addi a0, a0, -1
120119
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
121120
; CHECK-NEXT: vmv.s.x v0, a0
122121
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu
123-
; CHECK-NEXT: vrgather.vv v10, v14, v8, v0.t
122+
; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t
124123
; CHECK-NEXT: vmv.v.v v8, v10
125124
; CHECK-NEXT: ret
126125
%v32i8 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -230,22 +229,21 @@ define <16 x i16> @v16i16(<16 x i16> %a) {
230229
define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
231230
; CHECK-LABEL: v16i16_2:
232231
; CHECK: # %bb.0:
232+
; CHECK-NEXT: vmv2r.v v16, v10
233233
; CHECK-NEXT: li a0, 32
234-
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
235-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI15_0)
236234
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
237-
; CHECK-NEXT: vle16.v v16, (a1)
238-
; CHECK-NEXT: vmv2r.v v20, v10
239-
; CHECK-NEXT: vmv2r.v v12, v8
240-
; CHECK-NEXT: vrgather.vv v8, v12, v16
241-
; CHECK-NEXT: vid.v v12
242-
; CHECK-NEXT: vrsub.vi v12, v12, 15
235+
; CHECK-NEXT: vid.v v20
236+
; CHECK-NEXT: li a0, 31
237+
; CHECK-NEXT: vrsub.vx v24, v20, a0
238+
; CHECK-NEXT: vrgather.vv v12, v8, v24
239+
; CHECK-NEXT: vrsub.vi v8, v20, 15
243240
; CHECK-NEXT: lui a0, 16
244241
; CHECK-NEXT: addi a0, a0, -1
245242
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
246243
; CHECK-NEXT: vmv.s.x v0, a0
247244
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
248-
; CHECK-NEXT: vrgather.vv v8, v20, v12, v0.t
245+
; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t
246+
; CHECK-NEXT: vmv.v.v v8, v12
249247
; CHECK-NEXT: ret
250248
%v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
251249
ret <32 x i16> %v32i16
@@ -363,17 +361,17 @@ define <16 x i32> @v16i32(<16 x i32> %a) {
363361
define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) {
364362
; CHECK-LABEL: v16i32_2:
365363
; CHECK: # %bb.0:
366-
; CHECK-NEXT: li a0, 32
367-
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
368-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI23_0)
369-
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
370-
; CHECK-NEXT: vle16.v v20, (a1)
371364
; CHECK-NEXT: vmv4r.v v24, v12
372365
; CHECK-NEXT: vmv4r.v v16, v8
373-
; CHECK-NEXT: vrgatherei16.vv v8, v16, v20
366+
; CHECK-NEXT: li a0, 32
367+
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
368+
; CHECK-NEXT: vid.v v20
369+
; CHECK-NEXT: li a0, 31
370+
; CHECK-NEXT: vrsub.vx v28, v20, a0
371+
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
372+
; CHECK-NEXT: vrgatherei16.vv v8, v16, v28
374373
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
375-
; CHECK-NEXT: vid.v v16
376-
; CHECK-NEXT: vrsub.vi v16, v16, 15
374+
; CHECK-NEXT: vrsub.vi v16, v20, 15
377375
; CHECK-NEXT: lui a0, 16
378376
; CHECK-NEXT: addi a0, a0, -1
379377
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
@@ -549,10 +547,10 @@ define <32 x half> @v16f16_2(<16 x half> %a) {
549547
; CHECK-LABEL: v16f16_2:
550548
; CHECK: # %bb.0:
551549
; CHECK-NEXT: li a0, 32
552-
; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
553-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI35_0)
554550
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
555-
; CHECK-NEXT: vle16.v v16, (a1)
551+
; CHECK-NEXT: vid.v v12
552+
; CHECK-NEXT: li a0, 31
553+
; CHECK-NEXT: vrsub.vx v16, v12, a0
556554
; CHECK-NEXT: vrgather.vv v12, v8, v16
557555
; CHECK-NEXT: vmv.v.v v8, v12
558556
; CHECK-NEXT: ret
@@ -720,10 +718,10 @@ define <32 x i8> @v32i8(<32 x i8> %a) {
720718
; CHECK-LABEL: v32i8:
721719
; CHECK: # %bb.0:
722720
; CHECK-NEXT: li a0, 32
723-
; CHECK-NEXT: lui a1, %hi(.LCPI46_0)
724-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI46_0)
725721
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
726-
; CHECK-NEXT: vle8.v v12, (a1)
722+
; CHECK-NEXT: vid.v v10
723+
; CHECK-NEXT: li a0, 31
724+
; CHECK-NEXT: vrsub.vx v12, v10, a0
727725
; CHECK-NEXT: vrgather.vv v10, v8, v12
728726
; CHECK-NEXT: vmv.v.v v8, v10
729727
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)