Skip to content

Commit 6098d7d

Browse files
committed
[RISCV] Lower shuffles as rotates without zvbb
Now that the codegen for the expanded ISD::ROTL sequence has been improved, it's probably profitable to lower a shuffle that's a rotate to the vsll+vsrl+vor sequence to avoid a vrgather where possible, even if we don't have the vror instruction. This patch relaxes the restriction on ISD::ROTL being legal in lowerVECTOR_SHUFFLEAsRotate. It also attempts to do the lowering twice: Once if zvbb is enabled before any of the interleave/deinterleave/vmerge lowerings, and a second time unconditionally just before it falls back to the vrgather. This way it doesn't interfere with any of the above patterns that may be more profitable than the expanded ISD::ROTL sequence. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D159353
1 parent bf46b0b commit 6098d7d

File tree

3 files changed

+256
-178
lines changed

3 files changed

+256
-178
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4230,8 +4230,8 @@ static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
42304230
}
42314231

42324232
// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4233-
// reinterpret it as a shuffle of v2i32 where the two i32s are bit rotated, and
4234-
// lower it as a vror.vi (if legal with zvbb enabled).
4233+
// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4234+
// as a vror.vi if we have zvbb, or otherwise as a vsll, vsrl and vor.
42354235
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
42364236
SelectionDAG &DAG,
42374237
const RISCVSubtarget &Subtarget) {
@@ -4248,8 +4248,7 @@ static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
42484248
NumElts / NumSubElts);
42494249

42504250
// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4251-
if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::ROTL,
4252-
RotateVT))
4251+
if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
42534252
return SDValue();
42544253

42554254
SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
@@ -4276,12 +4275,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
42764275
unsigned NumElts = VT.getVectorNumElements();
42774276
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
42784277

4279-
// Lower to a vror.vi of a larger element type if possible. Do this before we
4280-
// promote i1s to i8s.
4281-
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4282-
return V;
4283-
42844278
if (VT.getVectorElementType() == MVT::i1) {
4279+
// Lower to a vror.vi of a larger element type if possible before we promote
4280+
// i1s to i8s.
4281+
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4282+
return V;
42854283
if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
42864284
return V;
42874285

@@ -4384,6 +4382,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
43844382
lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
43854383
return V;
43864384

4385+
// A bitrotate will be one instruction on zvbb, so try to lower to it first if
4386+
// available.
4387+
if (Subtarget.hasStdExtZvbb())
4388+
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4389+
return V;
4390+
43874391
// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
43884392
// be undef which can be handled with a single SLIDEDOWN/UP.
43894393
int LoSrc, HiSrc;
@@ -4510,6 +4514,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
45104514
if (IsSelect)
45114515
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
45124516

4517+
// We might be able to express the shuffle as a bitrotate. But even if we
4518+
// don't have zvbb and have to expand, the expanded sequence of approx. 2
4519+
// shifts and a vor will have a higher throughput than a vrgather.
4520+
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4521+
return V;
4522+
45134523
if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
45144524
// On such a large vector we're unable to use i8 as the index type.
45154525
// FIXME: We could promote the index to i16 and use vrgatherei16, but that

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -787,23 +787,13 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) {
787787
; This interleaves the first 2 elements of a vector in opposite order. With
788788
; undefs for the remaining elements. We use to miscompile this.
789789
define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) {
790-
; V128-LABEL: unary_interleave_10uu_v4i8:
791-
; V128: # %bb.0:
792-
; V128-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
793-
; V128-NEXT: vid.v v9
794-
; V128-NEXT: vrsub.vi v10, v9, 1
795-
; V128-NEXT: vrgather.vv v9, v8, v10
796-
; V128-NEXT: vmv1r.v v8, v9
797-
; V128-NEXT: ret
798-
;
799-
; V512-LABEL: unary_interleave_10uu_v4i8:
800-
; V512: # %bb.0:
801-
; V512-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
802-
; V512-NEXT: vid.v v9
803-
; V512-NEXT: vrsub.vi v10, v9, 1
804-
; V512-NEXT: vrgather.vv v9, v8, v10
805-
; V512-NEXT: vmv1r.v v8, v9
806-
; V512-NEXT: ret
790+
; CHECK-LABEL: unary_interleave_10uu_v4i8:
791+
; CHECK: # %bb.0:
792+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
793+
; CHECK-NEXT: vsrl.vi v9, v8, 8
794+
; CHECK-NEXT: vsll.vi v8, v8, 8
795+
; CHECK-NEXT: vor.vv v8, v8, v9
796+
; CHECK-NEXT: ret
807797
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
808798
ret <4 x i8> %a
809799
}

0 commit comments

Comments
 (0)