Skip to content

Commit 5aa5a2f

Browse files
authored
[RISCV] Disable exact VLEN splitting for bitrotate shuffles (#79468)
If we have a bitrotate shuffle, this is also by definition a vreg splitable shuffle when exact VLEN is known. However, there's no profit to be had from splitting the wider bitrotate lowering into individual m1 pieces. We'd rather leave it the higher lmul to reduce code size. This is a general problem for any linear-in-LMUL shuffle expansions when the vreg splitting still has to do linear work per piece. On first reflection it seems like element rotation might have the same interaction, but in that case, splitting can be done via a set of whole register moves (which may get folded into the consumer depending) which at least as good as a pair of slideup/slidedown. I think that bitrotate is the only shuffle expansion we have that actually needs handled here.
1 parent 7f409cd commit 5aa5a2f

File tree

2 files changed

+37
-25
lines changed

2 files changed

+37
-25
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4616,26 +4616,38 @@ static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
46164616
return Res;
46174617
}
46184618

4619-
// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4620-
// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4621-
// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4622-
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4623-
SelectionDAG &DAG,
4624-
const RISCVSubtarget &Subtarget) {
4619+
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,
4620+
SelectionDAG &DAG,
4621+
const RISCVSubtarget &Subtarget,
4622+
MVT &RotateVT, unsigned &RotateAmt) {
46254623
SDLoc DL(SVN);
46264624

46274625
EVT VT = SVN->getValueType(0);
46284626
unsigned NumElts = VT.getVectorNumElements();
46294627
unsigned EltSizeInBits = VT.getScalarSizeInBits();
4630-
unsigned NumSubElts, RotateAmt;
4628+
unsigned NumSubElts;
46314629
if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
46324630
NumElts, NumSubElts, RotateAmt))
4633-
return SDValue();
4634-
MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4631+
return false;
4632+
RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
46354633
NumElts / NumSubElts);
46364634

46374635
// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4638-
if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
4636+
return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4637+
}
4638+
4639+
// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4640+
// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4641+
// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4642+
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4643+
SelectionDAG &DAG,
4644+
const RISCVSubtarget &Subtarget) {
4645+
SDLoc DL(SVN);
4646+
4647+
EVT VT = SVN->getValueType(0);
4648+
unsigned RotateAmt;
4649+
MVT RotateVT;
4650+
if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
46394651
return SDValue();
46404652

46414653
SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
@@ -4672,6 +4684,13 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
46724684
if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen)
46734685
return SDValue();
46744686

4687+
// Avoid picking up bitrotate patterns which we have a linear-in-lmul
4688+
// expansion for.
4689+
unsigned RotateAmt;
4690+
MVT RotateVT;
4691+
if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4692+
return SDValue();
4693+
46754694
MVT ElemVT = VT.getVectorElementType();
46764695
unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
46774696
unsigned VRegsPerSrc = NumElts / ElemsPerVReg;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -858,37 +858,30 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2)
858858
; RV32-LABEL: shuffle_v8f32_as_i64_exact:
859859
; RV32: # %bb.0:
860860
; RV32-NEXT: li a0, 32
861-
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
861+
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
862862
; RV32-NEXT: vmv.v.x v10, a0
863-
; RV32-NEXT: vrsub.vi v11, v10, 0
864863
; RV32-NEXT: li a0, 63
865-
; RV32-NEXT: vand.vx v11, v11, a0
866-
; RV32-NEXT: vsrl.vv v12, v8, v11
864+
; RV32-NEXT: vand.vx v12, v10, a0
865+
; RV32-NEXT: vsll.vv v12, v8, v12
866+
; RV32-NEXT: vrsub.vi v10, v10, 0
867867
; RV32-NEXT: vand.vx v10, v10, a0
868-
; RV32-NEXT: vsll.vv v8, v8, v10
869-
; RV32-NEXT: vor.vv v8, v8, v12
870-
; RV32-NEXT: vsrl.vv v11, v9, v11
871-
; RV32-NEXT: vsll.vv v9, v9, v10
872-
; RV32-NEXT: vor.vv v9, v9, v11
868+
; RV32-NEXT: vsrl.vv v8, v8, v10
869+
; RV32-NEXT: vor.vv v8, v12, v8
873870
; RV32-NEXT: ret
874871
;
875872
; RV64-LABEL: shuffle_v8f32_as_i64_exact:
876873
; RV64: # %bb.0:
877874
; RV64-NEXT: li a0, 32
878-
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
875+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
879876
; RV64-NEXT: vsrl.vx v10, v8, a0
880877
; RV64-NEXT: vsll.vx v8, v8, a0
881878
; RV64-NEXT: vor.vv v8, v8, v10
882-
; RV64-NEXT: vsrl.vx v10, v9, a0
883-
; RV64-NEXT: vsll.vx v9, v9, a0
884-
; RV64-NEXT: vor.vv v9, v9, v10
885879
; RV64-NEXT: ret
886880
;
887881
; ZVKB-V-LABEL: shuffle_v8f32_as_i64_exact:
888882
; ZVKB-V: # %bb.0:
889-
; ZVKB-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
883+
; ZVKB-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
890884
; ZVKB-V-NEXT: vror.vi v8, v8, 32
891-
; ZVKB-V-NEXT: vror.vi v9, v9, 32
892885
; ZVKB-V-NEXT: ret
893886
;
894887
; ZVKB-ZVE32X-LABEL: shuffle_v8f32_as_i64_exact:

0 commit comments

Comments
 (0)