-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Lower a shuffle which is nearly identity except one replicated element #135292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Lower a shuffle which is nearly identity except one replicated element #135292
Conversation
…d elem This can be done with a vrgather.vi/vx, and (possibly) a register move. The alternative is to do a vrgather.vv with a full width index vector. We'd already caught the two operands forms of this shuffle; this patch specifically handles the single operand form. Unfortunately only in abstract, it would be nice if we canonicalized shuffles in some way wouldn't it?
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis can be done with a vrgather.vi/vx, and (possibly) a register move. The alternative is to do a vrgather.vv with a full width index vector. We'd already caught the two operands forms of this shuffle; this patch specifically handles the single operand form. Unfortunately only in abstract, it would be nice if we canonicalized shuffles in some way wouldn't it? Full diff: https://github.com/llvm/llvm-project/pull/135292.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7d192756fd56..fd0562d141796 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4726,6 +4726,47 @@ static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT,
DAG.getVectorIdxConstant(0, DL));
}
+/// Match a single source shuffle which is an identity except that some
+/// particular element is repeated. This can be lowered as a masked
+/// vrgather.vi/vx. Note that the two source form of this is handled
+/// by the recursive splitting logic and doesn't need special handling.
+static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+
+ SDLoc DL(SVN);
+ MVT VT = SVN->getSimpleValueType(0);
+ SDValue V1 = SVN->getOperand(0);
+ assert(SVN->getOperand(1).isUndef());
+ ArrayRef<int> Mask = SVN->getMask();
+ const unsigned NumElts = VT.getVectorNumElements();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ std::optional<int> SplatIdx;
+ for (auto [I, M] : enumerate(Mask)) {
+ if (M == -1 || I == (unsigned)M)
+ continue;
+ if (SplatIdx && *SplatIdx != M)
+ return SDValue();
+ SplatIdx = M;
+ }
+
+ if (!SplatIdx)
+ return SDValue();
+
+ SmallVector<SDValue> MaskVals;
+ for (int MaskIndex : Mask) {
+ bool SelectMaskVal = MaskIndex == *SplatIdx;
+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
+ }
+ assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
+ SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
+ SmallVector<int>(NumElts, *SplatIdx));
+ return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
+}
+
// Lower the following shuffle to vslidedown.
// a)
// t49: v8i8 = extract_subvector t13, Constant:i64<0>
@@ -5852,6 +5893,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
return V;
+ if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
+ return V;
+
// Match a spread(4,8) which can be done via extend and shift. Spread(2)
// is fully covered in interleave(2) above, so it is ignored here.
if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
index b26fc5653afec..e6375e276d37f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
@@ -1419,11 +1419,11 @@ define <8 x i32> @shuffle_v8i32_locally_repeating_neg(<8 x i32> %a) {
define <8 x i8> @identity_splat0(<8 x i8> %v) {
; CHECK-LABEL: identity_splat0:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI88_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v10, (a0)
-; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: li a0, 25
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vrgather.vi v9, v8, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%shuf = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 5, i32 6, i32 7>
@@ -1433,11 +1433,11 @@ define <8 x i8> @identity_splat0(<8 x i8> %v) {
define <8 x i8> @identity_splat2(<8 x i8> %v) {
; CHECK-LABEL: identity_splat2:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI89_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v10, (a0)
-; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: li a0, 28
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vrgather.vi v9, v8, 2, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%shuf = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 5, i32 6, i32 7>
@@ -1448,14 +1448,13 @@ define <8 x i8> @identity_splat2(<8 x i8> %v) {
define <8 x i8> @vmerge_vxm(<8 x i8> %v, i8 %s) {
; CHECK-LABEL: vmerge_vxm:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI90_0)
-; CHECK-NEXT: addi a1, a1, %lo(.LCPI90_0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v10, (a1)
-; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
+; CHECK-NEXT: li a1, 25
+; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vrgather.vi v9, v8, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%ins = insertelement <8 x i8> %v, i8 %s, i32 0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…d element (llvm#135292) This can be done with a vrgather.vi/vx, and (possibly) a register move. The alternative is to do a vrgather.vv with a full width index vector. We'd already caught the two operands forms of this shuffle; this patch specifically handles the single operand form. Unfortunately only in abstract, it would be nice if we canonicalized shuffles in some way wouldn't it?
This can be done with a vrgather.vi/vx, and (possibly) a register move. The alternative is to do a vrgather.vv with a full width index vector.
We'd already caught the two operands forms of this shuffle; this patch specifically handles the single operand form.
Unfortunately only in abstract, it would be nice if we canonicalized shuffles in some way wouldn't it?