Skip to content

Commit 0c7e044

Browse files
committed
[ARM] One-off identity shuffle
A One-Off Identity mask is a shuffle that is mostly an identity mask from as single source but contains a single element out-of-place, either from a different vector or from another position in the same vector. As opposed to lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert pair directly. Under ARM with individually accessible lane elements this often becomes a simple lane move. This also alters the LowerVECTOR_SHUFFLEUsingMovs code to use v4f32 (not v4i32), a more natural type for lane moves. Differential Revision: https://reviews.llvm.org/D95551
1 parent 0eda454 commit 0c7e044

File tree

6 files changed

+1358
-1516
lines changed

6 files changed

+1358
-1516
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8194,8 +8194,8 @@ static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
81948194
Input = Op->getOperand(1);
81958195
Elt -= 4;
81968196
}
8197-
SDValue BitCast = DAG.getBitcast(MVT::v4i32, Input);
8198-
Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, BitCast,
8197+
SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8198+
Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
81998199
DAG.getConstant(Elt, dl, MVT::i32));
82008200
}
82018201
}
@@ -8214,19 +8214,70 @@ static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
82148214
Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
82158215
SDValue NewShuffle = DAG.getVectorShuffle(
82168216
VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8217-
SDValue BitCast = DAG.getBitcast(MVT::v4i32, NewShuffle);
8217+
SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
82188218

82198219
for (int Part = 0; Part < 4; ++Part)
82208220
if (!Parts[Part])
8221-
Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
8221+
Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
82228222
BitCast, DAG.getConstant(Part, dl, MVT::i32));
82238223
}
82248224
// Build a vector out of the various parts and bitcast it back to the original
82258225
// type.
8226-
SDValue NewVec = DAG.getBuildVector(MVT::v4i32, dl, Parts);
8226+
SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
82278227
return DAG.getBitcast(VT, NewVec);
82288228
}
82298229

8230+
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op,
8231+
ArrayRef<int> ShuffleMask,
8232+
SelectionDAG &DAG) {
8233+
SDValue V1 = Op.getOperand(0);
8234+
SDValue V2 = Op.getOperand(1);
8235+
EVT VT = Op.getValueType();
8236+
unsigned NumElts = VT.getVectorNumElements();
8237+
8238+
// An One-Off Identity mask is one that is mostly an identity mask from as
8239+
// single source but contains a single element out-of-place, either from a
8240+
// different vector or from another position in the same vector. As opposed to
8241+
// lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8242+
// pair directly.
8243+
auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8244+
int &OffElement) {
8245+
OffElement = -1;
8246+
int NonUndef = 0;
8247+
for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8248+
if (Mask[i] == -1)
8249+
continue;
8250+
NonUndef++;
8251+
if (Mask[i] != i + BaseOffset) {
8252+
if (OffElement == -1)
8253+
OffElement = i;
8254+
else
8255+
return false;
8256+
}
8257+
}
8258+
return NonUndef > 2 && OffElement != -1;
8259+
};
8260+
int OffElement;
8261+
SDValue VInput;
8262+
if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8263+
VInput = V1;
8264+
else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8265+
VInput = V2;
8266+
else
8267+
return SDValue();
8268+
8269+
SDLoc dl(Op);
8270+
EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8271+
? MVT::i32
8272+
: VT.getScalarType();
8273+
SDValue Elt = DAG.getNode(
8274+
ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8275+
ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8276+
DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8277+
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8278+
DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8279+
}
8280+
82308281
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
82318282
const ARMSubtarget *ST) {
82328283
SDValue V1 = Op.getOperand(0);
@@ -8360,6 +8411,10 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
83608411
}
83618412
}
83628413

8414+
if (ST->hasMVEIntegerOps() && EltSize <= 32)
8415+
if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8416+
return V;
8417+
83638418
// If the shuffle is not directly supported and it has 4 elements, use
83648419
// the PerfectShuffle-generated table to synthesize it from other shuffles.
83658420
unsigned NumElts = VT.getVectorNumElements();

llvm/test/CodeGen/Thumb2/mve-float16regloops.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,15 +1481,11 @@ define void @arm_biquad_cascade_df2T_f16(%struct.arm_biquad_cascade_df2T_instanc
14811481
; CHECK-NEXT: vmovx.f16 s6, s12
14821482
; CHECK-NEXT: vfma.f16 q3, q6, r4
14831483
; CHECK-NEXT: vstr.16 s6, [r5, #2]
1484-
; CHECK-NEXT: vmov.f32 s12, s13
14851484
; CHECK-NEXT: vmovx.f16 s6, s13
1486-
; CHECK-NEXT: vmov q7, q3
1485+
; CHECK-NEXT: vmov.f32 s12, s13
14871486
; CHECK-NEXT: vins.f16 s12, s6
1488-
; CHECK-NEXT: vmov.16 q7[2], r7
14891487
; CHECK-NEXT: adds r5, #4
1490-
; CHECK-NEXT: vmov.f32 s13, s29
1491-
; CHECK-NEXT: vmov.f32 s14, s30
1492-
; CHECK-NEXT: vmov.f32 s15, s31
1488+
; CHECK-NEXT: vmov.16 q3[2], r7
14931489
; CHECK-NEXT: vmov q7, q3
14941490
; CHECK-NEXT: le lr, .LBB17_5
14951491
; CHECK-NEXT: .LBB17_6: @ %while.end

0 commit comments

Comments
 (0)