Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit ead04a9

Browse files
committed
[DAGCombiner] Call SimplifyDemandedVectorElts from EXTRACT_VECTOR_ELT
If we are only extracting vector elements via EXTRACT_VECTOR_ELT(s) we may be able to use SimplifyDemandedVectorElts to avoid unnecessary vector ops. Differential Revision: https://reviews.llvm.org/D49262 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337258 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 15fa57a commit ead04a9

File tree

11 files changed

+366
-606
lines changed

11 files changed

+366
-606
lines changed

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ namespace {
242242
}
243243

244244
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245-
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
245+
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
246+
bool AssumeSingleUse = false);
246247

247248
bool CombineToPreIndexedLoadStore(SDNode *N);
248249
bool CombineToPostIndexedLoadStore(SDNode *N);
@@ -1064,11 +1065,12 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
10641065
/// Check the specified vector node value to see if it can be simplified or
10651066
/// if things it uses can be simplified as it only uses some of the elements.
10661067
/// If so, return true.
1067-
bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1068-
const APInt &Demanded) {
1068+
bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1069+
bool AssumeSingleUse) {
10691070
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
10701071
APInt KnownUndef, KnownZero;
1071-
if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
1072+
if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1073+
0, AssumeSingleUse))
10721074
return false;
10731075

10741076
// Revisit the node.
@@ -15014,6 +15016,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
1501415016
}
1501515017
}
1501615018

15019+
// If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15020+
// simplify it based on the (valid) extraction indices.
15021+
if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
15022+
return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15023+
Use->getOperand(0) == InVec &&
15024+
isa<ConstantSDNode>(Use->getOperand(1));
15025+
})) {
15026+
APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
15027+
for (SDNode *Use : InVec->uses()) {
15028+
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15029+
if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
15030+
DemandedElts.setBit(CstElt->getZExtValue());
15031+
}
15032+
if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
15033+
return SDValue(N, 0);
15034+
}
15035+
1501715036
bool BCNumEltsChanged = false;
1501815037
EVT ExtVT = VT.getVectorElementType();
1501915038
EVT LVT = ExtVT;

lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3893,20 +3893,34 @@ static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
38933893
return nullptr;
38943894
}
38953895

3896-
// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
3896+
// Convert the mask of the given shuffle op into a byte-level mask,
38973897
// as if it had type vNi8.
3898-
static void getVPermMask(ShuffleVectorSDNode *VSN,
3898+
static bool getVPermMask(SDValue ShuffleOp,
38993899
SmallVectorImpl<int> &Bytes) {
3900-
EVT VT = VSN->getValueType(0);
3900+
EVT VT = ShuffleOp.getValueType();
39013901
unsigned NumElements = VT.getVectorNumElements();
39023902
unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3903-
Bytes.resize(NumElements * BytesPerElement, -1);
3904-
for (unsigned I = 0; I < NumElements; ++I) {
3905-
int Index = VSN->getMaskElt(I);
3906-
if (Index >= 0)
3903+
3904+
if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
3905+
Bytes.resize(NumElements * BytesPerElement, -1);
3906+
for (unsigned I = 0; I < NumElements; ++I) {
3907+
int Index = VSN->getMaskElt(I);
3908+
if (Index >= 0)
3909+
for (unsigned J = 0; J < BytesPerElement; ++J)
3910+
Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3911+
}
3912+
return true;
3913+
}
3914+
if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
3915+
isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
3916+
unsigned Index = ShuffleOp.getConstantOperandVal(1);
3917+
Bytes.resize(NumElements * BytesPerElement, -1);
3918+
for (unsigned I = 0; I < NumElements; ++I)
39073919
for (unsigned J = 0; J < BytesPerElement; ++J)
39083920
Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3921+
return true;
39093922
}
3923+
return false;
39103924
}
39113925

39123926
// Bytes is a VPERM-like permute vector, except that -1 is used for
@@ -4075,7 +4089,8 @@ bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
40754089
// See whether the bytes we need come from a contiguous part of one
40764090
// operand.
40774091
SmallVector<int, SystemZ::VectorBytes> OpBytes;
4078-
getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
4092+
if (!getVPermMask(Op, OpBytes))
4093+
break;
40794094
int NewByte;
40804095
if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
40814096
break;
@@ -5109,13 +5124,14 @@ SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
51095124
if (Opcode == ISD::BITCAST)
51105125
// Look through bitcasts.
51115126
Op = Op.getOperand(0);
5112-
else if (Opcode == ISD::VECTOR_SHUFFLE &&
5127+
else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
51135128
canTreatAsByteVector(Op.getValueType())) {
51145129
// Get a VPERM-like permute mask and see whether the bytes covered
51155130
// by the extracted element are a contiguous sequence from one
51165131
// source operand.
51175132
SmallVector<int, SystemZ::VectorBytes> Bytes;
5118-
getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
5133+
if (!getVPermMask(Op, Bytes))
5134+
break;
51195135
int First;
51205136
if (!getShuffleInput(Bytes, Index * BytesPerElement,
51215137
BytesPerElement, First))

test/CodeGen/AMDGPU/indirect-addressing-si.ll

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -480,38 +480,28 @@ bb7: ; preds = %bb4, %bb1
480480

481481
; GCN-LABEL: {{^}}multi_same_block:
482482

483-
; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000
484-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
485-
; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000
486-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000
487-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000
488-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000
489-
; GCN-DAG: s_load_dword [[ARG:s[0-9]+]]
490-
; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16
491-
492-
; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16
493-
; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0
494-
; GCN-NOT: m0
495-
496-
; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
497-
; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0
483+
; GCN: s_load_dword [[ARG:s[0-9]+]]
484+
485+
; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
486+
; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
487+
; MOVREL: s_waitcnt
488+
; MOVREL: s_add_i32 m0, [[ARG]], -16
489+
; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0
490+
; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0
491+
; MOVREL: s_mov_b32 m0, -1
492+
493+
494+
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
495+
; IDXMODE: s_waitcnt
496+
; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16
497+
; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
498+
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0
498499
; IDXMODE: s_set_gpr_idx_off
499-
500-
; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd
501-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd
502-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd
503-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd
504-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd
505-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
506-
507-
; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0
508-
509-
; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
510-
; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0
500+
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
501+
; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
502+
; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0
511503
; IDXMODE: s_set_gpr_idx_off
512504

513-
; PREGFX9: s_mov_b32 m0, -1
514-
; GFX9-NOT: s_mov_b32 m0
515505
; GCN: ds_write_b32
516506
; GCN: ds_write_b32
517507
; GCN: s_endpgm

test/CodeGen/ARM/func-argpassing-endian.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ define void @arg_double( double %val ) {
3838
define void @arg_v4i32(<4 x i32> %vec ) {
3939
; CHECK-LE-LABEL: arg_v4i32:
4040
; CHECK-LE: @ %bb.0:
41-
; CHECK-LE-NEXT: vmov d17, r2, r3
4241
; CHECK-LE-NEXT: vmov d16, r0, r1
4342
; CHECK-LE-NEXT: movw r0, :lower16:var32
4443
; CHECK-LE-NEXT: movt r0, :upper16:var32
@@ -47,7 +46,6 @@ define void @arg_v4i32(<4 x i32> %vec ) {
4746
;
4847
; CHECK-BE-LABEL: arg_v4i32:
4948
; CHECK-BE: @ %bb.0:
50-
; CHECK-BE-NEXT: vmov d17, r3, r2
5149
; CHECK-BE-NEXT: vmov d16, r1, r0
5250
; CHECK-BE-NEXT: movw r0, :lower16:var32
5351
; CHECK-BE-NEXT: movt r0, :upper16:var32

0 commit comments

Comments
 (0)