Skip to content

Commit bae4112

Browse files
authored
[DAG] replaceShuffleOfInsert - add support for shuffle_vector(scalar_to_vector(x),y) -> insert_vector_elt(y,x,c) (#127210)
Begin extending replaceShuffleOfInsert to handle other forms of scalar insertion into a vector. I've limited this to targets that have Custom/Legal ISD::INSERT_VECTOR_ELT handling for now - although we can probably always fold this before LegalOperations.
1 parent e56a6a2 commit bae4112

File tree

2 files changed

+25
-27
lines changed

2 files changed

+25
-27
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ namespace {
626626
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
627627
SDValue combineRepeatedFPDivisors(SDNode *N);
628628
SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
629+
SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
629630
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
630631
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
631632
SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
@@ -26107,8 +26108,7 @@ static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
2610726108
/// If a shuffle inserts exactly one element from a source vector operand into
2610826109
/// another vector operand and we can access the specified element as a scalar,
2610926110
/// then we can eliminate the shuffle.
26110-
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
26111-
SelectionDAG &DAG) {
26111+
SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
2611226112
// First, check if we are taking one element of a vector and shuffling that
2611326113
// element into another vector.
2611426114
ArrayRef<int> Mask = Shuf->getMask();
@@ -26131,7 +26131,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
2613126131
// Now see if we can access that element as a scalar via a real insert element
2613226132
// instruction.
2613326133
// TODO: We can try harder to locate the element as a scalar. Examples: it
26134-
// could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
26134+
// could be an operand of BUILD_VECTOR, or a constant.
2613526135
assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
2613626136
"Shuffle mask value must be from operand 0");
2613726137

@@ -26154,6 +26154,16 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
2615426154
Op1, Elt, NewInsIndex);
2615526155
}
2615626156

26157+
if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
26158+
return SDValue();
26159+
26160+
if (sd_match(Op0, m_UnaryOp(ISD::SCALAR_TO_VECTOR, m_Value(Elt))) &&
26161+
Mask[ShufOp0Index] == 0) {
26162+
SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
26163+
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
26164+
Op1, Elt, NewInsIndex);
26165+
}
26166+
2615726167
return SDValue();
2615826168
}
2615926169

@@ -26225,7 +26235,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
2622526235
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
2622626236
}
2622726237

26228-
if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
26238+
if (SDValue InsElt = replaceShuffleOfInsert(SVN))
2622926239
return InsElt;
2623026240

2623126241
// A shuffle of a single vector that is a splatted value can always be folded.

llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -239,13 +239,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
239239
;
240240
; CHECK-LE-P9-LABEL: test_none_v4i32:
241241
; CHECK-LE-P9: # %bb.0: # %entry
242-
; CHECK-LE-P9-NEXT: li r3, 0
243-
; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2
244-
; CHECK-LE-P9-NEXT: mtfprwz f0, r3
245242
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
246243
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
247-
; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
248-
; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1
244+
; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
245+
; CHECK-LE-P9-NEXT: xxperm v2, v2, vs0
249246
; CHECK-LE-P9-NEXT: stxv v2, 0(r5)
250247
; CHECK-LE-P9-NEXT: blr
251248
;
@@ -263,14 +260,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
263260
;
264261
; CHECK-BE-P9-LABEL: test_none_v4i32:
265262
; CHECK-BE-P9: # %bb.0: # %entry
266-
; CHECK-BE-P9-NEXT: li r3, 0
267-
; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2
268-
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
269263
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
270264
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
271-
; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
272-
; CHECK-BE-P9-NEXT: xxperm vs0, v2, vs1
273-
; CHECK-BE-P9-NEXT: stxv vs0, 0(r5)
265+
; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
266+
; CHECK-BE-P9-NEXT: xxperm v2, v2, vs0
267+
; CHECK-BE-P9-NEXT: stxv v2, 0(r5)
274268
; CHECK-BE-P9-NEXT: blr
275269
;
276270
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
@@ -286,13 +280,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
286280
;
287281
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
288282
; CHECK-AIX-64-P9: # %bb.0: # %entry
289-
; CHECK-AIX-64-P9-NEXT: li r4, 0
290-
; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
291-
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
292283
; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0
293-
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
294-
; CHECK-AIX-64-P9-NEXT: xxperm vs0, v2, vs1
295-
; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
284+
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
285+
; CHECK-AIX-64-P9-NEXT: xxperm v2, v2, vs0
286+
; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
296287
; CHECK-AIX-64-P9-NEXT: blr
297288
;
298289
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
@@ -308,13 +299,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
308299
;
309300
; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
310301
; CHECK-AIX-32-P9: # %bb.0: # %entry
311-
; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
312-
; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
313-
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
314302
; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0
315-
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4)
316-
; CHECK-AIX-32-P9-NEXT: xxperm vs0, v2, vs1
317-
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
303+
; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r4)
304+
; CHECK-AIX-32-P9-NEXT: xxperm v2, v2, vs0
305+
; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
318306
; CHECK-AIX-32-P9-NEXT: blr
319307
entry:
320308
%0 = extractelement <2 x i32> %vec, i64 0

0 commit comments

Comments
 (0)