Skip to content

[DAG] replaceShuffleOfInsert - add support for shuffle_vector(scalar_to_vector(x),y) -> insert_vector_elt(y,x,c) #127210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ namespace {
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
Expand Down Expand Up @@ -26124,8 +26125,7 @@ static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
/// If a shuffle inserts exactly one element from a source vector operand into
/// another vector operand and we can access the specified element as a scalar,
/// then we can eliminate the shuffle.
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
// First, check if we are taking one element of a vector and shuffling that
// element into another vector.
ArrayRef<int> Mask = Shuf->getMask();
Expand All @@ -26148,7 +26148,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
// Now see if we can access that element as a scalar via a real insert element
// instruction.
// TODO: We can try harder to locate the element as a scalar. Examples: it
// could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
// could be an operand of BUILD_VECTOR, or a constant.
assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
"Shuffle mask value must be from operand 0");

Expand All @@ -26171,6 +26171,16 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
Op1, Elt, NewInsIndex);
}

if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
return SDValue();

if (sd_match(Op0, m_UnaryOp(ISD::SCALAR_TO_VECTOR, m_Value(Elt))) &&
Mask[ShufOp0Index] == 0) {
SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
Op1, Elt, NewInsIndex);
}

return SDValue();
}

Expand Down Expand Up @@ -26242,7 +26252,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}

if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
if (SDValue InsElt = replaceShuffleOfInsert(SVN))
return InsElt;

// A shuffle of a single vector that is a splatted value can always be folded.
Expand Down
34 changes: 11 additions & 23 deletions llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-LE-P9-LABEL: test_none_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: li r3, 0
; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: xxperm v2, v2, vs0
; CHECK-LE-P9-NEXT: stxv v2, 0(r5)
; CHECK-LE-P9-NEXT: blr
;
Expand All @@ -263,14 +260,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: li r3, 0
; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, v2, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r5)
; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: xxperm v2, v2, vs0
; CHECK-BE-P9-NEXT: stxv v2, 0(r5)
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
Expand All @@ -286,13 +280,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: li r4, 0
; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxperm vs0, v2, vs1
; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxperm v2, v2, vs0
; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
Expand All @@ -308,13 +299,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-32-P9-NEXT: xxperm vs0, v2, vs1
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r4)
; CHECK-AIX-32-P9-NEXT: xxperm v2, v2, vs0
; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = extractelement <2 x i32> %vec, i64 0
Expand Down