Skip to content

Commit 4930f69

Browse files
committed
[X86] canonicalizeShuffleWithOp - add initial shuffle(sint_to_fp(x)) -> sint_to_fp(shuffle(x)) handling
Another tentative step towards folding shuffles through conversion ops - initially limited to cases where the src/dst types are matching width.
1 parent 9a34a4f commit 4930f69

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41404,6 +41404,7 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4140441404
N->isOnlyUserOf(N.getOperand(0).getNode())) {
4140541405
SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
4140641406
unsigned SrcOpcode = N0.getOpcode();
41407+
EVT OpVT = N0.getValueType();
4140741408
if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
4140841409
SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
4140941410
SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
@@ -41421,13 +41422,23 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4142141422
LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00);
4142241423
RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01);
4142341424
}
41424-
EVT OpVT = N0.getValueType();
4142541425
return DAG.getBitcast(ShuffleVT,
4142641426
DAG.getNode(SrcOpcode, DL, OpVT,
4142741427
DAG.getBitcast(OpVT, LHS),
4142841428
DAG.getBitcast(OpVT, RHS)));
4142941429
}
4143041430
}
41431+
if (SrcOpcode == ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
41432+
OpVT.getScalarSizeInBits() ==
41433+
N0.getOperand(0).getScalarValueSizeInBits()) {
41434+
SDValue Op00 = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
41435+
SDValue Res =
41436+
N.getNumOperands() == 2
41437+
? DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1))
41438+
: DAG.getNode(Opc, DL, ShuffleVT, Op00);
41439+
Res = DAG.getBitcast(N0.getOperand(0).getValueType(), Res);
41440+
return DAG.getBitcast(ShuffleVT, DAG.getNode(SrcOpcode, DL, OpVT, Res));
41441+
}
4143141442
}
4143241443
break;
4143341444
}

llvm/test/CodeGen/X86/pr50609.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ define void @PR50609(ptr noalias nocapture %RET, ptr noalias %aFOO, <16 x i32> %
1111
; CHECK-NEXT: vpsrad $31, %xmm2, %xmm3
1212
; CHECK-NEXT: vpsrld $30, %xmm3, %xmm3
1313
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
14+
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1415
; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2
1516
; CHECK-NEXT: vcvtdq2ps %xmm2, %xmm2
16-
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
1717
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
1818
; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi)
1919
; CHECK-NEXT: vmaskmovps %ymm2, %ymm1, 32(%rdi)

0 commit comments

Comments
 (0)