Skip to content

Commit 813459e

Browse files
committed
[X86] combineSelect fold 'smin' style pattern select(pcmpgt(RHS, LHS), LHS, RHS) -> select(pcmpgt(LHS, RHS), RHS, LHS) if pcmpgt(LHS, RHS) already exists
Avoids repeated commuted comparisons when we're performing min/max and clamp patterns
1 parent d4b4747 commit 813459e

File tree

4 files changed

+129
-136
lines changed

4 files changed

+129
-136
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44857,12 +44857,29 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4485744857
if (SDValue CondNot = IsNOT(Cond, DAG))
4485844858
return DAG.getNode(N->getOpcode(), DL, VT,
4485944859
DAG.getBitcast(CondVT, CondNot), RHS, LHS);
44860-
// pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the signbit.
44861-
if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() &&
44862-
ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
44863-
Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
44864-
DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
44865-
return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
44860+
44861+
if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse()) {
44862+
// pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the
44863+
// signbit.
44864+
if (ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
44865+
Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
44866+
DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
44867+
return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
44868+
}
44869+
44870+
// smin(LHS, RHS) : select(pcmpgt(RHS, LHS), LHS, RHS)
44871+
// -> select(pcmpgt(LHS, RHS), RHS, LHS)
44872+
// iff the commuted pcmpgt() already exists.
44873+
// TODO: Could DAGCombiner::combine cse search for SETCC nodes, like it
44874+
// does for commutative binops?
44875+
if (Cond.getOperand(0) == RHS && Cond.getOperand(1) == LHS) {
44876+
if (SDNode *FlipCond =
44877+
DAG.getNodeIfExists(X86ISD::PCMPGT, DAG.getVTList(CondVT),
44878+
{Cond.getOperand(1), Cond.getOperand(0)})) {
44879+
return DAG.getNode(N->getOpcode(), DL, VT, SDValue(FlipCond, 0), RHS,
44880+
LHS);
44881+
}
44882+
}
4486644883
}
4486744884
}
4486844885

llvm/test/CodeGen/X86/midpoint-int-vec-128.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -956,8 +956,7 @@ define <2 x i64> @vec128_i64_signed_reg_reg(<2 x i64> %a1, <2 x i64> %a2) nounwi
956956
; AVX1-FALLBACK: # %bb.0:
957957
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
958958
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
959-
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
960-
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
959+
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
961960
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
962961
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
963962
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
@@ -976,8 +975,7 @@ define <2 x i64> @vec128_i64_signed_reg_reg(<2 x i64> %a1, <2 x i64> %a2) nounwi
976975
; AVX2-FALLBACK: # %bb.0:
977976
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
978977
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
979-
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
980-
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
978+
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
981979
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
982980
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
983981
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
@@ -1401,8 +1399,7 @@ define <2 x i64> @vec128_i64_signed_mem_reg(ptr %a1_addr, <2 x i64> %a2) nounwin
14011399
; AVX1-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
14021400
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
14031401
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1404-
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
1405-
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
1402+
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm4
14061403
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
14071404
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm0, %xmm0
14081405
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm0, %xmm2
@@ -1422,8 +1419,7 @@ define <2 x i64> @vec128_i64_signed_mem_reg(ptr %a1_addr, <2 x i64> %a2) nounwin
14221419
; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
14231420
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
14241421
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1425-
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
1426-
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
1422+
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm4
14271423
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
14281424
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm0, %xmm0
14291425
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm0, %xmm2
@@ -1624,8 +1620,7 @@ define <2 x i64> @vec128_i64_signed_reg_mem(<2 x i64> %a1, ptr %a2_addr) nounwin
16241620
; AVX1-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
16251621
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
16261622
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1627-
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
1628-
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
1623+
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
16291624
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
16301625
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
16311626
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
@@ -1645,8 +1640,7 @@ define <2 x i64> @vec128_i64_signed_reg_mem(<2 x i64> %a1, ptr %a2_addr) nounwin
16451640
; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
16461641
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
16471642
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1648-
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
1649-
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
1643+
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
16501644
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
16511645
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
16521646
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
@@ -1850,8 +1844,7 @@ define <2 x i64> @vec128_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
18501844
; AVX1-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1
18511845
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
18521846
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1853-
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
1854-
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
1847+
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
18551848
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
18561849
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
18571850
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
@@ -1872,8 +1865,7 @@ define <2 x i64> @vec128_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
18721865
; AVX2-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1
18731866
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
18741867
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1875-
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
1876-
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
1868+
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
18771869
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
18781870
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
18791871
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2

0 commit comments

Comments
 (0)