Skip to content

Commit afa8211

Browse files
committed
[X86] Improve lowering of (v2i64 (setgt X, -1)) on pre-SSE2 targets. Enable v2i64 in foldVectorXorShiftIntoCmp.
Similar to D72302 but for the canonical form for the opposite case. I've changed foldVectorXorShiftIntoCmp to form a target independent setcc node instead of PCMPGT now and enabled its for v2i64 on pre-SSE4.2 targets. The setcc should eventually get lowered to PCMPGT or the new v2i64 sequence. Differential Revision: https://reviews.llvm.org/D72318
1 parent b937669 commit afa8211

File tree

2 files changed

+21
-13
lines changed

2 files changed

+21
-13
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21597,6 +21597,17 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
2159721597
return DAG.getBitcast(VT, Result);
2159821598
}
2159921599

21600+
if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) {
21601+
Op0 = DAG.getBitcast(MVT::v4i32, Op0);
21602+
Op1 = DAG.getConstant(-1, dl, MVT::v4i32);
21603+
21604+
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
21605+
static const int MaskHi[] = { 1, 1, 3, 3 };
21606+
SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
21607+
21608+
return DAG.getBitcast(VT, Result);
21609+
}
21610+
2160021611
// Since SSE has no unsigned integer comparisons, we need to flip the sign
2160121612
// bits of the inputs before performing those operations. The lower
2160221613
// compare is always unsigned.
@@ -40814,8 +40825,8 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
4081440825
default: return SDValue();
4081540826
case MVT::v16i8:
4081640827
case MVT::v8i16:
40817-
case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break;
40818-
case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break;
40828+
case MVT::v4i32:
40829+
case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break;
4081940830
case MVT::v32i8:
4082040831
case MVT::v16i16:
4082140832
case MVT::v8i32:
@@ -40839,7 +40850,7 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
4083940850

4084040851
// Create a greater-than comparison against -1. We don't use the more obvious
4084140852
// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.
40842-
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
40853+
return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT);
4084340854
}
4084440855

4084540856
/// Detect patterns of truncation with unsigned saturation:

llvm/test/CodeGen/X86/vector-pcmp.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
6161
define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
6262
; SSE2-LABEL: test_pcmpgtq:
6363
; SSE2: # %bb.0:
64-
; SSE2-NEXT: psrad $31, %xmm0
65-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
66-
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
67-
; SSE2-NEXT: pxor %xmm1, %xmm0
64+
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
65+
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
66+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
6867
; SSE2-NEXT: retq
6968
;
7069
; SSE42-LABEL: test_pcmpgtq:
@@ -187,13 +186,11 @@ define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
187186
define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
188187
; SSE2-LABEL: test_pcmpgtq_256:
189188
; SSE2: # %bb.0:
190-
; SSE2-NEXT: psrad $31, %xmm1
191-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
192-
; SSE2-NEXT: psrad $31, %xmm0
193-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
194189
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
195-
; SSE2-NEXT: pxor %xmm2, %xmm0
196-
; SSE2-NEXT: pxor %xmm2, %xmm1
190+
; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
191+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
192+
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
193+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
197194
; SSE2-NEXT: retq
198195
;
199196
; SSE42-LABEL: test_pcmpgtq_256:

0 commit comments

Comments
 (0)