Skip to content

Commit 098d2d6

Browse files
RKSimonJaddyen
authored andcommitted
[X86] SimplifyDemandedVectorEltsForTargetNode - replace packed fcmp node with scalar fcmp node if only element0 is demanded (llvm#140563)
These unnecessary vectorisation can appear due to fplogic opcodes only being available for 128-bit types - which can prevent folds that only work on the scalar source types and also lead to fcmp of garbage data in the upper elements. Fixes llvm#140534
1 parent 27f5cf0 commit 098d2d6

File tree

9 files changed

+99
-118
lines changed

9 files changed

+99
-118
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43450,6 +43450,28 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4345043450
KnownZero = LHSZero;
4345143451
break;
4345243452
}
43453+
case X86ISD::CMPM:
43454+
case X86ISD::CMPP: {
43455+
// Scalarize packed fp comparison if we only require element 0.
43456+
if (DemandedElts == 1) {
43457+
SDLoc dl(Op);
43458+
MVT VT = Op.getSimpleValueType();
43459+
MVT OpSVT = Op.getOperand(0).getSimpleValueType().getScalarType();
43460+
SDValue LHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(0), 0);
43461+
SDValue RHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(1), 0);
43462+
SDValue CC = Op.getOperand(2);
43463+
if (Opc == X86ISD::CMPM) {
43464+
SDValue Cmp =
43465+
TLO.DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, CC);
43466+
return TLO.CombineTo(
43467+
Op, TLO.DAG.getInsertSubvector(dl, TLO.DAG.getUNDEF(VT), Cmp, 0));
43468+
}
43469+
SDValue Cmp = TLO.DAG.getNode(X86ISD::FSETCC, dl, OpSVT, LHS, RHS, CC);
43470+
return TLO.CombineTo(Op,
43471+
TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Cmp));
43472+
}
43473+
break;
43474+
}
4345343475
case X86ISD::PCMPEQ:
4345443476
case X86ISD::PCMPGT: {
4345543477
APInt LHSUndef, LHSZero;

llvm/test/CodeGen/X86/and-or-setcc.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ define i1 @and_ord(float %a, float %b) {
1717
; X64-LABEL: and_ord:
1818
; X64: # %bb.0:
1919
; X64-NEXT: xorps %xmm2, %xmm2
20-
; X64-NEXT: cmpordps %xmm2, %xmm1
21-
; X64-NEXT: cmpordps %xmm2, %xmm0
20+
; X64-NEXT: cmpordss %xmm2, %xmm1
21+
; X64-NEXT: cmpordss %xmm2, %xmm0
2222
; X64-NEXT: andps %xmm1, %xmm0
2323
; X64-NEXT: movd %xmm0, %eax
2424
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -44,8 +44,8 @@ define i1 @or_uno(float %a, float %b) {
4444
; X64-LABEL: or_uno:
4545
; X64: # %bb.0:
4646
; X64-NEXT: xorps %xmm2, %xmm2
47-
; X64-NEXT: cmpunordps %xmm2, %xmm1
48-
; X64-NEXT: cmpunordps %xmm2, %xmm0
47+
; X64-NEXT: cmpunordss %xmm2, %xmm1
48+
; X64-NEXT: cmpunordss %xmm2, %xmm0
4949
; X64-NEXT: orps %xmm1, %xmm0
5050
; X64-NEXT: movd %xmm0, %eax
5151
; X64-NEXT: # kill: def $al killed $al killed $eax

llvm/test/CodeGen/X86/extract-vselect-setcc.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ define void @PR117684(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 {
55
; CHECK-LABEL: PR117684:
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
8-
; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1
8+
; CHECK-NEXT: vcmpnltss %xmm1, %xmm0, %k1
99
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
1010
; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z}
11-
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1211
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
1312
; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1413
; CHECK-NEXT: vbroadcastss %xmm2, %ymm2

llvm/test/CodeGen/X86/extractelement-fp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ define void @extsetcc(<4 x float> %x) {
319319
define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
320320
; X64-LABEL: extvselectsetcc_crash:
321321
; X64: # %bb.0:
322-
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
322+
; X64-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
323323
; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
324324
; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
325325
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
@@ -328,7 +328,7 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
328328
;
329329
; X86-LABEL: extvselectsetcc_crash:
330330
; X86: # %bb.0:
331-
; X86-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
331+
; X86-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
332332
; X86-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
333333
; X86-NEXT: vandpd %xmm2, %xmm1, %xmm1
334334
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0

0 commit comments

Comments
 (0)