Skip to content

Commit 169db80

Browse files
committed
[X86] Canonicalize fp zero vectors from bitcasted integer zero vectors
Generic code is supposed to handle this but can be blocked by hasOneUse checks. Noticed while investigating #26392
1 parent 539e60c commit 169db80

11 files changed

+136
-579
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42930,6 +42930,12 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
4293042930
}
4293142931
}
4293242932

42933+
// Canonicalize fp zero vectors - these sometimes don't fold due to one use
42934+
// limits.
42935+
if (VT.isVector() && TLI.isTypeLegal(VT) && ISD::isBuildVectorAllZeros(N) &&
42936+
(VT.getScalarType() == MVT::f32 || VT.getScalarType() == MVT::f64))
42937+
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N0));
42938+
4293342939
// Try to remove a bitcast of constant vXi1 vector. We have to legalize
4293442940
// most of these to scalar anyway.
4293542941
if (Subtarget.hasAVX512() && VT.isScalarInteger() &&

llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,12 @@ entry:
4949
define void @zero_test() {
5050
; X86-LABEL: zero_test:
5151
; X86: # %bb.0: # %entry
52-
; X86-NEXT: xorps %xmm0, %xmm0
53-
; X86-NEXT: movlps %xmm0, (%eax)
52+
; X86-NEXT: movl $0, (%eax)
5453
; X86-NEXT: retl
5554
;
5655
; X64-LABEL: zero_test:
5756
; X64: # %bb.0: # %entry
58-
; X64-NEXT: xorps %xmm0, %xmm0
59-
; X64-NEXT: movlps %xmm0, (%rax)
57+
; X64-NEXT: movq $0, (%rax)
6058
; X64-NEXT: retq
6159
entry:
6260
%0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer

llvm/test/CodeGen/X86/2012-07-10-extload64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ define void @store_64(ptr %ptr) {
2929
; X86-LABEL: store_64:
3030
; X86: # %bb.0: # %BB
3131
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
32-
; X86-NEXT: xorps %xmm0, %xmm0
33-
; X86-NEXT: movlps %xmm0, (%eax)
32+
; X86-NEXT: movl $0, 4(%eax)
33+
; X86-NEXT: movl $0, (%eax)
3434
; X86-NEXT: retl
3535
;
3636
; X64-LABEL: store_64:

llvm/test/CodeGen/X86/fold-load-vec.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define void @sample_test(ptr %source, ptr %dest) nounwind {
1010
; CHECK-NEXT: subq $24, %rsp
1111
; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
1212
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
13+
; CHECK-NEXT: movq $0, (%rsp)
1314
; CHECK-NEXT: xorps %xmm0, %xmm0
14-
; CHECK-NEXT: movlps %xmm0, (%rsp)
1515
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1616
; CHECK-NEXT: movlps %xmm0, (%rsp)
1717
; CHECK-NEXT: movlps %xmm0, (%rsi)

llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
5151
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
5252
; X32-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
5353
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
54-
; X32-NEXT: xorps %xmm0, %xmm0
55-
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
56-
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
57-
; X32-NEXT: mulps %xmm0, %xmm0
58-
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
5954
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
6055
; X32-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
6156
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
@@ -64,8 +59,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
6459
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
6560
; X32-NEXT: cmpunordps %xmm0, %xmm0
6661
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
62+
; X32-NEXT: xorps %xmm0, %xmm0
63+
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
6764
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
68-
; X32-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
65+
; X32-NEXT: minps %xmm0, %xmm0
6966
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
7067
; X32-NEXT: xorps %xmm0, %xmm0
7168
; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
@@ -135,11 +132,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
135132
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
136133
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
137134
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
138-
; X64-NEXT: xorps %xmm0, %xmm0
139-
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
140-
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
141-
; X64-NEXT: mulps %xmm0, %xmm0
142-
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
143135
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
144136
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
145137
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
@@ -148,8 +140,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
148140
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
149141
; X64-NEXT: cmpunordps %xmm0, %xmm0
150142
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
143+
; X64-NEXT: xorps %xmm0, %xmm0
144+
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
151145
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
152-
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
146+
; X64-NEXT: minps %xmm0, %xmm0
153147
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
154148
; X64-NEXT: xorl %ebx, %ebx
155149
; X64-NEXT: xorps %xmm3, %xmm3

llvm/test/CodeGen/X86/half.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,12 +1082,11 @@ define void @main.158() #0 {
10821082
; BWON-F16C-LABEL: main.158:
10831083
; BWON-F16C: # %bb.0: # %entry
10841084
; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
1085-
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1086-
; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1087-
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1088-
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1089-
; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1
1090-
; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
1085+
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1
1086+
; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1087+
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
1088+
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1089+
; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2
10911090
; BWON-F16C-NEXT: jae .LBB20_2
10921091
; BWON-F16C-NEXT: # %bb.1: # %entry
10931092
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -1100,8 +1099,7 @@ define void @main.158() #0 {
11001099
; CHECK-I686-LABEL: main.158:
11011100
; CHECK-I686: # %bb.0: # %entry
11021101
; CHECK-I686-NEXT: subl $12, %esp
1103-
; CHECK-I686-NEXT: pxor %xmm0, %xmm0
1104-
; CHECK-I686-NEXT: movd %xmm0, (%esp)
1102+
; CHECK-I686-NEXT: movl $0, (%esp)
11051103
; CHECK-I686-NEXT: calll __truncsfhf2
11061104
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
11071105
; CHECK-I686-NEXT: movw %ax, (%esp)

0 commit comments

Comments
 (0)