Skip to content

Commit 2567fea

Browse files
committed
[X86] Add fabs/fneg rmw style test coverage for #117557
Missed opportunity to avoid use of fpu for store(fabs(load()) style patterns
1 parent 7790834 commit 2567fea

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed

llvm/test/CodeGen/X86/combine-fabs.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,69 @@ define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) {
135135
ret <4 x float> %2
136136
}
137137

138+
; TODO: store(fabs(load())) - convert scalar to integer
139+
define void @combine_fabs_int_rmw_f64(ptr %ptr) {
140+
; SSE-LABEL: combine_fabs_int_rmw_f64:
141+
; SSE: # %bb.0:
142+
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
143+
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
144+
; SSE-NEXT: movlps %xmm0, (%rdi)
145+
; SSE-NEXT: retq
146+
;
147+
; AVX-LABEL: combine_fabs_int_rmw_f64:
148+
; AVX: # %bb.0:
149+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
150+
; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
151+
; AVX-NEXT: vmovlps %xmm0, (%rdi)
152+
; AVX-NEXT: retq
153+
%1 = load double, ptr %ptr
154+
%2 = call double @llvm.fabs.f64(double %1)
155+
store double %2, ptr %ptr
156+
ret void
157+
}
158+
159+
define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
160+
; SSE-LABEL: combine_fabs_int_f32:
161+
; SSE: # %bb.0:
162+
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
163+
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164+
; SSE-NEXT: movss %xmm0, (%rsi)
165+
; SSE-NEXT: retq
166+
;
167+
; AVX-LABEL: combine_fabs_int_f32:
168+
; AVX: # %bb.0:
169+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
170+
; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
171+
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
172+
; AVX-NEXT: vmovss %xmm0, (%rsi)
173+
; AVX-NEXT: retq
174+
%1 = load float, ptr %src
175+
%2 = call float @llvm.fabs.f32(float %1)
176+
store float %2, ptr %dst
177+
ret void
178+
}
179+
180+
; don't convert vector to scalar
181+
define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) {
182+
; SSE-LABEL: combine_fabs_vec_int_v4f32:
183+
; SSE: # %bb.0:
184+
; SSE-NEXT: movaps (%rdi), %xmm0
185+
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
186+
; SSE-NEXT: movaps %xmm0, (%rsi)
187+
; SSE-NEXT: retq
188+
;
189+
; AVX-LABEL: combine_fabs_vec_int_v4f32:
190+
; AVX: # %bb.0:
191+
; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
192+
; AVX-NEXT: vandps (%rdi), %xmm0, %xmm0
193+
; AVX-NEXT: vmovaps %xmm0, (%rsi)
194+
; AVX-NEXT: retq
195+
%1 = load <4 x float>, ptr %src
196+
%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
197+
store <4 x float> %2, ptr %dst
198+
ret void
199+
}
200+
138201
declare float @llvm.fabs.f32(float %p)
139202
declare float @llvm.copysign.f32(float %Mag, float %Sgn)
140203

llvm/test/CodeGen/X86/combine-fneg.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,4 +205,85 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
205205
ret <4 x float> %tmp
206206
}
207207

208+
; TODO: store(fneg(load())) - convert scalar to integer
209+
define void @fneg_int_rmw_f32(ptr %ptr) {
210+
; X86-SSE-LABEL: fneg_int_rmw_f32:
211+
; X86-SSE: # %bb.0:
212+
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
213+
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
214+
; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
215+
; X86-SSE-NEXT: movss %xmm0, (%eax)
216+
; X86-SSE-NEXT: retl
217+
;
218+
; X64-SSE-LABEL: fneg_int_rmw_f32:
219+
; X64-SSE: # %bb.0:
220+
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
221+
; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
222+
; X64-SSE-NEXT: movss %xmm0, (%rdi)
223+
; X64-SSE-NEXT: retq
224+
%1 = load float, ptr %ptr
225+
%2 = fneg float %1
226+
store float %2, ptr %ptr
227+
ret void
228+
}
229+
230+
define void @fneg_int_f64(ptr %src, ptr %dst) {
231+
; X86-SSE1-LABEL: fneg_int_f64:
232+
; X86-SSE1: # %bb.0:
233+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
234+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
235+
; X86-SSE1-NEXT: fldl (%ecx)
236+
; X86-SSE1-NEXT: fchs
237+
; X86-SSE1-NEXT: fstpl (%eax)
238+
; X86-SSE1-NEXT: retl
239+
;
240+
; X86-SSE2-LABEL: fneg_int_f64:
241+
; X86-SSE2: # %bb.0:
242+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
243+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
244+
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
245+
; X86-SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
246+
; X86-SSE2-NEXT: movlps %xmm0, (%eax)
247+
; X86-SSE2-NEXT: retl
248+
;
249+
; X64-SSE1-LABEL: fneg_int_f64:
250+
; X64-SSE1: # %bb.0:
251+
; X64-SSE1-NEXT: fldl (%rdi)
252+
; X64-SSE1-NEXT: fchs
253+
; X64-SSE1-NEXT: fstpl (%rsi)
254+
; X64-SSE1-NEXT: retq
255+
;
256+
; X64-SSE2-LABEL: fneg_int_f64:
257+
; X64-SSE2: # %bb.0:
258+
; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
259+
; X64-SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
260+
; X64-SSE2-NEXT: movlps %xmm0, (%rsi)
261+
; X64-SSE2-NEXT: retq
262+
%1 = load double, ptr %src
263+
%2 = fneg double %1
264+
store double %2, ptr %dst
265+
ret void
266+
}
208267

268+
; don't convert vector to scalar
269+
define void @fneg_int_v4f32(ptr %src, ptr %dst) {
270+
; X86-SSE-LABEL: fneg_int_v4f32:
271+
; X86-SSE: # %bb.0:
272+
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
273+
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
274+
; X86-SSE-NEXT: movaps (%ecx), %xmm0
275+
; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
276+
; X86-SSE-NEXT: movaps %xmm0, (%eax)
277+
; X86-SSE-NEXT: retl
278+
;
279+
; X64-SSE-LABEL: fneg_int_v4f32:
280+
; X64-SSE: # %bb.0:
281+
; X64-SSE-NEXT: movaps (%rdi), %xmm0
282+
; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
283+
; X64-SSE-NEXT: movaps %xmm0, (%rsi)
284+
; X64-SSE-NEXT: retq
285+
%1 = load <4 x float>, ptr %src
286+
%2 = fneg <4 x float> %1
287+
store <4 x float> %2, ptr %dst
288+
ret void
289+
}

0 commit comments

Comments
 (0)