Skip to content

Commit 722a568

Browse files
committed
[X86] Add test coverage for f16/bf16 fabs/fneg load-store tests
Future extension to #118680
1 parent 0cda970 commit 722a568

File tree

2 files changed

+205
-0
lines changed

2 files changed

+205
-0
lines changed

llvm/test/CodeGen/X86/combine-fabs.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,70 @@ define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
172172
ret void
173173
}
174174

175+
define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
176+
; SSE-LABEL: combine_fabs_int_rmw_bfloat:
177+
; SSE: # %bb.0:
178+
; SSE-NEXT: pushq %rbx
179+
; SSE-NEXT: movq %rdi, %rbx
180+
; SSE-NEXT: movzwl (%rdi), %eax
181+
; SSE-NEXT: shll $16, %eax
182+
; SSE-NEXT: movd %eax, %xmm0
183+
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
184+
; SSE-NEXT: callq __truncsfbf2@PLT
185+
; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
186+
; SSE-NEXT: popq %rbx
187+
; SSE-NEXT: retq
188+
;
189+
; AVX-LABEL: combine_fabs_int_rmw_bfloat:
190+
; AVX: # %bb.0:
191+
; AVX-NEXT: pushq %rbx
192+
; AVX-NEXT: movq %rdi, %rbx
193+
; AVX-NEXT: movzwl (%rdi), %eax
194+
; AVX-NEXT: shll $16, %eax
195+
; AVX-NEXT: vmovd %eax, %xmm0
196+
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
197+
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
198+
; AVX-NEXT: callq __truncsfbf2@PLT
199+
; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
200+
; AVX-NEXT: popq %rbx
201+
; AVX-NEXT: retq
202+
%1 = load bfloat, ptr %ptr
203+
%2 = call bfloat @llvm.fabs.bf16(bfloat %1)
204+
store bfloat %2, ptr %ptr
205+
ret void
206+
}
207+
208+
define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
209+
; SSE-LABEL: combine_fabs_int_half:
210+
; SSE: # %bb.0:
211+
; SSE-NEXT: pushq %rbx
212+
; SSE-NEXT: movq %rsi, %rbx
213+
; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
214+
; SSE-NEXT: callq __extendhfsf2@PLT
215+
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
216+
; SSE-NEXT: callq __truncsfhf2@PLT
217+
; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
218+
; SSE-NEXT: popq %rbx
219+
; SSE-NEXT: retq
220+
;
221+
; AVX-LABEL: combine_fabs_int_half:
222+
; AVX: # %bb.0:
223+
; AVX-NEXT: pushq %rbx
224+
; AVX-NEXT: movq %rsi, %rbx
225+
; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
226+
; AVX-NEXT: callq __extendhfsf2@PLT
227+
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
228+
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
229+
; AVX-NEXT: callq __truncsfhf2@PLT
230+
; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
231+
; AVX-NEXT: popq %rbx
232+
; AVX-NEXT: retq
233+
%1 = load half, ptr %src
234+
%2 = call half @llvm.fabs.f16(half %1)
235+
store half %2, ptr %dst
236+
ret void
237+
}
238+
175239
; don't convert vector to scalar
176240
define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) {
177241
; SSE-LABEL: combine_fabs_vec_int_v4f32:

llvm/test/CodeGen/X86/combine-fneg.ll

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,147 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
206206
}
207207

208208
; store(fneg(load())) - convert scalar to integer
209+
define void @fneg_int_rmw_half(ptr %ptr) nounwind {
210+
; X86-SSE1-LABEL: fneg_int_rmw_half:
211+
; X86-SSE1: # %bb.0:
212+
; X86-SSE1-NEXT: pushl %esi
213+
; X86-SSE1-NEXT: subl $8, %esp
214+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
215+
; X86-SSE1-NEXT: movzwl (%esi), %eax
216+
; X86-SSE1-NEXT: movl %eax, (%esp)
217+
; X86-SSE1-NEXT: calll __gnu_h2f_ieee
218+
; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
219+
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220+
; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
221+
; X86-SSE1-NEXT: movss %xmm0, (%esp)
222+
; X86-SSE1-NEXT: calll __gnu_f2h_ieee
223+
; X86-SSE1-NEXT: movw %ax, (%esi)
224+
; X86-SSE1-NEXT: addl $8, %esp
225+
; X86-SSE1-NEXT: popl %esi
226+
; X86-SSE1-NEXT: retl
227+
;
228+
; X86-SSE2-LABEL: fneg_int_rmw_half:
229+
; X86-SSE2: # %bb.0:
230+
; X86-SSE2-NEXT: pushl %esi
231+
; X86-SSE2-NEXT: subl $8, %esp
232+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
233+
; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
234+
; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
235+
; X86-SSE2-NEXT: movw %ax, (%esp)
236+
; X86-SSE2-NEXT: calll __extendhfsf2
237+
; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
238+
; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
239+
; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
240+
; X86-SSE2-NEXT: movd %xmm0, (%esp)
241+
; X86-SSE2-NEXT: calll __truncsfhf2
242+
; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
243+
; X86-SSE2-NEXT: movw %ax, (%esi)
244+
; X86-SSE2-NEXT: addl $8, %esp
245+
; X86-SSE2-NEXT: popl %esi
246+
; X86-SSE2-NEXT: retl
247+
;
248+
; X64-SSE1-LABEL: fneg_int_rmw_half:
249+
; X64-SSE1: # %bb.0:
250+
; X64-SSE1-NEXT: pushq %rbx
251+
; X64-SSE1-NEXT: movq %rdi, %rbx
252+
; X64-SSE1-NEXT: movzwl (%rdi), %edi
253+
; X64-SSE1-NEXT: callq __gnu_h2f_ieee@PLT
254+
; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
255+
; X64-SSE1-NEXT: callq __gnu_f2h_ieee@PLT
256+
; X64-SSE1-NEXT: movw %ax, (%rbx)
257+
; X64-SSE1-NEXT: popq %rbx
258+
; X64-SSE1-NEXT: retq
259+
;
260+
; X64-SSE2-LABEL: fneg_int_rmw_half:
261+
; X64-SSE2: # %bb.0:
262+
; X64-SSE2-NEXT: pushq %rbx
263+
; X64-SSE2-NEXT: movq %rdi, %rbx
264+
; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
265+
; X64-SSE2-NEXT: callq __extendhfsf2@PLT
266+
; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267+
; X64-SSE2-NEXT: callq __truncsfhf2@PLT
268+
; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
269+
; X64-SSE2-NEXT: movw %ax, (%rbx)
270+
; X64-SSE2-NEXT: popq %rbx
271+
; X64-SSE2-NEXT: retq
272+
%1 = load half, ptr %ptr
273+
%2 = fneg half %1
274+
store half %2, ptr %ptr
275+
ret void
276+
}
277+
278+
define void @fneg_int_bfloat(ptr %src, ptr %dst) nounwind {
279+
; X86-SSE1-LABEL: fneg_int_bfloat:
280+
; X86-SSE1: # %bb.0:
281+
; X86-SSE1-NEXT: pushl %esi
282+
; X86-SSE1-NEXT: subl $8, %esp
283+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
284+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
285+
; X86-SSE1-NEXT: movzwl (%eax), %eax
286+
; X86-SSE1-NEXT: shll $16, %eax
287+
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
288+
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
289+
; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
290+
; X86-SSE1-NEXT: movss %xmm0, (%esp)
291+
; X86-SSE1-NEXT: calll __truncsfbf2
292+
; X86-SSE1-NEXT: movw %ax, (%esi)
293+
; X86-SSE1-NEXT: addl $8, %esp
294+
; X86-SSE1-NEXT: popl %esi
295+
; X86-SSE1-NEXT: retl
296+
;
297+
; X86-SSE2-LABEL: fneg_int_bfloat:
298+
; X86-SSE2: # %bb.0:
299+
; X86-SSE2-NEXT: pushl %esi
300+
; X86-SSE2-NEXT: pushl %eax
301+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
302+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
303+
; X86-SSE2-NEXT: movzwl (%eax), %eax
304+
; X86-SSE2-NEXT: shll $16, %eax
305+
; X86-SSE2-NEXT: movd %eax, %xmm0
306+
; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
307+
; X86-SSE2-NEXT: movd %xmm0, (%esp)
308+
; X86-SSE2-NEXT: calll __truncsfbf2
309+
; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
310+
; X86-SSE2-NEXT: movw %ax, (%esi)
311+
; X86-SSE2-NEXT: addl $4, %esp
312+
; X86-SSE2-NEXT: popl %esi
313+
; X86-SSE2-NEXT: retl
314+
;
315+
; X64-SSE1-LABEL: fneg_int_bfloat:
316+
; X64-SSE1: # %bb.0:
317+
; X64-SSE1-NEXT: pushq %rbx
318+
; X64-SSE1-NEXT: subq $16, %rsp
319+
; X64-SSE1-NEXT: movq %rsi, %rbx
320+
; X64-SSE1-NEXT: movzwl (%rdi), %eax
321+
; X64-SSE1-NEXT: shll $16, %eax
322+
; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
323+
; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
324+
; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
325+
; X64-SSE1-NEXT: callq __truncsfbf2@PLT
326+
; X64-SSE1-NEXT: movw %ax, (%rbx)
327+
; X64-SSE1-NEXT: addq $16, %rsp
328+
; X64-SSE1-NEXT: popq %rbx
329+
; X64-SSE1-NEXT: retq
330+
;
331+
; X64-SSE2-LABEL: fneg_int_bfloat:
332+
; X64-SSE2: # %bb.0:
333+
; X64-SSE2-NEXT: pushq %rbx
334+
; X64-SSE2-NEXT: movq %rsi, %rbx
335+
; X64-SSE2-NEXT: movzwl (%rdi), %eax
336+
; X64-SSE2-NEXT: shll $16, %eax
337+
; X64-SSE2-NEXT: movd %eax, %xmm0
338+
; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
339+
; X64-SSE2-NEXT: callq __truncsfbf2@PLT
340+
; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
341+
; X64-SSE2-NEXT: movw %ax, (%rbx)
342+
; X64-SSE2-NEXT: popq %rbx
343+
; X64-SSE2-NEXT: retq
344+
%1 = load bfloat, ptr %src
345+
%2 = fneg bfloat %1
346+
store bfloat %2, ptr %dst
347+
ret void
348+
}
349+
209350
define void @fneg_int_rmw_f32(ptr %ptr) {
210351
; X86-SSE-LABEL: fneg_int_rmw_f32:
211352
; X86-SSE: # %bb.0:

0 commit comments

Comments
 (0)