Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit d948fa6

Browse files
committed
[x86] add tests for maxnum/minnum intrinsics with nnan; NFC
Clang 6.0 was updated to create these intrinsics rather than libcalls or fcmp/select, but the backend wasn't prepared to handle that optimally. This bug is not the primary reason for PR37403: https://bugs.llvm.org/show_bug.cgi?id=37403 ...but it's probably more important for x86 perf. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@331988 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e21fc64 commit d948fa6

File tree

2 files changed

+197
-0
lines changed

2 files changed

+197
-0
lines changed

test/CodeGen/X86/fmaxnum.ll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,3 +285,101 @@ define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y)
285285
ret <8 x double> %z
286286
}
287287

288+
; FIXME: The IR-level FMF should propagate to the node.
289+
290+
define double @maxnum_intrinsic_nnan_fmf_f64(double %a, double %b) {
291+
; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f64:
292+
; SSE: # %bb.0:
293+
; SSE-NEXT: movapd %xmm0, %xmm2
294+
; SSE-NEXT: cmpunordsd %xmm0, %xmm2
295+
; SSE-NEXT: movapd %xmm2, %xmm3
296+
; SSE-NEXT: andpd %xmm1, %xmm3
297+
; SSE-NEXT: maxsd %xmm0, %xmm1
298+
; SSE-NEXT: andnpd %xmm1, %xmm2
299+
; SSE-NEXT: orpd %xmm3, %xmm2
300+
; SSE-NEXT: movapd %xmm2, %xmm0
301+
; SSE-NEXT: retq
302+
;
303+
; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f64:
304+
; AVX: # %bb.0:
305+
; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
306+
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
307+
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
308+
; AVX-NEXT: retq
309+
%r = tail call nnan double @llvm.maxnum.f64(double %a, double %b)
310+
ret double %r
311+
}
312+
313+
; FIXME: Make sure vectors work too.
314+
315+
define <4 x float> @maxnum_intrinsic_nnan_fmf_f432(<4 x float> %a, <4 x float> %b) {
316+
; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f432:
317+
; SSE: # %bb.0:
318+
; SSE-NEXT: movaps %xmm1, %xmm2
319+
; SSE-NEXT: maxps %xmm0, %xmm2
320+
; SSE-NEXT: cmpunordps %xmm0, %xmm0
321+
; SSE-NEXT: andps %xmm0, %xmm1
322+
; SSE-NEXT: andnps %xmm2, %xmm0
323+
; SSE-NEXT: orps %xmm1, %xmm0
324+
; SSE-NEXT: retq
325+
;
326+
; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f432:
327+
; AVX: # %bb.0:
328+
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm2
329+
; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
330+
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
331+
; AVX-NEXT: retq
332+
%r = tail call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
333+
ret <4 x float> %r
334+
}
335+
336+
; FIXME: Current (but legacy someday): a function-level attribute should also enable the fold.
337+
338+
define float @maxnum_intrinsic_nnan_fmf_f32(float %a, float %b) #0 {
339+
; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f32:
340+
; SSE: # %bb.0:
341+
; SSE-NEXT: movaps %xmm0, %xmm2
342+
; SSE-NEXT: cmpunordss %xmm0, %xmm2
343+
; SSE-NEXT: movaps %xmm2, %xmm3
344+
; SSE-NEXT: andps %xmm1, %xmm3
345+
; SSE-NEXT: maxss %xmm0, %xmm1
346+
; SSE-NEXT: andnps %xmm1, %xmm2
347+
; SSE-NEXT: orps %xmm3, %xmm2
348+
; SSE-NEXT: movaps %xmm2, %xmm0
349+
; SSE-NEXT: retq
350+
;
351+
; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f32:
352+
; AVX: # %bb.0:
353+
; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2
354+
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
355+
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
356+
; AVX-NEXT: retq
357+
%r = tail call float @llvm.maxnum.f32(float %a, float %b)
358+
ret float %r
359+
}
360+
361+
; FIXME: Make sure vectors work too.
362+
363+
define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double> %b) #0 {
364+
; SSE-LABEL: maxnum_intrinsic_nnan_attr_f64:
365+
; SSE: # %bb.0:
366+
; SSE-NEXT: movapd %xmm1, %xmm2
367+
; SSE-NEXT: maxpd %xmm0, %xmm2
368+
; SSE-NEXT: cmpunordpd %xmm0, %xmm0
369+
; SSE-NEXT: andpd %xmm0, %xmm1
370+
; SSE-NEXT: andnpd %xmm2, %xmm0
371+
; SSE-NEXT: orpd %xmm1, %xmm0
372+
; SSE-NEXT: retq
373+
;
374+
; AVX-LABEL: maxnum_intrinsic_nnan_attr_f64:
375+
; AVX: # %bb.0:
376+
; AVX-NEXT: vmaxpd %xmm0, %xmm1, %xmm2
377+
; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0
378+
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
379+
; AVX-NEXT: retq
380+
%r = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b)
381+
ret <2 x double> %r
382+
}
383+
384+
attributes #0 = { "no-nans-fp-math"="true" }
385+

test/CodeGen/X86/fminnum.ll

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,3 +276,102 @@ define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y)
276276
%z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
277277
ret <8 x double> %z
278278
}
279+
280+
; FIXME: The IR-level FMF should propagate to the node.
281+
282+
define float @minnum_intrinsic_nnan_fmf_f32(float %a, float %b) {
283+
; SSE-LABEL: minnum_intrinsic_nnan_fmf_f32:
284+
; SSE: # %bb.0:
285+
; SSE-NEXT: movaps %xmm0, %xmm2
286+
; SSE-NEXT: cmpunordss %xmm0, %xmm2
287+
; SSE-NEXT: movaps %xmm2, %xmm3
288+
; SSE-NEXT: andps %xmm1, %xmm3
289+
; SSE-NEXT: minss %xmm0, %xmm1
290+
; SSE-NEXT: andnps %xmm1, %xmm2
291+
; SSE-NEXT: orps %xmm3, %xmm2
292+
; SSE-NEXT: movaps %xmm2, %xmm0
293+
; SSE-NEXT: retq
294+
;
295+
; AVX-LABEL: minnum_intrinsic_nnan_fmf_f32:
296+
; AVX: # %bb.0:
297+
; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2
298+
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
299+
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
300+
; AVX-NEXT: retq
301+
%r = tail call nnan float @llvm.minnum.f32(float %a, float %b)
302+
ret float %r
303+
}
304+
305+
; FIXME: Make sure vectors work too.
306+
307+
define <2 x double> @minnum_intrinsic_nnan_fmf_v2f64(<2 x double> %a, <2 x double> %b) {
308+
; SSE-LABEL: minnum_intrinsic_nnan_fmf_v2f64:
309+
; SSE: # %bb.0:
310+
; SSE-NEXT: movapd %xmm1, %xmm2
311+
; SSE-NEXT: minpd %xmm0, %xmm2
312+
; SSE-NEXT: cmpunordpd %xmm0, %xmm0
313+
; SSE-NEXT: andpd %xmm0, %xmm1
314+
; SSE-NEXT: andnpd %xmm2, %xmm0
315+
; SSE-NEXT: orpd %xmm1, %xmm0
316+
; SSE-NEXT: retq
317+
;
318+
; AVX-LABEL: minnum_intrinsic_nnan_fmf_v2f64:
319+
; AVX: # %bb.0:
320+
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm2
321+
; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0
322+
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
323+
; AVX-NEXT: retq
324+
%r = tail call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b)
325+
ret <2 x double> %r
326+
}
327+
328+
; FIXME: Current (but legacy someday): a function-level attribute should also enable the fold.
329+
330+
define double @minnum_intrinsic_nnan_fmf_f64(double %a, double %b) #0 {
331+
; SSE-LABEL: minnum_intrinsic_nnan_fmf_f64:
332+
; SSE: # %bb.0:
333+
; SSE-NEXT: movapd %xmm0, %xmm2
334+
; SSE-NEXT: cmpunordsd %xmm0, %xmm2
335+
; SSE-NEXT: movapd %xmm2, %xmm3
336+
; SSE-NEXT: andpd %xmm1, %xmm3
337+
; SSE-NEXT: minsd %xmm0, %xmm1
338+
; SSE-NEXT: andnpd %xmm1, %xmm2
339+
; SSE-NEXT: orpd %xmm3, %xmm2
340+
; SSE-NEXT: movapd %xmm2, %xmm0
341+
; SSE-NEXT: retq
342+
;
343+
; AVX-LABEL: minnum_intrinsic_nnan_fmf_f64:
344+
; AVX: # %bb.0:
345+
; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2
346+
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
347+
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
348+
; AVX-NEXT: retq
349+
%r = tail call nnan double @llvm.minnum.f64(double %a, double %b)
350+
ret double %r
351+
}
352+
353+
; FIXME: Make sure vectors work too.
354+
355+
define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float> %b) #0 {
356+
; SSE-LABEL: minnum_intrinsic_nnan_attr_v4f32:
357+
; SSE: # %bb.0:
358+
; SSE-NEXT: movaps %xmm1, %xmm2
359+
; SSE-NEXT: minps %xmm0, %xmm2
360+
; SSE-NEXT: cmpunordps %xmm0, %xmm0
361+
; SSE-NEXT: andps %xmm0, %xmm1
362+
; SSE-NEXT: andnps %xmm2, %xmm0
363+
; SSE-NEXT: orps %xmm1, %xmm0
364+
; SSE-NEXT: retq
365+
;
366+
; AVX-LABEL: minnum_intrinsic_nnan_attr_v4f32:
367+
; AVX: # %bb.0:
368+
; AVX-NEXT: vminps %xmm0, %xmm1, %xmm2
369+
; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
370+
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
371+
; AVX-NEXT: retq
372+
%r = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
373+
ret <4 x float> %r
374+
}
375+
376+
attributes #0 = { "no-nans-fp-math"="true" }
377+

0 commit comments

Comments
 (0)