@@ -276,3 +276,102 @@ define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y)
276
276
%z = call <8 x double > @llvm.minnum.v8f64 (<8 x double > %x , <8 x double > %y ) readnone
277
277
ret <8 x double > %z
278
278
}
279
+
280
+ ; FIXME: The IR-level FMF should propagate to the node.
281
+
282
+ define float @minnum_intrinsic_nnan_fmf_f32 (float %a , float %b ) {
283
+ ; SSE-LABEL: minnum_intrinsic_nnan_fmf_f32:
284
+ ; SSE: # %bb.0:
285
+ ; SSE-NEXT: movaps %xmm0, %xmm2
286
+ ; SSE-NEXT: cmpunordss %xmm0, %xmm2
287
+ ; SSE-NEXT: movaps %xmm2, %xmm3
288
+ ; SSE-NEXT: andps %xmm1, %xmm3
289
+ ; SSE-NEXT: minss %xmm0, %xmm1
290
+ ; SSE-NEXT: andnps %xmm1, %xmm2
291
+ ; SSE-NEXT: orps %xmm3, %xmm2
292
+ ; SSE-NEXT: movaps %xmm2, %xmm0
293
+ ; SSE-NEXT: retq
294
+ ;
295
+ ; AVX-LABEL: minnum_intrinsic_nnan_fmf_f32:
296
+ ; AVX: # %bb.0:
297
+ ; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2
298
+ ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
299
+ ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
300
+ ; AVX-NEXT: retq
301
+ %r = tail call nnan float @llvm.minnum.f32 (float %a , float %b )
302
+ ret float %r
303
+ }
304
+
305
+ ; FIXME: Make sure vectors work too.
306
+
307
+ define <2 x double > @minnum_intrinsic_nnan_fmf_v2f64 (<2 x double > %a , <2 x double > %b ) {
308
+ ; SSE-LABEL: minnum_intrinsic_nnan_fmf_v2f64:
309
+ ; SSE: # %bb.0:
310
+ ; SSE-NEXT: movapd %xmm1, %xmm2
311
+ ; SSE-NEXT: minpd %xmm0, %xmm2
312
+ ; SSE-NEXT: cmpunordpd %xmm0, %xmm0
313
+ ; SSE-NEXT: andpd %xmm0, %xmm1
314
+ ; SSE-NEXT: andnpd %xmm2, %xmm0
315
+ ; SSE-NEXT: orpd %xmm1, %xmm0
316
+ ; SSE-NEXT: retq
317
+ ;
318
+ ; AVX-LABEL: minnum_intrinsic_nnan_fmf_v2f64:
319
+ ; AVX: # %bb.0:
320
+ ; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm2
321
+ ; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0
322
+ ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
323
+ ; AVX-NEXT: retq
324
+ %r = tail call nnan <2 x double > @llvm.minnum.v2f64 (<2 x double > %a , <2 x double > %b )
325
+ ret <2 x double > %r
326
+ }
327
+
328
+ ; FIXME: Current (but legacy someday): a function-level attribute should also enable the fold.
329
+
330
+ define double @minnum_intrinsic_nnan_fmf_f64 (double %a , double %b ) #0 {
331
+ ; SSE-LABEL: minnum_intrinsic_nnan_fmf_f64:
332
+ ; SSE: # %bb.0:
333
+ ; SSE-NEXT: movapd %xmm0, %xmm2
334
+ ; SSE-NEXT: cmpunordsd %xmm0, %xmm2
335
+ ; SSE-NEXT: movapd %xmm2, %xmm3
336
+ ; SSE-NEXT: andpd %xmm1, %xmm3
337
+ ; SSE-NEXT: minsd %xmm0, %xmm1
338
+ ; SSE-NEXT: andnpd %xmm1, %xmm2
339
+ ; SSE-NEXT: orpd %xmm3, %xmm2
340
+ ; SSE-NEXT: movapd %xmm2, %xmm0
341
+ ; SSE-NEXT: retq
342
+ ;
343
+ ; AVX-LABEL: minnum_intrinsic_nnan_fmf_f64:
344
+ ; AVX: # %bb.0:
345
+ ; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2
346
+ ; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
347
+ ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
348
+ ; AVX-NEXT: retq
349
+ %r = tail call nnan double @llvm.minnum.f64 (double %a , double %b )
350
+ ret double %r
351
+ }
352
+
353
+ ; FIXME: Make sure vectors work too.
354
+
355
+ define <4 x float > @minnum_intrinsic_nnan_attr_v4f32 (<4 x float > %a , <4 x float > %b ) #0 {
356
+ ; SSE-LABEL: minnum_intrinsic_nnan_attr_v4f32:
357
+ ; SSE: # %bb.0:
358
+ ; SSE-NEXT: movaps %xmm1, %xmm2
359
+ ; SSE-NEXT: minps %xmm0, %xmm2
360
+ ; SSE-NEXT: cmpunordps %xmm0, %xmm0
361
+ ; SSE-NEXT: andps %xmm0, %xmm1
362
+ ; SSE-NEXT: andnps %xmm2, %xmm0
363
+ ; SSE-NEXT: orps %xmm1, %xmm0
364
+ ; SSE-NEXT: retq
365
+ ;
366
+ ; AVX-LABEL: minnum_intrinsic_nnan_attr_v4f32:
367
+ ; AVX: # %bb.0:
368
+ ; AVX-NEXT: vminps %xmm0, %xmm1, %xmm2
369
+ ; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
370
+ ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
371
+ ; AVX-NEXT: retq
372
+ %r = tail call <4 x float > @llvm.minnum.v4f32 (<4 x float > %a , <4 x float > %b )
373
+ ret <4 x float > %r
374
+ }
375
+
376
+ attributes #0 = { "no-nans-fp-math" ="true" }
377
+
0 commit comments