Skip to content

Commit e805d83

Browse files
authored
AMDGPU: Add more tests for fmed3 instcombine folds (#139529)
Add test with snan literals, and test with and without amdgpu-ieee
1 parent 9bdeb00 commit e805d83

File tree

2 files changed

+289
-5
lines changed

2 files changed

+289
-5
lines changed

llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -605,9 +605,38 @@ define float @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1
605605
ret float %med3
606606
}
607607

608+
define float @fmed3_f32_fpext_f16_strictfp(half %arg0, half %arg1, half %arg2) #2 {
609+
; UNKNOWN-LABEL: define float @fmed3_f32_fpext_f16_strictfp
610+
; UNKNOWN-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] {
611+
; UNKNOWN-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict")
612+
; UNKNOWN-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict")
613+
; UNKNOWN-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict")
614+
; UNKNOWN-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR2]]
615+
; UNKNOWN-NEXT: ret float [[MED3]]
616+
;
617+
; GFX8-LABEL: define float @fmed3_f32_fpext_f16_strictfp
618+
; GFX8-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] {
619+
; GFX8-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict")
620+
; GFX8-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict")
621+
; GFX8-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict")
622+
; GFX8-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR4:[0-9]+]]
623+
; GFX8-NEXT: ret float [[MED3]]
624+
;
625+
; GFX9-LABEL: define float @fmed3_f32_fpext_f16_strictfp
626+
; GFX9-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] {
627+
; GFX9-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict")
628+
; GFX9-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict")
629+
; GFX9-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict")
630+
; GFX9-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR5:[0-9]+]]
631+
; GFX9-NEXT: ret float [[MED3]]
632+
;
633+
%arg0.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg0, metadata !"fpexcept.strict")
634+
%arg1.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg1, metadata !"fpexcept.strict")
635+
%arg2.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg2, metadata !"fpexcept.strict")
636+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) #2
637+
ret float %med3
638+
}
608639

609640
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
610641
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn }
611-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
612-
; GFX8: {{.*}}
613-
; UNKNOWN: {{.*}}
642+
attributes #2 = { strictfp }

llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll

Lines changed: 257 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s
2+
3+
; Test with "amdgpu-ieee" set to true and false
4+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine -mcpu=gfx600 < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s
5+
; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s
6+
7+
; Test with gfx12 since there is no ieee bit anymore.
8+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s
9+
; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s
10+
311
; --------------------------------------------------------------------
412
; llvm.amdgcn.fmed3
513
; --------------------------------------------------------------------
@@ -328,5 +336,252 @@ define float @fmed3_x_y_poison_f32(float %x, float %y) #1 {
328336
ret float %med3
329337
}
330338

339+
define float @fmed3_snan1_x_y_f32(float %x, float %y) #1 {
340+
; CHECK-LABEL: define float @fmed3_snan1_x_y_f32(
341+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
342+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
343+
; CHECK-NEXT: ret float [[MED3]]
344+
;
345+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y)
346+
ret float %med3
347+
}
348+
349+
define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
350+
; CHECK-LABEL: define float @fmed3_x_snan1_y_f32(
351+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
352+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
353+
; CHECK-NEXT: ret float [[MED3]]
354+
;
355+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y)
356+
ret float %med3
357+
}
358+
359+
define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
360+
; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
361+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
362+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
363+
; CHECK-NEXT: ret float [[MED3]]
364+
;
365+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
366+
ret float %med3
367+
}
368+
369+
define float @fmed3_snan1_x_snan2_f32(float %x) #1 {
370+
; CHECK-LABEL: define float @fmed3_snan1_x_snan2_f32(
371+
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
372+
; CHECK-NEXT: ret float [[X]]
373+
;
374+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000)
375+
ret float %med3
376+
}
377+
378+
define float @fmed3_x_snan1_snan2_f32(float %x) #1 {
379+
; CHECK-LABEL: define float @fmed3_x_snan1_snan2_f32(
380+
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
381+
; CHECK-NEXT: ret float [[X]]
382+
;
383+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000)
384+
ret float %med3
385+
}
386+
387+
define float @fmed3_snan1_snan2_snan3_f32(float %x) #1 {
388+
; CHECK-LABEL: define float @fmed3_snan1_snan2_snan3_f32(
389+
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
390+
; CHECK-NEXT: ret float 0x7FF8000020000000
391+
;
392+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 0x7FF0000020000000, float 0x7FF0000040000000)
393+
ret float %med3
394+
}
395+
396+
define float @fmed3_snan1_1_2_f32(float %x, float %y) #1 {
397+
; CHECK-LABEL: define float @fmed3_snan1_1_2_f32(
398+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
399+
; CHECK-NEXT: ret float 1.000000e+00
400+
;
401+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 1.0, float 2.0)
402+
ret float %med3
403+
}
404+
405+
define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 {
406+
; CHECK-LABEL: define float @fmed3_snan1_neg1_2_f32(
407+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
408+
; CHECK-NEXT: ret float -1.000000e+00
409+
;
410+
%med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0x7FF4000000000000, float 2.0)
411+
ret float %med3
412+
}
413+
414+
define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
415+
; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32(
416+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
417+
; CHECK-NEXT: ret float 3.000000e+00
418+
;
419+
%med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4000000000000)
420+
ret float %med3
421+
}
422+
423+
; --------------------------------------------------------------------
424+
; llvm.amdgcn.fmed3 with default mode implied by shader CC
425+
; --------------------------------------------------------------------
426+
427+
define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32(float %x, float %y) {
428+
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32(
429+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
430+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
431+
; CHECK-NEXT: ret float [[MED3]]
432+
;
433+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y)
434+
ret float %med3
435+
}
436+
437+
define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y) {
438+
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(
439+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
440+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
441+
; CHECK-NEXT: ret float [[MED3]]
442+
;
443+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y)
444+
ret float %med3
445+
}
446+
447+
define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) {
448+
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(
449+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
450+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
451+
; CHECK-NEXT: ret float [[MED3]]
452+
;
453+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
454+
ret float %med3
455+
}
456+
; --------------------------------------------------------------------
457+
; llvm.amdgcn.fmed3 with default mode shader cc and amdgpu-ieee
458+
; --------------------------------------------------------------------
459+
460+
define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32(float %x, float %y) #1 {
461+
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32(
462+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
463+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
464+
; CHECK-NEXT: ret float [[MED3]]
465+
;
466+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y)
467+
ret float %med3
468+
}
469+
470+
define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(float %x, float %y) #1 {
471+
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(
472+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
473+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
474+
; CHECK-NEXT: ret float [[MED3]]
475+
;
476+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y)
477+
ret float %med3
478+
}
479+
480+
define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #1 {
481+
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(
482+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
483+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
484+
; CHECK-NEXT: ret float [[MED3]]
485+
;
486+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
487+
ret float %med3
488+
}
489+
490+
; --------------------------------------------------------------------
491+
; llvm.amdgcn.fmed3 with strictfp calls
492+
; --------------------------------------------------------------------
493+
494+
define float @fmed3_qnan0_x_y_f32_strictfp(float %x, float %y) #2 {
495+
; CHECK-LABEL: define float @fmed3_qnan0_x_y_f32_strictfp(
496+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] {
497+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
498+
; CHECK-NEXT: ret float [[MED3]]
499+
;
500+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) strictfp
501+
ret float %med3
502+
}
503+
504+
define float @fmed3_x_qnan0_y_f32_strictfp(float %x, float %y) #2 {
505+
; CHECK-LABEL: define float @fmed3_x_qnan0_y_f32_strictfp(
506+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
507+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
508+
; CHECK-NEXT: ret float [[MED3]]
509+
;
510+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) strictfp
511+
ret float %med3
512+
}
513+
514+
define float @fmed3_x_y_qnan0_f32_strictfp(float %x, float %y) #2 {
515+
; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32_strictfp(
516+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
517+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
518+
; CHECK-NEXT: ret float [[MED3]]
519+
;
520+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) strictfp
521+
ret float %med3
522+
}
523+
524+
define float @fmed3_snan1_x_y_f32_strictfp(float %x, float %y) #2 {
525+
; CHECK-LABEL: define float @fmed3_snan1_x_y_f32_strictfp(
526+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
527+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
528+
; CHECK-NEXT: ret float [[MED3]]
529+
;
530+
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) strictfp
531+
ret float %med3
532+
}
533+
534+
define float @fmed3_x_snan1_y_f32_strictfp(float %x, float %y) #2 {
535+
; CHECK-LABEL: define float @fmed3_x_snan1_y_f32_strictfp(
536+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
537+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
538+
; CHECK-NEXT: ret float [[MED3]]
539+
;
540+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) strictfp
541+
ret float %med3
542+
}
543+
544+
define float @fmed3_x_y_snan1_f32_strictfp(float %x, float %y) #2 {
545+
; CHECK-LABEL: define float @fmed3_x_y_snan1_f32_strictfp(
546+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
547+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
548+
; CHECK-NEXT: ret float [[MED3]]
549+
;
550+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) strictfp
551+
ret float %med3
552+
}
553+
554+
define float @fmed3_poison_x_y_f32_strictfp(float %x, float %y) #2 {
555+
; CHECK-LABEL: define float @fmed3_poison_x_y_f32_strictfp(
556+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
557+
; CHECK-NEXT: ret float poison
558+
;
559+
%med3 = call float @llvm.amdgcn.fmed3.f32(float poison, float %x, float %y) strictfp
560+
ret float %med3
561+
}
562+
563+
define float @fmed3_x_poison_y_f32_strictfp(float %x, float %y) #2 {
564+
; CHECK-LABEL: define float @fmed3_x_poison_y_f32_strictfp(
565+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
566+
; CHECK-NEXT: ret float poison
567+
;
568+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float poison, float %y) strictfp
569+
ret float %med3
570+
}
571+
572+
define float @fmed3_x_y_poison_f32_strictfp(float %x, float %y) #2 {
573+
; CHECK-LABEL: define float @fmed3_x_y_poison_f32_strictfp(
574+
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
575+
; CHECK-NEXT: ret float poison
576+
;
577+
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float poison) strictfp
578+
ret float %med3
579+
}
580+
331581
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
332-
attributes #1 = { nounwind }
582+
attributes #1 = { nounwind "amdgpu-ieee"="true" }
583+
attributes #2 = { nounwind strictfp "amdgpu-ieee"="true" }
584+
585+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
586+
; IEEE0: {{.*}}
587+
; IEEE1: {{.*}}

0 commit comments

Comments
 (0)