Skip to content

Commit 75cf309

Browse files
committed
AMDGPU: Assume f32 denormals are enabled by default
This will likely introduce catastrophic performance regressions on older subtargets, but should be correct. A follow up change will remove the old fp32-denormals subtarget features, and switch to using the new denormal-fp-math/denormal-fp-math-f32 attributes. Frontends should be making sure to add the denormal-fp-math-f32 attribute when appropriate to avoid performance regressions.
1 parent 0c85c48 commit 75cf309

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+138
-129
lines changed

llvm/docs/ReleaseNotes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ During this release ...
100100
Changes to the AMDGPU Target
101101
-----------------------------
102102

103+
* The backend default denormal handling mode has been switched to on
104+
for all targets for all compute function types. Frontends wishing to
105+
retain the old behavior should explicitly request f32 denormal
106+
flushing.
107+
103108
Changes to the AVR Target
104109
-----------------------------
105110

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
9797
// denormals, but should be checked. Should we issue a warning somewhere
9898
// if someone tries to enable these?
9999
if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
100-
FullFS += "+fp64-fp16-denormals,";
100+
FullFS += "+fp64-fp16-denormals,+fp32-denormals,";
101101
} else {
102102
FullFS += "-fp32-denormals,";
103103
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,8 +704,8 @@ struct SIModeRegisterDefaults {
704704
SIModeRegisterDefaults Mode;
705705
Mode.DX10Clamp = true;
706706
Mode.IEEE = IsCompute;
707-
Mode.FP32InputDenormals = false; // FIXME: Should be on by default.
708-
Mode.FP32OutputDenormals = false; // FIXME: Should be on by default.
707+
Mode.FP32InputDenormals = true;
708+
Mode.FP32OutputDenormals = true;
709709
Mode.FP64FP16InputDenormals = true;
710710
Mode.FP64FP16OutputDenormals = true;
711711
return Mode;

llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3-
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
2+
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3+
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
44

55
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
66

llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3-
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
2+
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3+
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
44

55
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
66

llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
; GCN-LABEL: {{^}}kernel_ieee_mode_default:
44
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
55
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
6-
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
7-
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
6+
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
7+
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
88
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
99
; GCN-NOT: v_mul_f32
1010
define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
@@ -18,8 +18,8 @@ define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
1818
; GCN-LABEL: {{^}}kernel_ieee_mode_on:
1919
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
2020
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
21-
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
22-
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
21+
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
22+
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
2323
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
2424
; GCN-NOT: v_mul_f32
2525
define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
@@ -48,8 +48,8 @@ define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
4848
; GCN-LABEL: {{^}}func_ieee_mode_default:
4949
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
5050
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
51-
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
52-
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
51+
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
52+
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
5353
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
5454
; GCN-NOT: v_mul_f32
5555
define void @func_ieee_mode_default() #0 {
@@ -63,8 +63,8 @@ define void @func_ieee_mode_default() #0 {
6363
; GCN-LABEL: {{^}}func_ieee_mode_on:
6464
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
6565
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
66-
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
67-
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
66+
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
67+
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
6868
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
6969
; GCN-NOT: v_mul_f32
7070
define void @func_ieee_mode_on() #1 {
@@ -93,8 +93,8 @@ define void @func_ieee_mode_off() #2 {
9393
; GCN-LABEL: {{^}}cs_ieee_mode_default:
9494
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
9595
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
96-
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
97-
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
96+
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
97+
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
9898
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
9999
; GCN-NOT: v_mul_f32
100100
define amdgpu_cs void @cs_ieee_mode_default() #0 {
@@ -108,8 +108,8 @@ define amdgpu_cs void @cs_ieee_mode_default() #0 {
108108
; GCN-LABEL: {{^}}cs_ieee_mode_on:
109109
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
110110
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
111-
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
112-
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
111+
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
112+
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
113113
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
114114
; GCN-NOT: v_mul_f32
115115
define amdgpu_cs void @cs_ieee_mode_on() #1 {
@@ -150,11 +150,12 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
150150
ret void
151151
}
152152

153+
; FIXME: Should have denormals off by default.
153154
; GCN-LABEL: {{^}}ps_ieee_mode_on:
154155
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
155156
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
156-
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
157-
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
157+
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
158+
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
158159
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
159160
; GCN-NOT: v_mul_f32
160161
define amdgpu_ps void @ps_ieee_mode_on() #1 {

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, f
338338
}
339339

340340
attributes #0 = { nounwind optnone noinline }
341-
attributes #1 = { nounwind }
341+
attributes #1 = { nounwind "target-features"="-fp32-denormals" }
342342
attributes #2 = { nounwind "target-features"="+fp32-denormals" }
343343

344344
!0 = !{float 2.500000e+00}

llvm/test/CodeGen/AMDGPU/clamp-modifier.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
389389

390390
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
391391

392-
attributes #0 = { nounwind }
392+
attributes #0 = { nounwind "target-features"="-fp32-denormals" }
393393
attributes #1 = { nounwind readnone }
394394
attributes #2 = { nounwind "target-features"="+fp32-denormals" }
395395
attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }

llvm/test/CodeGen/AMDGPU/clamp.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -767,8 +767,8 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
767767
declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
768768
declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
769769

770-
attributes #0 = { nounwind }
770+
attributes #0 = { nounwind "target-features"="-fp32-denormals" }
771771
attributes #1 = { nounwind readnone }
772-
attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "no-nans-fp-math"="false" }
773-
attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
774-
attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
772+
attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,-fp-exceptions" "no-nans-fp-math"="false" }
773+
attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
774+
attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }

llvm/test/CodeGen/AMDGPU/default-fp-mode.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
22

33
; GCN-LABEL: {{^}}test_default_si:
4-
; GCN: FloatMode: 192
4+
; GCN: FloatMode: 240
55
; GCN: IeeeMode: 1
66
define amdgpu_kernel void @test_default_si(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
77
store float 0.0, float addrspace(1)* %out0
@@ -10,7 +10,7 @@ define amdgpu_kernel void @test_default_si(float addrspace(1)* %out0, double add
1010
}
1111

1212
; GCN-LABEL: {{^}}test_default_vi:
13-
; GCN: FloatMode: 192
13+
; GCN: FloatMode: 240
1414
; GCN: IeeeMode: 1
1515
define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
1616
store float 0.0, float addrspace(1)* %out0
@@ -19,7 +19,7 @@ define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double add
1919
}
2020

2121
; GCN-LABEL: {{^}}test_f64_denormals:
22-
; GCN: FloatMode: 192
22+
; GCN: FloatMode: 240
2323
; GCN: IeeeMode: 1
2424
define amdgpu_kernel void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
2525
store float 0.0, float addrspace(1)* %out0
@@ -55,7 +55,7 @@ define amdgpu_kernel void @test_no_denormals(float addrspace(1)* %out0, double a
5555
}
5656

5757
; GCN-LABEL: {{^}}test_f16_f64_denormals:
58-
; GCN: FloatMode: 192
58+
; GCN: FloatMode: 240
5959
; GCN: IeeeMode: 1
6060
define amdgpu_kernel void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
6161
store half 0.0, half addrspace(1)* %out0
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_f16_f64_denormals(half addrspace(1)* %out0, doub
6464
}
6565

6666
; GCN-LABEL: {{^}}test_no_f16_f64_denormals:
67-
; GCN: FloatMode: 0
67+
; GCN: FloatMode: 48
6868
; GCN: IeeeMode: 1
6969
define amdgpu_kernel void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
7070
store half 0.0, half addrspace(1)* %out0
@@ -82,7 +82,9 @@ define amdgpu_kernel void @test_f32_f16_f64_denormals(half addrspace(1)* %out0,
8282
ret void
8383
}
8484

85+
; FIXME: Denormals should be off by default
8586
; GCN-LABEL: {{^}}kill_gs_const:
87+
; GCN: FloatMode: 240
8688
; GCN: IeeeMode: 0
8789
define amdgpu_gs void @kill_gs_const() {
8890
main_body:
@@ -94,6 +96,7 @@ main_body:
9496
}
9597

9698
; GCN-LABEL: {{^}}kill_vcc_implicit_def:
99+
; GCN: FloatMode: 240
97100
; GCN: IeeeMode: 0
98101
define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* inreg, [17 x <16 x i8>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [34 x <8 x i32>] addrspace(4)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
99102
entry:

llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,6 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
739739
}
740740

741741
attributes #0 = { nounwind readnone }
742-
attributes #1 = { nounwind }
743-
attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
744-
attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }
742+
attributes #1 = { nounwind "target-features"="-fp32-denormals" }
743+
attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
744+
attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }

llvm/test/CodeGen/AMDGPU/fcanonicalize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
625625
}
626626

627627
attributes #0 = { nounwind readnone }
628-
attributes #1 = { nounwind }
628+
attributes #1 = { nounwind "target-features"="-fp32-denormals" }
629629
attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
630630
attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
631631
attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }

llvm/test/CodeGen/AMDGPU/fdiv.f16.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
3-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
4-
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
5-
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
1+
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
3+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
4+
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
5+
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
66

77
; Make sure fdiv is promoted to f32.
88

llvm/test/CodeGen/AMDGPU/fdot2.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
; RUN: llc -march=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
2-
; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
3-
; RUN: llc -march=amdgcn -mcpu=gfx1011 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
4-
; RUN: llc -march=amdgcn -mcpu=gfx1012 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
5-
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
1+
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
2+
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
3+
; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
4+
; RUN: llc -march=amdgcn -mcpu=gfx1012 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
5+
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
66
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
77
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
88
; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)

llvm/test/CodeGen/AMDGPU/fma-combine.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
2-
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
1+
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
2+
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
33
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
44

55
; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs

llvm/test/CodeGen/AMDGPU/fmaxnum.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2-
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
1+
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
33

44
; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on:
55
; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}

llvm/test/CodeGen/AMDGPU/fminnum.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2-
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
1+
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
33

44
; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on:
55
; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}

0 commit comments

Comments
 (0)