access-softek
diff --git a/‎llvm/docs/ReleaseNotes.rst
Lines changed: 5 additions & 0 deletions b/‎llvm/docs/ReleaseNotes.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Lines changed: 2 additions & 2 deletions b/‎llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
Lines changed: 15 additions & 14 deletions b/‎llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
Lines changed: 15 additions & 14 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
Lines changed: 1 addition & 1 deletion b/‎llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
Lines changed: 1 addition & 1 deletion b/‎llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/CodeGen/AMDGPU/clamp.ll
Lines changed: 4 additions & 4 deletions b/‎llvm/test/CodeGen/AMDGPU/clamp.ll
Lines changed: 4 additions & 4 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
Lines changed: 8 additions & 5 deletions b/‎llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
Lines changed: 8 additions & 5 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
Lines changed: 3 additions & 3 deletions b/‎llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
Lines changed: 3 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
Lines changed: 1 addition & 1 deletion b/‎llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
Lines changed: 5 additions & 5 deletions b/‎llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
Lines changed: 5 additions & 5 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/fdot2.ll
Lines changed: 5 additions & 5 deletions b/‎llvm/test/CodeGen/AMDGPU/fdot2.ll
Lines changed: 5 additions & 5 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/fma-combine.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/CodeGen/AMDGPU/fma-combine.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/fmaxnum.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/CodeGen/AMDGPU/fmaxnum.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/fminnum.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/CodeGen/AMDGPU/fminnum.ll
Lines changed: 2 additions & 2 deletions
@@ -100,6 +100,11 @@ During this release ...
 Changes to the AMDGPU Target
 -----------------------------
 
+* The backend default denormal handling mode has been switched to on
+  for all targets for all compute function types. Frontends wishing to
+  retain the old behavior should explicitly request f32 denormal
+  flushing.
+
 Changes to the AVR Target
 -----------------------------
 
 
@@ -97,7 +97,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
   if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    FullFS += "+fp64-fp16-denormals,";
+    FullFS += "+fp64-fp16-denormals,+fp32-denormals,";
   } else {
     FullFS += "-fp32-denormals,";
   }
 
@@ -704,8 +704,8 @@ struct SIModeRegisterDefaults {
     SIModeRegisterDefaults Mode;
     Mode.DX10Clamp = true;
     Mode.IEEE = IsCompute;
-    Mode.FP32InputDenormals = false; // FIXME: Should be on by default.
-    Mode.FP32OutputDenormals = false; // FIXME: Should be on by default.
+    Mode.FP32InputDenormals = true;
+    Mode.FP32OutputDenormals = true;
     Mode.FP64FP16InputDenormals = true;
     Mode.FP64FP16OutputDenormals = true;
     return Mode;
 
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
 
 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
 
 
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
 
 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
 
 
@@ -3,8 +3,8 @@
 ; GCN-LABEL: {{^}}kernel_ieee_mode_default:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
 define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
@@ -18,8 +18,8 @@ define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
 ; GCN-LABEL: {{^}}kernel_ieee_mode_on:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
 define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
@@ -48,8 +48,8 @@ define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
 ; GCN-LABEL: {{^}}func_ieee_mode_default:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
 define void @func_ieee_mode_default() #0 {
@@ -63,8 +63,8 @@ define void @func_ieee_mode_default() #0 {
 ; GCN-LABEL: {{^}}func_ieee_mode_on:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
 define void @func_ieee_mode_on() #1 {
@@ -93,8 +93,8 @@ define void @func_ieee_mode_off() #2 {
 ; GCN-LABEL: {{^}}cs_ieee_mode_default:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
 define amdgpu_cs void @cs_ieee_mode_default() #0 {
@@ -108,8 +108,8 @@ define amdgpu_cs void @cs_ieee_mode_default() #0 {
 ; GCN-LABEL: {{^}}cs_ieee_mode_on:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
 define amdgpu_cs void @cs_ieee_mode_on() #1 {
@@ -150,11 +150,12 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
   ret void
 }
 
+; FIXME: Should have denormals off by default.
 ; GCN-LABEL: {{^}}ps_ieee_mode_on:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
 define amdgpu_ps void @ps_ieee_mode_on() #1 {
 
@@ -338,7 +338,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, f
 }
 
 attributes #0 = { nounwind optnone noinline }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "target-features"="-fp32-denormals" }
 attributes #2 = { nounwind "target-features"="+fp32-denormals" }
 
 !0 = !{float 2.500000e+00}
 
@@ -389,7 +389,7 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="-fp32-denormals" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "target-features"="+fp32-denormals" }
 attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }
 
@@ -767,8 +767,8 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="-fp32-denormals" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "no-nans-fp-math"="false" }
-attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
-attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,-fp-exceptions" "no-nans-fp-math"="false" }
+attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_default_si:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
 define amdgpu_kernel void @test_default_si(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
   store float 0.0, float addrspace(1)* %out0
@@ -10,7 +10,7 @@ define amdgpu_kernel void @test_default_si(float addrspace(1)* %out0, double add
 }
 
 ; GCN-LABEL: {{^}}test_default_vi:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
 define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
   store float 0.0, float addrspace(1)* %out0
@@ -19,7 +19,7 @@ define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double add
 }
 
 ; GCN-LABEL: {{^}}test_f64_denormals:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
 define amdgpu_kernel void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
   store float 0.0, float addrspace(1)* %out0
@@ -55,7 +55,7 @@ define amdgpu_kernel void @test_no_denormals(float addrspace(1)* %out0, double a
 }
 
 ; GCN-LABEL: {{^}}test_f16_f64_denormals:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
 define amdgpu_kernel void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
   store half 0.0, half addrspace(1)* %out0
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_f16_f64_denormals(half addrspace(1)* %out0, doub
 }
 
 ; GCN-LABEL: {{^}}test_no_f16_f64_denormals:
-; GCN: FloatMode: 0
+; GCN: FloatMode: 48
 ; GCN: IeeeMode: 1
 define amdgpu_kernel void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
   store half 0.0, half addrspace(1)* %out0
@@ -82,7 +82,9 @@ define amdgpu_kernel void @test_f32_f16_f64_denormals(half addrspace(1)* %out0,
   ret void
 }
 
+; FIXME: Denormals should be off by default
 ; GCN-LABEL: {{^}}kill_gs_const:
+; GCN: FloatMode: 240
 ; GCN: IeeeMode: 0
 define amdgpu_gs void @kill_gs_const() {
 main_body:
@@ -94,6 +96,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}kill_vcc_implicit_def:
+; GCN: FloatMode: 240
 ; GCN: IeeeMode: 0
 define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* inreg, [17 x <16 x i8>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [34 x <8 x i32>] addrspace(4)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
 entry:
 
@@ -739,6 +739,6 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }
+attributes #1 = { nounwind "target-features"="-fp32-denormals" }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
+attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
@@ -625,7 +625,7 @@ define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "target-features"="-fp32-denormals" }
 attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
 attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
 attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
 
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
 
 ; Make sure fdiv is promoted to f32.
 
 
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
-; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
-; RUN: llc -march=amdgcn -mcpu=gfx1011 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx1012 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
+; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
+; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx1012 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
 ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
 ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
 ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
 
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
 
 ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs
 
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on:
 ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
 
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on:
 ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,`
`97`	`97`	`// denormals, but should be checked. Should we issue a warning somewhere`
`98`	`98`	`// if someone tries to enable these?`
`99`	`99`	`if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {`
`100`		`- FullFS += "+fp64-fp16-denormals,";`
	`100`	`+ FullFS += "+fp64-fp16-denormals,+fp32-denormals,";`
`101`	`101`	`} else {`
`102`	`102`	`FullFS += "-fp32-denormals,";`
`103`	`103`	`}`
Original file line number	Diff line number	Diff line change
`@@ -338,7 +338,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, f`
`338`	`338`	`}`
`339`	`339`
`340`	`340`	`attributes #0 = { nounwind optnone noinline }`
`341`		`-attributes #1 = { nounwind }`
	`341`	`+attributes #1 = { nounwind "target-features"="-fp32-denormals" }`
`342`	`342`	`attributes #2 = { nounwind "target-features"="+fp32-denormals" }`
`343`	`343`
`344`	`344`	`!0 = !{float 2.500000e+00}`
Original file line number	Diff line number	Diff line change
`@@ -739,6 +739,6 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half`
`739`	`739`	`}`
`740`	`740`
`741`	`741`	`attributes #0 = { nounwind readnone }`
`742`		`-attributes #1 = { nounwind }`
`743`		`-attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }`
`744`		`-attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }`
	`742`	`+attributes #1 = { nounwind "target-features"="-fp32-denormals" }`
	`743`	`+attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }`
	`744`	`+attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }`
Original file line number	Diff line number	Diff line change
`@@ -625,7 +625,7 @@ define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {`
`625`	`625`	`}`
`626`	`626`
`627`	`627`	`attributes #0 = { nounwind readnone }`
`628`		`-attributes #1 = { nounwind }`
	`628`	`+attributes #1 = { nounwind "target-features"="-fp32-denormals" }`
`629`	`629`	`attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }`
`630`	`630`	`attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }`
`631`	`631`	`attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }`