Skip to content

Commit a6867fd

Browse files
committed
AMDGPU: Combine fp16/fp64 subtarget features
The same control register controls both, and are set to the same defaults. Keep the old names around as aliases. llvm-svn: 292837
1 parent f86d385 commit a6867fd

13 files changed

+117
-63
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,6 @@ def FeatureDPP : SubtargetFeature<"dpp",
206206
// Subtarget Features (options and debugging)
207207
//===------------------------------------------------------------===//
208208

209-
def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
210-
"FP16Denormals",
211-
"true",
212-
"Enable half precision denormal handling"
213-
>;
214-
215209
// Some instructions do not support denormals despite this flag. Using
216210
// fp32 denormals also causes instructions to run at the double
217211
// precision rate for the device.
@@ -221,13 +215,30 @@ def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
221215
"Enable single precision denormal handling"
222216
>;
223217

224-
def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
225-
"FP64Denormals",
218+
// Denormal handling for fp64 and fp16 is controlled by the same
219+
// config register when fp16 supported.
220+
// TODO: Do we need a separate f16 setting when not legal?
221+
def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
222+
"FP64FP16Denormals",
226223
"true",
227-
"Enable double precision denormal handling",
224+
"Enable double and half precision denormal handling",
228225
[FeatureFP64]
229226
>;
230227

228+
def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
229+
"FP64FP16Denormals",
230+
"true",
231+
"Enable double and half precision denormal handling",
232+
[FeatureFP64, FeatureFP64FP16Denormals]
233+
>;
234+
235+
def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
236+
"FP64FP16Denormals",
237+
"true",
238+
"Enable half precision denormal handling",
239+
[FeatureFP64FP16Denormals]
240+
>;
241+
231242
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
232243
"FPExceptions",
233244
"true",

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,10 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
4141
// for SI has the unhelpful behavior that it unsets everything else if you
4242
// disable it.
4343

44-
SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
44+
SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
4545
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
4646
FullFS += "+flat-for-global,+unaligned-buffer-access,";
47+
4748
FullFS += FS;
4849

4950
ParseSubtargetFeatures(GPU, FullFS);
@@ -52,9 +53,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
5253
// denormals, but should be checked. Should we issue a warning somewhere
5354
// if someone tries to enable these?
5455
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
55-
FP16Denormals = false;
56+
FP64FP16Denormals = false;
5657
FP32Denormals = false;
57-
FP64Denormals = false;
5858
}
5959

6060
// Set defaults if needed.
@@ -78,9 +78,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
7878
FastFMAF32(false),
7979
HalfRate64Ops(false),
8080

81-
FP16Denormals(false),
8281
FP32Denormals(false),
83-
FP64Denormals(false),
82+
FP64FP16Denormals(false),
8483
FPExceptions(false),
8584
FlatForGlobal(false),
8685
UnalignedScratchAccess(false),

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,8 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
8181
bool HalfRate64Ops;
8282

8383
// Dynamially set bits that enable features.
84-
bool FP16Denormals;
8584
bool FP32Denormals;
86-
bool FP64Denormals;
85+
bool FP64FP16Denormals;
8786
bool FPExceptions;
8887
bool FlatForGlobal;
8988
bool UnalignedScratchAccess;
@@ -282,15 +281,15 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
282281
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
283282

284283
bool hasFP16Denormals() const {
285-
return FP16Denormals;
284+
return FP64FP16Denormals;
286285
}
287286

288287
bool hasFP32Denormals() const {
289288
return FP32Denormals;
290289
}
291290

292291
bool hasFP64Denormals() const {
293-
return FP64Denormals;
292+
return FP64FP16Denormals;
294293
}
295294

296295
bool hasFPExceptions() const {

llvm/test/CodeGen/AMDGPU/default-fp-mode.ll

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,34 @@ define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %
5454
ret void
5555
}
5656

57+
; GCN-LABEL: {{^}}test_f16_f64_denormals:
58+
; GCN: FloatMode: 192
59+
; GCN: IeeeMode: 1
60+
define void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
61+
store half 0.0, half addrspace(1)* %out0
62+
store double 0.0, double addrspace(1)* %out1
63+
ret void
64+
}
65+
66+
; GCN-LABEL: {{^}}test_no_f16_f64_denormals:
67+
; GCN: FloatMode: 0
68+
; GCN: IeeeMode: 1
69+
define void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
70+
store half 0.0, half addrspace(1)* %out0
71+
store double 0.0, double addrspace(1)* %out1
72+
ret void
73+
}
74+
75+
; GCN-LABEL: {{^}}test_f32_f16_f64_denormals:
76+
; GCN: FloatMode: 240
77+
; GCN: IeeeMode: 1
78+
define void @test_f32_f16_f64_denormals(half addrspace(1)* %out0, float addrspace(1)* %out1, double addrspace(1)* %out2) #8 {
79+
store half 0.0, half addrspace(1)* %out0
80+
store float 0.0, float addrspace(1)* %out1
81+
store double 0.0, double addrspace(1)* %out2
82+
ret void
83+
}
84+
5785
; GCN-LABEL: {{^}}kill_gs_const:
5886
; GCN: IeeeMode: 0
5987
define amdgpu_gs void @kill_gs_const() {
@@ -87,4 +115,7 @@ attributes #1 = { nounwind "target-cpu"="fiji" }
87115
attributes #2 = { nounwind "target-features"="+fp64-denormals" }
88116
attributes #3 = { nounwind "target-features"="+fp32-denormals" }
89117
attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
90-
attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
118+
attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
119+
attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" }
120+
attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" }
121+
attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }

llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ define void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
6969
ret void
7070
}
7171

72-
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16:
73-
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
72+
; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
73+
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
7474
; GCN: buffer_store_short [[REG]]
75-
define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
75+
define void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
7676
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
7777
store half %canonicalized, half addrspace(1)* %out
7878
ret void
@@ -87,10 +87,10 @@ define void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %
8787
ret void
8888
}
8989

90-
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16:
91-
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
90+
; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
91+
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
9292
; GCN: buffer_store_short [[REG]]
93-
define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
93+
define void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
9494
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
9595
store half %canonicalized, half addrspace(1)* %out
9696
ret void
@@ -282,7 +282,7 @@ define void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out)
282282
}
283283

284284
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
285-
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
285+
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
286286
; GCN: buffer_store_dword [[REG]]
287287
define void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
288288
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
@@ -300,7 +300,7 @@ define void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspa
300300
}
301301

302302
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
303-
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
303+
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
304304
; GCN: buffer_store_dword [[REG]]
305305
define void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
306306
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
@@ -382,5 +382,5 @@ define void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %
382382

383383
attributes #0 = { nounwind readnone }
384384
attributes #1 = { nounwind }
385-
attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
386-
attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }
385+
attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
386+
attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }

llvm/test/CodeGen/AMDGPU/fcanonicalize.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,5 +347,5 @@ define void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #
347347

348348
attributes #0 = { nounwind readnone }
349349
attributes #1 = { nounwind }
350-
attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
351-
attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
350+
attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
351+
attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }

llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; XUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2-
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
3+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
4+
35

46
; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
57
; make add an instruction if the fadd has more than one use.
@@ -115,7 +117,8 @@ define void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, float %y) #0 {
115117
; VI: v_cndmask_b32_e32
116118
; VI: v_add_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
117119
; VI: v_mul_f16_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
118-
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
120+
; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
121+
; VI-DENORM: v_fma_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
119122
define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
120123
%x = bitcast i16 %x.arg to half
121124
%y = bitcast i16 %y.arg to half
@@ -136,7 +139,10 @@ define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.
136139

137140
; GCN-LABEL: {{^}}multiple_use_fadd_fmac_f16:
138141
; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}}
139-
; GCN-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
142+
143+
; VI-FLUSH-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
144+
; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}}
145+
140146
; GCN-DAG: buffer_store_short [[MUL2]]
141147
; GCN-DAG: buffer_store_short [[MAD]]
142148
; GCN: s_endpgm
@@ -153,7 +159,10 @@ define void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i16 zeroext %x.
153159

154160
; GCN-LABEL: {{^}}multiple_use_fadd_fmad_f16:
155161
; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}|
156-
; GCN-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
162+
163+
; VI-FLUSH-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
164+
; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
165+
157166
; GCN-DAG: buffer_store_short [[MUL2]]
158167
; GCN-DAG: buffer_store_short [[MAD]]
159168
; GCN: s_endpgm
@@ -170,8 +179,12 @@ define void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.
170179
}
171180

172181
; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad_f16:
173-
; GCN: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
174-
; GCN: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
182+
; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
183+
; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
184+
185+
; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
186+
; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
187+
175188
define void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
176189
%x = bitcast i16 %x.arg to half
177190
%y = bitcast i16 %y.arg to half

llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
2-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
3-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
4-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
5-
6-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
7-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
8-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
9-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
1+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
2+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
3+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
4+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
5+
6+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
7+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
8+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
9+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
1010

1111
declare i32 @llvm.amdgcn.workitem.id.x() #1
1212
declare half @llvm.fmuladd.f16(half, half, half) #1

llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %
6262

6363
attributes #0 = { nounwind "target-cpu"="kaveri" }
6464
attributes #1 = { nounwind "target-cpu"="fiji" }
65-
attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" }
66-
attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" }
67-
attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
68-
attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
65+
attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
66+
attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-fp16-denormals" }
67+
attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
68+
attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }

llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
; RUN: llc -march=amdgcn -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
2-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
3-
; RUN: llc -march=amdgcn -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
4-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
1+
; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
2+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
3+
; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
4+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
55

66
declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
77
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)

llvm/test/CodeGen/AMDGPU/v_mac.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
2-
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
2+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
3+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
34

45
; GCN-LABEL: {{^}}mac_vvv:
56
; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
@@ -250,8 +251,8 @@ bb:
250251
; FIXME: How is this not folded?
251252
; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 0x3c00
252253

253-
; VI: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
254-
; VI: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
254+
; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
255+
; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
255256
define void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 {
256257
bb:
257258
%tid = call i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/v_mac_f16.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2-
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
1+
; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
33

44
; GCN-LABEL: {{^}}mac_f16
55
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
@@ -604,5 +604,5 @@ entry:
604604
ret void
605605
}
606606

607-
attributes #0 = {"unsafe-fp-math"="false"}
608-
attributes #1 = {"unsafe-fp-math"="true"}
607+
attributes #0 = { nounwind "unsafe-fp-math"="false" }
608+
attributes #1 = { nounwind "unsafe-fp-math"="true" }

llvm/test/CodeGen/AMDGPU/v_madak_f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2-
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
1+
; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
33

44
; GCN-LABEL: {{^}}madak_f16
55
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]

0 commit comments

Comments
 (0)