Skip to content

Commit ce2258c

Browse files
committed
clang/AMDGPU: Stop setting old denormal subtarget features
1 parent 75cf309 commit ce2258c

File tree

4 files changed

+27
-52
lines changed

4 files changed

+27
-52
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -233,28 +233,6 @@ bool AMDGPUTargetInfo::initFeatureMap(
233233
return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
234234
}
235235

236-
void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
237-
TargetOptions &TargetOpts) const {
238-
bool hasFP32Denormals = false;
239-
bool hasFP64Denormals = false;
240-
241-
for (auto &I : TargetOpts.FeaturesAsWritten) {
242-
if (I == "+fp32-denormals" || I == "-fp32-denormals")
243-
hasFP32Denormals = true;
244-
if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
245-
hasFP64Denormals = true;
246-
}
247-
if (!hasFP32Denormals)
248-
TargetOpts.Features.push_back(
249-
(Twine(hasFastFMAF() && hasFullRateDenormalsF32() &&
250-
CGOpts.FP32DenormalMode.Output == llvm::DenormalMode::IEEE
251-
? '+' : '-') + Twine("fp32-denormals"))
252-
.str());
253-
// Always do not flush fp64 or fp16 denorms.
254-
if (!hasFP64Denormals && hasFP64())
255-
TargetOpts.Features.push_back("+fp64-fp16-denormals");
256-
}
257-
258236
void AMDGPUTargetInfo::fillValidCPUList(
259237
SmallVectorImpl<StringRef> &Values) const {
260238
if (isAMDGCN(getTriple()))

clang/lib/Basic/Targets/AMDGPU.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
208208
StringRef CPU,
209209
const std::vector<std::string> &FeatureVec) const override;
210210

211-
void adjustTargetOptions(const CodeGenOptions &CGOpts,
212-
TargetOptions &TargetOpts) const override;
213-
214211
ArrayRef<Builtin::Info> getTargetBuiltins() const override;
215212

216213
void getTargetDefines(const LangOptions &Opts,
Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
// RUN: %clang_cc1 -fcuda-is-device \
22
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
3-
// RUN: FileCheck -check-prefix=NOFTZ %s
3+
// RUN: FileCheck -check-prefixes=NOFTZ,PTXNOFTZ %s
44

55
// RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=ieee \
66
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
7-
// RUN: FileCheck -check-prefix=NOFTZ %s
7+
// RUN: FileCheck -check-prefixes=NOFTZ,PTXNOFTZ %s
88

99
// RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \
1010
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
11-
// RUN: FileCheck -check-prefix=FTZ %s
11+
// RUN: FileCheck -check-prefixes=FTZ,PTXFTZ %s
1212

1313
// RUN: %clang_cc1 -fcuda-is-device -x hip \
1414
// RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \
15-
// RUN: FileCheck -check-prefix=AMDNOFTZ %s
15+
// RUN: FileCheck -check-prefix=NOFTZ %s
1616

1717
// RUN: %clang_cc1 -fcuda-is-device -x hip \
1818
// RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -fdenormal-fp-math-f32=ieee -emit-llvm -o - %s | \
19-
// RUN: FileCheck -check-prefix=AMDNOFTZ %s
19+
// RUN: FileCheck -check-prefix=NOFTZ %s
2020

2121
// RUN: %clang_cc1 -fcuda-is-device -x hip -fdenormal-fp-math-f32=preserve-sign \
2222
// RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \
23-
// RUN: FileCheck -check-prefix=AMDFTZ %s
23+
// RUN: FileCheck -check-prefix=FTZ %s
2424

2525
#include "Inputs/cuda.h"
2626

@@ -29,10 +29,13 @@
2929
// -fdenormal-fp-math-f32. Further, check that we reflect the presence or
3030
// absence of -fcuda-flush-denormals-to-zero in a module flag.
3131

32-
// AMDGCN targets always have +fp64-fp16-denormals.
33-
// AMDGCN targets without fast FMAF (e.g. gfx803) always have +fp32-denormals.
34-
// For AMDGCN target with fast FMAF (e.g. gfx900), it has +fp32-denormals
35-
// by default and -fp32-denormals when there is option
32+
// AMDGCN targets always have f64/f16 denormals enabled.
33+
//
34+
// AMDGCN targets without fast FMAF (e.g. gfx803) always have f32 denormal
35+
// flushing by default.
36+
//
37+
// For AMDGCN target with fast FMAF (e.g. gfx900), it has ieee denormals by
38+
// default and preserve-sign when there with the option
3639
// -fcuda-flush-denormals-to-zero.
3740

3841
// CHECK-LABEL: define void @foo() #0
@@ -41,11 +44,8 @@ extern "C" __device__ void foo() {}
4144
// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
4245
// NOFTZ-NOT: "denormal-fp-math-f32"
4346

44-
// AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals
45-
// AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals
46-
47-
// FTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]}
48-
// FTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
47+
// PTXFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]}
48+
// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
4949

50-
// NOFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]}
51-
// NOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
50+
// PTXNOFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]}
51+
// PTXNOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
1515
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
1616

17-
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
18-
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
19-
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime"
20-
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
21-
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
22-
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
23-
// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
24-
// GFX700: "target-features"="+ci-insts,+flat-address-space,+fp64-fp16-denormals,-fp32-denormals"
25-
// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
26-
// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
17+
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime"
18+
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime"
19+
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime"
20+
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
21+
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
22+
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
23+
// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime"
24+
// GFX700: "target-features"="+ci-insts,+flat-address-space"
25+
// GFX600-NOT: "target-features"
26+
// GFX601-NOT: "target-features"
2727

2828
kernel void test() {}

0 commit comments

Comments
 (0)