Skip to content

Commit fa7cd54

Browse files
committed
clang: Guess at some platform FTZ/DAZ default settings
This is to avoid performance regressions when the default attribute behavior is fixed to assume ieee. I tested the default on x86_64 ubuntu, which seems to default to FTZ/DAZ, but am guessing for x86 and PS4.
1 parent 2452f85 commit fa7cd54

File tree

8 files changed

+79
-14
lines changed

8 files changed

+79
-14
lines changed

clang/include/clang/Driver/ToolChain.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,12 +573,19 @@ class ToolChain {
573573
virtual void AddCCKextLibArgs(const llvm::opt::ArgList &Args,
574574
llvm::opt::ArgStringList &CmdArgs) const;
575575

576+
/// If a runtime library exists that sets global flags for unsafe floating
577+
/// point math, return true.
578+
///
579+
/// This checks for presence of the -Ofast, -ffast-math or -funsafe-math flags.
580+
virtual bool isFastMathRuntimeAvailable(
581+
const llvm::opt::ArgList &Args, std::string &Path) const;
582+
576583
/// AddFastMathRuntimeIfAvailable - If a runtime library exists that sets
577584
/// global flags for unsafe floating point math, add it and return true.
578585
///
579586
/// This checks for presence of the -Ofast, -ffast-math or -funsafe-math flags.
580-
virtual bool AddFastMathRuntimeIfAvailable(
581-
const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const;
587+
bool addFastMathRuntimeIfAvailable(
588+
const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const;
582589

583590
/// addProfileRTLibs - When -fprofile-instr-profile is specified, try to pass
584591
/// a suitable profile runtime library to the linker.

clang/lib/Driver/ToolChain.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -917,28 +917,35 @@ void ToolChain::AddCCKextLibArgs(const ArgList &Args,
917917
CmdArgs.push_back("-lcc_kext");
918918
}
919919

920-
bool ToolChain::AddFastMathRuntimeIfAvailable(const ArgList &Args,
921-
ArgStringList &CmdArgs) const {
920+
bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args,
921+
std::string &Path) const {
922922
// Do not check for -fno-fast-math or -fno-unsafe-math when -Ofast passed
923923
// (to keep the linker options consistent with gcc and clang itself).
924924
if (!isOptimizationLevelFast(Args)) {
925925
// Check if -ffast-math or -funsafe-math.
926926
Arg *A =
927-
Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
928-
options::OPT_funsafe_math_optimizations,
929-
options::OPT_fno_unsafe_math_optimizations);
927+
Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
928+
options::OPT_funsafe_math_optimizations,
929+
options::OPT_fno_unsafe_math_optimizations);
930930

931931
if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
932932
A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
933933
return false;
934934
}
935935
// If crtfastmath.o exists add it to the arguments.
936-
std::string Path = GetFilePath("crtfastmath.o");
937-
if (Path == "crtfastmath.o") // Not found.
938-
return false;
936+
Path = GetFilePath("crtfastmath.o");
937+
return (Path != "crtfastmath.o"); // Not found.
938+
}
939+
940+
bool ToolChain::addFastMathRuntimeIfAvailable(const ArgList &Args,
941+
ArgStringList &CmdArgs) const {
942+
std::string Path;
943+
if (isFastMathRuntimeAvailable(Args, Path)) {
944+
CmdArgs.push_back(Args.MakeArgString(Path));
945+
return true;
946+
}
939947

940-
CmdArgs.push_back(Args.MakeArgString(Path));
941-
return true;
948+
return false;
942949
}
943950

944951
SanitizerMask ToolChain::getSupportedSanitizers() const {

clang/lib/Driver/ToolChains/Gnu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
502502
}
503503

504504
// Add crtfastmath.o if available and fast math is enabled.
505-
ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
505+
ToolChain.addFastMathRuntimeIfAvailable(Args, CmdArgs);
506506
}
507507

508508
Args.AddAllArgs(CmdArgs, options::OPT_L);

clang/lib/Driver/ToolChains/Linux.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,3 +988,22 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args,
988988
Twine("-u", llvm::getInstrProfRuntimeHookVarName())));
989989
ToolChain::addProfileRTLibs(Args, CmdArgs);
990990
}
991+
992+
llvm::DenormalMode Linux::getDefaultDenormalModeForType(
993+
const llvm::opt::ArgList &DriverArgs,
994+
Action::OffloadKind DeviceOffloadKind,
995+
const llvm::fltSemantics *FPType) const {
996+
switch (getTriple().getArch()) {
997+
case llvm::Triple::x86:
998+
case llvm::Triple::x86_64: {
999+
std::string Unused;
1000+
// DAZ and FTZ are turned on in crtfastmath.o
1001+
if (!DriverArgs.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) &&
1002+
isFastMathRuntimeAvailable(DriverArgs, Unused))
1003+
return llvm::DenormalMode::getPreserveSign();
1004+
return llvm::DenormalMode::getIEEE();
1005+
}
1006+
default:
1007+
return llvm::DenormalMode::getIEEE();
1008+
}
1009+
}

clang/lib/Driver/ToolChains/Linux.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF {
4646

4747
std::vector<std::string> ExtraOpts;
4848

49+
llvm::DenormalMode getDefaultDenormalModeForType(
50+
const llvm::opt::ArgList &DriverArgs,
51+
Action::OffloadKind DeviceOffloadKind,
52+
const llvm::fltSemantics *FPType = nullptr) const override;
53+
4954
protected:
5055
Tool *buildAssembler() const override;
5156
Tool *buildLinker() const override;

clang/lib/Driver/ToolChains/MinGW.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
300300

301301
if (!Args.hasArg(options::OPT_nostartfiles)) {
302302
// Add crtfastmath.o if available and fast math is enabled.
303-
TC.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
303+
TC.addFastMathRuntimeIfAvailable(Args, CmdArgs);
304304

305305
CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o")));
306306
}

clang/lib/Driver/ToolChains/PS4CPU.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,14 @@ class LLVM_LIBRARY_VISIBILITY PS4CPU : public Generic_ELF {
8888
// capable of unit splitting.
8989
bool canSplitThinLTOUnit() const override { return false; }
9090

91+
llvm::DenormalMode getDefaultDenormalModeForType(
92+
const llvm::opt::ArgList &DriverArgs,
93+
Action::OffloadKind DeviceOffloadKind,
94+
const llvm::fltSemantics *FPType) const override {
95+
// DAZ and FTZ are on by default.
96+
return llvm::DenormalMode::getPreserveSign();
97+
}
98+
9199
protected:
92100
Tool *buildAssembler() const override;
93101
Tool *buildLinker() const override;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: %clang -### -target arm-unknown-linux-gnu -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
2+
// RUN: %clang -### -target i386-unknown-linux-gnu -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
3+
4+
// RUN: %clang -### -target x86_64-unknown-linux-gnu --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
5+
6+
// crtfastmath enables ftz and daz
7+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s
8+
9+
// crt not linked in with nostartfiles
10+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math -nostartfiles --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
11+
12+
// If there's no crtfastmath, don't assume ftz/daz
13+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math --sysroot=/dev/null -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
14+
15+
// RUN: %clang -### -target x86_64-scei-ps4 -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s
16+
17+
18+
// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee
19+
// CHECK-PRESERVESIGN: -fdenormal-fp-math=preserve-sign,preserve-sign

0 commit comments

Comments
 (0)