Skip to content

Commit c4c76ea

Browse files
authored
[flang] IEEE underflow control for Arm (#124617)
Update IEEE_SUPPORT_UNDERFLOW_CONTROL, IEEE_GET_UNDERFLOW_MODE, and IEEE_SET_UNDERFLOW_MODE code for Arm.
1 parent 08a18ef commit c4c76ea

File tree

2 files changed

+45
-22
lines changed

2 files changed

+45
-22
lines changed

flang/include/flang/Tools/TargetSetup.h

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,34 +24,35 @@ namespace Fortran::tools {
2424
const std::string &compilerVersion, const std::string &compilerOptions) {
2525

2626
const llvm::Triple &targetTriple{targetMachine.getTargetTriple()};
27-
// FIXME: Handle real(3) ?
28-
if (targetTriple.getArch() != llvm::Triple::ArchType::x86_64) {
29-
targetCharacteristics.DisableType(
30-
Fortran::common::TypeCategory::Real, /*kind=*/10);
31-
}
27+
28+
targetCharacteristics.set_ieeeFeature(evaluate::IeeeFeature::Halting, true);
29+
3230
if (targetTriple.getArch() == llvm::Triple::ArchType::x86_64) {
3331
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/3);
3432
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/4);
3533
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/8);
3634
}
35+
3736
if (targetTriple.isARM() || targetTriple.isAArch64()) {
3837
targetCharacteristics.set_haltingSupportIsUnknownAtCompileTime();
3938
targetCharacteristics.set_ieeeFeature(
4039
evaluate::IeeeFeature::Halting, false);
41-
} else {
42-
targetCharacteristics.set_ieeeFeature(evaluate::IeeeFeature::Halting);
40+
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/3);
41+
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/4);
42+
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/8);
43+
}
44+
45+
if (targetTriple.getArch() != llvm::Triple::ArchType::x86_64) {
46+
targetCharacteristics.DisableType(
47+
Fortran::common::TypeCategory::Real, /*kind=*/10);
4348
}
4449

45-
// Figure out if we can support F128: see
46-
// flang/runtime/Float128Math/math-entries.h
47-
// TODO: this should be taken from TargetInfo::getLongDoubleFormat to support
48-
// cross-compilation
50+
// Check for kind=16 support. See flang/runtime/Float128Math/math-entries.h.
51+
// TODO: Take this from TargetInfo::getLongDoubleFormat for cross compilation.
4952
#ifdef FLANG_RUNTIME_F128_MATH_LIB
50-
// we can use libquadmath wrappers
51-
constexpr bool f128Support = true;
53+
constexpr bool f128Support = true; // use libquadmath wrappers
5254
#elif HAS_LDBL128
53-
// we can use libm wrappers
54-
constexpr bool f128Support = true;
55+
constexpr bool f128Support = true; // use libm wrappers
5556
#else
5657
constexpr bool f128Support = false;
5758
#endif

flang/runtime/exceptions.cpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#include "flang/Runtime/exceptions.h"
1212
#include "terminator.h"
1313
#include <cfenv>
14-
#if __x86_64__
14+
#if __aarch64__
15+
#include <fpu_control.h>
16+
#elif __x86_64__
1517
#include <xmmintrin.h>
1618
#endif
1719

@@ -90,20 +92,40 @@ bool RTNAME(SupportHalting)([[maybe_unused]] uint32_t except) {
9092
#endif
9193
}
9294

95+
// A hardware FZ (flush to zero) bit is the negation of the
96+
// ieee_[get|set]_underflow_mode GRADUAL argument.
97+
#if defined(_MM_FLUSH_ZERO_MASK)
98+
// The x86_64 MXCSR FZ bit affects computations of real kinds 3, 4, and 8.
99+
#elif defined(_FPU_GETCW)
100+
// The aarch64 FPCR FZ bit affects computations of real kinds 3, 4, and 8.
101+
// bit 24: FZ -- single, double precision flush to zero bit
102+
// bit 19: FZ16 -- half precision flush to zero bit [not currently relevant]
103+
#define _FPU_FPCR_FZ_MASK_ 0x01080000
104+
#endif
105+
93106
bool RTNAME(GetUnderflowMode)(void) {
94-
#if _MM_FLUSH_ZERO_MASK
95-
// The MXCSR Flush to Zero flag is the negation of the ieee_get_underflow_mode
96-
// GRADUAL argument. It affects real computations of kinds 3, 4, and 8.
107+
#if defined(_MM_FLUSH_ZERO_MASK)
97108
return _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_OFF;
109+
#elif defined(_FPU_GETCW)
110+
uint64_t fpcr;
111+
_FPU_GETCW(fpcr);
112+
return (fpcr & _FPU_FPCR_FZ_MASK_) == 0;
98113
#else
99114
return false;
100115
#endif
101116
}
102117
void RTNAME(SetUnderflowMode)(bool flag) {
103-
#if _MM_FLUSH_ZERO_MASK
104-
// The MXCSR Flush to Zero flag is the negation of the ieee_set_underflow_mode
105-
// GRADUAL argument. It affects real computations of kinds 3, 4, and 8.
118+
#if defined(_MM_FLUSH_ZERO_MASK)
106119
_MM_SET_FLUSH_ZERO_MODE(flag ? _MM_FLUSH_ZERO_OFF : _MM_FLUSH_ZERO_ON);
120+
#elif defined(_FPU_GETCW)
121+
uint64_t fpcr;
122+
_FPU_GETCW(fpcr);
123+
if (flag) {
124+
fpcr &= ~_FPU_FPCR_FZ_MASK_;
125+
} else {
126+
fpcr |= _FPU_FPCR_FZ_MASK_;
127+
}
128+
_FPU_SETCW(fpcr);
107129
#endif
108130
}
109131

0 commit comments

Comments
 (0)