Skip to content

Commit dcb8348

Browse files
committed
[AMDGPU] Split SIModeRegisterDefaults out of AMDGPUBaseInfo. NFC.
This is only used by CodeGen. Moving it out of AMDGPUBaseInfo simplifies future changes to make some of it depend on the subtarget. Differential Revision: https://reviews.llvm.org/D144650
1 parent 1812e13 commit dcb8348

14 files changed

+181
-150
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ using namespace llvm::AMDGPU;
6565
// We want to use these instructions, and using fp32 denormals also causes
6666
// instructions to run at the double precision rate for the device so it's
6767
// probably best to just report no single precision denormals.
68-
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
68+
static uint32_t getFPMode(SIModeRegisterDefaults Mode) {
6969
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
7070
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
7171
FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,16 @@
1414

1515
#include "AMDGPU.h"
1616
#include "AMDGPUTargetMachine.h"
17+
#include "SIModeRegisterDefaults.h"
1718
#include "llvm/Analysis/AssumptionCache.h"
1819
#include "llvm/Analysis/ConstantFolding.h"
1920
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
2021
#include "llvm/Analysis/ValueTracking.h"
2122
#include "llvm/CodeGen/TargetPassConfig.h"
2223
#include "llvm/IR/Dominators.h"
24+
#include "llvm/IR/IRBuilder.h"
2325
#include "llvm/IR/InstVisitor.h"
2426
#include "llvm/IR/IntrinsicsAMDGPU.h"
25-
#include "llvm/IR/IRBuilder.h"
2627
#include "llvm/InitializePasses.h"
2728
#include "llvm/Pass.h"
2829
#include "llvm/Support/KnownBits.h"
@@ -1425,7 +1426,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
14251426

14261427
HasUnsafeFPMath = hasUnsafeFPMath(F);
14271428

1428-
AMDGPU::SIModeRegisterDefaults Mode(F);
1429+
SIModeRegisterDefaults Mode(F);
14291430
HasFP32Denormals = Mode.allFP32Denormals();
14301431

14311432
bool MadeChange = false;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
131131
}
132132
#endif
133133
Subtarget = &MF.getSubtarget<GCNSubtarget>();
134-
Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
134+
Mode = SIModeRegisterDefaults(MF.getFunction());
135135
return SelectionDAGISel::runOnMachineFunction(MF);
136136
}
137137

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "GCNSubtarget.h"
1818
#include "SIMachineFunctionInfo.h"
19+
#include "SIModeRegisterDefaults.h"
1920
#include "llvm/CodeGen/SelectionDAGISel.h"
2021
#include "llvm/Target/TargetMachine.h"
2122

@@ -82,7 +83,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
8283
const GCNSubtarget *Subtarget;
8384

8485
// Default FP mode for the current function.
85-
AMDGPU::SIModeRegisterDefaults Mode;
86+
SIModeRegisterDefaults Mode;
8687

8788
bool EnableLateStructurizeCFG;
8889

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ bool AMDGPUInstructionSelector::selectG_FMA_FMAD(MachineInstr &I) const {
547547
#ifndef NDEBUG
548548
const SIMachineFunctionInfo *MFI =
549549
I.getMF()->getInfo<SIMachineFunctionInfo>();
550-
AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode();
550+
SIModeRegisterDefaults Mode = MFI->getMode();
551551
assert((IsFMA || !Mode.allFP32Denormals()) &&
552552
"fmad selected with denormals enabled");
553553
#endif

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3875,10 +3875,9 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
38753875

38763876
// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
38773877
// to enable denorm mode. When 'Enable' is false, disable denorm mode.
3878-
static void toggleSPDenormMode(bool Enable,
3879-
MachineIRBuilder &B,
3878+
static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
38803879
const GCNSubtarget &ST,
3881-
AMDGPU::SIModeRegisterDefaults Mode) {
3880+
SIModeRegisterDefaults Mode) {
38823881
// Set SP denorm mode to this value.
38833882
unsigned SPDenormMode =
38843883
Enable ? FP_DENORM_FLUSH_NONE : Mode.fpDenormModeSPValue();
@@ -3913,7 +3912,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
39133912
Register LHS = MI.getOperand(1).getReg();
39143913
Register RHS = MI.getOperand(2).getReg();
39153914
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
3916-
AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode();
3915+
SIModeRegisterDefaults Mode = MFI->getMode();
39173916

39183917
uint16_t Flags = MI.getFlags();
39193918

llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class AMDGPURegBankCombinerHelper {
7575
void applyClamp(MachineInstr &MI, Register &Reg);
7676

7777
private:
78-
AMDGPU::SIModeRegisterDefaults getMode();
78+
SIModeRegisterDefaults getMode();
7979
bool getIEEE();
8080
bool getDX10Clamp();
8181
bool isFminnumIeee(const MachineInstr &MI);
@@ -328,7 +328,7 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
328328
MI.eraseFromParent();
329329
}
330330

331-
AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
331+
SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
332332
return MF.getInfo<SIMachineFunctionInfo>()->getMode();
333333
}
334334

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "AMDGPUTargetTransformInfo.h"
1818
#include "AMDGPUTargetMachine.h"
1919
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20+
#include "SIModeRegisterDefaults.h"
2021
#include "llvm/Analysis/InlineCost.h"
2122
#include "llvm/Analysis/LoopInfo.h"
2223
#include "llvm/Analysis/ValueTracking.h"
@@ -293,7 +294,7 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
293294
ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
294295
TLI(ST->getTargetLowering()), CommonTTI(TM, F),
295296
IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {
296-
AMDGPU::SIModeRegisterDefaults Mode(F);
297+
SIModeRegisterDefaults Mode(F);
297298
HasFP32Denormals = Mode.allFP32Denormals();
298299
HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
299300
}
@@ -1148,8 +1149,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
11481149

11491150
// FIXME: dx10_clamp can just take the caller setting, but there seems to be
11501151
// no way to support merge for backend defined attributes.
1151-
AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
1152-
AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
1152+
SIModeRegisterDefaults CallerMode(*Caller);
1153+
SIModeRegisterDefaults CalleeMode(*Callee);
11531154
if (!CallerMode.isInlineCompatible(CalleeMode))
11541155
return false;
11551156

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ add_llvm_target(AMDGPUCodeGen
152152
SIMachineScheduler.cpp
153153
SIMemoryLegalizer.cpp
154154
SIModeRegister.cpp
155+
SIModeRegisterDefaults.cpp
155156
SIOptimizeExecMasking.cpp
156157
SIOptimizeExecMaskingPreRA.cpp
157158
SIOptimizeVGPRLiveRange.cpp

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "AMDGPUTargetMachine.h"
1919
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2020
#include "SIInstrInfo.h"
21+
#include "SIModeRegisterDefaults.h"
2122
#include "llvm/ADT/SetVector.h"
2223
#include "llvm/CodeGen/MIRYamlMapping.h"
2324
#include "llvm/CodeGen/PseudoSourceValue.h"
@@ -215,7 +216,7 @@ struct SIMode {
215216

216217
SIMode() = default;
217218

218-
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
219+
SIMode(const SIModeRegisterDefaults &Mode) {
219220
IEEE = Mode.IEEE;
220221
DX10Clamp = Mode.DX10Clamp;
221222
FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign;
@@ -359,7 +360,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
359360
friend class GCNTargetMachine;
360361

361362
// State of MODE register, assumed FP mode.
362-
AMDGPU::SIModeRegisterDefaults Mode;
363+
SIModeRegisterDefaults Mode;
363364

364365
// Registers that may be reserved for spilling purposes. These may be the same
365366
// as the input registers.
@@ -551,9 +552,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
551552

552553
void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
553554

554-
AMDGPU::SIModeRegisterDefaults getMode() const {
555-
return Mode;
556-
}
555+
SIModeRegisterDefaults getMode() const { return Mode; }
557556

558557
ArrayRef<SIRegisterInfo::SpilledReg>
559558
getSGPRSpillToVGPRLanes(int FrameIndex) const {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "SIModeRegisterDefaults.h"
10+
11+
using namespace llvm;
12+
13+
SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
14+
*this = getDefaultForCallingConv(F.getCallingConv());
15+
16+
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
17+
if (!IEEEAttr.empty())
18+
IEEE = IEEEAttr == "true";
19+
20+
StringRef DX10ClampAttr =
21+
F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
22+
if (!DX10ClampAttr.empty())
23+
DX10Clamp = DX10ClampAttr == "true";
24+
25+
StringRef DenormF32Attr =
26+
F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
27+
if (!DenormF32Attr.empty())
28+
FP32Denormals = parseDenormalFPAttribute(DenormF32Attr);
29+
30+
StringRef DenormAttr =
31+
F.getFnAttribute("denormal-fp-math").getValueAsString();
32+
if (!DenormAttr.empty()) {
33+
DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
34+
if (DenormF32Attr.empty())
35+
FP32Denormals = DenormMode;
36+
FP64FP16Denormals = DenormMode;
37+
}
38+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//===-- SIModeRegisterDefaults.h --------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
10+
#define LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
11+
12+
#include "Utils/AMDGPUBaseInfo.h"
13+
#include "llvm/ADT/FloatingPointMode.h"
14+
15+
namespace llvm {
16+
17+
// Track defaults for fields in the MODE register.
18+
struct SIModeRegisterDefaults {
19+
/// Floating point opcodes that support exception flag gathering quiet and
20+
/// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
21+
/// become IEEE 754- 2008 compliant due to signaling NaN propagation and
22+
/// quieting.
23+
bool IEEE : 1;
24+
25+
/// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
26+
/// clamp NaN to zero; otherwise, pass NaN through.
27+
bool DX10Clamp : 1;
28+
29+
/// If this is set, neither input or output denormals are flushed for most f32
30+
/// instructions.
31+
DenormalMode FP32Denormals;
32+
33+
/// If this is set, neither input or output denormals are flushed for both f64
34+
/// and f16/v2f16 instructions.
35+
DenormalMode FP64FP16Denormals;
36+
37+
SIModeRegisterDefaults()
38+
: IEEE(true), DX10Clamp(true), FP32Denormals(DenormalMode::getIEEE()),
39+
FP64FP16Denormals(DenormalMode::getIEEE()) {}
40+
41+
SIModeRegisterDefaults(const Function &F);
42+
43+
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
44+
SIModeRegisterDefaults Mode;
45+
Mode.IEEE = !AMDGPU::isShader(CC);
46+
return Mode;
47+
}
48+
49+
bool operator==(const SIModeRegisterDefaults Other) const {
50+
return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
51+
FP32Denormals == Other.FP32Denormals &&
52+
FP64FP16Denormals == Other.FP64FP16Denormals;
53+
}
54+
55+
bool allFP32Denormals() const {
56+
return FP32Denormals == DenormalMode::getIEEE();
57+
}
58+
59+
bool allFP64FP16Denormals() const {
60+
return FP64FP16Denormals == DenormalMode::getIEEE();
61+
}
62+
63+
/// Get the encoding value for the FP_DENORM bits of the mode register for the
64+
/// FP32 denormal mode.
65+
uint32_t fpDenormModeSPValue() const {
66+
if (FP32Denormals == DenormalMode::getPreserveSign())
67+
return FP_DENORM_FLUSH_IN_FLUSH_OUT;
68+
if (FP32Denormals.Output == DenormalMode::PreserveSign)
69+
return FP_DENORM_FLUSH_OUT;
70+
if (FP32Denormals.Input == DenormalMode::PreserveSign)
71+
return FP_DENORM_FLUSH_IN;
72+
return FP_DENORM_FLUSH_NONE;
73+
}
74+
75+
/// Get the encoding value for the FP_DENORM bits of the mode register for the
76+
/// FP64/FP16 denormal mode.
77+
uint32_t fpDenormModeDPValue() const {
78+
if (FP64FP16Denormals == DenormalMode::getPreserveSign())
79+
return FP_DENORM_FLUSH_IN_FLUSH_OUT;
80+
if (FP64FP16Denormals.Output == DenormalMode::PreserveSign)
81+
return FP_DENORM_FLUSH_OUT;
82+
if (FP64FP16Denormals.Input == DenormalMode::PreserveSign)
83+
return FP_DENORM_FLUSH_IN;
84+
return FP_DENORM_FLUSH_NONE;
85+
}
86+
87+
/// Returns true if a flag is compatible if it's enabled in the callee, but
88+
/// disabled in the caller.
89+
static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
90+
return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
91+
}
92+
93+
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
94+
// be able to override.
95+
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
96+
if (DX10Clamp != CalleeMode.DX10Clamp)
97+
return false;
98+
if (IEEE != CalleeMode.IEEE)
99+
return false;
100+
101+
// Allow inlining denormals enabled into denormals flushed functions.
102+
return oneWayCompatible(FP64FP16Denormals.Input !=
103+
DenormalMode::PreserveSign,
104+
CalleeMode.FP64FP16Denormals.Input !=
105+
DenormalMode::PreserveSign) &&
106+
oneWayCompatible(FP64FP16Denormals.Output !=
107+
DenormalMode::PreserveSign,
108+
CalleeMode.FP64FP16Denormals.Output !=
109+
DenormalMode::PreserveSign) &&
110+
oneWayCompatible(FP32Denormals.Input != DenormalMode::PreserveSign,
111+
CalleeMode.FP32Denormals.Input !=
112+
DenormalMode::PreserveSign) &&
113+
oneWayCompatible(FP32Denormals.Output != DenormalMode::PreserveSign,
114+
CalleeMode.FP32Denormals.Output !=
115+
DenormalMode::PreserveSign);
116+
}
117+
};
118+
119+
} // end namespace llvm
120+
121+
#endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2598,31 +2598,6 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
25982598
return 13;
25992599
}
26002600

2601-
SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
2602-
*this = getDefaultForCallingConv(F.getCallingConv());
2603-
2604-
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
2605-
if (!IEEEAttr.empty())
2606-
IEEE = IEEEAttr == "true";
2607-
2608-
StringRef DX10ClampAttr
2609-
= F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
2610-
if (!DX10ClampAttr.empty())
2611-
DX10Clamp = DX10ClampAttr == "true";
2612-
2613-
StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
2614-
if (!DenormF32Attr.empty())
2615-
FP32Denormals = parseDenormalFPAttribute(DenormF32Attr);
2616-
2617-
StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
2618-
if (!DenormAttr.empty()) {
2619-
DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
2620-
if (DenormF32Attr.empty())
2621-
FP32Denormals = DenormMode;
2622-
FP64FP16Denormals = DenormMode;
2623-
}
2624-
}
2625-
26262601
namespace {
26272602

26282603
struct SourceOfDivergence {

0 commit comments

Comments
 (0)