Skip to content

Commit fbe4ff8

Browse files
committed
AMDGPU: Partially fix not respecting dynamic denormal mode
The most notable issue was producing v_mad_f32 in functions with the dynamic mode, since it just ignores the mode. fdiv lowering is still somewhat broken because it involves a mode switch and we need to query the original mode.
1 parent 9b8ed1e commit fbe4ff8

14 files changed

+1162
-981
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ class AMDGPUCodeGenPrepareImpl
9999
Module *Mod = nullptr;
100100
const DataLayout *DL = nullptr;
101101
bool HasUnsafeFPMath = false;
102-
bool HasFP32Denormals = false;
102+
bool HasFP32DenormalFlush = false;
103103
bool FlowChanged = false;
104104

105105
DenseMap<const PHINode *, bool> BreakPhiNodesCache;
@@ -793,8 +793,8 @@ static Value *optimizeWithRcp(Value *Num, Value *Den, bool AllowInaccurateRcp,
793793
//
794794
// NOTE: optimizeWithRcp should be tried first because rcp is the preference.
795795
static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
796-
bool HasDenormals, IRBuilder<> &Builder,
797-
Module *Mod) {
796+
bool HasFP32DenormalFlush,
797+
IRBuilder<> &Builder, Module *Mod) {
798798
// fdiv.fast can achieve 2.5 ULP accuracy.
799799
if (ReqdAccuracy < 2.5f)
800800
return nullptr;
@@ -811,7 +811,7 @@ static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
811811
}
812812

813813
// fdiv does not support denormals. But 1.0/x is always fine to use it.
814-
if (HasDenormals && !NumIsOne)
814+
if (!HasFP32DenormalFlush && !NumIsOne)
815815
return nullptr;
816816

817817
Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
@@ -851,7 +851,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
851851
// rcp_f16 is accurate to 0.51 ulp.
852852
// rcp_f32 is accurate for !fpmath >= 1.0ulp and denormals are flushed.
853853
// rcp_f64 is never accurate.
854-
const bool RcpIsAccurate = !HasFP32Denormals && ReqdAccuracy >= 1.0f;
854+
const bool RcpIsAccurate = HasFP32DenormalFlush && ReqdAccuracy >= 1.0f;
855855

856856
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
857857
Builder.setFastMathFlags(FMF);
@@ -873,8 +873,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
873873
Value *NewElt = optimizeWithRcp(NumEltI, DenEltI, AllowInaccurateRcp,
874874
RcpIsAccurate, Builder, Mod);
875875
if (!NewElt) // Try fdiv.fast.
876-
NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy,
877-
HasFP32Denormals, Builder, Mod);
876+
NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy,
877+
HasFP32DenormalFlush, Builder, Mod);
878878
if (!NewElt) // Keep the original.
879879
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
880880

@@ -885,8 +885,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
885885
NewFDiv = optimizeWithRcp(Num, Den, AllowInaccurateRcp, RcpIsAccurate,
886886
Builder, Mod);
887887
if (!NewFDiv) { // Try fdiv.fast.
888-
NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy, HasFP32Denormals,
889-
Builder, Mod);
888+
NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy,
889+
HasFP32DenormalFlush, Builder, Mod);
890890
}
891891
}
892892

@@ -1832,7 +1832,8 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
18321832
Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
18331833
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
18341834
SIModeRegisterDefaults Mode(F);
1835-
Impl.HasFP32Denormals = Mode.allFP32Denormals();
1835+
Impl.HasFP32DenormalFlush =
1836+
Mode.FP32Denormals == DenormalMode::getPreserveSign();
18361837
return Impl.run(F);
18371838
}
18381839

@@ -1848,7 +1849,8 @@ PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
18481849
Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
18491850
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
18501851
SIModeRegisterDefaults Mode(F);
1851-
Impl.HasFP32Denormals = Mode.allFP32Denormals();
1852+
Impl.HasFP32DenormalFlush =
1853+
Mode.FP32Denormals == DenormalMode::getPreserveSign();
18521854
PreservedAnalyses PA = PreservedAnalyses::none();
18531855
if (!Impl.FlowChanged)
18541856
PA.preserveSet<CFGAnalyses>();

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,7 +1883,8 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
18831883
bool UseFmadFtz = false;
18841884
if (Subtarget->isGCN()) {
18851885
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1886-
UseFmadFtz = MFI->getMode().allFP32Denormals();
1886+
UseFmadFtz =
1887+
MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign();
18871888
}
18881889

18891890
// float fr = mad(fqneg, fb, fa);
@@ -1975,11 +1976,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
19751976
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
19761977

19771978
// Compute denominator reciprocal.
1978-
unsigned FMAD = !Subtarget->hasMadMacF32Insts() ?
1979-
(unsigned)ISD::FMA :
1980-
!MFI->getMode().allFP32Denormals() ?
1981-
(unsigned)ISD::FMAD :
1982-
(unsigned)AMDGPUISD::FMAD_FTZ;
1979+
unsigned FMAD =
1980+
!Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
1981+
: MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()
1982+
? (unsigned)ISD::FMAD
1983+
: (unsigned)AMDGPUISD::FMAD_FTZ;
19831984

19841985
SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
19851986
SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,12 @@ let GIIgnoreCopies = 1 in
110110
class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
111111

112112
let RecomputePerFunction = 1 in {
113-
def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
114-
def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
115-
def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
116-
def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
117-
def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
118-
def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
113+
def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
114+
def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
115+
def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
116+
def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
117+
def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
118+
def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
119119
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
120120
}
121121

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2965,9 +2965,11 @@ bool AMDGPULegalizerInfo::legalizeFMad(
29652965

29662966
// TODO: Always legal with future ftz flag.
29672967
// FIXME: Do we need just output?
2968-
if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals())
2968+
if (Ty == LLT::scalar(32) &&
2969+
MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign())
29692970
return true;
2970-
if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals())
2971+
if (Ty == LLT::scalar(16) &&
2972+
MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign())
29712973
return true;
29722974

29732975
MachineIRBuilder HelperBuilder(MI);
@@ -4642,7 +4644,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
46424644

46434645
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
46444646
// aren't modeled as reading it.
4645-
if (!Mode.allFP32Denormals())
4647+
if (Mode.FP32Denormals != DenormalMode::getIEEE())
46464648
toggleSPDenormMode(true, B, ST, Mode);
46474649

46484650
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
@@ -4652,7 +4654,9 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
46524654
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
46534655
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
46544656

4655-
if (!Mode.allFP32Denormals())
4657+
// FIXME: This mishandles dynamic denormal mode. We need to query the
4658+
// current mode and restore the original.
4659+
if (Mode.FP32Denormals != DenormalMode::getIEEE())
46564660
toggleSPDenormMode(false, B, ST, Mode);
46574661

46584662
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,9 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
297297
TLI(ST->getTargetLowering()), CommonTTI(TM, F),
298298
IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {
299299
SIModeRegisterDefaults Mode(F);
300-
HasFP32Denormals = Mode.allFP32Denormals();
301-
HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
300+
HasFP32Denormals = Mode.FP32Denormals != DenormalMode::getPreserveSign();
301+
HasFP64FP16Denormals =
302+
Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
302303
}
303304

304305
bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ static cl::opt<bool> UseDivergentRegisterIndexing(
5959
cl::desc("Use indirect register addressing for divergent indexes"),
6060
cl::init(false));
6161

62-
static bool hasFP32Denormals(const MachineFunction &MF) {
62+
static bool denormalModeIsFlushAllF32(const MachineFunction &MF) {
6363
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
64-
return Info->getMode().allFP32Denormals();
64+
return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign();
6565
}
6666

67-
static bool hasFP64FP16Denormals(const MachineFunction &MF) {
67+
static bool denormalModeIsFlushAllF64F16(const MachineFunction &MF) {
6868
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
69-
return Info->getMode().allFP64FP16Denormals();
69+
return Info->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign();
7070
}
7171

7272
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
@@ -830,10 +830,10 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
830830
EVT DestVT, EVT SrcVT) const {
831831
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
832832
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
833-
DestVT.getScalarType() == MVT::f32 &&
834-
SrcVT.getScalarType() == MVT::f16 &&
835-
// TODO: This probably only requires no input flushing?
836-
!hasFP32Denormals(DAG.getMachineFunction());
833+
DestVT.getScalarType() == MVT::f32 &&
834+
SrcVT.getScalarType() == MVT::f16 &&
835+
// TODO: This probably only requires no input flushing?
836+
denormalModeIsFlushAllF32(DAG.getMachineFunction());
837837
}
838838

839839
bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
@@ -843,7 +843,7 @@ bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
843843
DestTy.getScalarSizeInBits() == 32 &&
844844
SrcTy.getScalarSizeInBits() == 16 &&
845845
// TODO: This probably only requires no input flushing?
846-
!hasFP32Denormals(*MI.getMF());
846+
denormalModeIsFlushAllF32(*MI.getMF());
847847
}
848848

849849
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
@@ -4646,7 +4646,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
46464646
// Otherwise f32 mad is always full rate and returns the same result as
46474647
// the separate operations so should be preferred over fma.
46484648
// However does not support denormals.
4649-
if (hasFP32Denormals(MF))
4649+
if (!denormalModeIsFlushAllF32(MF))
46504650
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
46514651

46524652
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
@@ -4655,7 +4655,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
46554655
case MVT::f64:
46564656
return true;
46574657
case MVT::f16:
4658-
return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF);
4658+
return Subtarget->has16BitInsts() && !denormalModeIsFlushAllF64F16(MF);
46594659
default:
46604660
break;
46614661
}
@@ -4684,9 +4684,10 @@ bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
46844684
return false;
46854685

46864686
if (Ty.getScalarSizeInBits() == 16)
4687-
return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF());
4687+
return Subtarget->hasMadF16() && denormalModeIsFlushAllF64F16(*MI.getMF());
46884688
if (Ty.getScalarSizeInBits() == 32)
4689-
return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());
4689+
return Subtarget->hasMadMacF32Insts() &&
4690+
denormalModeIsFlushAllF32(*MI.getMF());
46904691

46914692
return false;
46924693
}
@@ -4698,10 +4699,10 @@ bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
46984699
EVT VT = N->getValueType(0);
46994700
if (VT == MVT::f32)
47004701
return Subtarget->hasMadMacF32Insts() &&
4701-
!hasFP32Denormals(DAG.getMachineFunction());
4702+
denormalModeIsFlushAllF32(DAG.getMachineFunction());
47024703
if (VT == MVT::f16) {
47034704
return Subtarget->hasMadF16() &&
4704-
!hasFP64FP16Denormals(DAG.getMachineFunction());
4705+
denormalModeIsFlushAllF64F16(DAG.getMachineFunction());
47054706
}
47064707

47074708
return false;
@@ -9307,15 +9308,13 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
93079308

93089309
// Returns immediate value for setting the F32 denorm mode when using the
93099310
// S_DENORM_MODE instruction.
9310-
static SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
9311-
const SDLoc &SL, const GCNSubtarget *ST) {
9311+
static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG,
9312+
const SIMachineFunctionInfo *Info,
9313+
const GCNSubtarget *ST) {
93129314
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
9313-
int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
9314-
? FP_DENORM_FLUSH_NONE
9315-
: FP_DENORM_FLUSH_IN_FLUSH_OUT;
9316-
9317-
int Mode = SPDenormMode | (DPDenormModeDefault << 2);
9318-
return DAG.getTargetConstant(Mode, SL, MVT::i32);
9315+
uint32_t DPDenormModeDefault = Info->getMode().fpDenormModeDPValue();
9316+
uint32_t Mode = SPDenormMode | (DPDenormModeDefault << 2);
9317+
return DAG.getTargetConstant(Mode, SDLoc(), MVT::i32);
93199318
}
93209319

93219320
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
@@ -9353,7 +9352,11 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
93539352
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
93549353
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
93559354

9356-
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
9355+
const MachineFunction &MF = DAG.getMachineFunction();
9356+
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
9357+
const DenormalMode DenormMode = Info->getMode().FP32Denormals;
9358+
9359+
const bool HasFP32Denormals = DenormMode == DenormalMode::getIEEE();
93579360

93589361
if (!HasFP32Denormals) {
93599362
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
@@ -9365,7 +9368,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
93659368
SDNode *EnableDenorm;
93669369
if (Subtarget->hasDenormModeInst()) {
93679370
const SDValue EnableDenormValue =
9368-
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
9371+
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget);
93699372

93709373
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
93719374
DAG.getEntryNode(), EnableDenormValue).getNode();
@@ -9405,10 +9408,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
94059408
NumeratorScaled, Fma3, Flags);
94069409

94079410
if (!HasFP32Denormals) {
9411+
// FIXME: This mishandles dynamic denormal mode. We need to query the
9412+
// current mode and restore the original.
9413+
94089414
SDNode *DisableDenorm;
94099415
if (Subtarget->hasDenormModeInst()) {
9410-
const SDValue DisableDenormValue =
9411-
getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
9416+
const SDValue DisableDenormValue = getSPDenormModeValue(
9417+
FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget);
94129418

94139419
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
94149420
Fma4.getValue(1), DisableDenormValue,
@@ -11782,10 +11788,11 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
1178211788

1178311789
// Only do this if we are not trying to support denormals. v_mad_f32 does not
1178411790
// support denormals ever.
11785-
if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) ||
11786-
(VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
11787-
getSubtarget()->hasMadF16())) &&
11788-
isOperationLegal(ISD::FMAD, VT))
11791+
if (((VT == MVT::f32 &&
11792+
denormalModeIsFlushAllF32(DAG.getMachineFunction())) ||
11793+
(VT == MVT::f16 && Subtarget->hasMadF16() &&
11794+
denormalModeIsFlushAllF64F16(DAG.getMachineFunction()))) &&
11795+
isOperationLegal(ISD::FMAD, VT))
1178911796
return ISD::FMAD;
1179011797

1179111798
const TargetOptions &Options = DAG.getTarget().Options;
@@ -13743,10 +13750,10 @@ bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
1374313750
EVT VT) const {
1374413751
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
1374513752
case MVT::f32:
13746-
return hasFP32Denormals(DAG.getMachineFunction());
13753+
return !denormalModeIsFlushAllF32(DAG.getMachineFunction());
1374713754
case MVT::f64:
1374813755
case MVT::f16:
13749-
return hasFP64FP16Denormals(DAG.getMachineFunction());
13756+
return !denormalModeIsFlushAllF64F16(DAG.getMachineFunction());
1375013757
default:
1375113758
return false;
1375213759
}
@@ -13756,10 +13763,10 @@ bool SITargetLowering::denormalsEnabledForType(LLT Ty,
1375613763
MachineFunction &MF) const {
1375713764
switch (Ty.getScalarSizeInBits()) {
1375813765
case 32:
13759-
return hasFP32Denormals(MF);
13766+
return !denormalModeIsFlushAllF32(MF);
1376013767
case 64:
1376113768
case 16:
13762-
return hasFP64FP16Denormals(MF);
13769+
return !denormalModeIsFlushAllF64F16(MF);
1376313770
default:
1376413771
return false;
1376513772
}

llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,6 @@ struct SIModeRegisterDefaults {
5454
FP64FP16Denormals == Other.FP64FP16Denormals;
5555
}
5656

57-
bool allFP32Denormals() const {
58-
return FP32Denormals == DenormalMode::getIEEE();
59-
}
60-
61-
bool allFP64FP16Denormals() const {
62-
return FP64FP16Denormals == DenormalMode::getIEEE();
63-
}
64-
6557
/// Get the encoding value for the FP_DENORM bits of the mode register for the
6658
/// FP32 denormal mode.
6759
uint32_t fpDenormModeSPValue() const {

0 commit comments

Comments
 (0)