@@ -59,14 +59,14 @@ static cl::opt<bool> UseDivergentRegisterIndexing(
59
59
cl::desc("Use indirect register addressing for divergent indexes"),
60
60
cl::init(false));
61
61
62
- static bool hasFP32Denormals (const MachineFunction &MF) {
62
+ static bool denormalModeIsFlushAllF32 (const MachineFunction &MF) {
63
63
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
64
- return Info->getMode().allFP32Denormals ();
64
+ return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign ();
65
65
}
66
66
67
- static bool hasFP64FP16Denormals (const MachineFunction &MF) {
67
+ static bool denormalModeIsFlushAllF64F16 (const MachineFunction &MF) {
68
68
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
69
- return Info->getMode().allFP64FP16Denormals ();
69
+ return Info->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign ();
70
70
}
71
71
72
72
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
@@ -830,10 +830,10 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
830
830
EVT DestVT, EVT SrcVT) const {
831
831
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
832
832
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
833
- DestVT.getScalarType() == MVT::f32 &&
834
- SrcVT.getScalarType() == MVT::f16 &&
835
- // TODO: This probably only requires no input flushing?
836
- !hasFP32Denormals (DAG.getMachineFunction());
833
+ DestVT.getScalarType() == MVT::f32 &&
834
+ SrcVT.getScalarType() == MVT::f16 &&
835
+ // TODO: This probably only requires no input flushing?
836
+ denormalModeIsFlushAllF32 (DAG.getMachineFunction());
837
837
}
838
838
839
839
bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
@@ -843,7 +843,7 @@ bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
843
843
DestTy.getScalarSizeInBits() == 32 &&
844
844
SrcTy.getScalarSizeInBits() == 16 &&
845
845
// TODO: This probably only requires no input flushing?
846
- !hasFP32Denormals (*MI.getMF());
846
+ denormalModeIsFlushAllF32 (*MI.getMF());
847
847
}
848
848
849
849
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
@@ -4646,7 +4646,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
4646
4646
// Otherwise f32 mad is always full rate and returns the same result as
4647
4647
// the separate operations so should be preferred over fma.
4648
4648
// However does not support denormals.
4649
- if (hasFP32Denormals (MF))
4649
+ if (!denormalModeIsFlushAllF32 (MF))
4650
4650
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
4651
4651
4652
4652
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
@@ -4655,7 +4655,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
4655
4655
case MVT::f64:
4656
4656
return true;
4657
4657
case MVT::f16:
4658
- return Subtarget->has16BitInsts() && hasFP64FP16Denormals (MF);
4658
+ return Subtarget->has16BitInsts() && !denormalModeIsFlushAllF64F16 (MF);
4659
4659
default:
4660
4660
break;
4661
4661
}
@@ -4684,9 +4684,10 @@ bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
4684
4684
return false;
4685
4685
4686
4686
if (Ty.getScalarSizeInBits() == 16)
4687
- return Subtarget->hasMadF16() && !hasFP64FP16Denormals (*MI.getMF());
4687
+ return Subtarget->hasMadF16() && denormalModeIsFlushAllF64F16 (*MI.getMF());
4688
4688
if (Ty.getScalarSizeInBits() == 32)
4689
- return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());
4689
+ return Subtarget->hasMadMacF32Insts() &&
4690
+ denormalModeIsFlushAllF32(*MI.getMF());
4690
4691
4691
4692
return false;
4692
4693
}
@@ -4698,10 +4699,10 @@ bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
4698
4699
EVT VT = N->getValueType(0);
4699
4700
if (VT == MVT::f32)
4700
4701
return Subtarget->hasMadMacF32Insts() &&
4701
- !hasFP32Denormals (DAG.getMachineFunction());
4702
+ denormalModeIsFlushAllF32 (DAG.getMachineFunction());
4702
4703
if (VT == MVT::f16) {
4703
4704
return Subtarget->hasMadF16() &&
4704
- !hasFP64FP16Denormals (DAG.getMachineFunction());
4705
+ denormalModeIsFlushAllF64F16 (DAG.getMachineFunction());
4705
4706
}
4706
4707
4707
4708
return false;
@@ -9307,15 +9308,13 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
9307
9308
9308
9309
// Returns immediate value for setting the F32 denorm mode when using the
9309
9310
// S_DENORM_MODE instruction.
9310
- static SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
9311
- const SDLoc &SL, const GCNSubtarget *ST) {
9311
+ static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG,
9312
+ const SIMachineFunctionInfo *Info,
9313
+ const GCNSubtarget *ST) {
9312
9314
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
9313
- int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
9314
- ? FP_DENORM_FLUSH_NONE
9315
- : FP_DENORM_FLUSH_IN_FLUSH_OUT;
9316
-
9317
- int Mode = SPDenormMode | (DPDenormModeDefault << 2);
9318
- return DAG.getTargetConstant(Mode, SL, MVT::i32);
9315
+ uint32_t DPDenormModeDefault = Info->getMode().fpDenormModeDPValue();
9316
+ uint32_t Mode = SPDenormMode | (DPDenormModeDefault << 2);
9317
+ return DAG.getTargetConstant(Mode, SDLoc(), MVT::i32);
9319
9318
}
9320
9319
9321
9320
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
@@ -9353,7 +9352,11 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
9353
9352
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
9354
9353
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
9355
9354
9356
- const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
9355
+ const MachineFunction &MF = DAG.getMachineFunction();
9356
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
9357
+ const DenormalMode DenormMode = Info->getMode().FP32Denormals;
9358
+
9359
+ const bool HasFP32Denormals = DenormMode == DenormalMode::getIEEE();
9357
9360
9358
9361
if (!HasFP32Denormals) {
9359
9362
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
@@ -9365,7 +9368,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
9365
9368
SDNode *EnableDenorm;
9366
9369
if (Subtarget->hasDenormModeInst()) {
9367
9370
const SDValue EnableDenormValue =
9368
- getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL , Subtarget);
9371
+ getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info , Subtarget);
9369
9372
9370
9373
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
9371
9374
DAG.getEntryNode(), EnableDenormValue).getNode();
@@ -9405,10 +9408,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
9405
9408
NumeratorScaled, Fma3, Flags);
9406
9409
9407
9410
if (!HasFP32Denormals) {
9411
+ // FIXME: This mishandles dynamic denormal mode. We need to query the
9412
+ // current mode and restore the original.
9413
+
9408
9414
SDNode *DisableDenorm;
9409
9415
if (Subtarget->hasDenormModeInst()) {
9410
- const SDValue DisableDenormValue =
9411
- getSPDenormModeValue( FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL , Subtarget);
9416
+ const SDValue DisableDenormValue = getSPDenormModeValue(
9417
+ FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info , Subtarget);
9412
9418
9413
9419
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
9414
9420
Fma4.getValue(1), DisableDenormValue,
@@ -11782,10 +11788,11 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
11782
11788
11783
11789
// Only do this if we are not trying to support denormals. v_mad_f32 does not
11784
11790
// support denormals ever.
11785
- if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) ||
11786
- (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
11787
- getSubtarget()->hasMadF16())) &&
11788
- isOperationLegal(ISD::FMAD, VT))
11791
+ if (((VT == MVT::f32 &&
11792
+ denormalModeIsFlushAllF32(DAG.getMachineFunction())) ||
11793
+ (VT == MVT::f16 && Subtarget->hasMadF16() &&
11794
+ denormalModeIsFlushAllF64F16(DAG.getMachineFunction()))) &&
11795
+ isOperationLegal(ISD::FMAD, VT))
11789
11796
return ISD::FMAD;
11790
11797
11791
11798
const TargetOptions &Options = DAG.getTarget().Options;
@@ -13743,10 +13750,10 @@ bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
13743
13750
EVT VT) const {
13744
13751
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
13745
13752
case MVT::f32:
13746
- return hasFP32Denormals (DAG.getMachineFunction());
13753
+ return !denormalModeIsFlushAllF32 (DAG.getMachineFunction());
13747
13754
case MVT::f64:
13748
13755
case MVT::f16:
13749
- return hasFP64FP16Denormals (DAG.getMachineFunction());
13756
+ return !denormalModeIsFlushAllF64F16 (DAG.getMachineFunction());
13750
13757
default:
13751
13758
return false;
13752
13759
}
@@ -13756,10 +13763,10 @@ bool SITargetLowering::denormalsEnabledForType(LLT Ty,
13756
13763
MachineFunction &MF) const {
13757
13764
switch (Ty.getScalarSizeInBits()) {
13758
13765
case 32:
13759
- return hasFP32Denormals (MF);
13766
+ return !denormalModeIsFlushAllF32 (MF);
13760
13767
case 64:
13761
13768
case 16:
13762
- return hasFP64FP16Denormals (MF);
13769
+ return !denormalModeIsFlushAllF64F16 (MF);
13763
13770
default:
13764
13771
return false;
13765
13772
}
0 commit comments