Skip to content

Commit 4a396b2

Browse files
committed
lower fminimum/fmaximum
1 parent 051a1f6 commit 4a396b2

File tree

4 files changed

+74
-0
lines changed

4 files changed

+74
-0
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ class LegalizerHelper {
426426
LegalizeResult lowerMinMax(MachineInstr &MI);
427427
LegalizeResult lowerFCopySign(MachineInstr &MI);
428428
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
429+
LegalizeResult lowerFMinimum_FMaximum(MachineInstr &MI);
429430
LegalizeResult lowerFMad(MachineInstr &MI);
430431
LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
431432
LegalizeResult lowerFFloor(MachineInstr &MI);

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
343343
return isKnownNeverNaN(Val, MRI, true);
344344
}
345345

346+
bool isKnownNeverZeroFloat(Register Val, const MachineRegisterInfo &MRI);
347+
346348
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO);
347349

348350
/// Return a virtual register corresponding to the incoming argument register \p

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
//===----------------------------------------------------------------------===//
1414

1515
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16+
#include "llvm/ADT/APFloat.h"
1617
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
1718
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
1819
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -32,6 +33,7 @@
3233
#include "llvm/CodeGen/TargetLowering.h"
3334
#include "llvm/CodeGen/TargetOpcodes.h"
3435
#include "llvm/CodeGen/TargetSubtargetInfo.h"
36+
#include "llvm/IR/InstrTypes.h"
3537
#include "llvm/IR/Instructions.h"
3638
#include "llvm/Support/Debug.h"
3739
#include "llvm/Support/MathExtras.h"
@@ -4594,6 +4596,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
45944596
case G_FMINNUM:
45954597
case G_FMAXNUM:
45964598
return lowerFMinNumMaxNum(MI);
4599+
case G_FMINIMUM:
4600+
case G_FMAXIMUM:
4601+
return lowerFMinimum_FMaximum(MI);
45974602
case G_MERGE_VALUES:
45984603
return lowerMergeValues(MI);
45994604
case G_UNMERGE_VALUES:
@@ -8165,6 +8170,62 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
81658170
return Legalized;
81668171
}
81678172

8173+
LegalizerHelper::LegalizeResult
8174+
LegalizerHelper::lowerFMinimum_FMaximum(MachineInstr &MI) {
8175+
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8176+
LLT Ty = MRI.getType(Dst);
8177+
unsigned Opc = MI.getOpcode();
8178+
bool IsMax = Opc == TargetOpcode::G_FMAXIMUM;
8179+
8180+
Register MinMax;
8181+
unsigned CompOpcIeee = IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8182+
unsigned CompOpc = IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8183+
CmpInst::Predicate CompPred = IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT;
8184+
LLT S1 = LLT::scalar(1);
8185+
const fltSemantics &FPSem = getFltSemanticForLLT(Ty);
8186+
8187+
bool MinMaxMustRespectOrderedZero = false;
8188+
8189+
if (LI.isLegalOrCustom({CompOpcIeee, Ty})) {
8190+
MinMax = MIRBuilder.buildInstr(CompOpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8191+
MinMaxMustRespectOrderedZero = true;
8192+
} else if (LI.isLegalOrCustom({CompOpc, Ty})) {
8193+
MinMax = MIRBuilder.buildInstr(CompOpc, {Ty}, {Src0, Src1}).getReg(0);
8194+
} else {
8195+
// NaN (if exists) will be propagated later, so orderness doesn't matter.
8196+
auto Comp = MIRBuilder.buildFCmp(CompPred, S1, Src0, Src1);
8197+
MinMax = MIRBuilder.buildSelect(Ty, Comp,Src0, Src1).getReg(0);
8198+
}
8199+
8200+
// Propagate any NaN of both operands
8201+
if (!MI.getFlag(MachineInstr::FmNoNans) && (!isKnownNeverNaN(Src0, MRI) || !isKnownNeverNaN(Src1, MRI))) {
8202+
auto FPNaN = MIRBuilder.buildFConstant(Ty, APFloat::getNaN(FPSem));
8203+
auto Comp = MIRBuilder.buildFCmp(CmpInst::Predicate::FCMP_UNO, S1, Src0, Src1);
8204+
MinMax = MIRBuilder.buildSelect(Ty, Comp, FPNaN, MinMax).getReg(0);
8205+
}
8206+
8207+
// fminimum/fmaximum requires -0.0 less than +0.0
8208+
if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz) &&
8209+
!isKnownNeverZeroFloat(Src0, MRI) && !isKnownNeverZeroFloat(Src1, MRI)) {
8210+
auto Zero = MIRBuilder.buildFConstant(Ty, APFloat::getZero(FPSem));
8211+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::Predicate::FCMP_OEQ, S1,MinMax, Zero);
8212+
8213+
unsigned TestZeroMask = IsMax ? fcPosZero : fcNegZero;
8214+
8215+
auto Src0Zero = MIRBuilder.buildIsFPClass(S1, Src0, TestZeroMask);
8216+
auto Src0Comp = MIRBuilder.buildSelect(Ty, Src0Zero, Src0, MinMax);
8217+
8218+
auto Src1Zero = MIRBuilder.buildIsFPClass(S1, Src1, TestZeroMask);
8219+
auto Src1Comp = MIRBuilder.buildSelect(Ty, Src1Zero, Src1, Src0Comp);
8220+
8221+
MinMax = MIRBuilder.buildSelect(Ty, IsZero, Src1Comp, MinMax).getReg(0);
8222+
}
8223+
8224+
MRI.replaceRegWith(Dst, MinMax);
8225+
MI.eraseFromParent();
8226+
return Legalized;
8227+
}
8228+
81688229
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
81698230
// Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
81708231
Register DstReg = MI.getOperand(0).getReg();

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,16 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
890890
return false;
891891
}
892892

893+
bool llvm::isKnownNeverZeroFloat(Register Reg, const MachineRegisterInfo &MRI) {
894+
std::optional<FPValueAndVReg> FPValReg;
895+
if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
896+
if (!FPValReg->Value.isZero())
897+
return true;
898+
}
899+
900+
return false;
901+
}
902+
893903
Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
894904
const MachinePointerInfo &MPO) {
895905
auto PSV = dyn_cast_if_present<const PseudoSourceValue *>(MPO.V);

0 commit comments

Comments
 (0)