Skip to content

Commit 00f5af3

Browse files
committed
AMDGPU/GlobalISel: Start legalizing minimumnum and maximumnum
This is the bare minimum to get the intrinsic to compile for AMDGPU, and it's not optimal. We need to follow along closer with the existing G_FMINNUM/G_FMAXNUM with custom lowering to handle the IEEE=0 case better. Just re-use the existing lowering for the old semantics for G_FMINNUM/G_FMAXNUM. This does not change G_FMINNUM/G_FMAXNUM's treatment, nor try to handle the general expansion without an underlying min/max variant (or with G_FMINIMUM/G_FMAXIMUM).
1 parent 80913b4 commit 00f5af3

File tree

4 files changed

+15221
-7751
lines changed

4 files changed

+15221
-7751
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3221,6 +3221,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
32213221
case TargetOpcode::G_FMAXNUM_IEEE:
32223222
case TargetOpcode::G_FMINIMUM:
32233223
case TargetOpcode::G_FMAXIMUM:
3224+
case TargetOpcode::G_FMINIMUMNUM:
3225+
case TargetOpcode::G_FMAXIMUMNUM:
32243226
case TargetOpcode::G_FDIV:
32253227
case TargetOpcode::G_FREM:
32263228
case TargetOpcode::G_FCEIL:
@@ -4591,6 +4593,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
45914593
return lowerFCopySign(MI);
45924594
case G_FMINNUM:
45934595
case G_FMAXNUM:
4596+
case G_FMINIMUMNUM:
4597+
case G_FMAXIMUMNUM:
45944598
return lowerFMinNumMaxNum(MI);
45954599
case G_MERGE_VALUES:
45964600
return lowerMergeValues(MI);
@@ -5379,6 +5383,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
53795383
case G_FMAXNUM_IEEE:
53805384
case G_FMINIMUM:
53815385
case G_FMAXIMUM:
5386+
case G_FMINIMUMNUM:
5387+
case G_FMAXIMUMNUM:
53825388
case G_FSHL:
53835389
case G_FSHR:
53845390
case G_ROTL:
@@ -6090,6 +6096,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
60906096
case TargetOpcode::G_FMAXNUM_IEEE:
60916097
case TargetOpcode::G_FMINIMUM:
60926098
case TargetOpcode::G_FMAXIMUM:
6099+
case TargetOpcode::G_FMINIMUMNUM:
6100+
case TargetOpcode::G_FMAXIMUMNUM:
60936101
case TargetOpcode::G_STRICT_FADD:
60946102
case TargetOpcode::G_STRICT_FSUB:
60956103
case TargetOpcode::G_STRICT_FMUL:
@@ -8139,8 +8147,27 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
81398147

81408148
LegalizerHelper::LegalizeResult
81418149
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
8142-
unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8143-
TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8150+
// FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8151+
// identical handling. fminimumnum/fmaximumnum also need a path that do not
8152+
// depend on fminnum/fmaxnum.
8153+
8154+
unsigned NewOp;
8155+
switch (MI.getOpcode()) {
8156+
case TargetOpcode::G_FMINNUM:
8157+
NewOp = TargetOpcode::G_FMINNUM_IEEE;
8158+
break;
8159+
case TargetOpcode::G_FMINIMUMNUM:
8160+
NewOp = TargetOpcode::G_FMINNUM;
8161+
break;
8162+
case TargetOpcode::G_FMAXNUM:
8163+
NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8164+
break;
8165+
case TargetOpcode::G_FMAXIMUMNUM:
8166+
NewOp = TargetOpcode::G_FMAXNUM;
8167+
break;
8168+
default:
8169+
llvm_unreachable("unexpected min/max opcode");
8170+
}
81448171

81458172
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
81468173
LLT Ty = MRI.getType(Dst);

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
960960
auto &MinNumMaxNum = getActionDefinitionsBuilder({
961961
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
962962

963+
// TODO: These should be custom lowered and are directly legal with IEEE=0
964+
auto &MinimumNumMaximumNum =
965+
getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
966+
963967
if (ST.hasVOP3PInsts()) {
964968
MinNumMaxNum.customFor(FPTypesPK16)
965969
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
@@ -976,6 +980,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
976980
.scalarize(0);
977981
}
978982

983+
MinimumNumMaximumNum.lower();
984+
979985
if (ST.hasVOP3PInsts())
980986
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
981987

@@ -2102,6 +2108,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
21022108
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
21032109
}
21042110

2111+
2112+
21052113
getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
21062114
.lower();
21072115

0 commit comments

Comments
 (0)