Skip to content

Commit 8d35ab8

Browse files
authored
AArch64: Add FMINNUM_IEEE and FMAXNUM_IEEE support (#107855)
FMINNM/FMAXNM instructions of AArch64 follow IEEE754-2008. We can use them to canonicalize a floating point number. And FMINNUM_IEEE/FMAXNUM_IEEE is used by something like expanding FMINIMUMNUM/FMAXIMUMNUM, so let's define them. Update combine_andor_with_cmps.ll. Add fp-maximumnum-minimumnum.ll, with nnan testcases only. V1F64 is not supported yet. If we set v1f64 as legal, FMINNUM/FMAXNUM will have some problem: both of them use `if (isOperationLegalOrCustom(FMAXNUM_IEEE, VT))`. AArch64 depends on `expandFMINNUM_FMAXNUM` returning `SDValue()` for FMAXNUM and FMINNUM. We should fix this problem, while it will be in future patch.
1 parent f93258e commit 8d35ab8

File tree

5 files changed

+527
-15
lines changed

5 files changed

+527
-15
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
12871287
case ISD::VP_FMINIMUM:
12881288
case ISD::FMAXIMUM:
12891289
case ISD::VP_FMAXIMUM:
1290+
case ISD::FMINIMUMNUM:
1291+
case ISD::FMAXIMUMNUM:
12901292
case ISD::SDIV: case ISD::VP_SDIV:
12911293
case ISD::UDIV: case ISD::VP_UDIV:
12921294
case ISD::FDIV: case ISD::VP_FDIV:

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -861,12 +861,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
861861
setOperationAction(ISD::FP_ROUND, MVT::v4bf16, Custom);
862862

863863
// AArch64 has implementations of a lot of rounding-like FP operations.
864+
// clang-format off
864865
for (auto Op :
865866
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
866867
ISD::FRINT, ISD::FTRUNC, ISD::FROUND,
867868
ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
868869
ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND,
869870
ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
871+
ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE,
870872
ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT,
871873
ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
872874
ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
@@ -877,6 +879,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
877879
if (Subtarget->hasFullFP16())
878880
setOperationAction(Op, MVT::f16, Legal);
879881
}
882+
// clang-format on
880883

881884
// Basic strict FP operations are legal
882885
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
@@ -1194,6 +1197,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11941197
ISD::FEXP10, ISD::FRINT, ISD::FROUND,
11951198
ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM,
11961199
ISD::FMAXNUM, ISD::FMINIMUM, ISD::FMAXIMUM,
1200+
ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
11971201
ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
11981202
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FCEIL,
11991203
ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT,
@@ -1202,6 +1206,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
12021206
ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM})
12031207
setOperationAction(Op, MVT::v1f64, Expand);
12041208
// clang-format on
1209+
12051210
for (auto Op :
12061211
{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP,
12071212
ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL,
@@ -1345,12 +1350,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13451350
}
13461351
}
13471352

1348-
// AArch64 has implementations of a lot of rounding-like FP operations.
13491353
for (auto Op :
13501354
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
1351-
ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR,
1352-
ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT,
1353-
ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) {
1355+
ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
1356+
ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL,
1357+
ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUND,
1358+
ISD::STRICT_FROUNDEVEN}) {
13541359
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
13551360
setOperationAction(Op, Ty, Legal);
13561361
if (Subtarget->hasFullFP16())
@@ -1961,10 +1966,10 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
19611966
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
19621967
for (unsigned Opcode :
19631968
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
1964-
ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM,
1965-
ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
1966-
ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA,
1967-
ISD::STRICT_FSQRT})
1969+
ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::STRICT_FMINIMUM,
1970+
ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
1971+
ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1972+
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
19681973
setOperationAction(Opcode, VT, Legal);
19691974

19701975
// Strict fp extend and trunc are legal

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5050,6 +5050,19 @@ def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
50505050
def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
50515051
(FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
50525052

5053+
def : Pat<(fminnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
5054+
(FMINNMDrr FPR64:$a, FPR64:$b)>;
5055+
def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5056+
(FMINNMSrr FPR32:$a, FPR32:$b)>;
5057+
def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5058+
(FMINNMHrr FPR16:$a, FPR16:$b)>;
5059+
def : Pat<(fmaxnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
5060+
(FMAXNMDrr FPR64:$a, FPR64:$b)>;
5061+
def : Pat<(fmaxnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5062+
(FMAXNMSrr FPR32:$a, FPR32:$b)>;
5063+
def : Pat<(fmaxnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5064+
(FMAXNMHrr FPR16:$a, FPR16:$b)>;
5065+
50535066
//===----------------------------------------------------------------------===//
50545067
// Floating point three operand instructions.
50555068
//===----------------------------------------------------------------------===//
@@ -5554,6 +5567,27 @@ defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
55545567
defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
55555568
defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
55565569

5570+
def : Pat<(v2f64 (fminnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
5571+
(v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5572+
def : Pat<(v4f32 (fminnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5573+
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5574+
def : Pat<(v8f16 (fminnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5575+
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5576+
def : Pat<(v2f32 (fminnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5577+
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5578+
def : Pat<(v4f16 (fminnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5579+
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
5580+
def : Pat<(v2f64 (fmaxnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
5581+
(v2f64 (FMAXNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5582+
def : Pat<(v4f32 (fmaxnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5583+
(v4f32 (FMAXNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5584+
def : Pat<(v8f16 (fmaxnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5585+
(v8f16 (FMAXNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5586+
def : Pat<(v2f32 (fmaxnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5587+
(v2f32 (FMAXNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5588+
def : Pat<(v4f16 (fmaxnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5589+
(v4f16 (FMAXNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
5590+
55575591
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
55585592
// instruction expects the addend first, while the fma intrinsic puts it last.
55595593
defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",

llvm/test/CodeGen/AArch64/combine_andor_with_cmps.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,15 @@ define i1 @test2(double %arg1, double %arg2, double %arg3) #0 {
3131
ret i1 %or1
3232
}
3333

34-
; It is illegal to apply the optimization in the following two test cases
35-
; because FMINNUM_IEEE and FMAXNUM_IEEE are not supported.
36-
3734
define i1 @test3(float %arg1, float %arg2, float %arg3) {
3835
; CHECK-LABEL: test3:
3936
; CHECK: // %bb.0:
4037
; CHECK-NEXT: fmov s3, #1.00000000
4138
; CHECK-NEXT: fadd s0, s0, s3
4239
; CHECK-NEXT: fmov s3, #2.00000000
4340
; CHECK-NEXT: fadd s1, s1, s3
44-
; CHECK-NEXT: fcmp s1, s2
45-
; CHECK-NEXT: fccmp s0, s2, #0, lt
41+
; CHECK-NEXT: fmaxnm s0, s0, s1
42+
; CHECK-NEXT: fcmp s0, s2
4643
; CHECK-NEXT: cset w0, lt
4744
; CHECK-NEXT: ret
4845
%add1 = fadd nnan float %arg1, 1.0
@@ -60,8 +57,8 @@ define i1 @test4(float %arg1, float %arg2, float %arg3) {
6057
; CHECK-NEXT: fadd s0, s0, s3
6158
; CHECK-NEXT: fmov s3, #2.00000000
6259
; CHECK-NEXT: fadd s1, s1, s3
63-
; CHECK-NEXT: fcmp s1, s2
64-
; CHECK-NEXT: fccmp s0, s2, #4, gt
60+
; CHECK-NEXT: fminnm s0, s0, s1
61+
; CHECK-NEXT: fcmp s0, s2
6562
; CHECK-NEXT: cset w0, gt
6663
; CHECK-NEXT: ret
6764
%add1 = fadd nnan float %arg1, 1.0

0 commit comments

Comments
 (0)