Skip to content

Commit 8798df3

Browse files
Your Namewzssyqa
authored andcommitted
neon fullfp16 may be disabled
1 parent cfc50bd commit 8798df3

File tree

3 files changed

+531
-100
lines changed

3 files changed

+531
-100
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
880880
setOperationAction(Op, MVT::f16, Legal);
881881
}
882882
// clang-format on
883+
if (!Subtarget->hasFullFP16())
884+
setOperationPromotedToType(ISD::FCANONICALIZE, MVT::f16, MVT::f32);
883885

884886
// Basic strict FP operations are legal
885887
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
@@ -1362,6 +1364,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13621364
for (MVT Ty : {MVT::v4f16, MVT::v8f16})
13631365
setOperationAction(Op, Ty, Legal);
13641366
}
1367+
if (!Subtarget->hasFullFP16()) {
1368+
setOperationPromotedToType(ISD::FCANONICALIZE, MVT::v4f16, MVT::v4f32);
1369+
setOperationPromotedToType(ISD::FCANONICALIZE, MVT::v8f16, MVT::v8f32);
1370+
}
13651371

13661372
// LRINT and LLRINT.
13671373
for (auto Op : {ISD::LRINT, ISD::LLRINT}) {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5052,23 +5052,25 @@ def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
50525052

50535053
def : Pat<(fminnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
50545054
(FMINNMDrr FPR64:$a, FPR64:$b)>;
5055-
def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5056-
(FMINNMSrr FPR32:$a, FPR32:$b)>;
5057-
def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5058-
(FMINNMHrr FPR16:$a, FPR16:$b)>;
50595055
def : Pat<(fmaxnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
50605056
(FMAXNMDrr FPR64:$a, FPR64:$b)>;
5057+
def : Pat<(f64 (fcanonicalize f64:$a)),
5058+
(FMINNMDrr f64:$a, f64:$a)>;
5059+
def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5060+
(FMINNMSrr FPR32:$a, FPR32:$b)>;
50615061
def : Pat<(fmaxnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
50625062
(FMAXNMSrr FPR32:$a, FPR32:$b)>;
5063+
def : Pat<(f32 (fcanonicalize f32:$a)),
5064+
(FMINNMSrr f32:$a, f32:$a)>;
5065+
5066+
let Predicates = [HasFullFP16] in {
5067+
def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5068+
(FMINNMHrr FPR16:$a, FPR16:$b)>;
50635069
def : Pat<(fmaxnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
50645070
(FMAXNMHrr FPR16:$a, FPR16:$b)>;
5065-
50665071
def : Pat<(f16 (fcanonicalize f16:$a)),
50675072
(FMINNMHrr f16:$a, f16:$a)>;
5068-
def : Pat<(f32 (fcanonicalize f32:$a)),
5069-
(FMINNMSrr f32:$a, f32:$a)>;
5070-
def : Pat<(f64 (fcanonicalize f64:$a)),
5071-
(FMINNMDrr f64:$a, f64:$a)>;
5073+
}
50725074
//===----------------------------------------------------------------------===//
50735075
// Floating point three operand instructions.
50745076
//===----------------------------------------------------------------------===//
@@ -5573,37 +5575,41 @@ defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
55735575
defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
55745576
defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
55755577

5578+
let Predicates = [HasNEON] in {
55765579
def : Pat<(v2f64 (fminnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
55775580
(v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5578-
def : Pat<(v4f32 (fminnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5579-
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5580-
def : Pat<(v8f16 (fminnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5581-
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5582-
def : Pat<(v2f32 (fminnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5583-
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5584-
def : Pat<(v4f16 (fminnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5585-
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
55865581
def : Pat<(v2f64 (fmaxnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
55875582
(v2f64 (FMAXNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5583+
def : Pat<(v2f64 (fcanonicalize (v2f64 V128:$Rn))),
5584+
(v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rn)))>;
5585+
def : Pat<(v4f32 (fminnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5586+
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
55885587
def : Pat<(v4f32 (fmaxnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
55895588
(v4f32 (FMAXNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5590-
def : Pat<(v8f16 (fmaxnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5591-
(v8f16 (FMAXNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5589+
def : Pat<(v4f32 (fcanonicalize (v4f32 V128:$Rn))),
5590+
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rn)))>;
5591+
def : Pat<(v2f32 (fminnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5592+
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
55925593
def : Pat<(v2f32 (fmaxnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
55935594
(v2f32 (FMAXNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5594-
def : Pat<(v4f16 (fmaxnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5595-
(v4f16 (FMAXNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
5595+
def : Pat<(v2f32 (fcanonicalize (v2f32 V64:$Rn))),
5596+
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rn)))>;
5597+
}
55965598

5597-
def : Pat<(v2f64 (fcanonicalize (v2f64 V128:$Rn))),
5598-
(v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rn)))>;
5599-
def : Pat<(v4f32 (fcanonicalize (v4f32 V128:$Rn))),
5600-
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rn)))>;
5599+
let Predicates = [HasNEON, HasFullFP16] in {
5600+
def : Pat<(v8f16 (fminnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5601+
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5602+
def : Pat<(v8f16 (fmaxnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5603+
(v8f16 (FMAXNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
56015604
def : Pat<(v8f16 (fcanonicalize (v8f16 V128:$Rn))),
56025605
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rn)))>;
5603-
def : Pat<(v2f32 (fcanonicalize (v2f32 V64:$Rn))),
5604-
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rn)))>;
5606+
def : Pat<(v4f16 (fminnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5607+
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
5608+
def : Pat<(v4f16 (fmaxnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5609+
(v4f16 (FMAXNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
56055610
def : Pat<(v4f16 (fcanonicalize (v4f16 V64:$Rn))),
56065611
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rn)))>;
5612+
}
56075613

56085614
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
56095615
// instruction expects the addend first, while the fma intrinsic puts it last.

0 commit comments

Comments
 (0)