@@ -171,7 +171,8 @@ def SDT_AArch64Arith : SDTypeProfile<1, 3, [
171
171
172
172
def SDT_AArch64FMA : SDTypeProfile<1, 4, [
173
173
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
174
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
174
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>,
175
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>
175
176
]>;
176
177
177
178
// Predicated operations with the result of inactive lanes being unspecified.
@@ -244,6 +245,11 @@ def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arit
244
245
def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>;
245
246
def AArch64revd_mt : SDNode<"AArch64ISD::REVD_MERGE_PASSTHRU", SDT_AArch64Arith>;
246
247
248
+ def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
249
+ (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
250
+ return N->getFlags().hasNoSignedZeros();
251
+ }]>;
252
+
247
253
// These are like the above but we don't yet have need for ISD nodes. They allow
248
254
// a single pattern to match intrinsic and ISD operand layouts.
249
255
def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls node:$pt, node:$pg, node:$op)]>;
@@ -349,19 +355,25 @@ def AArch64fmul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
349
355
def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
350
356
(AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>;
351
357
352
- // FMAs with a negated multiplication operand can be commuted.
353
- def AArch64fmls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
354
- [(AArch64fma_p node:$pred, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op2, node:$op3),
355
- (AArch64fma_p node:$pred, node:$op2, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op3)]>;
358
+ def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
359
+ [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za),
360
+ (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), node:$zn, node:$zm, node:$za), node:$za)]>;
361
+
362
+ def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
363
+ [(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za),
364
+ (AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za),
365
+ (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), (AArch64fneg_mt (AArch64ptrue 31), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>;
366
+
367
+ def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
368
+ [(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))),
369
+ (AArch64fneg_mt_nsz node:$pg, (AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), (undef))]>;
370
+
371
+ def AArch64fnmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
372
+ [(AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>;
356
373
357
374
def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
358
375
(AArch64fsub_p node:$pg, node:$op2, node:$op1)>;
359
376
360
- def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
361
- (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
362
- return N->getFlags().hasNoSignedZeros();
363
- }]>;
364
-
365
377
def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
366
378
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
367
379
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
@@ -649,7 +661,7 @@ let Predicates = [HasSVE] in {
649
661
} // End HasSVE
650
662
651
663
let Predicates = [HasSVEorSME] in {
652
- defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
664
+ defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
653
665
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
654
666
655
667
defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", AArch64fmla_m1, "FMAD_ZPmZZ">;
@@ -662,48 +674,10 @@ let Predicates = [HasSVEorSME] in {
662
674
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>;
663
675
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>;
664
676
665
- defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
666
- defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
667
- defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
668
- defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
669
-
670
- multiclass fma<ValueType Ty, ValueType PredTy, string Suffix> {
671
- // Zd = Za + Zn * Zm
672
- def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)),
673
- (!cast<Instruction>("FMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
674
-
675
- // Zd = Za + -Zn * Zm
676
- def : Pat<(Ty (AArch64fmls_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)),
677
- (!cast<Instruction>("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
678
-
679
- // Zd = -Za + Zn * Zm
680
- def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, (AArch64fneg_mt PredTy:$P, Ty:$Za, (Ty (undef))))),
681
- (!cast<Instruction>("FNMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
682
-
683
- // Zd = -Za + -Zn * Zm
684
- def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, (AArch64fneg_mt PredTy:$P, Ty:$Za, (Ty (undef))))),
685
- (!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
686
-
687
- // Zd = -(Za + Zn * Zm)
688
- // (with nsz neg.)
689
- def : Pat<(AArch64fneg_mt_nsz PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
690
- (!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
691
-
692
- // Zda = Zda + Zn * Zm
693
- def : Pat<(vselect (PredTy PPR:$Pg), (Ty (AArch64fma_p (PredTy (AArch64ptrue 31)), ZPR:$Zn, ZPR:$Zm, ZPR:$Za)), ZPR:$Za),
694
- (!cast<Instruction>("FMLA_ZPmZZ_"#Suffix) PPR:$Pg, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
695
-
696
- // Zda = Zda + -Zn * Zm
697
- def : Pat<(vselect (PredTy PPR:$Pg), (Ty (AArch64fma_p (PredTy (AArch64ptrue 31)), (AArch64fneg_mt (PredTy (AArch64ptrue 31)), Ty:$Zn, (Ty (undef))), ZPR:$Zm, ZPR:$Za)), ZPR:$Za),
698
- (!cast<Instruction>("FMLS_ZPmZZ_"#Suffix) PPR:$Pg, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
699
- }
700
-
701
- defm : fma<nxv8f16, nxv8i1, "H">;
702
- defm : fma<nxv4f16, nxv4i1, "H">;
703
- defm : fma<nxv2f16, nxv2i1, "H">;
704
- defm : fma<nxv4f32, nxv4i1, "S">;
705
- defm : fma<nxv2f32, nxv2i1, "S">;
706
- defm : fma<nxv2f64, nxv2i1, "D">;
677
+ defm FMLA_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fmla_p>;
678
+ defm FMLS_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fmls_p>;
679
+ defm FNMLA_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fnmla_p>;
680
+ defm FNMLS_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fnmls_p>;
707
681
} // End HasSVEorSME
708
682
709
683
let Predicates = [HasSVE] in {
0 commit comments