@@ -469,6 +469,89 @@ def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
469
469
V1UnitV, V1UnitV, V1UnitV,
470
470
V1UnitV, V1UnitV, V1UnitV]>;
471
471
472
+ //===----------------------------------------------------------------------===//
473
+ // Define forwarded types
474
+
475
+ // NOTE: SOG, p. 20, n. 2: Accumulator forwarding is not supported for
476
+ // consumers of 64 bit multiply high operations?
477
+ def V1Wr_IM : SchedWriteRes<[V1UnitM]> { let Latency = 2; }
478
+ def V1Wr_IMA : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
479
+ def V1WriteIM : SchedWriteVariant<
480
+ [SchedVar<NeoverseMULIdiomPred, [V1Wr_IM]>,
481
+ SchedVar<NoSchedPred, [V1Wr_IMA]>]>;
482
+ def V1Rd_IMA : SchedReadAdvance<1, [V1Wr_IMA]>;
483
+
484
+ def V1Wr_FMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
485
+ def V1Rd_FMA : SchedReadAdvance<2, [WriteFMul, V1Wr_FMA]>;
486
+
487
+ def V1Wr_ADA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
488
+ def V1Rd_ADA : SchedReadAdvance<3, [V1Wr_ADA]>;
489
+
490
+ def V1Wr_VDOT : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
491
+ def V1Rd_VDOT : SchedReadAdvance<2, [V1Wr_VDOT]>;
492
+
493
+ def V1Wr_VMMA : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
494
+ def V1Rd_VMMA : SchedReadAdvance<2, [V1Wr_VMMA]>;
495
+
496
+ def V1Wr_VMA : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
497
+ def V1Rd_VMA : SchedReadAdvance<3, [V1Wr_VMA]>;
498
+
499
+ def V1Wr_VMAL : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
500
+ def V1Rd_VMAL : SchedReadAdvance<3, [V1Wr_VMAL]>;
501
+
502
+ def V1Wr_VSA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
503
+ def V1Rd_VSA : SchedReadAdvance<3, [V1Wr_VSA]>;
504
+
505
+ def V1Wr_FCMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
506
+ def V1Rd_FCMA : SchedReadAdvance<2, [V1Wr_FCMA]>;
507
+
508
+ def V1Wr_FPM : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
509
+ def V1Wr_FPMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
510
+ def V1Rd_FPMA : SchedReadAdvance<2, [V1Wr_FPM, V1Wr_FPMA]>;
511
+
512
+ def V1Wr_FPMAL : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
513
+ def V1Rd_FPMAL : SchedReadAdvance<3, [V1Wr_FPMAL]>;
514
+
515
+ def V1Wr_BFD : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
516
+ def V1Rd_BFD : SchedReadAdvance<2, [V1Wr_BFD]>;
517
+
518
+ def V1Wr_BFMMA : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
519
+ def V1Rd_BFMMA : SchedReadAdvance<2, [V1Wr_BFMMA]>;
520
+
521
+ def V1Wr_BFMLA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
522
+ def V1Rd_BFMLA : SchedReadAdvance<2, [V1Wr_BFMLA]>;
523
+
524
+ def V1Wr_CRC : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
525
+ def V1Rd_CRC : SchedReadAdvance<1, [V1Wr_CRC]>;
526
+
527
+ def V1Wr_ZDOTB : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
528
+ def V1Rd_ZDOTB : SchedReadAdvance<2, [V1Wr_ZDOTB]>;
529
+
530
+ def V1Wr_ZUDOTB : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
531
+ def V1Rd_ZUDOTB : SchedReadAdvance<2, [V1Wr_ZUDOTB]>;
532
+
533
+ def V1Wr_ZDOTH : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
534
+ def V1Rd_ZDOTH : SchedReadAdvance<3, [V1Wr_ZDOTH]>;
535
+
536
+ def V1Wr_ZMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
537
+ def V1Rd_ZMMA : SchedReadAdvance<2, [V1Wr_ZMMA]>;
538
+
539
+ let Latency = 5, NumMicroOps = 2 in
540
+ def V1Wr_ZMAD : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
541
+ def V1Rd_ZMAD : SchedReadAdvance<3, [V1Wr_ZMAD]>;
542
+
543
+ def V1Wr_ZFCMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
544
+ def V1Rd_ZFCMA : SchedReadAdvance<3, [V1Wr_ZFCMA]>;
545
+
546
+ def V1Wr_ZFMA : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
547
+ def V1Rd_ZFMA : SchedReadAdvance<2, [V1Wr_ZFMA]>;
548
+
549
+ def V1Wr_ZBFDOT : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
550
+ def V1Rd_ZBFDOT : SchedReadAdvance<2, [V1Wr_ZBFDOT]>;
551
+ def V1Wr_ZBFMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
552
+ def V1Rd_ZBFMMA : SchedReadAdvance<2, [V1Wr_ZBFMMA]>;
553
+ def V1Wr_ZBFMAL : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
554
+ def V1Rd_ZBFMAL : SchedReadAdvance<3, [V1Wr_ZBFMAL]>;
472
555
473
556
// Miscellaneous Instructions
474
557
// -----------------------------------------------------------------------------
@@ -553,16 +636,19 @@ def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>;
553
636
def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
554
637
def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
555
638
639
+ def : SchedAlias<WriteIM32, V1Write_2c_1M>;
640
+ def : SchedAlias<WriteIM64, V1Write_2c_1M>;
641
+
556
642
// Multiply
557
- // Multiply accumulate
558
- // Multiply accumulate, long
559
- // Multiply long
560
- def V1WriteIM : SchedWriteVariant<
561
- [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>,
562
- SchedVar<NoSchedPred, [V1Write_2c_1M0]>]>;
563
- def : SchedAlias<WriteIM32, V1WriteIM>;
564
- def : SchedAlias<WriteIM64, V1WriteIM>;
643
+ // Multiply accumulate, W-form
644
+ // Multiply accumulate, X-form
645
+ def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA],
646
+ (instregex "^M(ADD|SUB)[WX]rrr$")>;
565
647
648
+ // Multiply accumulate long
649
+ // Multiply long
650
+ def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA],
651
+ (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
566
652
// Multiply high
567
653
def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
568
654
@@ -680,10 +766,11 @@ def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
680
766
def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
681
767
682
768
// FP multiply
683
- def : SchedAlias <WriteFMul, V1Write_3c_1V>;
769
+ def : WriteRes <WriteFMul, [V1UnitV]> { let Latency = 3; }
684
770
685
771
// FP multiply accumulate
686
- def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
772
+ def : InstRW<[V1Wr_FMA, ReadDefault, ReadDefault, V1Rd_FMA],
773
+ (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
687
774
688
775
// FP round to integral
689
776
def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
@@ -824,7 +911,7 @@ def : SchedAlias<WriteVq, V1Write_2c_1V>;
824
911
// ASIMD absolute diff accum
825
912
// ASIMD absolute diff accum long
826
913
// ASIMD pairwise add and accumulate long
827
- def : InstRW<[V1Write_4c_1V13 ], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
914
+ def : InstRW<[V1Wr_ADA, V1Rd_ADA ], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
828
915
829
916
// ASIMD arith, reduce, 4H/4S
830
917
// ASIMD max/min, reduce, 4H/4S
@@ -843,23 +930,26 @@ def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
843
930
844
931
// ASIMD dot product
845
932
// ASIMD dot product using signed and unsigned integers
846
- def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
933
+ def : InstRW<[V1Wr_VDOT, V1Rd_VDOT],
934
+ (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
847
935
848
- // ASIMD matrix multiply- accumulate
849
- def : InstRW<[V1Write_3c_1V ], (instrs SMMLA, UMMLA, USMMLA)>;
936
+ // ASIMD matrix multiply-accumulate
937
+ def : InstRW<[V1Wr_VMMA, V1Rd_VMMA ], (instrs SMMLA, UMMLA, USMMLA)>;
850
938
851
939
// ASIMD multiply
940
+ def : InstRW<[V1Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
941
+
852
942
// ASIMD multiply accumulate
943
+ def : InstRW<[V1Wr_VMA, V1Rd_VMA], (instregex "^MLAv", "^MLSv")>;
944
+
853
945
// ASIMD multiply accumulate long
946
+ def : InstRW<[V1Wr_VMAL, V1Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
947
+
854
948
// ASIMD multiply accumulate high
949
+ def : InstRW<[V1Write_4c_1V02], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
950
+
855
951
// ASIMD multiply accumulate saturating long
856
- def : InstRW<[V1Write_4c_1V02],
857
- (instregex "^MUL(v[148]i16|v[124]i32)$",
858
- "^SQR?DMULH(v[48]i16|v[24]i32)$",
859
- "^ML[AS](v[148]i16|v[124]i32)$",
860
- "^[SU]ML[AS]Lv",
861
- "^SQRDML[AS]H(v[148]i16|v[124]i32)$",
862
- "^SQDML[AS]Lv")>;
952
+ def : InstRW<[V1Write_4c_1V02], (instregex "^SQDML[AS]L[iv]")>;
863
953
864
954
// ASIMD multiply/multiply long (8x8) polynomial
865
955
def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
@@ -868,11 +958,12 @@ def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
868
958
def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
869
959
870
960
// ASIMD shift accumulate
961
+ def : InstRW<[V1Wr_VSA, V1Rd_VSA], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
962
+
871
963
// ASIMD shift by immed, complex
872
964
// ASIMD shift by register, complex
873
965
def : InstRW<[V1Write_4c_1V13],
874
- (instregex "^[SU]R?SRAv",
875
- "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
966
+ (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
876
967
"^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
877
968
"^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
878
969
"^[SU]Q?RSHLv", "^[SU]QSHLv")>;
@@ -890,16 +981,25 @@ def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
890
981
// ASIMD FP absolute value/difference
891
982
// ASIMD FP arith, normal
892
983
// ASIMD FP compare
893
- // ASIMD FP complex add
894
984
// ASIMD FP max/min, normal
895
985
// ASIMD FP max/min, pairwise
896
986
// ASIMD FP negate
897
987
// Covered by "SchedAlias (WriteV[dq]...)" above
898
988
989
+ // ASIMD FP complex add
990
+ def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$")>;
991
+
899
992
// ASIMD FP complex multiply add
993
+ def : InstRW<[V1Wr_FCMA, V1Rd_FCMA], (instregex "^FCMLAv")>;
994
+
995
+ // ASIMD FP multiply
996
+ def : InstRW<[V1Wr_FPM], (instregex "^FMULX?v")>;
997
+
900
998
// ASIMD FP multiply accumulate
901
- def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$",
902
- "^FML[AS]v")>;
999
+ def : InstRW<[V1Wr_FPMA, V1Rd_FPMA], (instregex "^FML[AS]v")>;
1000
+
1001
+ // ASIMD FP multiply accumulate long
1002
+ def : InstRW<[V1Wr_FPMAL, V1Rd_FPMAL], (instregex "^FML[AS]L2?v")>;
903
1003
904
1004
// ASIMD FP convert, long (F16 to F32)
905
1005
def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
@@ -953,12 +1053,6 @@ def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
953
1053
// ASIMD FP max/min, reduce, Q-form F16
954
1054
def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
955
1055
956
- // ASIMD FP multiply
957
- def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>;
958
-
959
- // ASIMD FP multiply accumulate long
960
- def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
961
-
962
1056
// ASIMD FP round, D-form F32 and Q-form F64
963
1057
def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
964
1058
@@ -976,13 +1070,13 @@ def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
976
1070
def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
977
1071
978
1072
// ASIMD dot product
979
- def : InstRW<[V1Write_4c_1V ], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
1073
+ def : InstRW<[V1Wr_BFD, V1Rd_BFD ], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
980
1074
981
1075
// ASIMD matrix multiply accumulate
982
- def : InstRW<[V1Write_5c_1V ], (instrs BFMMLA)>;
1076
+ def : InstRW<[V1Wr_BFMMA, V1Rd_BFMMA ], (instrs BFMMLA)>;
983
1077
984
1078
// ASIMD multiply accumulate long
985
- def : InstRW<[V1Write_4c_1V ], (instregex "^BFMLAL[BT](Idx)?$")>;
1079
+ def : InstRW<[V1Wr_BFMLA, V1Rd_BFMLA ], (instregex "^BFMLAL[BT](Idx)?$")>;
986
1080
987
1081
// Scalar convert, F32 to BF16
988
1082
def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
@@ -1300,7 +1394,7 @@ def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1300
1394
// -----------------------------------------------------------------------------
1301
1395
1302
1396
// CRC checksum ops
1303
- def : InstRW<[V1Write_2c_1M0 ], (instregex "^CRC32C?[BHWX]rr$")>;
1397
+ def : InstRW<[V1Wr_CRC, V1Rd_CRC ], (instregex "^CRC32C?[BHWX]rr$")>;
1304
1398
1305
1399
1306
1400
// SVE Predicate instructions
@@ -1440,13 +1534,14 @@ def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
1440
1534
"^[SU]DIV_ZPZZ_D")>;
1441
1535
1442
1536
// Dot product, 8 bit
1443
- def : InstRW<[V1Write_3c_1V01 ], (instregex "^[SU]DOT_ZZZI?_S$")>;
1537
+ def : InstRW<[V1Wr_ZDOTB, V1Rd_ZDOTB ], (instregex "^[SU]DOT_ZZZI?_S$")>;
1444
1538
1445
1539
// Dot product, 8 bit, using signed and unsigned integers
1446
- def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
1540
+ def : InstRW<[V1Wr_ZUDOTB, V1Rd_ZUDOTB],
1541
+ (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
1447
1542
1448
1543
// Dot product, 16 bit
1449
- def : InstRW<[V1Write_4c_1V0 ], (instregex "^[SU]DOT_ZZZI?_D$")>;
1544
+ def : InstRW<[V1Wr_ZDOTH, V1Rd_ZDOTH ], (instregex "^[SU]DOT_ZZZI?_D$")>;
1450
1545
1451
1546
// Duplicate, immediate and indexed form
1452
1547
def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
@@ -1488,7 +1583,7 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1488
1583
"^MOVPRFX_ZZ$")>;
1489
1584
1490
1585
// Matrix multiply-accumulate
1491
- def : InstRW<[V1Write_3c_1V01 ], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1586
+ def : InstRW<[V1Wr_ZMMA, V1Rd_ZMMA ], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1492
1587
1493
1588
// Multiply, B, H, S element size
1494
1589
def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
@@ -1497,12 +1592,16 @@ def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
1497
1592
"^[SU]MULH_ZPZZ_[BHS]")>;
1498
1593
1499
1594
// Multiply, D element size
1500
- // Multiply accumulate, D element size
1501
1595
def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
1502
1596
"^MUL_ZPZZ_D",
1503
1597
"^[SU]MULH_(ZPmZ|ZZZ)_D",
1504
- "^[SU]MULH_ZPZZ_D",
1505
- "^(MLA|MLS|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
1598
+ "^[SU]MULH_ZPZZ_D")>;
1599
+
1600
+ // Multiply accumulate, D element size
1601
+ def : InstRW<[V1Wr_ZMAD, V1Rd_ZMAD],
1602
+ (instregex "^ML[AS]_ZPZZZ_D")>;
1603
+ def : InstRW<[V1Wr_ZMAD, ReadDefault, V1Rd_ZMAD],
1604
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
1506
1605
1507
1606
// Multiply accumulate, B, H, S element size
1508
1607
// NOTE: This is not specified in the SOG.
@@ -1583,8 +1682,8 @@ def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
1583
1682
def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1584
1683
1585
1684
// Floating point complex multiply add
1586
- def : InstRW<[V1Write_5c_1V01 ], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1587
- "^FCMLA_ZZZI_[HS]$ ")>;
1685
+ def : InstRW<[V1Wr_ZFCMA, ReadDefault, V1Rd_ZFCMA ], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
1686
+ def : InstRW<[V1Wr_ZFCMA, V1Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
1588
1687
1589
1688
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1590
1689
// Floating point convert to integer, F32
@@ -1623,11 +1722,15 @@ def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
1623
1722
"^FMUL_ZPZ[IZ]_[HSD]")>;
1624
1723
1625
1724
// Floating point multiply accumulate
1725
+ def : InstRW<[V1Wr_ZFMA, ReadDefault, V1Rd_ZFMA],
1726
+ (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
1727
+ "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
1728
+ def : InstRW<[V1Wr_ZFMA, V1Rd_ZFMA],
1729
+ (instregex "^FML[AS]_ZZZI_[HSD]",
1730
+ "^FN?ML[AS]_ZPZZZ_[HSD]")>;
1731
+
1626
1732
// Floating point reciprocal step
1627
- def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
1628
- "^FN?ML[AS]_ZPZZZ_[HSD]",
1629
- "^FML[AS]_ZZZI_[HSD]$",
1630
- "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
1733
+ def : InstRW<[V1Write_4c_1V01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
1631
1734
1632
1735
// Floating point reciprocal estimate, F16
1633
1736
def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
@@ -1681,13 +1784,13 @@ def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
1681
1784
def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
1682
1785
1683
1786
// Dot product
1684
- def : InstRW<[V1Write_4c_1V01 ], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
1787
+ def : InstRW<[V1Wr_ZBFDOT, V1Rd_ZBFDOT ], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
1685
1788
1686
1789
// Matrix multiply accumulate
1687
- def : InstRW<[V1Write_5c_1V01 ], (instrs BFMMLA_ZZZ)>;
1790
+ def : InstRW<[V1Wr_ZBFMMA, V1Rd_ZBFMMA ], (instrs BFMMLA_ZZZ)>;
1688
1791
1689
1792
// Multiply accumulate long
1690
- def : InstRW<[V1Write_5c_1V01 ], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
1793
+ def : InstRW<[V1Wr_ZBFMAL, V1Rd_ZBFMAL ], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
1691
1794
1692
1795
1693
1796
// SVE Load instructions
0 commit comments