Skip to content

Commit 303c8d2

Browse files
author
Rin Dobrescu
authored
[AArch64] Add SchedReadAdvance to Neoverse-V1 scheduling model. (#111538)
Introduce a description of late forwarding to the Neoverse-V1 Scheduling model.
1 parent b5ea5be commit 303c8d2

File tree

3 files changed

+1645
-121
lines changed

3 files changed

+1645
-121
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td

Lines changed: 155 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,89 @@ def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
469469
V1UnitV, V1UnitV, V1UnitV,
470470
V1UnitV, V1UnitV, V1UnitV]>;
471471

472+
//===----------------------------------------------------------------------===//
473+
// Define forwarded types
474+
475+
// NOTE: SOG, p. 20, n. 2: Accumulator forwarding is not supported for
476+
// consumers of 64 bit multiply high operations?
477+
def V1Wr_IM : SchedWriteRes<[V1UnitM]> { let Latency = 2; }
478+
def V1Wr_IMA : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
479+
def V1WriteIM : SchedWriteVariant<
480+
[SchedVar<NeoverseMULIdiomPred, [V1Wr_IM]>,
481+
SchedVar<NoSchedPred, [V1Wr_IMA]>]>;
482+
def V1Rd_IMA : SchedReadAdvance<1, [V1Wr_IMA]>;
483+
484+
def V1Wr_FMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
485+
def V1Rd_FMA : SchedReadAdvance<2, [WriteFMul, V1Wr_FMA]>;
486+
487+
def V1Wr_ADA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
488+
def V1Rd_ADA : SchedReadAdvance<3, [V1Wr_ADA]>;
489+
490+
def V1Wr_VDOT : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
491+
def V1Rd_VDOT : SchedReadAdvance<2, [V1Wr_VDOT]>;
492+
493+
def V1Wr_VMMA : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
494+
def V1Rd_VMMA : SchedReadAdvance<2, [V1Wr_VMMA]>;
495+
496+
def V1Wr_VMA : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
497+
def V1Rd_VMA : SchedReadAdvance<3, [V1Wr_VMA]>;
498+
499+
def V1Wr_VMAL : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
500+
def V1Rd_VMAL : SchedReadAdvance<3, [V1Wr_VMAL]>;
501+
502+
def V1Wr_VSA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
503+
def V1Rd_VSA : SchedReadAdvance<3, [V1Wr_VSA]>;
504+
505+
def V1Wr_FCMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
506+
def V1Rd_FCMA : SchedReadAdvance<2, [V1Wr_FCMA]>;
507+
508+
def V1Wr_FPM : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
509+
def V1Wr_FPMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
510+
def V1Rd_FPMA : SchedReadAdvance<2, [V1Wr_FPM, V1Wr_FPMA]>;
511+
512+
def V1Wr_FPMAL : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
513+
def V1Rd_FPMAL : SchedReadAdvance<3, [V1Wr_FPMAL]>;
514+
515+
def V1Wr_BFD : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
516+
def V1Rd_BFD : SchedReadAdvance<2, [V1Wr_BFD]>;
517+
518+
def V1Wr_BFMMA : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
519+
def V1Rd_BFMMA : SchedReadAdvance<2, [V1Wr_BFMMA]>;
520+
521+
def V1Wr_BFMLA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
522+
def V1Rd_BFMLA : SchedReadAdvance<2, [V1Wr_BFMLA]>;
523+
524+
def V1Wr_CRC : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
525+
def V1Rd_CRC : SchedReadAdvance<1, [V1Wr_CRC]>;
526+
527+
def V1Wr_ZDOTB : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
528+
def V1Rd_ZDOTB : SchedReadAdvance<2, [V1Wr_ZDOTB]>;
529+
530+
def V1Wr_ZUDOTB : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
531+
def V1Rd_ZUDOTB : SchedReadAdvance<2, [V1Wr_ZUDOTB]>;
532+
533+
def V1Wr_ZDOTH : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
534+
def V1Rd_ZDOTH : SchedReadAdvance<3, [V1Wr_ZDOTH]>;
535+
536+
def V1Wr_ZMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
537+
def V1Rd_ZMMA : SchedReadAdvance<2, [V1Wr_ZMMA]>;
538+
539+
let Latency = 5, NumMicroOps = 2 in
540+
def V1Wr_ZMAD : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
541+
def V1Rd_ZMAD : SchedReadAdvance<3, [V1Wr_ZMAD]>;
542+
543+
def V1Wr_ZFCMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
544+
def V1Rd_ZFCMA : SchedReadAdvance<3, [V1Wr_ZFCMA]>;
545+
546+
def V1Wr_ZFMA : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
547+
def V1Rd_ZFMA : SchedReadAdvance<2, [V1Wr_ZFMA]>;
548+
549+
def V1Wr_ZBFDOT : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
550+
def V1Rd_ZBFDOT : SchedReadAdvance<2, [V1Wr_ZBFDOT]>;
551+
def V1Wr_ZBFMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
552+
def V1Rd_ZBFMMA : SchedReadAdvance<2, [V1Wr_ZBFMMA]>;
553+
def V1Wr_ZBFMAL : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
554+
def V1Rd_ZBFMAL : SchedReadAdvance<3, [V1Wr_ZBFMAL]>;
472555

473556
// Miscellaneous Instructions
474557
// -----------------------------------------------------------------------------
@@ -553,16 +636,19 @@ def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>;
553636
def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
554637
def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
555638

639+
def : SchedAlias<WriteIM32, V1Write_2c_1M>;
640+
def : SchedAlias<WriteIM64, V1Write_2c_1M>;
641+
556642
// Multiply
557-
// Multiply accumulate
558-
// Multiply accumulate, long
559-
// Multiply long
560-
def V1WriteIM : SchedWriteVariant<
561-
[SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>,
562-
SchedVar<NoSchedPred, [V1Write_2c_1M0]>]>;
563-
def : SchedAlias<WriteIM32, V1WriteIM>;
564-
def : SchedAlias<WriteIM64, V1WriteIM>;
643+
// Multiply accumulate, W-form
644+
// Multiply accumulate, X-form
645+
def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA],
646+
(instregex "^M(ADD|SUB)[WX]rrr$")>;
565647

648+
// Multiply accumulate long
649+
// Multiply long
650+
def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA],
651+
(instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
566652
// Multiply high
567653
def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
568654

@@ -680,10 +766,11 @@ def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
680766
def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
681767

682768
// FP multiply
683-
def : SchedAlias<WriteFMul, V1Write_3c_1V>;
769+
def : WriteRes<WriteFMul, [V1UnitV]> { let Latency = 3; }
684770

685771
// FP multiply accumulate
686-
def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
772+
def : InstRW<[V1Wr_FMA, ReadDefault, ReadDefault, V1Rd_FMA],
773+
(instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
687774

688775
// FP round to integral
689776
def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
@@ -824,7 +911,7 @@ def : SchedAlias<WriteVq, V1Write_2c_1V>;
824911
// ASIMD absolute diff accum
825912
// ASIMD absolute diff accum long
826913
// ASIMD pairwise add and accumulate long
827-
def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
914+
def : InstRW<[V1Wr_ADA, V1Rd_ADA], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
828915

829916
// ASIMD arith, reduce, 4H/4S
830917
// ASIMD max/min, reduce, 4H/4S
@@ -843,23 +930,26 @@ def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
843930

844931
// ASIMD dot product
845932
// ASIMD dot product using signed and unsigned integers
846-
def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
933+
def : InstRW<[V1Wr_VDOT, V1Rd_VDOT],
934+
(instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
847935

848-
// ASIMD matrix multiply- accumulate
849-
def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>;
936+
// ASIMD matrix multiply-accumulate
937+
def : InstRW<[V1Wr_VMMA, V1Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
850938

851939
// ASIMD multiply
940+
def : InstRW<[V1Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
941+
852942
// ASIMD multiply accumulate
943+
def : InstRW<[V1Wr_VMA, V1Rd_VMA], (instregex "^MLAv", "^MLSv")>;
944+
853945
// ASIMD multiply accumulate long
946+
def : InstRW<[V1Wr_VMAL, V1Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
947+
854948
// ASIMD multiply accumulate high
949+
def : InstRW<[V1Write_4c_1V02], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
950+
855951
// ASIMD multiply accumulate saturating long
856-
def : InstRW<[V1Write_4c_1V02],
857-
(instregex "^MUL(v[148]i16|v[124]i32)$",
858-
"^SQR?DMULH(v[48]i16|v[24]i32)$",
859-
"^ML[AS](v[148]i16|v[124]i32)$",
860-
"^[SU]ML[AS]Lv",
861-
"^SQRDML[AS]H(v[148]i16|v[124]i32)$",
862-
"^SQDML[AS]Lv")>;
952+
def : InstRW<[V1Write_4c_1V02], (instregex "^SQDML[AS]L[iv]")>;
863953

864954
// ASIMD multiply/multiply long (8x8) polynomial
865955
def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
@@ -868,11 +958,12 @@ def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
868958
def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
869959

870960
// ASIMD shift accumulate
961+
def : InstRW<[V1Wr_VSA, V1Rd_VSA], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
962+
871963
// ASIMD shift by immed, complex
872964
// ASIMD shift by register, complex
873965
def : InstRW<[V1Write_4c_1V13],
874-
(instregex "^[SU]R?SRAv",
875-
"^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
966+
(instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
876967
"^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
877968
"^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
878969
"^[SU]Q?RSHLv", "^[SU]QSHLv")>;
@@ -890,16 +981,25 @@ def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
890981
// ASIMD FP absolute value/difference
891982
// ASIMD FP arith, normal
892983
// ASIMD FP compare
893-
// ASIMD FP complex add
894984
// ASIMD FP max/min, normal
895985
// ASIMD FP max/min, pairwise
896986
// ASIMD FP negate
897987
// Covered by "SchedAlias (WriteV[dq]...)" above
898988

989+
// ASIMD FP complex add
990+
def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$")>;
991+
899992
// ASIMD FP complex multiply add
993+
def : InstRW<[V1Wr_FCMA, V1Rd_FCMA], (instregex "^FCMLAv")>;
994+
995+
// ASIMD FP multiply
996+
def : InstRW<[V1Wr_FPM], (instregex "^FMULX?v")>;
997+
900998
// ASIMD FP multiply accumulate
901-
def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$",
902-
"^FML[AS]v")>;
999+
def : InstRW<[V1Wr_FPMA, V1Rd_FPMA], (instregex "^FML[AS]v")>;
1000+
1001+
// ASIMD FP multiply accumulate long
1002+
def : InstRW<[V1Wr_FPMAL, V1Rd_FPMAL], (instregex "^FML[AS]L2?v")>;
9031003

9041004
// ASIMD FP convert, long (F16 to F32)
9051005
def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
@@ -953,12 +1053,6 @@ def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
9531053
// ASIMD FP max/min, reduce, Q-form F16
9541054
def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
9551055

956-
// ASIMD FP multiply
957-
def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>;
958-
959-
// ASIMD FP multiply accumulate long
960-
def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
961-
9621056
// ASIMD FP round, D-form F32 and Q-form F64
9631057
def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
9641058

@@ -976,13 +1070,13 @@ def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
9761070
def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
9771071

9781072
// ASIMD dot product
979-
def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
1073+
def : InstRW<[V1Wr_BFD, V1Rd_BFD], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
9801074

9811075
// ASIMD matrix multiply accumulate
982-
def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>;
1076+
def : InstRW<[V1Wr_BFMMA, V1Rd_BFMMA], (instrs BFMMLA)>;
9831077

9841078
// ASIMD multiply accumulate long
985-
def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>;
1079+
def : InstRW<[V1Wr_BFMLA, V1Rd_BFMLA], (instregex "^BFMLAL[BT](Idx)?$")>;
9861080

9871081
// Scalar convert, F32 to BF16
9881082
def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
@@ -1300,7 +1394,7 @@ def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
13001394
// -----------------------------------------------------------------------------
13011395

13021396
// CRC checksum ops
1303-
def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>;
1397+
def : InstRW<[V1Wr_CRC, V1Rd_CRC], (instregex "^CRC32C?[BHWX]rr$")>;
13041398

13051399

13061400
// SVE Predicate instructions
@@ -1440,13 +1534,14 @@ def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
14401534
"^[SU]DIV_ZPZZ_D")>;
14411535

14421536
// Dot product, 8 bit
1443-
def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>;
1537+
def : InstRW<[V1Wr_ZDOTB, V1Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>;
14441538

14451539
// Dot product, 8 bit, using signed and unsigned integers
1446-
def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
1540+
def : InstRW<[V1Wr_ZUDOTB, V1Rd_ZUDOTB],
1541+
(instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
14471542

14481543
// Dot product, 16 bit
1449-
def : InstRW<[V1Write_4c_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
1544+
def : InstRW<[V1Wr_ZDOTH, V1Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>;
14501545

14511546
// Duplicate, immediate and indexed form
14521547
def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
@@ -1488,7 +1583,7 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
14881583
"^MOVPRFX_ZZ$")>;
14891584

14901585
// Matrix multiply-accumulate
1491-
def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1586+
def : InstRW<[V1Wr_ZMMA, V1Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
14921587

14931588
// Multiply, B, H, S element size
14941589
def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
@@ -1497,12 +1592,16 @@ def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
14971592
"^[SU]MULH_ZPZZ_[BHS]")>;
14981593

14991594
// Multiply, D element size
1500-
// Multiply accumulate, D element size
15011595
def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
15021596
"^MUL_ZPZZ_D",
15031597
"^[SU]MULH_(ZPmZ|ZZZ)_D",
1504-
"^[SU]MULH_ZPZZ_D",
1505-
"^(MLA|MLS|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
1598+
"^[SU]MULH_ZPZZ_D")>;
1599+
1600+
// Multiply accumulate, D element size
1601+
def : InstRW<[V1Wr_ZMAD, V1Rd_ZMAD],
1602+
(instregex "^ML[AS]_ZPZZZ_D")>;
1603+
def : InstRW<[V1Wr_ZMAD, ReadDefault, V1Rd_ZMAD],
1604+
(instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
15061605

15071606
// Multiply accumulate, B, H, S element size
15081607
// NOTE: This is not specified in the SOG.
@@ -1583,8 +1682,8 @@ def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
15831682
def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
15841683

15851684
// Floating point complex multiply add
1586-
def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1587-
"^FCMLA_ZZZI_[HS]$")>;
1685+
def : InstRW<[V1Wr_ZFCMA, ReadDefault, V1Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
1686+
def : InstRW<[V1Wr_ZFCMA, V1Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
15881687

15891688
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
15901689
// Floating point convert to integer, F32
@@ -1623,11 +1722,15 @@ def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
16231722
"^FMUL_ZPZ[IZ]_[HSD]")>;
16241723

16251724
// Floating point multiply accumulate
1725+
def : InstRW<[V1Wr_ZFMA, ReadDefault, V1Rd_ZFMA],
1726+
(instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
1727+
"^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
1728+
def : InstRW<[V1Wr_ZFMA, V1Rd_ZFMA],
1729+
(instregex "^FML[AS]_ZZZI_[HSD]",
1730+
"^FN?ML[AS]_ZPZZZ_[HSD]")>;
1731+
16261732
// Floating point reciprocal step
1627-
def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
1628-
"^FN?ML[AS]_ZPZZZ_[HSD]",
1629-
"^FML[AS]_ZZZI_[HSD]$",
1630-
"^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
1733+
def : InstRW<[V1Write_4c_1V01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
16311734

16321735
// Floating point reciprocal estimate, F16
16331736
def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
@@ -1681,13 +1784,13 @@ def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
16811784
def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
16821785

16831786
// Dot product
1684-
def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
1787+
def : InstRW<[V1Wr_ZBFDOT, V1Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
16851788

16861789
// Matrix multiply accumulate
1687-
def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>;
1790+
def : InstRW<[V1Wr_ZBFMMA, V1Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
16881791

16891792
// Multiply accumulate long
1690-
def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
1793+
def : InstRW<[V1Wr_ZBFMAL, V1Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
16911794

16921795

16931796
// SVE Load instructions

0 commit comments

Comments
 (0)