Skip to content

Commit d36d805

Browse files
committed
[AArch64] Ensure Neoverse-N2 scheduling model includes all SVE pseudos.
Similar to #84187, this enables the existing test we have for checking the scheduling info of the pseudos matches the real instructions, and adjusts the scheduling info in the NeoverseN2 model to make sure all cases were handled.
1 parent ef1eb03 commit d36d805

File tree

2 files changed

+114
-100
lines changed

2 files changed

+114
-100
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td

Lines changed: 110 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,7 +1577,8 @@ def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
15771577
// -----------------------------------------------------------------------------
15781578

15791579
// Arithmetic, absolute diff
1580-
def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
1580+
def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
1581+
"^[SU]ABD_ZPZZ_[BHSD]")>;
15811582

15821583
// Arithmetic, absolute diff accum
15831584
def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
@@ -1590,24 +1591,25 @@ def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
15901591

15911592
// Arithmetic, basic
15921593
def : InstRW<[N2Write_2cyc_1V],
1593-
(instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1594-
"^(ADD|SUB)_ZZZ_[BHSD]$",
1595-
"^(ADD|SUB|SUBR)_ZI_[BHSD]$",
1596-
"^ADR_[SU]XTW_ZZZ_D_[0123]$",
1597-
"^ADR_LSL_ZZZ_[SD]_[0123]$",
1598-
"^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
1599-
"^SADDLBT_ZZZ_[HSD]$",
1600-
"^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
1601-
"^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
1594+
(instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1595+
"^(ADD|SUB)_ZZZ_[BHSD]",
1596+
"^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
1597+
"^(ADD|SUB|SUBR)_ZI_[BHSD]",
1598+
"^ADR_[SU]XTW_ZZZ_D_[0123]",
1599+
"^ADR_LSL_ZZZ_[SD]_[0123]",
1600+
"^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
1601+
"^SADDLBT_ZZZ_[HSD]",
1602+
"^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
1603+
"^SSUBL(BT|TB)_ZZZ_[HSD]")>;
16021604

16031605
// Arithmetic, complex
16041606
def : InstRW<[N2Write_2cyc_1V],
1605-
(instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
1606-
"^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1607-
"^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
1608-
"^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
1609-
"^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
1610-
"^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
1607+
(instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
1608+
"^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1609+
"^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
1610+
"^[SU]Q(ADD|SUB)_ZI_[BHSD]",
1611+
"^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
1612+
"^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
16111613

16121614
// Arithmetic, large integer
16131615
def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
@@ -1620,12 +1622,13 @@ def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
16201622

16211623
// Arithmetic, shift
16221624
def : InstRW<[N2Write_2cyc_1V1],
1623-
(instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
1624-
"^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
1625-
"^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
1626-
"^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
1627-
"^(ASR|LSL|LSR)_ZZI_[BHSD]$",
1628-
"^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
1625+
(instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
1626+
"^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
1627+
"^(ASR|LSL|LSR)_ZPmI_[BHSD]",
1628+
"^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
1629+
"^(ASR|LSL|LSR)_ZZI_[BHSD]",
1630+
"^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1631+
"^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
16291632

16301633
// Arithmetic, shift and accumulate
16311634
def : InstRW<[N2Write_4cyc_1V1],
@@ -1638,29 +1641,29 @@ def : InstRW<[N2Write_2cyc_1V1],
16381641

16391642
// Arithmetic, shift complex
16401643
def : InstRW<[N2Write_4cyc_1V1],
1641-
(instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
1642-
"^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
1643-
"^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
1644-
"^SQSHRU?N[BT]_ZZI_[BHS]$",
1645-
"^UQR?SHRN[BT]_ZZI_[BHS]$")>;
1644+
(instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
1645+
"^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
1646+
"^[SU]QR?SHL_ZPZZ_[BHSD]",
1647+
"^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
1648+
"^SQSHRU?N[BT]_ZZI_[BHS]",
1649+
"^UQR?SHRN[BT]_ZZI_[BHS]")>;
16461650

16471651
// Arithmetic, shift right for divide
1648-
def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>;
1652+
def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
16491653

16501654
// Arithmetic, shift rounding
1651-
def : InstRW<[N2Write_4cyc_1V1],
1652-
(instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
1653-
"^[SU]RSHR_ZPmI_[BHSD]$")>;
1655+
def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
1656+
"^[SU]RSHL_ZPZZ_[BHSD]",
1657+
"^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
16541658

16551659
// Bit manipulation
1656-
def : InstRW<[N2Write_6cyc_2V1],
1657-
(instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
1660+
def : InstRW<[N2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
16581661

16591662
// Bitwise select
16601663
def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
16611664

16621665
// Count/reverse bits
1663-
def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
1666+
def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
16641667

16651668
// Broadcast logical bitmask immediate to vector
16661669
def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
@@ -1695,19 +1698,14 @@ def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
16951698
"^SPLICE_ZPZZ?_[BHSD]$")>;
16961699

16971700
// Convert to floating point, 64b to float or convert to double
1698-
def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>;
1699-
1700-
// Convert to floating point, 64b to half
1701-
def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>;
1701+
def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1702+
"^[SU]CVTF_ZPmZ_StoD")>;
17021703

17031704
// Convert to floating point, 32b to single or half
1704-
def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
1705-
1706-
// Convert to floating point, 32b to double
1707-
def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>;
1705+
def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
17081706

17091707
// Convert to floating point, 16b to half
1710-
def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
1708+
def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
17111709

17121710
// Copy, scalar
17131711
def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
@@ -1717,10 +1715,12 @@ def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
17171715
"^CPY_ZPzI_[BHSD]$")>;
17181716

17191717
// Divides, 32 bit
1720-
def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
1718+
def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
1719+
"^[SU]DIV_ZPZZ_S")>;
17211720

17221721
// Divides, 64 bit
1723-
def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
1722+
def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
1723+
"^[SU]DIV_ZPZZ_D")>;
17241724

17251725
// Dot product, 8 bit
17261726
def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
@@ -1739,9 +1739,9 @@ def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
17391739
def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
17401740

17411741
// Extend, sign or zero
1742-
def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
1743-
"^[SU]XTH_ZPmZ_[SD]$",
1744-
"^[SU]XTW_ZPmZ_[D]$")>;
1742+
def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
1743+
"^[SU]XTH_ZPmZ_[SD]",
1744+
"^[SU]XTW_ZPmZ_[D]")>;
17451745

17461746
// Extract
17471747
def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
@@ -1778,14 +1778,16 @@ def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
17781778

17791779
// Logical
17801780
def : InstRW<[N2Write_2cyc_1V],
1781-
(instregex "^(AND|EOR|ORR)_ZI$",
1782-
"^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
1783-
"^EOR(BT|TB)_ZZZ_[BHSD]$",
1784-
"^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
1781+
(instregex "^(AND|EOR|ORR)_ZI",
1782+
"^(AND|BIC|EOR|ORR)_ZZZ",
1783+
"^EOR(BT|TB)_ZZZ_[BHSD]",
1784+
"^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
1785+
"^NOT_ZPmZ_[BHSD]")>;
17851786

17861787
// Max/min, basic and pairwise
1787-
def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
1788-
"^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
1788+
def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
1789+
"^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
1790+
"^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
17891791

17901792
// Matching operations
17911793
def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
@@ -1798,24 +1800,28 @@ def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
17981800
"^MOVPRFX_ZZ$")>;
17991801

18001802
// Multiply, B, H, S element size
1801-
def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
1802-
"^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
1803+
def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
1804+
"^MUL_ZPZZ_[BHS]",
1805+
"^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
1806+
"^[SU]MULH_ZPZZ_[BHS]")>;
18031807

18041808
// Multiply, D element size
1805-
def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
1806-
"^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
1809+
def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
1810+
"^MUL_ZPZZ_D",
1811+
"^[SU]MULH_(ZPmZ|ZZZ)_D",
1812+
"^[SU]MULH_ZPZZ_D")>;
18071813

18081814
// Multiply long
18091815
def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
18101816
"^[SU]MULL[BT]_ZZZ_[HSD]$")>;
18111817

18121818
// Multiply accumulate, B, H, S element size
18131819
def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1814-
"^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
1820+
"^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
18151821

18161822
// Multiply accumulate, D element size
18171823
def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
1818-
"^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
1824+
"^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
18191825

18201826
// Multiply accumulate long
18211827
def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
@@ -1864,7 +1870,7 @@ def : InstRW<[N2Write_2cyc_1V0],
18641870
(instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
18651871

18661872
// Reciprocal estimate
1867-
def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
1873+
def : InstRW<[N2Write_4cyc_2V0], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
18681874

18691875
// Reduction, arithmetic, B form
18701876
def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
@@ -1909,13 +1915,17 @@ def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
19091915
// -----------------------------------------------------------------------------
19101916

19111917
// Floating point absolute value/difference
1912-
def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
1918+
def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
1919+
"^FABD_ZPZZ_[HSD]",
1920+
"^FABS_ZPmZ_[HSD]")>;
19131921

19141922
// Floating point arithmetic
1915-
def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
1916-
"^FADDP_ZPmZZ_[HSD]$",
1917-
"^FNEG_ZPmZ_[HSD]$",
1918-
"^FSUBR_ZPm[IZ]_[HSD]$")>;
1923+
def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
1924+
"^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
1925+
"^FADDP_ZPmZZ_[HSD]",
1926+
"^FNEG_ZPmZ_[HSD]",
1927+
"^FSUBR_ZPm[IZ]_[HSD]",
1928+
"^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
19191929

19201930
// Floating point associative add, F16
19211931
def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
@@ -1940,80 +1950,80 @@ def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
19401950
"^FCMLA_ZZZI_[HS]$")>;
19411951

19421952
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1943-
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
1944-
"^FCVTLT_ZPmZ_HtoS$",
1945-
"^FCVTNT_ZPmZ_StoH$")>;
1953+
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
1954+
"^FCVTLT_ZPmZ_HtoS",
1955+
"^FCVTNT_ZPmZ_StoH")>;
19461956

19471957
// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
19481958
// or F64 to F16)
1949-
def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
1950-
"^FCVTLT_ZPmZ_StoD$",
1951-
"^FCVTNT_ZPmZ_DtoS$")>;
1959+
def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
1960+
"^FCVTLT_ZPmZ_StoD",
1961+
"^FCVTNT_ZPmZ_DtoS")>;
19521962

19531963
// Floating point convert, round to odd
19541964
def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
19551965

19561966
// Floating point base2 log, F16
1957-
def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>;
1967+
def : InstRW<[N2Write_6cyc_4V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
19581968

19591969
// Floating point base2 log, F32
1960-
def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>;
1970+
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
19611971

19621972
// Floating point base2 log, F64
1963-
def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>;
1973+
def : InstRW<[N2Write_3cyc_1V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
19641974

19651975
// Floating point convert to integer, F16
1966-
def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
1976+
def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
19671977

19681978
// Floating point convert to integer, F32
1969-
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
1979+
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
19701980

19711981
// Floating point convert to integer, F64
19721982
def : InstRW<[N2Write_3cyc_1V0],
1973-
(instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
1983+
(instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
19741984

19751985
// Floating point copy
19761986
def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
19771987
"^FDUP_ZI_[HSD]$")>;
19781988

19791989
// Floating point divide, F16
1980-
def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
1990+
def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
19811991

19821992
// Floating point divide, F32
1983-
def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
1993+
def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
19841994

19851995
// Floating point divide, F64
1986-
def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
1996+
def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
19871997

19881998
// Floating point min/max pairwise
1989-
def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
1999+
def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
19902000

19912001
// Floating point min/max
1992-
def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
2002+
def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2003+
"^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
19932004

19942005
// Floating point multiply
1995-
def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
1996-
"^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
2006+
def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2007+
"^FMULX_ZPZZ_[HSD]",
2008+
"^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2009+
"^FMUL_ZPZ[IZ]_[HSD]")>;
19972010

19982011
// Floating point multiply accumulate
1999-
def : InstRW<[N2Write_4cyc_1V],
2000-
(instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
2001-
"^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
2012+
def : InstRW<[N2Write_4cyc_1V], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
2013+
"^FN?ML[AS]_ZPZZZ_[HSD]",
2014+
"^FML[AS]_ZZZI_[HSD]$")>;
20022015

20032016
// Floating point multiply add/sub accumulate long
20042017
def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
20052018

20062019
// Floating point reciprocal estimate, F16
2007-
def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
2008-
FRSQRTE_ZZ_H)>;
2020+
def : InstRW<[N2Write_6cyc_4V0], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
20092021

20102022
// Floating point reciprocal estimate, F32
2011-
def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
2012-
FRSQRTE_ZZ_S)>;
2023+
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
20132024

20142025
// Floating point reciprocal estimate, F64
2015-
def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
2016-
FRSQRTE_ZZ_D)>;
2026+
def : InstRW<[N2Write_3cyc_1V0], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
20172027

20182028
// Floating point reciprocal step
20192029
def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
@@ -2031,22 +2041,22 @@ def : InstRW<[N2Write_2cyc_1V],
20312041
(instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
20322042

20332043
// Floating point round to integral, F16
2034-
def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
2044+
def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
20352045

20362046
// Floating point round to integral, F32
2037-
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
2047+
def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
20382048

20392049
// Floating point round to integral, F64
2040-
def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
2050+
def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
20412051

20422052
// Floating point square root, F16
2043-
def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>;
2053+
def : InstRW<[N2Write_13cyc_1V0], (instregex "^FSQRT_ZPmZ_H")>;
20442054

20452055
// Floating point square root, F32
2046-
def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>;
2056+
def : InstRW<[N2Write_10cyc_1V0], (instregex "^FSQRT_ZPmZ_S")>;
20472057

20482058
// Floating point square root, F64
2049-
def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>;
2059+
def : InstRW<[N2Write_16cyc_1V0], (instregex "^FSQRT_ZPmZ_D")>;
20502060

20512061
// Floating point trigonometric exponentiation
20522062
def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;

0 commit comments

Comments
 (0)