Skip to content

Commit 69b4784

Browse files
author
Rin Dobrescu
authored
[AArch64] Fix scheduling information for arithmetic and logical instructions. (#113542)
This patch corrects scheduling information relating to Neoverse-V2 arithmetic and logical instructions.
1 parent 46829e5 commit 69b4784

13 files changed

+11299
-11203
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ def V1UnitV0 : ProcResource<1>; // FP/ASIMD 0
5555
def V1UnitV1 : ProcResource<1>; // FP/ASIMD 1
5656
def V1UnitV2 : ProcResource<1>; // FP/ASIMD 2
5757
def V1UnitV3 : ProcResource<1>; // FP/ASIMD 3
58+
def V1UnitFlg : ProcResource<3>; // Flags
5859

5960
def V1UnitI : ProcResGroup<[V1UnitS,
6061
V1UnitM0, V1UnitM1]>; // Integer units
61-
def V1UnitJ : ProcResGroup<[V1UnitS, V1UnitM0]>; // Integer 0-2 units
6262
def V1UnitM : ProcResGroup<[V1UnitM0, V1UnitM1]>; // Integer multicycle units
6363
def V1UnitL : ProcResGroup<[V1UnitL01, V1UnitL2]>; // Load units
6464
def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1,
@@ -98,13 +98,14 @@ def V1Write_0c_0Z : SchedWriteRes<[]>;
9898

9999
def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; }
100100
def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; }
101-
def V1Write_1c_1J : SchedWriteRes<[V1UnitJ]> { let Latency = 1; }
101+
def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1; }
102102
def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; }
103103
def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; }
104104
def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
105105
def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
106106
def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
107107
def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; }
108+
def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2; }
108109
def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; }
109110
def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; }
110111
def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; }
@@ -595,7 +596,7 @@ def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
595596
def : SchedAlias<WriteI, V1Write_1c_1I>;
596597

597598
// ALU, basic, flagset
598-
def : InstRW<[V1Write_1c_1J],
599+
def : InstRW<[V1Write_1c_1I_1Flg],
599600
(instregex "^(ADD|SUB)S[WX]r[ir]$",
600601
"^(ADC|SBC)S[WX]r$",
601602
"^ANDS[WX]ri$",
@@ -614,19 +615,19 @@ def : SchedAlias<WriteISReg, V1WriteISReg>;
614615
// Arithmetic, flagset, LSL shift, shift <= 4
615616
// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4
616617
def V1WriteISRegS : SchedWriteVariant<
617-
[SchedVar<IsCheapLSL, [V1Write_1c_1J]>,
618-
SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
618+
[SchedVar<IsCheapLSL, [V1Write_1c_1I_1Flg]>,
619+
SchedVar<NoSchedPred, [V1Write_2c_1M_1Flg]>]>;
619620
def : InstRW<[V1WriteISRegS],
620621
(instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
621622

622623
// Logical, shift, no flagset
623624
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
624625

625626
// Logical, shift, flagset
626-
def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
627+
def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
627628

628629
// Flag manipulation instructions
629-
def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>;
630+
def : InstRW<[V1Write_1c_1I_1Flg], (instrs SETF8, SETF16, RMIF, CFINV)>;
630631

631632

632633
// Divide and multiply instructions

llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def V2UnitV3 : ProcResource<1>; // FP/ASIMD 3
5252
def V2UnitL01 : ProcResource<2>; // Load/Store 0/1
5353
def V2UnitL2 : ProcResource<1>; // Load 2
5454
def V2UnitD : ProcResource<2>; // Store data 0/1
55+
def V2UnitFlg : ProcResource<3>; // Flags
5556

5657
def V2UnitR : ProcResGroup<[V2UnitS0, V2UnitS1]>; // Integer single-cycle 0/1
5758
def V2UnitS : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>; // Integer single-cycle 0/1/2/3
@@ -97,11 +98,13 @@ def V2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
9798

9899
def V2Write_1c_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; }
99100
def V2Write_1c_1F : SchedWriteRes<[V2UnitF]> { let Latency = 1; }
101+
def V2Write_1c_1F_1Flg : SchedWriteRes<[V2UnitF, V2UnitFlg]> { let Latency = 1; }
100102
def V2Write_1c_1I : SchedWriteRes<[V2UnitI]> { let Latency = 1; }
101103
def V2Write_1c_1M : SchedWriteRes<[V2UnitM]> { let Latency = 1; }
102104
def V2Write_1c_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 1; }
103105
def V2Write_1c_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
104106
def V2Write_2c_1M : SchedWriteRes<[V2UnitM]> { let Latency = 2; }
107+
def V2Write_2c_1M_1Flg : SchedWriteRes<[V2UnitM, V2UnitFlg]> { let Latency = 2; }
105108
def V2Write_3c_1M : SchedWriteRes<[V2UnitM]> { let Latency = 3; }
106109
def V2Write_2c_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
107110
def V2Write_3c_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 3; }
@@ -886,12 +889,12 @@ def V2Write_ArithI : SchedWriteVariant<[
886889
SchedVar<NoSchedPred, [V2Write_2c_1M]>]>;
887890

888891
def V2Write_ArithF : SchedWriteVariant<[
889-
SchedVar<IsCheapLSL, [V2Write_1c_1F]>,
890-
SchedVar<NoSchedPred, [V2Write_2c_1M]>]>;
892+
SchedVar<IsCheapLSL, [V2Write_1c_1F_1Flg]>,
893+
SchedVar<NoSchedPred, [V2Write_2c_1M_1Flg]>]>;
891894

892895
def V2Write_Logical : SchedWriteVariant<[
893-
SchedVar<NeoverseNoLSL, [V2Write_1c_1F]>,
894-
SchedVar<NoSchedPred, [V2Write_2c_1M]>]>;
896+
SchedVar<NeoverseNoLSL, [V2Write_1c_1F_1Flg]>,
897+
SchedVar<NoSchedPred, [V2Write_2c_1M_1Flg]>]>;
895898

896899
def V2Write_Extr : SchedWriteVariant<[
897900
SchedVar<IsRORImmIdiomPred, [V2Write_1c_1I]>,
@@ -1106,19 +1109,19 @@ def : InstRW<[V2Write_1c_1B_1R], (instrs BL, BLR)>;
11061109
// -----------------------------------------------------------------------------
11071110

11081111
// ALU, basic
1109-
// ALU, basic, flagset
11101112
def : SchedAlias<WriteI, V2Write_1c_1I>;
1111-
def : InstRW<[V2Write_1c_1F], (instregex "^(ADD|SUB)S[WX]r[ir]$",
1113+
1114+
// ALU, basic, flagset
1115+
def : InstRW<[V2Write_1c_1F_1Flg],
1116+
(instregex "^(ADD|SUB)S[WX]r[ir]$",
11121117
"^(ADC|SBC)S[WX]r$",
1113-
"^ANDS[WX]ri$")>;
1118+
"^ANDS[WX]ri$",
1119+
"^(AND|BIC)S[WX]rr$")>;
11141120
def : InstRW<[V2Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
11151121

11161122
// ALU, extend and shift
11171123
def : SchedAlias<WriteIEReg, V2Write_2c_1M>;
11181124

1119-
// Conditional compare
1120-
def : InstRW<[V2Write_1c_1F], (instregex "^CCM[NP][WX][ir]")>;
1121-
11221125
// Arithmetic, LSL shift, shift <= 4
11231126
// Arithmetic, flagset, LSL shift, shift <= 4
11241127
// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
@@ -1129,6 +1132,9 @@ def : InstRW<[V2Write_ArithF],
11291132
// Arithmetic, immediate to logical address tag
11301133
def : InstRW<[V2Write_2c_1M], (instrs ADDG, SUBG)>;
11311134

1135+
// Conditional compare
1136+
def : InstRW<[V2Write_1c_1F_1Flg], (instregex "^CCM[NP][WX][ir]")>;
1137+
11321138
// Convert floating-point condition flags
11331139
// Flag manipulation instructions
11341140
def : WriteRes<WriteSys, []> { let Latency = 1; }
@@ -1138,8 +1144,10 @@ def : InstRW<[V2Write_2c_1M], (instrs IRG, IRGstack)>;
11381144

11391145
// Insert Tag Mask
11401146
// Subtract Pointer
1147+
def : InstRW<[V2Write_1c_1I], (instrs GMI, SUBP)>;
1148+
11411149
// Subtract Pointer, flagset
1142-
def : InstRW<[V2Write_1c_1I], (instrs GMI, SUBP, SUBPS)>;
1150+
def : InstRW<[V2Write_1c_1F_1Flg], (instrs SUBPS)>;
11431151

11441152
// Logical, shift, no flagset
11451153
def : InstRW<[V2Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;

llvm/test/tools/llvm-mca/AArch64/Neoverse/512tvb-sve-instructions.s

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,25 @@ abs z0.b, p0/m, z0.b
2121
# CHECK-NEXT: [0.1] - V1UnitB
2222
# CHECK-NEXT: [1.0] - V1UnitD
2323
# CHECK-NEXT: [1.1] - V1UnitD
24-
# CHECK-NEXT: [2] - V1UnitL2
25-
# CHECK-NEXT: [3.0] - V1UnitL01
26-
# CHECK-NEXT: [3.1] - V1UnitL01
27-
# CHECK-NEXT: [4] - V1UnitM0
28-
# CHECK-NEXT: [5] - V1UnitM1
29-
# CHECK-NEXT: [6.0] - V1UnitS
30-
# CHECK-NEXT: [6.1] - V1UnitS
31-
# CHECK-NEXT: [7] - V1UnitV0
32-
# CHECK-NEXT: [8] - V1UnitV1
33-
# CHECK-NEXT: [9] - V1UnitV2
34-
# CHECK-NEXT: [10] - V1UnitV3
24+
# CHECK-NEXT: [2.0] - V1UnitFlg
25+
# CHECK-NEXT: [2.1] - V1UnitFlg
26+
# CHECK-NEXT: [2.2] - V1UnitFlg
27+
# CHECK-NEXT: [3] - V1UnitL2
28+
# CHECK-NEXT: [4.0] - V1UnitL01
29+
# CHECK-NEXT: [4.1] - V1UnitL01
30+
# CHECK-NEXT: [5] - V1UnitM0
31+
# CHECK-NEXT: [6] - V1UnitM1
32+
# CHECK-NEXT: [7.0] - V1UnitS
33+
# CHECK-NEXT: [7.1] - V1UnitS
34+
# CHECK-NEXT: [8] - V1UnitV0
35+
# CHECK-NEXT: [9] - V1UnitV1
36+
# CHECK-NEXT: [10] - V1UnitV2
37+
# CHECK-NEXT: [11] - V1UnitV3
3538

3639
# CHECK: Resource pressure per iteration:
37-
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10]
38-
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - -
40+
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11]
41+
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - -
3942

4043
# CHECK: Resource pressure by instruction:
41-
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10] Instructions:
42-
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - abs z0.b, p0/m, z0.b
44+
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] Instructions:
45+
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z0.b, p0/m, z0.b

0 commit comments

Comments
 (0)