Skip to content

Commit 8e35762

Browse files
committed
[AArch64] Initial sched model for Neoverse N3
1 parent 24bbf27 commit 8e35762

File tree

4 files changed

+157
-157
lines changed

4 files changed

+157
-157
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -230,19 +230,26 @@ def N3Write_8c_1V_1L : SchedWriteRes<[N3UnitV, N3UnitL]> {
230230
let NumMicroOps = 2;
231231
}
232232

233-
//===----------------------------------------------------------------------===//
234-
// Define generic 3 micro-op types
235-
236-
def N3Write_5c_1M0_2V : SchedWriteRes<[N3UnitM0, N3UnitV, N3UnitV]> {
233+
def N3Write_5c_1M0_1V : SchedWriteRes<[N3UnitM0, N3UnitV]> {
237234
let Latency = 5;
238-
let NumMicroOps = 3;
235+
let NumMicroOps = 2;
239236
}
240237

241-
def N3Write_5c_1V1_2V : SchedWriteRes<[N3UnitV1, N3UnitV, N3UnitV]> {
238+
def N3Write_5c_1V1_1V : SchedWriteRes<[N3UnitV1, N3UnitV]> {
242239
let Latency = 5;
243-
let NumMicroOps = 3;
240+
let NumMicroOps = 2;
244241
}
245242

243+
def N3Write_8c_1M0_1V : SchedWriteRes<[N3UnitM0, N3UnitV]> {
244+
let Latency = 8;
245+
let NumMicroOps = 2;
246+
}
247+
248+
//===----------------------------------------------------------------------===//
249+
// Define generic 3 micro-op types
250+
251+
252+
246253
def N3Write_6c_3V : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV]> {
247254
let Latency = 6;
248255
let NumMicroOps = 3;
@@ -263,10 +270,6 @@ def N3Write_8c_2L_1V : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitV]> {
263270
let NumMicroOps = 3;
264271
}
265272

266-
def N3Write_8c_1M0_2V : SchedWriteRes<[N3UnitM0, N3UnitV, N3UnitV]> {
267-
let Latency = 8;
268-
let NumMicroOps = 3;
269-
}
270273

271274
def N3Write_7c_2V_1V1 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV1]> {
272275
let Latency = 7;
@@ -278,6 +281,11 @@ def N3Write_5c_2V_1V1 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV1]> {
278281
let NumMicroOps = 3;
279282
}
280283

284+
def N3Write_7c_1M_1M0_1V : SchedWriteRes<[N3UnitM, N3UnitM0, N3UnitV]> {
285+
let Latency = 7;
286+
let NumMicroOps = 3;
287+
}
288+
281289
//===----------------------------------------------------------------------===//
282290
// Define generic 4 micro-op types
283291

@@ -351,14 +359,6 @@ def N3Write_2c_1L01_2I_1V : SchedWriteRes<[N3UnitL01, N3UnitI, N3UnitI, N3UnitV]
351359
let NumMicroOps = 4;
352360
}
353361

354-
//===----------------------------------------------------------------------===//
355-
// Define generic 5 micro-op types
356-
357-
def N3Write_7c_2M_1M0_2V : SchedWriteRes<[N3UnitM, N3UnitM, N3UnitM0, N3UnitV, N3UnitV]> {
358-
let Latency = 7;
359-
let NumMicroOps = 5;
360-
}
361-
362362
//===----------------------------------------------------------------------===//
363363
// Define generic 6 micro-op types
364364

@@ -902,7 +902,7 @@ def : InstRW<[N3Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
902902
def : InstRW<[N3Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
903903

904904
// FP transfer, from gen to high half of vec reg
905-
def : InstRW<[N3Write_5c_1M0_2V], (instrs FMOVXDHighr)>;
905+
def : InstRW<[N3Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
906906

907907
// FP transfer, from vec to gen reg
908908
def : SchedAlias<WriteFCopy, N3Write_3c_1V>;
@@ -1017,7 +1017,7 @@ def : InstRW<[N3Write_4c_1V1], (instregex "^[SU]ABAL?v",
10171017
def : InstRW<[N3Write_3c_1V1], (instregex "^[SU]?ADDL?Vv4i(16|32)v$")>;
10181018

10191019
// ASIMD arith, reduce, 8B/8H
1020-
def : InstRW<[N3Write_5c_1V1_2V], (instregex "^[SU]?ADDL?Vv8i(8|16)v$")>;
1020+
def : InstRW<[N3Write_5c_1V1_1V], (instregex "^[SU]?ADDL?Vv8i(8|16)v$")>;
10211021

10221022
// ASIMD arith, reduce, 16B
10231023
def : InstRW<[N3Write_6c_2V1], (instregex "^[SU]?ADDL?Vv16i8v$")>;
@@ -1033,7 +1033,7 @@ def : InstRW<[N3Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>;
10331033
def : InstRW<[N3Write_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4i(16|32)v$")>;
10341034

10351035
// ASIMD max/min, reduce, 8B/8H
1036-
def : InstRW<[N3Write_5c_1V1_2V], (instregex "^[SU](MAX|MIN)Vv8i(8|16)v$")>;
1036+
def : InstRW<[N3Write_5c_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i(8|16)v$")>;
10371037

10381038
// ASIMD max/min, reduce, 16B
10391039
def : InstRW<[N3Write_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
@@ -1278,7 +1278,7 @@ def : InstRW<[N3Write_2c_2V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
12781278
"^UMOVvi(8|16|32|64)$")>;
12791279

12801280
// ASIMD transfer, gen reg to element
1281-
def : InstRW<[N3Write_5c_1M0_2V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1281+
def : InstRW<[N3Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
12821282

12831283
// ASIMD load instructions
12841284
// -----------------------------------------------------------------------------
@@ -1587,7 +1587,7 @@ def : InstRW<[N3Write_2c_1M],
15871587
"^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
15881588

15891589
// Predicate counting vector, active predicate
1590-
def : InstRW<[N3Write_7c_2M_1M0_2V],
1590+
def : InstRW<[N3Write_7c_1M_1M0_1V],
15911591
(instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
15921592

15931593
// Predicate logical
@@ -1741,7 +1741,7 @@ def : InstRW<[N3Write_4c_1V0], (instregex "^CMLA_ZZZ_[BHS]$", "^CMLA_ZZZI_[HS]$"
17411741
def : InstRW<[N3Write_5c_2V0], (instrs CMLA_ZZZ_D)>;
17421742

17431743
// Conditional extract operations, scalar form
1744-
def : InstRW<[N3Write_8c_1M0_2V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1744+
def : InstRW<[N3Write_8c_1M0_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
17451745

17461746
// Conditional extract operations, SIMD&FP scalar and vector forms
17471747
def : InstRW<[N3Write_2c_1V], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
@@ -1759,7 +1759,7 @@ def : InstRW<[N3Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
17591759
def : InstRW<[N3Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
17601760

17611761
// Copy, scalar
1762-
def : InstRW<[N3Write_5c_1M0_2V], (instregex "^CPY_ZPmR_[BHSD]$")>;
1762+
def : InstRW<[N3Write_5c_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
17631763

17641764
// Copy, scalar SIMD&FP or imm
17651765
def : InstRW<[N3Write_2c_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
@@ -1820,13 +1820,13 @@ def : InstRW<[N3Write_2c_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
18201820
def : InstRW<[N3Write_2c_1V], (instregex "^INDEX_II_[BHS]$")>;
18211821

18221822
// Horizontal operations, B, H, S form, scalar, immediate operands / scalar operands only / immediate, scalar operands
1823-
def : InstRW<[N3Write_5c_1M0_2V], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1823+
def : InstRW<[N3Write_5c_1M0_1V], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
18241824

18251825
// Horizontal operations, D form, immediate operands only
18261826
def : InstRW<[N3Write_2c_1V], (instrs INDEX_II_D)>;
18271827

18281828
// Horizontal operations, D form, scalar, immediate operands / scalar operands only / immediate, scalar operands
1829-
def : InstRW<[N3Write_5c_1M0_2V], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1829+
def : InstRW<[N3Write_5c_1M0_1V], (instregex "^INDEX_(IR|RI|RR)_D$")>;
18301830

18311831
// Logical
18321832
def : InstRW<[N3Write_2c_1V],

llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2091,7 +2091,7 @@ drps
20912091
# CHECK-NEXT: 1 3 0.50 fmov x20, d31
20922092
# CHECK-NEXT: 1 3 1.00 fmov d1, x15
20932093
# CHECK-NEXT: 1 3 0.50 fmov x3, v12.d[1]
2094-
# CHECK-NEXT: 3 5 1.00 fmov v1.d[1], x19
2094+
# CHECK-NEXT: 2 5 1.00 fmov v1.d[1], x19
20952095
# CHECK-NEXT: 1 2 0.50 fmov s2, #0.12500000
20962096
# CHECK-NEXT: 1 2 0.50 fmov s3, #1.00000000
20972097
# CHECK-NEXT: 1 2 0.50 fmov d30, #16.00000000
@@ -2557,7 +2557,7 @@ drps
25572557

25582558
# CHECK: Resource pressure per iteration:
25592559
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
2560-
# CHECK-NEXT: 11.00 11.00 33.00 33.00 99.33 163.33 163.33 345.50 200.50 144.00 144.00 185.00 65.00
2560+
# CHECK-NEXT: 11.00 11.00 33.00 33.00 99.33 163.33 163.33 345.50 200.50 144.00 144.00 184.50 64.50
25612561

25622562
# CHECK: Resource pressure by instruction:
25632563
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
@@ -3275,7 +3275,7 @@ drps
32753275
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov x20, d31
32763276
# CHECK-NEXT: - - - - - - - 1.00 - - - - - fmov d1, x15
32773277
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov x3, v12.d[1]
3278-
# CHECK-NEXT: - - - - - - - 1.00 - - - 1.00 1.00 fmov v1.d[1], x19
3278+
# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 fmov v1.d[1], x19
32793279
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov s2, #0.12500000
32803280
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov s3, #1.00000000
32813281
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov d30, #16.00000000

llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,10 +1441,10 @@ zip2 v0.8h, v0.8h, v0.8h
14411441
# CHECK-NEXT: 1 2 0.50 mov v2.h[1], v0.h[1]
14421442
# CHECK-NEXT: 1 2 0.50 mov v2.s[2], v0.s[2]
14431443
# CHECK-NEXT: 1 2 0.50 mov v2.d[1], v0.d[1]
1444-
# CHECK-NEXT: 3 5 1.00 mov v0.b[0], w8
1445-
# CHECK-NEXT: 3 5 1.00 mov v0.h[1], w8
1446-
# CHECK-NEXT: 3 5 1.00 mov v0.s[2], w8
1447-
# CHECK-NEXT: 3 5 1.00 mov v0.d[1], x8
1444+
# CHECK-NEXT: 2 5 1.00 mov v0.b[0], w8
1445+
# CHECK-NEXT: 2 5 1.00 mov v0.h[1], w8
1446+
# CHECK-NEXT: 2 5 1.00 mov v0.s[2], w8
1447+
# CHECK-NEXT: 2 5 1.00 mov v0.d[1], x8
14481448
# CHECK-NEXT: 1 2 0.50 mov v0.16b, v0.16b
14491449
# CHECK-NEXT: 1 2 0.50 mov v0.8b, v0.8b
14501450
# CHECK-NEXT: 1 2 0.50 movi d15, #0xff00ff00ff00ff
@@ -2163,7 +2163,7 @@ zip2 v0.8h, v0.8h, v0.8h
21632163

21642164
# CHECK: Resource pressure per iteration:
21652165
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
2166-
# CHECK-NEXT: - - - - 39.00 57.50 57.50 18.75 7.75 7.75 7.75 651.00 586.00
2166+
# CHECK-NEXT: - - - - 39.00 57.50 57.50 18.75 7.75 7.75 7.75 649.00 584.00
21672167

21682168
# CHECK: Resource pressure by instruction:
21692169
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
@@ -2530,10 +2530,10 @@ zip2 v0.8h, v0.8h, v0.8h
25302530
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.h[1], v0.h[1]
25312531
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.s[2], v0.s[2]
25322532
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.d[1], v0.d[1]
2533-
# CHECK-NEXT: - - - - - - - 1.00 - - - 1.00 1.00 mov v0.b[0], w8
2534-
# CHECK-NEXT: - - - - - - - 1.00 - - - 1.00 1.00 mov v0.h[1], w8
2535-
# CHECK-NEXT: - - - - - - - 1.00 - - - 1.00 1.00 mov v0.s[2], w8
2536-
# CHECK-NEXT: - - - - - - - 1.00 - - - 1.00 1.00 mov v0.d[1], x8
2533+
# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.b[0], w8
2534+
# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.h[1], w8
2535+
# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.s[2], w8
2536+
# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.d[1], x8
25372537
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v0.16b, v0.16b
25382538
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v0.8b, v0.8b
25392539
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 movi d15, #0xff00ff00ff00ff

0 commit comments

Comments
 (0)