Skip to content

Commit 78a871a

Browse files
dpenrydavemgreen
authored andcommitted
[ARM] Use ProcResGroup in Cortex-M7 scheduling model
Used to model structural hazards on FP issue, where some instructions take up 2 issue slots and others one as well as similar structural hazards on load issue, where some instructions take up two load lanes and others one. Differential Revision: https://reviews.llvm.org/D98977
1 parent 3c54762 commit 78a871a

File tree

5 files changed

+97
-51
lines changed

5 files changed

+97
-51
lines changed

llvm/lib/Target/ARM/ARMScheduleM7.td

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ def CortexM7Model : SchedMachineModel {
1919
let CompleteModel = 0;
2020
}
2121

22+
let SchedModel = CortexM7Model in {
23+
2224
//===--------------------------------------------------------------------===//
2325
// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
2426
// pipe. The stages relevant to scheduling are as follows:
@@ -33,22 +35,24 @@ def CortexM7Model : SchedMachineModel {
3335
// for scheduling, so simple ALU operations executing in EX2 will have
3436
// ReadAdvance<0> (the default) for their source operands and Latency = 1.
3537

36-
def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
38+
def M7UnitLoadL : ProcResource<1> { let BufferSize = 0; }
39+
def M7UnitLoadH : ProcResource<1> { let BufferSize = 0; }
40+
def M7UnitLoad : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; }
3741
def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
3842
def M7UnitALU : ProcResource<2>;
3943
def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
4044
def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
4145
def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
4246
def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
4347
def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
44-
def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
48+
def M7UnitVPortL : ProcResource<1> { let BufferSize = 0; }
49+
def M7UnitVPortH : ProcResource<1> { let BufferSize = 0; }
50+
def M7UnitVPort : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; }
4551
def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
4652

4753
//===---------------------------------------------------------------------===//
4854
// Subtarget-specific SchedWrite types with map ProcResources and set latency.
4955

50-
let SchedModel = CortexM7Model in {
51-
5256
def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
5357

5458
// Basic ALU with shifts.
@@ -105,39 +109,42 @@ def : WriteRes<WriteNoop, []> { let Latency = 0; }
105109
// Floating point conversions.
106110
def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
107111
def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
112+
def M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> {
113+
let Latency = 3;
114+
}
108115

109116
// The FP pipeline has a latency of 3 cycles.
110117
// ALU operations (32/64-bit). These go down the FP pipeline.
111118
def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
112-
def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
119+
def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
113120
let Latency = 4;
114121
let BeginGroup = 1;
115122
}
116123

117124
// Multiplication
118125
def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
119-
def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
126+
def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
120127
let Latency = 7;
121128
let BeginGroup = 1;
122129
}
123130

124131
// Multiply-accumulate. FPMAC goes down the FP Pipeline.
125132
def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
126-
def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
133+
def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
127134
let Latency = 11;
128135
let BeginGroup = 1;
129136
}
130137

131138
// Division. Effective scheduling latency is 3, though real latency is larger
132139
def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
133-
def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
140+
def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
134141
let Latency = 30;
135142
let BeginGroup = 1;
136143
}
137144

138145
// Square-root. Effective scheduling latency is 3; real latency is larger
139146
def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
140-
def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
147+
def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
141148
let Latency = 30;
142149
let BeginGroup = 1;
143150
}
@@ -283,12 +290,12 @@ def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
283290
// VFP loads and stores
284291

285292
def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
286-
def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
293+
def M7LoadDP : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH]> {
287294
let Latency = 2;
288295
let SingleIssue = 1;
289296
}
290297
def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
291-
def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
298+
def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH]> {
292299
let SingleIssue = 1;
293300
}
294301

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple arm-arm-eabi -mcpu=cortex-m7 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck %s
3+
---
4+
name: test_groups
5+
alignment: 2
6+
tracksRegLiveness: true
7+
liveins:
8+
- { reg: '$d0' }
9+
- { reg: '$r0' }
10+
- { reg: '$r1' }
11+
- { reg: '$r2' }
12+
- { reg: '$r3' }
13+
- { reg: '$r4' }
14+
frameInfo:
15+
maxAlignment: 1
16+
maxCallFrameSize: 0
17+
machineFunctionInfo: {}
18+
body: |
19+
bb.0:
20+
liveins: $d0, $r0, $r1, $r2, $r3, $r4
21+
22+
; CHECK-LABEL: name: test_groups
23+
; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
24+
; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
25+
; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
26+
; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
27+
; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
28+
; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
29+
; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
30+
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
31+
renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
32+
renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
33+
VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
34+
renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
35+
t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
36+
renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
37+
tBX_RET 14 /* CC::al */, $noreg, implicit $d0
38+
39+
...

llvm/test/tools/llvm-mca/ARM/m7-fp.s

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -253,23 +253,23 @@ vstr.f32 s0, [r0]
253253
# CHECK-NEXT: [0.0] - M7UnitALU
254254
# CHECK-NEXT: [0.1] - M7UnitALU
255255
# CHECK-NEXT: [1] - M7UnitBranch
256-
# CHECK-NEXT: [2.0] - M7UnitLoad
257-
# CHECK-NEXT: [2.1] - M7UnitLoad
258-
# CHECK-NEXT: [3] - M7UnitMAC
259-
# CHECK-NEXT: [4] - M7UnitSIMD
260-
# CHECK-NEXT: [5] - M7UnitShift1
261-
# CHECK-NEXT: [6] - M7UnitShift2
262-
# CHECK-NEXT: [7] - M7UnitStore
263-
# CHECK-NEXT: [8] - M7UnitVFP
264-
# CHECK-NEXT: [9.0] - M7UnitVPort
265-
# CHECK-NEXT: [9.1] - M7UnitVPort
256+
# CHECK-NEXT: [2] - M7UnitLoadH
257+
# CHECK-NEXT: [3] - M7UnitLoadL
258+
# CHECK-NEXT: [4] - M7UnitMAC
259+
# CHECK-NEXT: [5] - M7UnitSIMD
260+
# CHECK-NEXT: [6] - M7UnitShift1
261+
# CHECK-NEXT: [7] - M7UnitShift2
262+
# CHECK-NEXT: [8] - M7UnitStore
263+
# CHECK-NEXT: [9] - M7UnitVFP
264+
# CHECK-NEXT: [10] - M7UnitVPortH
265+
# CHECK-NEXT: [11] - M7UnitVPortL
266266

267267
# CHECK: Resource pressure per iteration:
268-
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
269-
# CHECK-NEXT: - - - 1.00 1.00 - - - - 2.00 104.00 81.00 81.00
268+
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
269+
# CHECK-NEXT: - - - 1.50 1.50 - - - - 2.00 104.00 81.00 81.00
270270

271271
# CHECK: Resource pressure by instruction:
272-
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
272+
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
273273
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vabs.f32 s0, s2
274274
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vabs.f64 d0, d2
275275
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vadd.f32 s0, s2, s1
@@ -384,7 +384,7 @@ vstr.f32 s0, [r0]
384384
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsqrt.f64 d0, d2
385385
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vsub.f32 s0, s2, s1
386386
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsub.f64 d0, d2, d1
387-
# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 1.00 1.00 vldr d0, [r0]
387+
# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r0]
388388
# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.50 vldr s0, [r0]
389389
# CHECK-NEXT: - - - - - - - - - 1.00 - 1.00 1.00 vstr d0, [r0]
390390
# CHECK-NEXT: - - - - - - - - - 1.00 - 0.50 0.50 vstr s0, [r0]

llvm/test/tools/llvm-mca/ARM/m7-int.s

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -862,23 +862,23 @@ yield
862862
# CHECK-NEXT: [0.0] - M7UnitALU
863863
# CHECK-NEXT: [0.1] - M7UnitALU
864864
# CHECK-NEXT: [1] - M7UnitBranch
865-
# CHECK-NEXT: [2.0] - M7UnitLoad
866-
# CHECK-NEXT: [2.1] - M7UnitLoad
867-
# CHECK-NEXT: [3] - M7UnitMAC
868-
# CHECK-NEXT: [4] - M7UnitSIMD
869-
# CHECK-NEXT: [5] - M7UnitShift1
870-
# CHECK-NEXT: [6] - M7UnitShift2
871-
# CHECK-NEXT: [7] - M7UnitStore
872-
# CHECK-NEXT: [8] - M7UnitVFP
873-
# CHECK-NEXT: [9.0] - M7UnitVPort
874-
# CHECK-NEXT: [9.1] - M7UnitVPort
865+
# CHECK-NEXT: [2] - M7UnitLoadH
866+
# CHECK-NEXT: [3] - M7UnitLoadL
867+
# CHECK-NEXT: [4] - M7UnitMAC
868+
# CHECK-NEXT: [5] - M7UnitSIMD
869+
# CHECK-NEXT: [6] - M7UnitShift1
870+
# CHECK-NEXT: [7] - M7UnitShift2
871+
# CHECK-NEXT: [8] - M7UnitStore
872+
# CHECK-NEXT: [9] - M7UnitVFP
873+
# CHECK-NEXT: [10] - M7UnitVPortH
874+
# CHECK-NEXT: [11] - M7UnitVPortL
875875

876876
# CHECK: Resource pressure per iteration:
877-
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
877+
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
878878
# CHECK-NEXT: 125.00 125.00 - 35.00 35.00 43.00 90.00 88.00 2.00 45.00 - - -
879879

880880
# CHECK: Resource pressure by instruction:
881-
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
881+
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
882882
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adc r0, r1, #0
883883
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1, #0
884884
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1

llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,26 +34,26 @@ vldr d0, [r1]
3434
# CHECK-NEXT: [0.0] - M7UnitALU
3535
# CHECK-NEXT: [0.1] - M7UnitALU
3636
# CHECK-NEXT: [1] - M7UnitBranch
37-
# CHECK-NEXT: [2.0] - M7UnitLoad
38-
# CHECK-NEXT: [2.1] - M7UnitLoad
39-
# CHECK-NEXT: [3] - M7UnitMAC
40-
# CHECK-NEXT: [4] - M7UnitSIMD
41-
# CHECK-NEXT: [5] - M7UnitShift1
42-
# CHECK-NEXT: [6] - M7UnitShift2
43-
# CHECK-NEXT: [7] - M7UnitStore
44-
# CHECK-NEXT: [8] - M7UnitVFP
45-
# CHECK-NEXT: [9.0] - M7UnitVPort
46-
# CHECK-NEXT: [9.1] - M7UnitVPort
37+
# CHECK-NEXT: [2] - M7UnitLoadH
38+
# CHECK-NEXT: [3] - M7UnitLoadL
39+
# CHECK-NEXT: [4] - M7UnitMAC
40+
# CHECK-NEXT: [5] - M7UnitSIMD
41+
# CHECK-NEXT: [6] - M7UnitShift1
42+
# CHECK-NEXT: [7] - M7UnitShift2
43+
# CHECK-NEXT: [8] - M7UnitStore
44+
# CHECK-NEXT: [9] - M7UnitVFP
45+
# CHECK-NEXT: [10] - M7UnitVPortH
46+
# CHECK-NEXT: [11] - M7UnitVPortL
4747

4848
# CHECK: Resource pressure per iteration:
49-
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
50-
# CHECK-NEXT: 1.00 1.00 - - 1.00 - - - - - - - 2.00
49+
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
50+
# CHECK-NEXT: 1.00 1.00 - 1.00 1.00 - - - - - - 1.00 1.00
5151

5252
# CHECK: Resource pressure by instruction:
53-
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
53+
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
5454
# CHECK-NEXT: - 1.00 - - - - - - - - - - - add.w r1, r1, #1
5555
# CHECK-NEXT: 1.00 - - - - - - - - - - - - add.w r1, r1, #2
56-
# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1]
56+
# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r1]
5757

5858
# CHECK: Timeline view:
5959
# CHECK-NEXT: Index 012345

0 commit comments

Comments
 (0)