@@ -19,6 +19,8 @@ def CortexM7Model : SchedMachineModel {
19
19
let CompleteModel = 0;
20
20
}
21
21
22
+ let SchedModel = CortexM7Model in {
23
+
22
24
//===--------------------------------------------------------------------===//
23
25
// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
24
26
// pipe. The stages relevant to scheduling are as follows:
@@ -33,22 +35,24 @@ def CortexM7Model : SchedMachineModel {
33
35
// for scheduling, so simple ALU operations executing in EX2 will have
34
36
// ReadAdvance<0> (the default) for their source operands and Latency = 1.
35
37
36
- def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
38
+ def M7UnitLoadL : ProcResource<1> { let BufferSize = 0; }
39
+ def M7UnitLoadH : ProcResource<1> { let BufferSize = 0; }
40
+ def M7UnitLoad : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; }
37
41
def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
38
42
def M7UnitALU : ProcResource<2>;
39
43
def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
40
44
def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
41
45
def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
42
46
def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
43
47
def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
44
- def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
48
+ def M7UnitVPortL : ProcResource<1> { let BufferSize = 0; }
49
+ def M7UnitVPortH : ProcResource<1> { let BufferSize = 0; }
50
+ def M7UnitVPort : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; }
45
51
def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
46
52
47
53
//===---------------------------------------------------------------------===//
48
54
// Subtarget-specific SchedWrite types with map ProcResources and set latency.
49
55
50
- let SchedModel = CortexM7Model in {
51
-
52
56
def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
53
57
54
58
// Basic ALU with shifts.
@@ -105,39 +109,42 @@ def : WriteRes<WriteNoop, []> { let Latency = 0; }
105
109
// Floating point conversions.
106
110
def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
107
111
def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
112
+ def M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> {
113
+ let Latency = 3;
114
+ }
108
115
109
116
// The FP pipeline has a latency of 3 cycles.
110
117
// ALU operations (32/64-bit). These go down the FP pipeline.
111
118
def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
112
- def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort ]> {
119
+ def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH ]> {
113
120
let Latency = 4;
114
121
let BeginGroup = 1;
115
122
}
116
123
117
124
// Multiplication
118
125
def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
119
- def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort ]> {
126
+ def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH ]> {
120
127
let Latency = 7;
121
128
let BeginGroup = 1;
122
129
}
123
130
124
131
// Multiply-accumulate. FPMAC goes down the FP Pipeline.
125
132
def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
126
- def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort ]> {
133
+ def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH ]> {
127
134
let Latency = 11;
128
135
let BeginGroup = 1;
129
136
}
130
137
131
138
// Division. Effective scheduling latency is 3, though real latency is larger
132
139
def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
133
- def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort ]> {
140
+ def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH ]> {
134
141
let Latency = 30;
135
142
let BeginGroup = 1;
136
143
}
137
144
138
145
// Square-root. Effective scheduling latency is 3; real latency is larger
139
146
def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
140
- def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort ]> {
147
+ def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH ]> {
141
148
let Latency = 30;
142
149
let BeginGroup = 1;
143
150
}
@@ -283,12 +290,12 @@ def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
283
290
// VFP loads and stores
284
291
285
292
def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
286
- def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort ]> {
293
+ def M7LoadDP : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH ]> {
287
294
let Latency = 2;
288
295
let SingleIssue = 1;
289
296
}
290
297
def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
291
- def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort ]> {
298
+ def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH ]> {
292
299
let SingleIssue = 1;
293
300
}
294
301
0 commit comments