@@ -52,7 +52,7 @@ def Znver3Model : SchedMachineModel {
52
52
int VecLoadLatency = 7;
53
53
// Latency of a simple store operation.
54
54
int StoreLatency = 1;
55
- // FIXME
55
+ // FIXME:
56
56
let HighLatency = 25; // FIXME: any better choice?
57
57
// AMD SOG 19h, 2.8 Optimizing Branching
58
58
// The branch misprediction penalty is in the range from 11 to 18 cycles,
@@ -193,83 +193,83 @@ def Zn3Int : ProcResGroup<[Zn3ALU0, Zn3AGU0, Zn3BRU0, // scheduler 0
193
193
// <...>, and six FPU pipes.
194
194
// Agner, 22.10 Floating point execution pipes
195
195
// There are six floating point/vector execution pipes,
196
- def Zn3FPP0 : ProcResource<1>;
197
- def Zn3FPP1 : ProcResource<1>;
198
- def Zn3FPP2 : ProcResource<1>;
199
- def Zn3FPP3 : ProcResource<1>;
200
- def Zn3FPP45 : ProcResource<2>;
196
+ def Zn3FP0 : ProcResource<1>;
197
+ def Zn3FP1 : ProcResource<1>;
198
+ def Zn3FP2 : ProcResource<1>;
199
+ def Zn3FP3 : ProcResource<1>;
200
+ def Zn3FP45 : ProcResource<2>;
201
201
202
202
//
203
203
// Execution Units
204
204
//===----------------------------------------------------------------------===//
205
205
// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
206
206
207
207
// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
208
- defvar Zn3FPFMul0 = Zn3FPP0 ;
209
- defvar Zn3FPFMul1 = Zn3FPP1 ;
208
+ defvar Zn3FPFMul0 = Zn3FP0 ;
209
+ defvar Zn3FPFMul1 = Zn3FP1 ;
210
210
211
211
// (v)FADD*
212
- defvar Zn3FPFAdd0 = Zn3FPP2 ;
213
- defvar Zn3FPFAdd1 = Zn3FPP3 ;
212
+ defvar Zn3FPFAdd0 = Zn3FP2 ;
213
+ defvar Zn3FPFAdd1 = Zn3FP3 ;
214
214
215
215
// All convert operations except pack/unpack
216
- defvar Zn3FPFCvt0 = Zn3FPP2 ;
217
- defvar Zn3FPFCvt1 = Zn3FPP3 ;
216
+ defvar Zn3FPFCvt0 = Zn3FP2 ;
217
+ defvar Zn3FPFCvt1 = Zn3FP3 ;
218
218
219
219
// All Divide and Square Root except Reciprocal Approximation
220
220
// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
221
221
// FDIV unit can support 2 simultaneous operations in flight
222
222
// even though it occupies a single pipe.
223
223
// FIXME: BufferSize=2 ?
224
- defvar Zn3FPFDiv = Zn3FPP1 ;
224
+ defvar Zn3FPFDiv = Zn3FP1 ;
225
225
226
226
// Moves and Logical operations on Floating Point Data Types
227
- defvar Zn3FPFMisc0 = Zn3FPP0 ;
228
- defvar Zn3FPFMisc1 = Zn3FPP1 ;
229
- defvar Zn3FPFMisc2 = Zn3FPP2 ;
230
- defvar Zn3FPFMisc3 = Zn3FPP3 ;
227
+ defvar Zn3FPFMisc0 = Zn3FP0 ;
228
+ defvar Zn3FPFMisc1 = Zn3FP1 ;
229
+ defvar Zn3FPFMisc2 = Zn3FP2 ;
230
+ defvar Zn3FPFMisc3 = Zn3FP3 ;
231
231
232
232
// Integer Adds, Subtracts, and Compares
233
233
// Some complex VADD operations are not available in all pipes.
234
- defvar Zn3FPVAdd0 = Zn3FPP0 ;
235
- defvar Zn3FPVAdd1 = Zn3FPP1 ;
236
- defvar Zn3FPVAdd2 = Zn3FPP2 ;
237
- defvar Zn3FPVAdd3 = Zn3FPP3 ;
234
+ defvar Zn3FPVAdd0 = Zn3FP0 ;
235
+ defvar Zn3FPVAdd1 = Zn3FP1 ;
236
+ defvar Zn3FPVAdd2 = Zn3FP2 ;
237
+ defvar Zn3FPVAdd3 = Zn3FP3 ;
238
238
239
239
// Integer Multiplies, SAD, Blendvb
240
- defvar Zn3FPVMul0 = Zn3FPP0 ;
241
- defvar Zn3FPVMul1 = Zn3FPP3 ;
240
+ defvar Zn3FPVMul0 = Zn3FP0 ;
241
+ defvar Zn3FPVMul1 = Zn3FP3 ;
242
242
243
243
// Data Shuffles, Packs, Unpacks, Permute
244
244
// Some complex shuffle operations are only available in pipe1.
245
- defvar Zn3FPVShuf = Zn3FPP1 ;
246
- defvar Zn3FPVShufAux = Zn3FPP2 ;
245
+ defvar Zn3FPVShuf = Zn3FP1 ;
246
+ defvar Zn3FPVShufAux = Zn3FP2 ;
247
247
248
248
// Bit Shift Left/Right operations
249
- defvar Zn3FPVShift0 = Zn3FPP1 ;
250
- defvar Zn3FPVShift1 = Zn3FPP2 ;
249
+ defvar Zn3FPVShift0 = Zn3FP1 ;
250
+ defvar Zn3FPVShift1 = Zn3FP2 ;
251
251
252
252
// Moves and Logical operations on Packed Integer Data Types
253
- defvar Zn3FPVMisc0 = Zn3FPP0 ;
254
- defvar Zn3FPVMisc1 = Zn3FPP1 ;
255
- defvar Zn3FPVMisc2 = Zn3FPP2 ;
256
- defvar Zn3FPVMisc3 = Zn3FPP3 ;
253
+ defvar Zn3FPVMisc0 = Zn3FP0 ;
254
+ defvar Zn3FPVMisc1 = Zn3FP1 ;
255
+ defvar Zn3FPVMisc2 = Zn3FP2 ;
256
+ defvar Zn3FPVMisc3 = Zn3FP3 ;
257
257
258
258
// *AES*
259
- defvar Zn3FPAES0 = Zn3FPP0 ;
260
- defvar Zn3FPAES1 = Zn3FPP1 ;
259
+ defvar Zn3FPAES0 = Zn3FP0 ;
260
+ defvar Zn3FPAES1 = Zn3FP1 ;
261
261
262
262
// *CLM*
263
- defvar Zn3FPCLM0 = Zn3FPP0 ;
264
- defvar Zn3FPCLM1 = Zn3FPP1 ;
263
+ defvar Zn3FPCLM0 = Zn3FP0 ;
264
+ defvar Zn3FPCLM1 = Zn3FP1 ;
265
265
266
266
// Execution pipeline grouping
267
267
//===----------------------------------------------------------------------===//
268
268
269
269
// AMD SOG 19h, 2.11 Floating-Point Unit
270
270
// Stores and floating point to general purpose register transfer
271
271
// have 2 dedicated pipelines (pipe 5 and 6).
272
- def Zn3FPU0123 : ProcResGroup<[Zn3FPP0, Zn3FPP1, Zn3FPP2, Zn3FPP3 ]>;
272
+ def Zn3FPU0123 : ProcResGroup<[Zn3FP0, Zn3FP1, Zn3FP2, Zn3FP3 ]>;
273
273
274
274
// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
275
275
def Zn3FPFMul01 : ProcResGroup<[Zn3FPFMul0, Zn3FPFMul1]>;
@@ -293,12 +293,12 @@ def Zn3FPFMisc12 : ProcResGroup<[Zn3FPFMisc1, Zn3FPFMisc2]>;
293
293
// AMD SOG 19h, 2.11 Floating-Point Unit
294
294
// Stores and floating point to general purpose register transfer
295
295
// have 2 dedicated pipelines (pipe 5 and 6).
296
- defvar Zn3FPLd01 = Zn3FPP45 ;
296
+ defvar Zn3FPLd01 = Zn3FP45 ;
297
297
298
298
// AMD SOG 19h, 2.11 Floating-Point Unit
299
299
// Note that FP stores are supported on two pipelines,
300
300
// but throughput is limited to one per cycle.
301
- let Super = Zn3FPP45 in
301
+ let Super = Zn3FP45 in
302
302
def Zn3FPSt : ProcResource<1>;
303
303
304
304
// Integer Adds, Subtracts, and Compares
@@ -345,8 +345,8 @@ def Zn3FpPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 1], [0, 1, 1],
345
345
// AMD SOG 19h, 2.11 Floating-Point Unit
346
346
// <...> the scheduler can issue 1 micro op per cycle for each pipe.
347
347
// FIXME: those are two separate schedulers, not a single big one.
348
- def Zn3FP : ProcResGroup<[Zn3FPP0, Zn3FPP2 , /*Zn3FPP4 ,*/ // scheduler 0
349
- Zn3FPP1, Zn3FPP3, Zn3FPP45 /*Zn3FPP5 */ // scheduler 1
348
+ def Zn3FP : ProcResGroup<[Zn3FP0, Zn3FP2 , /*Zn3FP4 ,*/ // scheduler 0
349
+ Zn3FP1, Zn3FP3, Zn3FP45 /*Zn3FP5 */ // scheduler 1
350
350
]> {
351
351
let BufferSize = !mul(2, 32);
352
352
}
@@ -838,9 +838,9 @@ defm : Zn3WriteResInt<WriteZero, [Zn3ALU0123], 0, [0], 1>;
838
838
defm : Zn3WriteResIntPair<WriteJump, [Zn3BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis
839
839
840
840
// Floating point. This covers both scalar and vector operations.
841
- defm : Zn3WriteResInt<WriteFLD0, [Zn3FPLd01, Zn3Load, Zn3FPP1 ], !add(Znver3Model.LoadLatency, 4), [1, 1, 1], 1>;
842
- defm : Zn3WriteResInt<WriteFLD1, [Zn3FPLd01, Zn3Load, Zn3FPP1 ], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>;
843
- defm : Zn3WriteResInt<WriteFLDC, [Zn3FPLd01, Zn3Load, Zn3FPP1 ], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>;
841
+ defm : Zn3WriteResInt<WriteFLD0, [Zn3FPLd01, Zn3Load, Zn3FP1 ], !add(Znver3Model.LoadLatency, 4), [1, 1, 1], 1>;
842
+ defm : Zn3WriteResInt<WriteFLD1, [Zn3FPLd01, Zn3Load, Zn3FP1 ], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>;
843
+ defm : Zn3WriteResInt<WriteFLDC, [Zn3FPLd01, Zn3Load, Zn3FP1 ], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>;
844
844
defm : Zn3WriteResXMM<WriteFLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
845
845
defm : Zn3WriteResXMM<WriteFLoadX, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
846
846
defm : Zn3WriteResYMM<WriteFLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
0 commit comments