Skip to content

Commit 09fc178

Browse files
asi-scdnpetrov-scaaa-scel-sc
authored
[RISCV] Add scheduling model for Syntacore SCR7 (#108814)
Syntacore SCR7 is rv64imafdcv_zba_zbb_zbc_zbs_zkn. Scheduling model for RVV will be added later. Overview: https://syntacore.com/products/scr7 --------- Co-authored-by: Dmitrii Petrov <[email protected]> Co-authored-by: Anton Afanasyev <[email protected]> Co-authored-by: Elena Lepilkina <[email protected]>
1 parent 594579b commit 09fc178

File tree

7 files changed

+649
-1
lines changed

7 files changed

+649
-1
lines changed

llvm/lib/Target/RISCV/RISCV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ include "RISCVSchedSiFiveP400.td"
5252
include "RISCVSchedSiFiveP600.td"
5353
include "RISCVSchedSyntacoreSCR1.td"
5454
include "RISCVSchedSyntacoreSCR345.td"
55+
include "RISCVSchedSyntacoreSCR7.td"
5556
include "RISCVSchedXiangShanNanHu.td"
5657

5758
//===----------------------------------------------------------------------===//

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ def SYNTACORE_SCR5_RV64 : RISCVProcessorModel<"syntacore-scr5-rv64",
384384
[TuneNoDefaultUnroll, FeaturePostRAScheduler]>;
385385

386386
def SYNTACORE_SCR7 : RISCVProcessorModel<"syntacore-scr7",
387-
NoSchedModel,
387+
SyntacoreSCR7Model,
388388
[Feature64Bit,
389389
FeatureStdExtI,
390390
FeatureStdExtZicsr,
Lines changed: 379 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,379 @@
1+
//==- RISCVSchedSyntacoreSCR7.td - Syntacore SCR7 Sched Defs -*- tablegen -*-=//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
//===----------------------------------------------------------------------===//
10+
11+
// This file covers scheduling model for rv64imafdcv_zba_zbb_zbc_zbs
12+
// configuration of Syntacore SCR7 processor.
13+
// Overview: https://syntacore.com/products/scr7
14+
15+
// SCR7 is an out-of-order superscalar dual-issue core.
16+
// FIXME: add V and Zkn extensions scheduling model
17+
def SyntacoreSCR7Model : SchedMachineModel {
18+
let MicroOpBufferSize = 36;
19+
let IssueWidth = 2;
20+
let MispredictPenalty = 9;
21+
let LoadLatency = 3;
22+
let CompleteModel = 0;
23+
let UnsupportedFeatures = [HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
24+
HasStdExtZcmt, HasVInstructions];
25+
}
26+
27+
// Branching
28+
multiclass SCR7_Branching<ProcResourceKind BRU> {
29+
def : WriteRes<WriteJmp, [BRU]>;
30+
def : WriteRes<WriteJal, [BRU]>;
31+
def : WriteRes<WriteJalr, [BRU]>;
32+
}
33+
34+
// Single-cycle integer arithmetic and logic
35+
multiclass SCR7_IntALU<ProcResourceKind ALU> {
36+
def : WriteRes<WriteIALU, [ALU]>;
37+
def : WriteRes<WriteIALU32, [ALU]>;
38+
def : WriteRes<WriteShiftImm, [ALU]>;
39+
def : WriteRes<WriteShiftImm32, [ALU]>;
40+
def : WriteRes<WriteShiftReg, [ALU]>;
41+
def : WriteRes<WriteShiftReg32, [ALU]>;
42+
}
43+
44+
// Pipelined integer multiplication
45+
multiclass SCR7_IntMul<list<ProcResourceKind> Resources> {
46+
let Latency = 3 in {
47+
def : WriteRes<WriteIMul, Resources>;
48+
def : WriteRes<WriteIMul32, Resources>;
49+
}
50+
}
51+
52+
// Common implementation for WriteIDiv and WriteIDiv32 sched writes.
53+
multiclass SCR7_IntDivImpl<list<ProcResourceKind> Resources,
54+
list<int> ReleaseCycles, int DivLatency,
55+
SchedWrite DivWrite, SchedWrite RemWrite> {
56+
let Latency = DivLatency, ReleaseAtCycles = ReleaseCycles in {
57+
def : WriteRes<DivWrite, Resources>;
58+
def : WriteRes<RemWrite, Resources>;
59+
}
60+
}
61+
62+
// Non-pipelined integer division
63+
multiclass SCR7_IntDiv<list<ProcResourceKind> Resources,
64+
list<int> ReleaseCycles,
65+
int DivLatency> {
66+
defm : SCR7_IntDivImpl<Resources,
67+
ReleaseCycles,
68+
DivLatency,
69+
WriteIDiv,
70+
WriteIRem>;
71+
}
72+
73+
multiclass SCR7_IntDiv32<list<ProcResourceKind> Resources,
74+
list<int> ReleaseCycles,
75+
int DivLatency> {
76+
defm : SCR7_IntDivImpl<Resources,
77+
ReleaseCycles,
78+
DivLatency,
79+
WriteIDiv32,
80+
WriteIRem32>;
81+
}
82+
83+
multiclass SCR7_Bitmanip<ProcResourceKind BMU> {
84+
let Latency = 1 in {
85+
// Zba
86+
def : WriteRes<WriteSHXADD, [BMU]>;
87+
def : WriteRes<WriteSHXADD32, [BMU]>;
88+
// Zbb
89+
def : WriteRes<WriteRotateImm, [BMU]>;
90+
def : WriteRes<WriteRotateImm32, [BMU]>;
91+
def : WriteRes<WriteRotateReg, [BMU]>;
92+
def : WriteRes<WriteRotateReg32, [BMU]>;
93+
def : WriteRes<WriteCLZ, [BMU]>;
94+
def : WriteRes<WriteCLZ32, [BMU]>;
95+
def : WriteRes<WriteCTZ, [BMU]>;
96+
def : WriteRes<WriteCTZ32, [BMU]>;
97+
def : WriteRes<WriteCPOP, [BMU]>;
98+
def : WriteRes<WriteCPOP32, [BMU]>;
99+
def : WriteRes<WriteREV8, [BMU]>;
100+
def : WriteRes<WriteORCB, [BMU]>;
101+
def : WriteRes<WriteIMinMax, [BMU]>;
102+
// Zbs
103+
def : WriteRes<WriteSingleBit, [BMU]>;
104+
def : WriteRes<WriteSingleBitImm, [BMU]>;
105+
// Zbc
106+
def : WriteRes<WriteCLMUL, [BMU]>;
107+
def : WriteRes<WriteBEXT, [BMU]>;
108+
def : WriteRes<WriteBEXTI, [BMU]>;
109+
}
110+
}
111+
112+
multiclass SCR7_ScalarCrypto<ProcResourceKind SCU> {
113+
let Latency = 1 in {
114+
// Zbkb
115+
def : WriteRes<WriteBREV8, [SCU]>;
116+
def : WriteRes<WritePACK, [SCU]>;
117+
def : WriteRes<WritePACK32, [SCU]>;
118+
def : WriteRes<WriteZIP, [SCU]>;
119+
// Zbkx
120+
def : WriteRes<WriteXPERM, [SCU]>;
121+
}
122+
}
123+
124+
multiclass SCR7_IntPipeline<ProcResourceKind ALU_Any,
125+
ProcResourceKind ALU_DIV_IS,
126+
ProcResourceKind DIV,
127+
ProcResourceKind ALU_MUL_IS,
128+
ProcResourceKind MUL> {
129+
defm : SCR7_Branching<ALU_Any>;
130+
defm : SCR7_Bitmanip<ALU_Any>;
131+
defm : SCR7_ScalarCrypto<ALU_Any>;
132+
defm : SCR7_IntALU<ALU_Any>;
133+
defm : SCR7_IntMul<[ALU_MUL_IS, MUL]>;
134+
defm : SCR7_IntDiv<[ALU_DIV_IS, DIV],
135+
/* ReleaseAtCycles */[1, 35],
136+
/* Latency */ 35>;
137+
defm : SCR7_IntDiv32<[ALU_DIV_IS, DIV],
138+
/* ReleaseAtCycles */[1, 19],
139+
/* Latency */ 19>;
140+
}
141+
142+
// Load/store instructions
143+
multiclass SCR7_BasicMemory<ProcResourceKind LSU> {
144+
let Latency = 3 in {
145+
def : WriteRes<WriteSTB, [LSU]>;
146+
def : WriteRes<WriteSTH, [LSU]>;
147+
def : WriteRes<WriteSTW, [LSU]>;
148+
def : WriteRes<WriteSTD, [LSU]>;
149+
def : WriteRes<WriteLDB, [LSU]>;
150+
def : WriteRes<WriteLDH, [LSU]>;
151+
def : WriteRes<WriteLDW, [LSU]>;
152+
def : WriteRes<WriteLDD, [LSU]>;
153+
def : WriteRes<WriteFST32, [LSU]>;
154+
def : WriteRes<WriteFST64, [LSU]>;
155+
def : WriteRes<WriteFLD32, [LSU]>;
156+
def : WriteRes<WriteFLD64, [LSU]>;
157+
}
158+
}
159+
160+
// Atomic memory
161+
multiclass SCR7_AtomicMemory<ProcResourceKind LSU> {
162+
let Latency = 19 in {
163+
def : WriteRes<WriteAtomicLDW, [LSU]>;
164+
def : WriteRes<WriteAtomicLDD, [LSU]>;
165+
}
166+
let Latency = 21 in {
167+
def : WriteRes<WriteAtomicW, [LSU]>;
168+
def : WriteRes<WriteAtomicD, [LSU]>;
169+
def : WriteRes<WriteAtomicSTW, [LSU]>;
170+
def : WriteRes<WriteAtomicSTD, [LSU]>;
171+
}
172+
}
173+
multiclass SCR7_FPU<ProcResourceKind FPU_IS, ProcResourceKind FALU,
174+
ProcResourceKind FMA, ProcResourceKind FDIVSQRT> {
175+
// FALU operations
176+
let Latency = 4 in {
177+
def : WriteRes<WriteFAdd32, [FPU_IS, FALU]>;
178+
def : WriteRes<WriteFAdd64, [FPU_IS, FALU]>;
179+
def : WriteRes<WriteFSGNJ32, [FPU_IS, FALU]>;
180+
def : WriteRes<WriteFSGNJ64, [FPU_IS, FALU]>;
181+
def : WriteRes<WriteFMinMax32, [FPU_IS, FALU]>;
182+
def : WriteRes<WriteFMinMax64, [FPU_IS, FALU]>;
183+
184+
def : WriteRes<WriteFCvtI32ToF32, [FPU_IS, FALU]>;
185+
def : WriteRes<WriteFCvtI32ToF64, [FPU_IS, FALU]>;
186+
def : WriteRes<WriteFCvtI64ToF32, [FPU_IS, FALU]>;
187+
def : WriteRes<WriteFCvtI64ToF64, [FPU_IS, FALU]>;
188+
def : WriteRes<WriteFCvtF32ToF64, [FPU_IS, FALU]>;
189+
def : WriteRes<WriteFCvtF64ToF32, [FPU_IS, FALU]>;
190+
def : WriteRes<WriteFCvtF32ToI32, [FPU_IS, FALU]>;
191+
def : WriteRes<WriteFCvtF32ToI64, [FPU_IS, FALU]>;
192+
def : WriteRes<WriteFCvtF64ToI32, [FPU_IS, FALU]>;
193+
def : WriteRes<WriteFCvtF64ToI64, [FPU_IS, FALU]>;
194+
195+
def : WriteRes<WriteFClass32, [FPU_IS, FALU]>;
196+
def : WriteRes<WriteFClass64, [FPU_IS, FALU]>;
197+
198+
def : WriteRes<WriteFCmp32, [FPU_IS, FALU]>;
199+
def : WriteRes<WriteFCmp64, [FPU_IS, FALU]>;
200+
201+
def : WriteRes<WriteFMovI32ToF32, [FPU_IS, FALU]>;
202+
def : WriteRes<WriteFMovF32ToI32, [FPU_IS, FALU]>;
203+
def : WriteRes<WriteFMovI64ToF64, [FPU_IS, FALU]>;
204+
def : WriteRes<WriteFMovF64ToI64, [FPU_IS, FALU]>;
205+
}
206+
207+
// FMA operations
208+
let Latency = 6 in {
209+
def : WriteRes<WriteFMul32, [FPU_IS, FMA]>;
210+
def : WriteRes<WriteFMul64, [FPU_IS, FMA]>;
211+
def : WriteRes<WriteFMA32, [FPU_IS, FMA]>;
212+
def : WriteRes<WriteFMA64, [FPU_IS, FMA]>;
213+
}
214+
215+
def : WriteRes<WriteFDiv32, [FPU_IS, FDIVSQRT]> {
216+
let Latency = 16;
217+
let ReleaseAtCycles = [1, 15];
218+
}
219+
def : WriteRes<WriteFDiv64, [FPU_IS, FDIVSQRT]> {
220+
let Latency = 30;
221+
let ReleaseAtCycles = [1, 29];
222+
}
223+
224+
def : WriteRes<WriteFSqrt32, [FPU_IS, FDIVSQRT]> {
225+
let Latency = 18;
226+
let ReleaseAtCycles = [1, 16];
227+
}
228+
def : WriteRes<WriteFSqrt64, [FPU_IS, FDIVSQRT]> {
229+
let Latency = 32;
230+
let ReleaseAtCycles = [1, 30];
231+
}
232+
}
233+
234+
// Others
235+
multiclass SCR7_Other {
236+
def : WriteRes<WriteCSR, []>;
237+
def : WriteRes<WriteNop, []>;
238+
239+
def : InstRW<[WriteIALU], (instrs COPY)>;
240+
}
241+
242+
// Unsupported scheduling classes for SCR7.
243+
multiclass SCR7_Unsupported {
244+
defm : UnsupportedSchedSFB;
245+
defm : UnsupportedSchedV;
246+
defm : UnsupportedSchedXsfvcp;
247+
defm : UnsupportedSchedZabha;
248+
defm : UnsupportedSchedZfa;
249+
defm : UnsupportedSchedZfh;
250+
defm : UnsupportedSchedZvk;
251+
}
252+
253+
254+
// Bypasses (none)
255+
multiclass SCR7_NoReadAdvances {
256+
def : ReadAdvance<ReadJmp, 0>;
257+
def : ReadAdvance<ReadJalr, 0>;
258+
def : ReadAdvance<ReadCSR, 0>;
259+
def : ReadAdvance<ReadStoreData, 0>;
260+
def : ReadAdvance<ReadMemBase, 0>;
261+
def : ReadAdvance<ReadIALU, 0>;
262+
def : ReadAdvance<ReadIALU32, 0>;
263+
def : ReadAdvance<ReadShiftImm, 0>;
264+
def : ReadAdvance<ReadShiftImm32, 0>;
265+
def : ReadAdvance<ReadShiftReg, 0>;
266+
def : ReadAdvance<ReadShiftReg32, 0>;
267+
def : ReadAdvance<ReadIDiv, 0>;
268+
def : ReadAdvance<ReadIDiv32, 0>;
269+
def : ReadAdvance<ReadIRem, 0>;
270+
def : ReadAdvance<ReadIRem32, 0>;
271+
def : ReadAdvance<ReadIMul, 0>;
272+
def : ReadAdvance<ReadIMul32, 0>;
273+
def : ReadAdvance<ReadAtomicWA, 0>;
274+
def : ReadAdvance<ReadAtomicWD, 0>;
275+
def : ReadAdvance<ReadAtomicDA, 0>;
276+
def : ReadAdvance<ReadAtomicDD, 0>;
277+
def : ReadAdvance<ReadAtomicLDW, 0>;
278+
def : ReadAdvance<ReadAtomicLDD, 0>;
279+
def : ReadAdvance<ReadAtomicSTW, 0>;
280+
def : ReadAdvance<ReadAtomicSTD, 0>;
281+
def : ReadAdvance<ReadSHXADD, 0>;
282+
def : ReadAdvance<ReadSHXADD32, 0>;
283+
def : ReadAdvance<ReadRotateImm, 0>;
284+
def : ReadAdvance<ReadRotateImm32, 0>;
285+
def : ReadAdvance<ReadRotateReg, 0>;
286+
def : ReadAdvance<ReadRotateReg32, 0>;
287+
def : ReadAdvance<ReadCLZ, 0>;
288+
def : ReadAdvance<ReadCLZ32, 0>;
289+
def : ReadAdvance<ReadCTZ, 0>;
290+
def : ReadAdvance<ReadCTZ32, 0>;
291+
def : ReadAdvance<ReadCPOP, 0>;
292+
def : ReadAdvance<ReadCPOP32, 0>;
293+
def : ReadAdvance<ReadREV8, 0>;
294+
def : ReadAdvance<ReadORCB, 0>;
295+
def : ReadAdvance<ReadIMinMax, 0>;
296+
def : ReadAdvance<ReadCLMUL, 0>;
297+
def : ReadAdvance<ReadBREV8, 0>;
298+
def : ReadAdvance<ReadPACK, 0>;
299+
def : ReadAdvance<ReadPACK32, 0>;
300+
def : ReadAdvance<ReadZIP, 0>;
301+
def : ReadAdvance<ReadXPERM, 0>;
302+
def : ReadAdvance<ReadSingleBit, 0>;
303+
def : ReadAdvance<ReadSingleBitImm, 0>;
304+
def : ReadAdvance<ReadFStoreData, 0>;
305+
def : ReadAdvance<ReadFMemBase, 0>;
306+
def : ReadAdvance<ReadFAdd32, 0>;
307+
def : ReadAdvance<ReadFAdd64, 0>;
308+
def : ReadAdvance<ReadFMul32, 0>;
309+
def : ReadAdvance<ReadFMul64, 0>;
310+
def : ReadAdvance<ReadFMA32, 0>;
311+
def : ReadAdvance<ReadFMA32Addend, 0>;
312+
def : ReadAdvance<ReadFMA64, 0>;
313+
def : ReadAdvance<ReadFMA64Addend, 0>;
314+
def : ReadAdvance<ReadFDiv32, 0>;
315+
def : ReadAdvance<ReadFDiv64, 0>;
316+
def : ReadAdvance<ReadFSqrt32, 0>;
317+
def : ReadAdvance<ReadFSqrt64, 0>;
318+
def : ReadAdvance<ReadFCmp32, 0>;
319+
def : ReadAdvance<ReadFCmp64, 0>;
320+
def : ReadAdvance<ReadFSGNJ32, 0>;
321+
def : ReadAdvance<ReadFSGNJ64, 0>;
322+
def : ReadAdvance<ReadFMinMax32, 0>;
323+
def : ReadAdvance<ReadFMinMax64, 0>;
324+
def : ReadAdvance<ReadFCvtF32ToI32, 0>;
325+
def : ReadAdvance<ReadFCvtF32ToI64, 0>;
326+
def : ReadAdvance<ReadFCvtF64ToI32, 0>;
327+
def : ReadAdvance<ReadFCvtF64ToI64, 0>;
328+
def : ReadAdvance<ReadFCvtI32ToF32, 0>;
329+
def : ReadAdvance<ReadFCvtI32ToF64, 0>;
330+
def : ReadAdvance<ReadFCvtI64ToF32, 0>;
331+
def : ReadAdvance<ReadFCvtI64ToF64, 0>;
332+
def : ReadAdvance<ReadFCvtF32ToF64, 0>;
333+
def : ReadAdvance<ReadFCvtF64ToF32, 0>;
334+
def : ReadAdvance<ReadFMovF32ToI32, 0>;
335+
def : ReadAdvance<ReadFMovI32ToF32, 0>;
336+
def : ReadAdvance<ReadFMovF64ToI64, 0>;
337+
def : ReadAdvance<ReadFMovI64ToF64, 0>;
338+
def : ReadAdvance<ReadFClass32, 0>;
339+
def : ReadAdvance<ReadFClass64, 0>;
340+
}
341+
342+
let SchedModel = SyntacoreSCR7Model in {
343+
// Integer pipeline has two reservation stations with single issue port
344+
// each. Every station has eight entries:
345+
// First station:
346+
// - ALU (+ bitmanip and scalar crypto)
347+
// - Pipelined Multiplier (3 stage)
348+
// Second station:
349+
// - ALU (+ bitmanip and scalar crypto)
350+
// - Non-pipelined divider (other units are not blocked)
351+
def SCR7_ALU_MUL_IS : ProcResource<1> { let BufferSize = 8; }
352+
def SCR7_ALU_DIV_IS : ProcResource<1> { let BufferSize = 8; }
353+
def SCR7_ALU_Any : ProcResGroup<[SCR7_ALU_MUL_IS, SCR7_ALU_DIV_IS]>;
354+
def SCR7_MUL : ProcResource<1> { let BufferSize = 1; }
355+
def SCR7_DIV : ProcResource<1> { let BufferSize = 1; }
356+
357+
defm : SCR7_IntPipeline<SCR7_ALU_Any,
358+
SCR7_ALU_DIV_IS, SCR7_DIV,
359+
SCR7_ALU_MUL_IS, SCR7_MUL>;
360+
361+
// SCR7 single-issue LSU with sixteen entries.
362+
def SCR7_LSU : ProcResource<1> { let BufferSize = 16; }
363+
defm : SCR7_BasicMemory<SCR7_LSU>;
364+
defm : SCR7_AtomicMemory<SCR7_LSU>;
365+
366+
// FPU has one issue slot with eight entries:
367+
// - FP ALU
368+
// - FMA
369+
// - Non-pipelined FDIV/FSQRT
370+
def SCR7_FPU_IS : ProcResource<1> { let BufferSize = 8; }
371+
def SCR7_FALU : ProcResource<1> { let BufferSize = 1; }
372+
def SCR7_FMA : ProcResource<1> { let BufferSize = 1; }
373+
def SCR7_FDIVSQRT : ProcResource<1> { let BufferSize = 1; }
374+
defm : SCR7_FPU<SCR7_FPU_IS, SCR7_FALU, SCR7_FMA, SCR7_FDIVSQRT>;
375+
376+
defm : SCR7_Other;
377+
defm : SCR7_Unsupported;
378+
defm : SCR7_NoReadAdvances;
379+
}

0 commit comments

Comments
 (0)