Skip to content

Commit 841a7f0

Browse files
[RISCV][NFC] Factor out VLEN in the SiFive7 scheduling model (#143629)
In preparation of reusing SiFive7Model for sifive-x390, which has a VLEN of 1024, it's better (and less chaotic) to factor out the VLEN parameter from various of places first: the plan is to do a major overhaul on this file in which all the `WriteRes` are going to be encapsulated in a big `multiclass`, where VLEN is one of its template arguments. Such that we can instantiate different scheduling models with different VLEN. Before that happens, a placeholder defvar `SiFive7VLEN` is used instead in this patch. NFC. Co-authored-by: Michael Maitland <[email protected]>
1 parent 67ff66e commit 841a7f0

File tree

1 file changed

+30
-34
lines changed

1 file changed

+30
-34
lines changed

llvm/lib/Target/RISCV/RISCVSchedSiFive7.td

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,8 @@ class SiFive7GetCyclesSegmentedSeg2<string mx> {
8888

8989
// Cycles for segmented loads and stores are calculated using the
9090
// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91-
class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92-
defvar VLEN = 512;
93-
defvar DLEN = 256;
91+
class SiFive7GetCyclesSegmented<string mx, int sew, int nf, int VLEN> {
92+
defvar DLEN = !div(VLEN, 2);
9493
// (VLEN * LMUL) / SEW
9594
defvar VLUpperBound = !cond(
9695
!eq(mx, "M1") : !div(VLEN, sew),
@@ -107,23 +106,20 @@ class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
107106
int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108107
}
109108

110-
class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111-
// FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112-
// to use a different VLEN, this model will not make scheduling decisions
113-
// based on the user specified VLEN.
109+
class SiFive7GetCyclesOnePerElement<string mx, int sew, int VLEN> {
114110
// c = ceil(VLEN / SEW) * LMUL
115111
// Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116112
// largest division performed on VLEN is in MF8 case with division
117113
// by 8. Therefore, there is no need to ceil the result.
118-
int VLEN = !div(512, sew);
114+
int numElements = !div(VLEN, sew);
119115
int c = !cond(
120-
!eq(mx, "M1") : VLEN,
121-
!eq(mx, "M2") : !mul(VLEN, 2),
122-
!eq(mx, "M4") : !mul(VLEN, 4),
123-
!eq(mx, "M8") : !mul(VLEN, 8),
124-
!eq(mx, "MF2") : !div(VLEN, 2),
125-
!eq(mx, "MF4") : !div(VLEN, 4),
126-
!eq(mx, "MF8") : !div(VLEN, 8)
116+
!eq(mx, "M1") : numElements,
117+
!eq(mx, "M2") : !mul(numElements, 2),
118+
!eq(mx, "M4") : !mul(numElements, 4),
119+
!eq(mx, "M8") : !mul(numElements, 8),
120+
!eq(mx, "MF2") : !div(numElements, 2),
121+
!eq(mx, "MF4") : !div(numElements, 4),
122+
!eq(mx, "MF8") : !div(numElements, 8)
127123
);
128124
}
129125

@@ -139,10 +135,9 @@ class SiFive7GetDivOrSqrtFactor<int sew> {
139135

140136
/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141137
/// cycles.
142-
class SiFive7GetReductionCycles<string mx, int sew> {
138+
class SiFive7GetReductionCycles<string mx, int sew, int VLEN> {
143139
// VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144140
// VLUpperBound=(VLEN*LMUL)/SEW.
145-
defvar VLEN = 512;
146141
defvar DLEN = !div(VLEN, 2);
147142
defvar TwoTimesLMUL = !cond(
148143
!eq(mx, "M1") : 2,
@@ -160,8 +155,7 @@ class SiFive7GetReductionCycles<string mx, int sew> {
160155
}
161156

162157
/// Cycles for ordered reductions take approximately 6*VL cycles
163-
class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164-
defvar VLEN = 512;
158+
class SiFive7GetOrderedReductionCycles<string mx, int sew, int VLEN> {
165159
// (VLEN * LMUL) / SEW
166160
defvar VLUpperBound = !cond(
167161
!eq(mx, "M1") : !div(VLEN, sew),
@@ -234,6 +228,8 @@ def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
234228

235229
def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
236230

231+
defvar SiFive7VLEN = 512;
232+
237233
// Branching
238234
let Latency = 3 in {
239235
def : WriteRes<WriteJmp, [SiFive7PipeB]>;
@@ -481,7 +477,7 @@ foreach mx = SchedMxList in {
481477

482478
foreach mx = SchedMxList in {
483479
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
484-
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
480+
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, SiFive7VLEN>.c;
485481
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
486482
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
487483
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -501,7 +497,7 @@ foreach mx = SchedMxList in {
501497
// since LMUL >= 16/64.
502498
foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
503499
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
504-
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
500+
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, SiFive7VLEN>.c;
505501
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
506502
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
507503
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -518,7 +514,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
518514
}
519515
foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
520516
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
521-
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
517+
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, SiFive7VLEN>.c;
522518
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
523519
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
524520
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -535,7 +531,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
535531
}
536532
foreach mx = ["M1", "M2", "M4", "M8"] in {
537533
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
538-
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
534+
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, SiFive7VLEN>.c;
539535
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
540536
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
541537
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -588,7 +584,7 @@ foreach mx = SchedMxList in {
588584
let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
589585
defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
590586
foreach nf=3-8 in {
591-
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
587+
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN>.c;
592588
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
593589
// Does not chain so set latency high
594590
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -603,7 +599,7 @@ foreach mx = SchedMxList in {
603599
foreach mx = SchedMxList in {
604600
foreach nf=2-8 in {
605601
foreach eew = [8, 16, 32, 64] in {
606-
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
602+
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN>.c;
607603
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
608604
// Does not chain so set latency high
609605
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -669,7 +665,7 @@ foreach mx = SchedMxList in {
669665
foreach mx = SchedMxList in {
670666
foreach sew = SchedSEWSet<mx>.val in {
671667
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
672-
!div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
668+
!div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN>.c, 4));
673669
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
674670
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
675671
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -774,7 +770,7 @@ foreach mx = SchedMxList in {
774770
foreach mx = SchedMxListF in {
775771
foreach sew = SchedSEWSet<mx, isF=1>.val in {
776772
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
777-
!div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
773+
!div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN>.c, 4));
778774
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
779775
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
780776
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -834,7 +830,7 @@ foreach mx = SchedMxListFW in {
834830
// 14. Vector Reduction Operations
835831
foreach mx = SchedMxList in {
836832
foreach sew = SchedSEWSet<mx>.val in {
837-
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
833+
defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
838834
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
839835
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
840836
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -847,7 +843,7 @@ foreach mx = SchedMxList in {
847843

848844
foreach mx = SchedMxListWRed in {
849845
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
850-
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
846+
defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
851847
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
852848
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
853849
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -857,15 +853,15 @@ foreach mx = SchedMxListWRed in {
857853

858854
foreach mx = SchedMxListF in {
859855
foreach sew = SchedSEWSet<mx, 1>.val in {
860-
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
856+
defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
861857
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
862858
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
863859
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
864860
mx, sew, IsWorstCase>;
865861
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
866862
mx, sew, IsWorstCase>;
867863
}
868-
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
864+
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN>.c;
869865
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
870866
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
871867
mx, sew, IsWorstCase>;
@@ -874,12 +870,12 @@ foreach mx = SchedMxListF in {
874870

875871
foreach mx = SchedMxListFWRed in {
876872
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
877-
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
873+
defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
878874
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
879875
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
880876
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
881877
mx, sew, IsWorstCase>;
882-
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
878+
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN>.c;
883879
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
884880
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
885881
mx, sew, IsWorstCase>;
@@ -924,7 +920,7 @@ foreach mx = SchedMxList in {
924920

925921
foreach mx = SchedMxList in {
926922
foreach sew = SchedSEWSet<mx>.val in {
927-
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
923+
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN>.c;
928924
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
929925
let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
930926
defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;

0 commit comments

Comments
 (0)