@@ -88,9 +88,8 @@ class SiFive7GetCyclesSegmentedSeg2<string mx> {
88
88
89
89
// Cycles for segmented loads and stores are calculated using the
90
90
// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91
- class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92
- defvar VLEN = 512;
93
- defvar DLEN = 256;
91
+ class SiFive7GetCyclesSegmented<string mx, int sew, int nf, int VLEN> {
92
+ defvar DLEN = !div(VLEN, 2);
94
93
// (VLEN * LMUL) / SEW
95
94
defvar VLUpperBound = !cond(
96
95
!eq(mx, "M1") : !div(VLEN, sew),
@@ -107,23 +106,20 @@ class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
107
106
int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108
107
}
109
108
110
- class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111
- // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112
- // to use a different VLEN, this model will not make scheduling decisions
113
- // based on the user specified VLEN.
109
+ class SiFive7GetCyclesOnePerElement<string mx, int sew, int VLEN> {
114
110
// c = ceil(VLEN / SEW) * LMUL
115
111
// Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116
112
// largest division performed on VLEN is in MF8 case with division
117
113
// by 8. Therefore, there is no need to ceil the result.
118
- int VLEN = !div(512 , sew);
114
+ int numElements = !div(VLEN , sew);
119
115
int c = !cond(
120
- !eq(mx, "M1") : VLEN ,
121
- !eq(mx, "M2") : !mul(VLEN , 2),
122
- !eq(mx, "M4") : !mul(VLEN , 4),
123
- !eq(mx, "M8") : !mul(VLEN , 8),
124
- !eq(mx, "MF2") : !div(VLEN , 2),
125
- !eq(mx, "MF4") : !div(VLEN , 4),
126
- !eq(mx, "MF8") : !div(VLEN , 8)
116
+ !eq(mx, "M1") : numElements ,
117
+ !eq(mx, "M2") : !mul(numElements , 2),
118
+ !eq(mx, "M4") : !mul(numElements , 4),
119
+ !eq(mx, "M8") : !mul(numElements , 8),
120
+ !eq(mx, "MF2") : !div(numElements , 2),
121
+ !eq(mx, "MF4") : !div(numElements , 4),
122
+ !eq(mx, "MF8") : !div(numElements , 8)
127
123
);
128
124
}
129
125
@@ -139,10 +135,9 @@ class SiFive7GetDivOrSqrtFactor<int sew> {
139
135
140
136
/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141
137
/// cycles.
142
- class SiFive7GetReductionCycles<string mx, int sew> {
138
+ class SiFive7GetReductionCycles<string mx, int sew, int VLEN > {
143
139
// VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144
140
// VLUpperBound=(VLEN*LMUL)/SEW.
145
- defvar VLEN = 512;
146
141
defvar DLEN = !div(VLEN, 2);
147
142
defvar TwoTimesLMUL = !cond(
148
143
!eq(mx, "M1") : 2,
@@ -160,8 +155,7 @@ class SiFive7GetReductionCycles<string mx, int sew> {
160
155
}
161
156
162
157
/// Cycles for ordered reductions take approximately 6*VL cycles
163
- class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164
- defvar VLEN = 512;
158
+ class SiFive7GetOrderedReductionCycles<string mx, int sew, int VLEN> {
165
159
// (VLEN * LMUL) / SEW
166
160
defvar VLUpperBound = !cond(
167
161
!eq(mx, "M1") : !div(VLEN, sew),
@@ -234,6 +228,8 @@ def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
234
228
235
229
def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
236
230
231
+ defvar SiFive7VLEN = 512;
232
+
237
233
// Branching
238
234
let Latency = 3 in {
239
235
def : WriteRes<WriteJmp, [SiFive7PipeB]>;
@@ -481,7 +477,7 @@ foreach mx = SchedMxList in {
481
477
482
478
foreach mx = SchedMxList in {
483
479
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
484
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
480
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, SiFive7VLEN >.c;
485
481
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
486
482
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
487
483
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -501,7 +497,7 @@ foreach mx = SchedMxList in {
501
497
// since LMUL >= 16/64.
502
498
foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
503
499
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
504
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
500
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, SiFive7VLEN >.c;
505
501
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
506
502
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
507
503
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -518,7 +514,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
518
514
}
519
515
foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
520
516
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
521
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
517
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, SiFive7VLEN >.c;
522
518
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
523
519
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
524
520
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -535,7 +531,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
535
531
}
536
532
foreach mx = ["M1", "M2", "M4", "M8"] in {
537
533
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
538
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
534
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, SiFive7VLEN >.c;
539
535
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
540
536
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
541
537
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -588,7 +584,7 @@ foreach mx = SchedMxList in {
588
584
let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
589
585
defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
590
586
foreach nf=3-8 in {
591
- defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
587
+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN >.c;
592
588
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
593
589
// Does not chain so set latency high
594
590
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -603,7 +599,7 @@ foreach mx = SchedMxList in {
603
599
foreach mx = SchedMxList in {
604
600
foreach nf=2-8 in {
605
601
foreach eew = [8, 16, 32, 64] in {
606
- defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
602
+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN >.c;
607
603
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
608
604
// Does not chain so set latency high
609
605
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -669,7 +665,7 @@ foreach mx = SchedMxList in {
669
665
foreach mx = SchedMxList in {
670
666
foreach sew = SchedSEWSet<mx>.val in {
671
667
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
672
- !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
668
+ !div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN >.c, 4));
673
669
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
674
670
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
675
671
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -774,7 +770,7 @@ foreach mx = SchedMxList in {
774
770
foreach mx = SchedMxListF in {
775
771
foreach sew = SchedSEWSet<mx, isF=1>.val in {
776
772
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
777
- !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
773
+ !div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN >.c, 4));
778
774
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
779
775
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
780
776
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -834,7 +830,7 @@ foreach mx = SchedMxListFW in {
834
830
// 14. Vector Reduction Operations
835
831
foreach mx = SchedMxList in {
836
832
foreach sew = SchedSEWSet<mx>.val in {
837
- defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
833
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
838
834
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
839
835
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
840
836
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -847,7 +843,7 @@ foreach mx = SchedMxList in {
847
843
848
844
foreach mx = SchedMxListWRed in {
849
845
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
850
- defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
846
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
851
847
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
852
848
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
853
849
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -857,15 +853,15 @@ foreach mx = SchedMxListWRed in {
857
853
858
854
foreach mx = SchedMxListF in {
859
855
foreach sew = SchedSEWSet<mx, 1>.val in {
860
- defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
856
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
861
857
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
862
858
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
863
859
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
864
860
mx, sew, IsWorstCase>;
865
861
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
866
862
mx, sew, IsWorstCase>;
867
863
}
868
- defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
864
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN >.c;
869
865
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
870
866
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
871
867
mx, sew, IsWorstCase>;
@@ -874,12 +870,12 @@ foreach mx = SchedMxListF in {
874
870
875
871
foreach mx = SchedMxListFWRed in {
876
872
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
877
- defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
873
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN >.c;
878
874
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
879
875
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
880
876
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
881
877
mx, sew, IsWorstCase>;
882
- defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
878
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN >.c;
883
879
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
884
880
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
885
881
mx, sew, IsWorstCase>;
@@ -924,7 +920,7 @@ foreach mx = SchedMxList in {
924
920
925
921
foreach mx = SchedMxList in {
926
922
foreach sew = SchedSEWSet<mx>.val in {
927
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
923
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN >.c;
928
924
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
929
925
let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
930
926
defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
0 commit comments