@@ -160,6 +160,44 @@ class SiFive7GetDivOrSqrtFactor<int sew> {
160
160
);
161
161
}
162
162
163
+ /// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
164
+ /// cycles.
165
+ class SiFive7GetReductionCycles<string mx, int sew> {
166
+ // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
167
+ // VLUpperBound=(VLEN*LMUL)/SEW.
168
+ defvar VLEN = 512;
169
+ defvar DLEN = !div(VLEN, 2);
170
+ defvar TwoTimesLMUL = !cond(
171
+ !eq(mx, "M1") : 2,
172
+ !eq(mx, "M2") : 4,
173
+ !eq(mx, "M4") : 8,
174
+ !eq(mx, "M8") : 16,
175
+ !eq(mx, "MF2") : 1,
176
+ !eq(mx, "MF4") : 1,
177
+ !eq(mx, "MF8") : 1
178
+ );
179
+ int c = !add(
180
+ !div(TwoTimesLMUL, DLEN),
181
+ !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
182
+ );
183
+ }
184
+
185
+ /// Cycles for ordered reductions take approximatley 5*VL cycles
186
+ class SiFive7GetOrderedReductionCycles<string mx, int sew> {
187
+ defvar VLEN = 512;
188
+ // (VLEN * LMUL) / SEW
189
+ defvar VLUpperBound = !cond(
190
+ !eq(mx, "M1") : !div(VLEN, sew),
191
+ !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
192
+ !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
193
+ !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
194
+ !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
195
+ !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
196
+ !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
197
+ );
198
+ int c = !mul(5, VLUpperBound);
199
+ }
200
+
163
201
// SiFive7 machine model for scheduling and other instruction cost heuristics.
164
202
def SiFive7Model : SchedMachineModel {
165
203
let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
@@ -730,14 +768,55 @@ foreach mx = SchedMxListFW in {
730
768
}
731
769
732
770
// 14. Vector Reduction Operations
733
- let Latency = 32 in {
734
- defm "" : LMULSEWWriteRes<"WriteVIRedV_From", [SiFive7VA]>;
735
- defm "" : LMULSEWWriteRes<"WriteVIWRedV_From", [SiFive7VA]>;
736
- defm "" : LMULSEWWriteRes<"WriteVFRedV_From", [SiFive7VA]>;
737
- defm "" : LMULSEWWriteRes<"WriteVFRedOV_From", [SiFive7VA]>;
738
- defm "" : LMULSEWWriteResF<"WriteVFRedMinMaxV_From", [SiFive7VA]>;
739
- defm "" : LMULSEWWriteResFWRed<"WriteVFWRedV_From", [SiFive7VA]>;
740
- defm "" : LMULSEWWriteResFWRed<"WriteVFWRedOV_From", [SiFive7VA]>;
771
+ foreach mx = SchedMxList in {
772
+ foreach sew = SchedSEWSet<mx>.val in {
773
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
774
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
775
+ let Latency = Cycles, ResourceCycles = [Cycles] in
776
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA],
777
+ mx, sew, IsWorstCase>;
778
+ }
779
+ }
780
+
781
+ foreach mx = SchedMxListWRed in {
782
+ foreach sew = SchedSEWSet<mx, 1>.val in {
783
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
784
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
785
+ let Latency = Cycles, ResourceCycles = [Cycles] in
786
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA],
787
+ mx, sew, IsWorstCase>;
788
+ }
789
+ }
790
+
791
+ foreach mx = SchedMxListF in {
792
+ foreach sew = SchedSEWSetF<mx>.val in {
793
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
794
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
795
+ let Latency = RedCycles, ResourceCycles = [RedCycles] in {
796
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA],
797
+ mx, sew, IsWorstCase>;
798
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA],
799
+ mx, sew, IsWorstCase>;
800
+ }
801
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
802
+ let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
803
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA],
804
+ mx, sew, IsWorstCase>;
805
+ }
806
+ }
807
+
808
+ foreach mx = SchedMxListFWRed in {
809
+ foreach sew = SchedSEWSetF<mx, 1>.val in {
810
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
811
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
812
+ let Latency = RedCycles, ResourceCycles = [RedCycles] in
813
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA],
814
+ mx, sew, IsWorstCase>;
815
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
816
+ let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
817
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA],
818
+ mx, sew, IsWorstCase>;
819
+ }
741
820
}
742
821
743
822
// 15. Vector Mask Instructions
0 commit comments