Skip to content

Commit 208fc34

Browse files
[RISCV] Improve SiFive7 for reductions and ordered reductions
Since the scheduling resources for reductions and ordered reductions now account for LMUL and SEW, we can modify the Latency and ResourceCycles for these resoruces. * Most reductions take a total of approx `vl*SEW/DLEN + 5*(4 + log2(DLEN/SEW))` cycles. * Ordered floating-point reductions take a total of approx `5*vl` cycles. Differential Revision: https://reviews.llvm.org/D153474
1 parent ecef87b commit 208fc34

File tree

1 file changed

+87
-8
lines changed

1 file changed

+87
-8
lines changed

llvm/lib/Target/RISCV/RISCVSchedSiFive7.td

Lines changed: 87 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,44 @@ class SiFive7GetDivOrSqrtFactor<int sew> {
160160
);
161161
}
162162

163+
/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
164+
/// cycles.
165+
class SiFive7GetReductionCycles<string mx, int sew> {
166+
// VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
167+
// VLUpperBound=(VLEN*LMUL)/SEW.
168+
defvar VLEN = 512;
169+
defvar DLEN = !div(VLEN, 2);
170+
defvar TwoTimesLMUL = !cond(
171+
!eq(mx, "M1") : 2,
172+
!eq(mx, "M2") : 4,
173+
!eq(mx, "M4") : 8,
174+
!eq(mx, "M8") : 16,
175+
!eq(mx, "MF2") : 1,
176+
!eq(mx, "MF4") : 1,
177+
!eq(mx, "MF8") : 1
178+
);
179+
int c = !add(
180+
!div(TwoTimesLMUL, DLEN),
181+
!mul(5, !add(4, !logtwo(!div(DLEN, sew))))
182+
);
183+
}
184+
185+
/// Cycles for ordered reductions take approximatley 5*VL cycles
186+
class SiFive7GetOrderedReductionCycles<string mx, int sew> {
187+
defvar VLEN = 512;
188+
// (VLEN * LMUL) / SEW
189+
defvar VLUpperBound = !cond(
190+
!eq(mx, "M1") : !div(VLEN, sew),
191+
!eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
192+
!eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
193+
!eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
194+
!eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
195+
!eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
196+
!eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
197+
);
198+
int c = !mul(5, VLUpperBound);
199+
}
200+
163201
// SiFive7 machine model for scheduling and other instruction cost heuristics.
164202
def SiFive7Model : SchedMachineModel {
165203
let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
@@ -730,14 +768,55 @@ foreach mx = SchedMxListFW in {
730768
}
731769

732770
// 14. Vector Reduction Operations
733-
let Latency = 32 in {
734-
defm "" : LMULSEWWriteRes<"WriteVIRedV_From", [SiFive7VA]>;
735-
defm "" : LMULSEWWriteRes<"WriteVIWRedV_From", [SiFive7VA]>;
736-
defm "" : LMULSEWWriteRes<"WriteVFRedV_From", [SiFive7VA]>;
737-
defm "" : LMULSEWWriteRes<"WriteVFRedOV_From", [SiFive7VA]>;
738-
defm "" : LMULSEWWriteResF<"WriteVFRedMinMaxV_From", [SiFive7VA]>;
739-
defm "" : LMULSEWWriteResFWRed<"WriteVFWRedV_From", [SiFive7VA]>;
740-
defm "" : LMULSEWWriteResFWRed<"WriteVFWRedOV_From", [SiFive7VA]>;
771+
foreach mx = SchedMxList in {
772+
foreach sew = SchedSEWSet<mx>.val in {
773+
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
774+
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
775+
let Latency = Cycles, ResourceCycles = [Cycles] in
776+
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA],
777+
mx, sew, IsWorstCase>;
778+
}
779+
}
780+
781+
foreach mx = SchedMxListWRed in {
782+
foreach sew = SchedSEWSet<mx, 1>.val in {
783+
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
784+
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
785+
let Latency = Cycles, ResourceCycles = [Cycles] in
786+
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA],
787+
mx, sew, IsWorstCase>;
788+
}
789+
}
790+
791+
foreach mx = SchedMxListF in {
792+
foreach sew = SchedSEWSetF<mx>.val in {
793+
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
794+
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
795+
let Latency = RedCycles, ResourceCycles = [RedCycles] in {
796+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA],
797+
mx, sew, IsWorstCase>;
798+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA],
799+
mx, sew, IsWorstCase>;
800+
}
801+
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
802+
let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
803+
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA],
804+
mx, sew, IsWorstCase>;
805+
}
806+
}
807+
808+
foreach mx = SchedMxListFWRed in {
809+
foreach sew = SchedSEWSetF<mx, 1>.val in {
810+
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
811+
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
812+
let Latency = RedCycles, ResourceCycles = [RedCycles] in
813+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA],
814+
mx, sew, IsWorstCase>;
815+
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
816+
let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
817+
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA],
818+
mx, sew, IsWorstCase>;
819+
}
741820
}
742821

743822
// 15. Vector Mask Instructions

0 commit comments

Comments
 (0)