-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][NFC] Factor out VLEN in the SiFive7 scheduling model #143629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesIn preparation of reusing SiFive7Model for sifive-x390, which has a VLEN of 1024, it's better (and less chaotic) to factor out the VLEN parameter from various of places first: the plan is to do a major overhaul on this file in which all the Before that happens, a placeholder defvar NFC. Full diff: https://github.com/llvm/llvm-project/pull/143629.diff 1 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index af64a871a9292..c1d7cd4a716e7 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -88,9 +88,8 @@ class SiFive7GetCyclesSegmentedSeg2<string mx> {
// Cycles for segmented loads and stores are calculated using the
// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
-class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
- defvar VLEN = 512;
- defvar DLEN = 256;
+class SiFive7GetCyclesSegmented<string mx, int sew, int nf, int VLEN> {
+ defvar DLEN = !div(VLEN, 2);
// (VLEN * LMUL) / SEW
defvar VLUpperBound = !cond(
!eq(mx, "M1") : !div(VLEN, sew),
@@ -107,23 +106,20 @@ class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
}
-class SiFive7GetCyclesOnePerElement<string mx, int sew> {
- // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
- // to use a different VLEN, this model will not make scheduling decisions
- // based on the user specified VLEN.
+class SiFive7GetCyclesOnePerElement<string mx, int sew, int VLEN> {
// c = ceil(VLEN / SEW) * LMUL
// Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
// largest division performed on VLEN is in MF8 case with division
// by 8. Therefore, there is no need to ceil the result.
- int VLEN = !div(512, sew);
+ int numElements = !div(VLEN, sew);
int c = !cond(
- !eq(mx, "M1") : VLEN,
- !eq(mx, "M2") : !mul(VLEN, 2),
- !eq(mx, "M4") : !mul(VLEN, 4),
- !eq(mx, "M8") : !mul(VLEN, 8),
- !eq(mx, "MF2") : !div(VLEN, 2),
- !eq(mx, "MF4") : !div(VLEN, 4),
- !eq(mx, "MF8") : !div(VLEN, 8)
+ !eq(mx, "M1") : numElements,
+ !eq(mx, "M2") : !mul(numElements, 2),
+ !eq(mx, "M4") : !mul(numElements, 4),
+ !eq(mx, "M8") : !mul(numElements, 8),
+ !eq(mx, "MF2") : !div(numElements, 2),
+ !eq(mx, "MF4") : !div(numElements, 4),
+ !eq(mx, "MF8") : !div(numElements, 8)
);
}
@@ -139,10 +135,9 @@ class SiFive7GetDivOrSqrtFactor<int sew> {
/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
/// cycles.
-class SiFive7GetReductionCycles<string mx, int sew> {
+class SiFive7GetReductionCycles<string mx, int sew, int VLEN> {
// VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
// VLUpperBound=(VLEN*LMUL)/SEW.
- defvar VLEN = 512;
defvar DLEN = !div(VLEN, 2);
defvar TwoTimesLMUL = !cond(
!eq(mx, "M1") : 2,
@@ -160,8 +155,7 @@ class SiFive7GetReductionCycles<string mx, int sew> {
}
/// Cycles for ordered reductions take approximately 6*VL cycles
-class SiFive7GetOrderedReductionCycles<string mx, int sew> {
- defvar VLEN = 512;
+class SiFive7GetOrderedReductionCycles<string mx, int sew, int VLEN> {
// (VLEN * LMUL) / SEW
defvar VLUpperBound = !cond(
!eq(mx, "M1") : !div(VLEN, sew),
@@ -234,6 +228,8 @@ def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
+defvar SiFive7VLEN = 512;
+
// Branching
let Latency = 3 in {
def : WriteRes<WriteJmp, [SiFive7PipeB]>;
@@ -481,7 +477,7 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -501,7 +497,7 @@ foreach mx = SchedMxList in {
// since LMUL >= 16/64.
foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -518,7 +514,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
}
foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -535,7 +531,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
}
foreach mx = ["M1", "M2", "M4", "M8"] in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
@@ -588,7 +584,7 @@ foreach mx = SchedMxList in {
let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
foreach nf=3-8 in {
- defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -603,7 +599,7 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
foreach nf=2-8 in {
foreach eew = [8, 16, 32, 64] in {
- defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
@@ -669,7 +665,7 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
- !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
+ !div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN>.c, 4));
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -774,7 +770,7 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
- !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
+ !div(SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN>.c, 4));
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
@@ -834,7 +830,7 @@ foreach mx = SchedMxListFW in {
// 14. Vector Reduction Operations
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
- defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -847,7 +843,7 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListWRed in {
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
- defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -857,7 +853,7 @@ foreach mx = SchedMxListWRed in {
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
- defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
@@ -865,7 +861,7 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
- defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN>.c;
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
@@ -874,12 +870,12 @@ foreach mx = SchedMxListF in {
foreach mx = SchedMxListFWRed in {
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
- defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
- defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew, SiFive7VLEN>.c;
let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
@@ -924,7 +920,7 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew, SiFive7VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
|
Co-Authored-By: Michael Maitland <[email protected]>
65dfc29
to
ceea613
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…3629) In preparation of reusing SiFive7Model for sifive-x390, which has a VLEN of 1024, it's better (and less chaotic) to factor out the VLEN parameter from various of places first: the plan is to do a major overhaul on this file in which all the `WriteRes` are going to be encapsulated in a big `multiclass`, where VLEN is one of its template arguments. Such that we can instantiate different scheduling models with different VLEN. Before that happens, a placeholder defvar `SiFive7VLEN` is used instead in this patch. NFC. Co-authored-by: Michael Maitland <[email protected]>
…3629) In preparation of reusing SiFive7Model for sifive-x390, which has a VLEN of 1024, it's better (and less chaotic) to factor out the VLEN parameter from various of places first: the plan is to do a major overhaul on this file in which all the `WriteRes` are going to be encapsulated in a big `multiclass`, where VLEN is one of its template arguments. Such that we can instantiate different scheduling models with different VLEN. Before that happens, a placeholder defvar `SiFive7VLEN` is used instead in this patch. NFC. Co-authored-by: Michael Maitland <[email protected]>
…3629) In preparation of reusing SiFive7Model for sifive-x390, which has a VLEN of 1024, it's better (and less chaotic) to factor out the VLEN parameter from various of places first: the plan is to do a major overhaul on this file in which all the `WriteRes` are going to be encapsulated in a big `multiclass`, where VLEN is one of its template arguments. Such that we can instantiate different scheduling models with different VLEN. Before that happens, a placeholder defvar `SiFive7VLEN` is used instead in this patch. NFC. Co-authored-by: Michael Maitland <[email protected]>
In preparation of reusing SiFive7Model for sifive-x390, which has a VLEN of 1024, it's better (and less chaotic) to factor out the VLEN parameter from various of places first: the plan is to do a major overhaul on this file in which all the
WriteRes
are going to be encapsulated in a bigmulticlass
, where VLEN is one of its template arguments. Such that we can instantiate different scheduling models with different VLEN.Before that happens, a placeholder defvar
SiFive7VLEN
is used instead in this patch.NFC.