-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Add scheduling model for SiFive P800 processors #139316
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Add scheduling model for SiFive P800 processors #139316
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesThe scheduling model for SiFive P800 series cores. They have 6 integer pipes, 2 floating point pipes, and 2 vector pipes. The tests are meant to have the same coverage as its P600 counterpart. Patch is 1.10 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139316.diff 21 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 7f96c6718ffa9..e322ae340349c 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -58,6 +58,7 @@ include "RISCVSchedSiFive7.td"
include "RISCVSchedSiFiveP400.td"
include "RISCVSchedSiFiveP500.td"
include "RISCVSchedSiFiveP600.td"
+include "RISCVSchedSiFiveP800.td"
include "RISCVSchedSpacemitX60.td"
include "RISCVSchedSyntacoreSCR1.td"
include "RISCVSchedSyntacoreSCR345.td"
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index db57f5c4da24e..735997de94e81 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -365,7 +365,7 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
TuneVXRMPipelineFlush,
TunePostRAScheduler]>;
-def SIFIVE_P870 : RISCVProcessorModel<"sifive-p870", NoSchedModel,
+def SIFIVE_P870 : RISCVProcessorModel<"sifive-p870", SiFiveP800Model,
!listconcat(RVA23U64Features,
[FeatureStdExtZama16b,
FeatureStdExtZfh,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP800.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP800.td
new file mode 100644
index 0000000000000..f226f1f683f5e
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP800.td
@@ -0,0 +1,1184 @@
+//==- RISCVSchedSiFiveP800.td - SiFiveP800 Scheduling Defs ---*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+/// c is true if mx has the worst case behavior compared to LMULs in MxList.
+/// On the SiFiveP800, the worst case LMUL is the Largest LMUL
+/// and the worst case sew is the smallest SEW for that LMUL.
+class SiFiveP800IsWorstCaseMX<string mx, list<string> MxList> {
+ string LLMUL = LargestLMUL<MxList>.r;
+ bit c = !eq(mx, LLMUL);
+}
+
+class SiFiveP800IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
+ string LLMUL = LargestLMUL<MxList>.r;
+ int SSEW = SmallestSEW<mx, isF>.r;
+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+}
+
+// 1 Micro-Op per cycle.
+class SiFiveP800GetLMulCycles<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+// Latency for segmented loads and stores are calculated as vl.
+class SiFiveP800GetCyclesSegmented<string mx, int sew> {
+ defvar VLEN = 128;
+ int c = !cond(
+ !eq(mx, "M1") : !div(VLEN, sew),
+ !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
+ !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
+ !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
+ !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
+ !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
+ !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
+ );
+}
+
+class SiFiveP800VSM3CCycles<string mx> {
+ // c = ceil(LMUL / 2)
+ int c = !cond(!eq(mx, "M2") : 1,
+ !eq(mx, "M4") : 2,
+ !eq(mx, "M8") : 4,
+ true : 1);
+}
+
+// SiFiveP800 machine model for scheduling and other instruction cost heuristics.
+def SiFiveP800Model : SchedMachineModel {
+ let IssueWidth = 6; // 6 micro-ops are dispatched per cycle.
+ let MicroOpBufferSize = 288; // Max micro-ops that can be buffered.
+ let LoadLatency = 4; // Cycles for loads to access the cache.
+ let MispredictPenalty = 9; // Extra cycles for a mispredicted branch.
+ let PostRAScheduler = true;
+ let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+ HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+ HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
+ HasVendorXSfvqmaccqoq, HasVendorXSfvqmaccdod];
+ let CompleteModel = false;
+}
+
+let SchedModel = SiFiveP800Model in {
+
+def SiFiveP800IEXQ0 : ProcResource<1>;
+def SiFiveP800IEXQ1 : ProcResource<1>;
+def SiFiveP800IEXQ2 : ProcResource<1>;
+def SiFiveP800IEXQ3 : ProcResource<1>;
+def SiFiveP800IEXQ4 : ProcResource<1>;
+def SiFiveP800IEXQ5 : ProcResource<1>;
+def SiFiveP800FEXQ0 : ProcResource<1>;
+def SiFiveP800FEXQ1 : ProcResource<1>;
+
+// Two Load/Store ports that can issue either two loads, two stores, or one load
+// and one store.
+def SiFiveP800LDST : ProcResource<2>;
+// One additional port that can only handle loads.
+def SiFiveP800LD : ProcResource<1>;
+def SiFiveP800Load : ProcResGroup<[SiFiveP800LDST, SiFiveP800LD]>;
+
+// 6-wide pipeline with 6 ALU pipes.
+def SiFiveP800IntArith : ProcResGroup<[SiFiveP800IEXQ0, SiFiveP800IEXQ1, SiFiveP800IEXQ2, SiFiveP800IEXQ3]>;
+defvar SiFiveP800SYS = SiFiveP800IEXQ1;
+defvar SiFiveP800CMOV = SiFiveP800IEXQ3;
+defvar SiFiveP800I2F = SiFiveP800IEXQ3;
+def SiFiveP800Mul : ProcResGroup<[SiFiveP800IEXQ1, SiFiveP800IEXQ3]>;
+def SiFiveP800Branch : ProcResGroup<[SiFiveP800IEXQ4, SiFiveP800IEXQ5]>;
+def SiFiveP800Div : ProcResource<1>;
+
+def SiFiveP800FloatArith : ProcResGroup<[SiFiveP800FEXQ0, SiFiveP800FEXQ1]>;
+defvar SiFiveP800F2I = SiFiveP800FEXQ0;
+def SiFiveP800FloatDiv : ProcResource<1>;
+
+// Vector pipeline
+// VEXQ0 handle Mask, Simple Slide instructions,
+// VEXQ1 handle Complex Slide, Permutation, Reductions, Divide instructions.
+// Other vector instructions can be done in VEXQ0 and VEXQ1.
+def SiFiveP800VEXQ0 : ProcResource<1>;
+def SiFiveP800VEXQ1 : ProcResource<1>;
+def SiFiveP800VectorArith : ProcResGroup<[SiFiveP800VEXQ0, SiFiveP800VEXQ1]>;
+
+def SiFiveP800VLD : ProcResource<1>;
+def SiFiveP800VST : ProcResource<1>;
+def SiFiveP800VDiv : ProcResource<1>;
+def SiFiveP800VFloatDiv : ProcResource<1>;
+
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU, [SiFiveP800IntArith]>;
+def : WriteRes<WriteIALU32, [SiFiveP800IntArith]>;
+def : WriteRes<WriteShiftImm, [SiFiveP800IntArith]>;
+def : WriteRes<WriteShiftImm32, [SiFiveP800IntArith]>;
+def : WriteRes<WriteShiftReg, [SiFiveP800IntArith]>;
+def : WriteRes<WriteShiftReg32, [SiFiveP800IntArith]>;
+// Branching
+def : WriteRes<WriteJmp, [SiFiveP800Branch]>;
+def : WriteRes<WriteJal, [SiFiveP800Branch]>;
+def : WriteRes<WriteJalr, [SiFiveP800Branch]>;
+
+// CMOV
+def P800WriteCMOV : SchedWriteRes<[SiFiveP800Branch, SiFiveP800CMOV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[P800WriteCMOV], (instrs PseudoCCMOVGPRNoX0)>;
+
+let Latency = 2 in {
+// Integer multiplication
+def : WriteRes<WriteIMul, [SiFiveP800Mul]>;
+def : WriteRes<WriteIMul32, [SiFiveP800Mul]>;
+// cpop[w] look exactly like multiply.
+def : WriteRes<WriteCPOP, [SiFiveP800Mul]>;
+def : WriteRes<WriteCPOP32, [SiFiveP800Mul]>;
+}
+
+// Integer division
+def : WriteRes<WriteIDiv, [SiFiveP800IEXQ2, SiFiveP800Div]> {
+ let Latency = 35;
+ let ReleaseAtCycles = [1, 34];
+}
+def : WriteRes<WriteIDiv32, [SiFiveP800IEXQ2, SiFiveP800Div]> {
+ let Latency = 20;
+ let ReleaseAtCycles = [1, 19];
+}
+
+// Integer remainder
+def : WriteRes<WriteIRem, [SiFiveP800IEXQ2, SiFiveP800Div]> {
+ let Latency = 35;
+ let ReleaseAtCycles = [1, 34];
+}
+def : WriteRes<WriteIRem32, [SiFiveP800IEXQ2, SiFiveP800Div]> {
+ let Latency = 20;
+ let ReleaseAtCycles = [1, 19];
+}
+
+// Bitmanip
+def : WriteRes<WriteRotateImm, [SiFiveP800IntArith]>;
+def : WriteRes<WriteRotateImm32, [SiFiveP800IntArith]>;
+def : WriteRes<WriteRotateReg, [SiFiveP800IntArith]>;
+def : WriteRes<WriteRotateReg32, [SiFiveP800IntArith]>;
+
+def : WriteRes<WriteCLZ, [SiFiveP800IntArith]>;
+def : WriteRes<WriteCLZ32, [SiFiveP800IntArith]>;
+def : WriteRes<WriteCTZ, [SiFiveP800IntArith]>;
+def : WriteRes<WriteCTZ32, [SiFiveP800IntArith]>;
+
+def : WriteRes<WriteORCB, [SiFiveP800IntArith]>;
+def : WriteRes<WriteIMinMax, [SiFiveP800IntArith]>;
+
+def : WriteRes<WriteREV8, [SiFiveP800IntArith]>;
+
+def : WriteRes<WriteSHXADD, [SiFiveP800IntArith]>;
+def : WriteRes<WriteSHXADD32, [SiFiveP800IntArith]>;
+
+def : WriteRes<WriteSingleBit, [SiFiveP800IntArith]>;
+def : WriteRes<WriteSingleBitImm, [SiFiveP800IntArith]>;
+def : WriteRes<WriteBEXT, [SiFiveP800IntArith]>;
+def : WriteRes<WriteBEXTI, [SiFiveP800IntArith]>;
+
+// Memory
+def : WriteRes<WriteSTB, [SiFiveP800LDST]>;
+def : WriteRes<WriteSTH, [SiFiveP800LDST]>;
+def : WriteRes<WriteSTW, [SiFiveP800LDST]>;
+def : WriteRes<WriteSTD, [SiFiveP800LDST]>;
+def : WriteRes<WriteFST16, [SiFiveP800LDST]>;
+def : WriteRes<WriteFST32, [SiFiveP800LDST]>;
+def : WriteRes<WriteFST64, [SiFiveP800LDST]>;
+
+let Latency = 4 in {
+def : WriteRes<WriteLDB, [SiFiveP800Load]>;
+def : WriteRes<WriteLDH, [SiFiveP800Load]>;
+}
+let Latency = 4 in {
+def : WriteRes<WriteLDW, [SiFiveP800Load]>;
+def : WriteRes<WriteLDD, [SiFiveP800Load]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFLD16, [SiFiveP800Load]>;
+def : WriteRes<WriteFLD32, [SiFiveP800Load]>;
+def : WriteRes<WriteFLD64, [SiFiveP800Load]>;
+}
+
+// Atomic memory
+let Latency = 3 in {
+def : WriteRes<WriteAtomicSTW, [SiFiveP800LDST]>;
+def : WriteRes<WriteAtomicSTD, [SiFiveP800LDST]>;
+def : WriteRes<WriteAtomicW, [SiFiveP800LDST]>;
+def : WriteRes<WriteAtomicD, [SiFiveP800LDST]>;
+def : WriteRes<WriteAtomicLDW, [SiFiveP800Load]>;
+def : WriteRes<WriteAtomicLDD, [SiFiveP800Load]>;
+}
+
+// Floating point
+let Latency = 2 in {
+def : WriteRes<WriteFAdd16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFAdd32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFAdd64, [SiFiveP800FloatArith]>;
+}
+let Latency = 3 in {
+def : WriteRes<WriteFMul16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFMul32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFMul64, [SiFiveP800FloatArith]>;
+}
+let Latency = 4 in {
+def : WriteRes<WriteFMA16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFMA32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFMA64, [SiFiveP800FloatArith]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteFSGNJ16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFSGNJ32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFSGNJ64, [SiFiveP800FloatArith]>;
+
+def : WriteRes<WriteFMinMax16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFMinMax32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFMinMax64, [SiFiveP800FloatArith]>;
+}
+
+// Half precision.
+def : WriteRes<WriteFDiv16, [SiFiveP800FEXQ1, SiFiveP800FloatDiv]> {
+ let Latency = 4;
+ let ReleaseAtCycles = [1, 4];
+}
+def : WriteRes<WriteFSqrt16, [SiFiveP800FEXQ1, SiFiveP800FloatDiv]> {
+ let Latency = 18;
+ let ReleaseAtCycles = [1, 17];
+}
+
+// Single precision.
+def : WriteRes<WriteFDiv32, [SiFiveP800FEXQ1, SiFiveP800FloatDiv]> {
+ let Latency = 6;
+ let ReleaseAtCycles = [1, 6];
+}
+def : WriteRes<WriteFSqrt32, [SiFiveP800FEXQ1, SiFiveP800FloatDiv]> {
+ let Latency = 18;
+ let ReleaseAtCycles = [1, 17];
+}
+
+// Double precision
+def : WriteRes<WriteFDiv64, [SiFiveP800FEXQ1, SiFiveP800FloatDiv]> {
+ let Latency = 11;
+ let ReleaseAtCycles = [1, 11];
+}
+def : WriteRes<WriteFSqrt64, [SiFiveP800FEXQ1, SiFiveP800FloatDiv]> {
+ let Latency = 33;
+ let ReleaseAtCycles = [1, 32];
+}
+
+// Conversions
+let Latency = 2 in {
+def : WriteRes<WriteFCvtI32ToF16, [SiFiveP800I2F]>;
+def : WriteRes<WriteFCvtI32ToF32, [SiFiveP800I2F]>;
+def : WriteRes<WriteFCvtI32ToF64, [SiFiveP800I2F]>;
+def : WriteRes<WriteFCvtI64ToF16, [SiFiveP800I2F]>;
+def : WriteRes<WriteFCvtI64ToF32, [SiFiveP800I2F]>;
+def : WriteRes<WriteFCvtI64ToF64, [SiFiveP800I2F]>;
+def : WriteRes<WriteFCvtF16ToI32, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCvtF16ToI64, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCvtF16ToF32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFCvtF16ToF64, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFCvtF32ToI32, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCvtF32ToI64, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCvtF32ToF16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFCvtF32ToF64, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFCvtF64ToI32, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCvtF64ToI64, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCvtF64ToF16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFCvtF64ToF32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFRoundF16, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFRoundF32, [SiFiveP800FloatArith]>;
+def : WriteRes<WriteFRoundF64, [SiFiveP800FloatArith]>;
+
+def : WriteRes<WriteFClass16, [SiFiveP800F2I]>;
+def : WriteRes<WriteFClass32, [SiFiveP800F2I]>;
+def : WriteRes<WriteFClass64, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCmp16, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCmp32, [SiFiveP800F2I]>;
+def : WriteRes<WriteFCmp64, [SiFiveP800F2I]>;
+def : WriteRes<WriteFMovI16ToF16, [SiFiveP800I2F]>;
+def : WriteRes<WriteFMovF16ToI16, [SiFiveP800F2I]>;
+def : WriteRes<WriteFMovI32ToF32, [SiFiveP800I2F]>;
+def : WriteRes<WriteFMovF32ToI32, [SiFiveP800F2I]>;
+def : WriteRes<WriteFMovI64ToF64, [SiFiveP800I2F]>;
+def : WriteRes<WriteFMovF64ToI64, [SiFiveP800F2I]>;
+def : WriteRes<WriteFLI16, [SiFiveP800I2F]>;
+def : WriteRes<WriteFLI32, [SiFiveP800I2F]>;
+def : WriteRes<WriteFLI64, [SiFiveP800I2F]>;
+}
+
+// 6. Configuration-Setting Instructions
+def : WriteRes<WriteVSETVLI, [SiFiveP800SYS]>;
+def : WriteRes<WriteVSETIVLI, [SiFiveP800SYS]>;
+def : WriteRes<WriteVSETVL, [SiFiveP800SYS]>;
+
+// 7. Vector Loads and Stores
+// FIXME: This unit is still being improved, currently
+// it is based on stage numbers. Estimates are optimistic,
+// latency may be longer.
+foreach mx = SchedMxList in {
+ defvar LMulLat = SiFiveP800GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP800IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 8, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDM", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [SiFiveP800VLD], mx, IsWorstCase>;
+ }
+ let Latency = 12, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVLDS8", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDS16", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDS32", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDS64", [SiFiveP800VLD], mx, IsWorstCase>;
+ }
+ let Latency = 12, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFiveP800VLD], mx, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxList in {
+ defvar LMulLat = SiFiveP800GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP800IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 8, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVSTE", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTM", [SiFiveP800VST], mx, IsWorstCase>;
+ }
+ let Latency = 12, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVSTS8", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS16", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS32", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS64", [SiFiveP800VST], mx, IsWorstCase>;
+ }
+ let Latency = 12, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFiveP800VST], mx, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar LMulLat = SiFiveP800GetCyclesSegmented<mx, eew>.c;
+ defvar IsWorstCase = SiFiveP800IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !add(12, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFiveP800VLD], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFiveP800VLD], mx, IsWorstCase>;
+ }
+ let Latency = !add(1, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in {
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFiveP800VST], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFiveP800VST], mx, IsWorstCase>;
+ }
+ }
+ }
+}
+
+// Whole register move/load/store
+foreach LMul = [1, 2, 4, 8] in {
+ let Latency = 8, ReleaseAtCycles = [LMul] in {
+ def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SiFiveP800VLD]>;
+ def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SiFiveP800VST]>;
+ }
+ let Latency = 2, ReleaseAtCycles = [LMul] in {
+ def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SiFiveP800VectorArith]>;
+ }
+}
+
+// 11. Vector Integer Arithmetic Instructions
+foreach mx = SchedMxList in {
+ defvar LMulLat = SiFiveP800GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP800IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 2, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVExtV", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP800VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP800VectorArith], mx, IsWorstCa...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/22785 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/18058 Here is the relevant piece of the build log for the reference
|
Fixed by b3c3297 |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/8/builds/15579 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/18637 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/18875 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/32328 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/16808 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/27949 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/19313 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/26335 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/92/builds/19111 Here is the relevant piece of the build log for the reference
|
The scheduling model for SiFive P800 series cores. They have 6 integer pipes, 2 floating point pipes, and 2 vector pipes. https://chipsandcheese.com/p/hot-chips-2023-sifives-p870-takes-risc-v-further The tests are meant to have the same coverage as its P600 counterpart.
The scheduling model for SiFive P800 series cores. They have 6 integer pipes, 2 floating point pipes, and 2 vector pipes. https://chipsandcheese.com/p/hot-chips-2023-sifives-p870-takes-risc-v-further The tests are meant to have the same coverage as its P600 counterpart.
The scheduling model for SiFive P800 series cores. They have 6 integer pipes, 2 floating point pipes, and 2 vector pipes. https://chipsandcheese.com/p/hot-chips-2023-sifives-p870-takes-risc-v-further The tests are meant to have the same coverage as its P600 counterpart.
The scheduling model for SiFive P800 series cores. They have 6 integer pipes, 2 floating point pipes, and 2 vector pipes.
https://chipsandcheese.com/p/hot-chips-2023-sifives-p870-takes-risc-v-further
The tests are meant to have the same coverage as its P600 counterpart.