Skip to content

[RISCV] Refine vector division latencies in SiFive P600's scheduling model #115038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
Original file line number Diff line number Diff line change
Expand Up @@ -443,12 +443,16 @@ foreach mx = SchedMxListW in {
}
}

// Worst case needs 64 cycles if SEW is equal to 64.
// Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64.
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
let Latency = 64, ReleaseAtCycles = [LMulLat, !mul(63, LMulLat)] in {
defvar DivMicroOpLat =
!cond(!eq(sew, 8): 51, !eq(sew, 16): 45, !eq(sew, 32): 42,
/* SEW=64 */ true: 72);
defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>;
}
Expand Down Expand Up @@ -583,12 +587,15 @@ foreach mx = SchedMxListFW in {
}
}

// Worst case needs 76 cycles if SEW is equal to 64.
// Worst case needs around 29/25/37 * LMUL cycles for f16/32/64.
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
let Latency = 76, ReleaseAtCycles = [LMulLat, !mul(76, LMulLat)] in {
defvar DivMicroOpLat =
!cond(!eq(sew, 16): 29, !eq(sew, 32): 25, /* SEW=64 */ true: 37);
defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
Expand Down
Loading
Loading