Skip to content

Commit 7ef7c0d

Browse files
mshockwaveYeting Kuo
andauthored
[RISCV] Refine vector division latencies in SiFive P600's scheduling model (#115038)
For both vector integer and floating point divisions. Co-authored-by: Yeting Kuo <[email protected]>
1 parent a6637ae commit 7ef7c0d

File tree

2 files changed

+1023
-4
lines changed

2 files changed

+1023
-4
lines changed

llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -443,12 +443,16 @@ foreach mx = SchedMxListW in {
443443
}
444444
}
445445

446-
// Worst case needs 64 cycles if SEW is equal to 64.
446+
// Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64.
447447
foreach mx = SchedMxList in {
448448
foreach sew = SchedSEWSet<mx>.val in {
449449
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
450450
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
451-
let Latency = 64, ReleaseAtCycles = [LMulLat, !mul(63, LMulLat)] in {
451+
defvar DivMicroOpLat =
452+
!cond(!eq(sew, 8): 51, !eq(sew, 16): 45, !eq(sew, 32): 42,
453+
/* SEW=64 */ true: 72);
454+
defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
455+
let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
452456
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>;
453457
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>;
454458
}
@@ -583,12 +587,15 @@ foreach mx = SchedMxListFW in {
583587
}
584588
}
585589

586-
// Worst case needs 76 cycles if SEW is equal to 64.
590+
// Worst case needs around 29/25/37 * LMUL cycles for f16/32/64.
587591
foreach mx = SchedMxListF in {
588592
foreach sew = SchedSEWSet<mx, 1>.val in {
589593
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
590594
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
591-
let Latency = 76, ReleaseAtCycles = [LMulLat, !mul(76, LMulLat)] in {
595+
defvar DivMicroOpLat =
596+
!cond(!eq(sew, 16): 29, !eq(sew, 32): 25, /* SEW=64 */ true: 37);
597+
defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
598+
let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
592599
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
593600
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
594601
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;

0 commit comments

Comments
 (0)