Skip to content

Commit 6b2e511

Browse files
authored
[RISCV] Implement isHighLatencyDef() (#127476)
And returns true for div/rem/sqrt/... operations. This is an alternative if we don't support generic scheduling model.
1 parent 6ba34f9 commit 6b2e511

File tree

8 files changed

+161
-121
lines changed

8 files changed

+161
-121
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4336,3 +4336,53 @@ RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
43364336

43374337
return std::make_unique<RISCVPipelinerLoopInfo>(LHS, RHS, Cond);
43384338
}
4339+
4340+
// FIXME: We should remove this if we have a default generic scheduling model.
4341+
bool RISCVInstrInfo::isHighLatencyDef(int Opc) const {
4342+
unsigned RVVMCOpcode = RISCV::getRVVMCOpcode(Opc);
4343+
Opc = RVVMCOpcode ? RVVMCOpcode : Opc;
4344+
switch (Opc) {
4345+
default:
4346+
return false;
4347+
// Integer div/rem.
4348+
case RISCV::DIV:
4349+
case RISCV::DIVW:
4350+
case RISCV::DIVU:
4351+
case RISCV::DIVUW:
4352+
case RISCV::REM:
4353+
case RISCV::REMW:
4354+
case RISCV::REMU:
4355+
case RISCV::REMUW:
4356+
// Floating-point div/sqrt.
4357+
case RISCV::FDIV_H:
4358+
case RISCV::FDIV_S:
4359+
case RISCV::FDIV_D:
4360+
case RISCV::FDIV_H_INX:
4361+
case RISCV::FDIV_S_INX:
4362+
case RISCV::FDIV_D_INX:
4363+
case RISCV::FDIV_D_IN32X:
4364+
case RISCV::FSQRT_H:
4365+
case RISCV::FSQRT_S:
4366+
case RISCV::FSQRT_D:
4367+
case RISCV::FSQRT_H_INX:
4368+
case RISCV::FSQRT_S_INX:
4369+
case RISCV::FSQRT_D_INX:
4370+
case RISCV::FSQRT_D_IN32X:
4371+
// Vector integer div/rem
4372+
case RISCV::VDIV_VV:
4373+
case RISCV::VDIV_VX:
4374+
case RISCV::VDIVU_VV:
4375+
case RISCV::VDIVU_VX:
4376+
case RISCV::VREM_VV:
4377+
case RISCV::VREM_VX:
4378+
case RISCV::VREMU_VV:
4379+
case RISCV::VREMU_VX:
4380+
// Vector floating-point div/sqrt.
4381+
case RISCV::VFDIV_VV:
4382+
case RISCV::VFDIV_VF:
4383+
case RISCV::VFRDIV_VF:
4384+
case RISCV::VFSQRT_V:
4385+
case RISCV::VFRSQRT7_V:
4386+
return true;
4387+
}
4388+
}

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
300300
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
301301
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
302302

303+
bool isHighLatencyDef(int Opc) const override;
304+
303305
protected:
304306
const RISCVSubtarget &STI;
305307

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -894,18 +894,18 @@ define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) {
894894
; CHECK-LABEL: vwmul_v2i16_multiuse:
895895
; CHECK: # %bb.0:
896896
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
897-
; CHECK-NEXT: vle8.v v8, (a0)
898-
; CHECK-NEXT: vle8.v v9, (a1)
899-
; CHECK-NEXT: vle8.v v10, (a2)
900-
; CHECK-NEXT: vle8.v v11, (a3)
901-
; CHECK-NEXT: vsext.vf2 v12, v8
897+
; CHECK-NEXT: vle8.v v8, (a1)
898+
; CHECK-NEXT: vle8.v v9, (a2)
899+
; CHECK-NEXT: vsext.vf2 v10, v8
902900
; CHECK-NEXT: vsext.vf2 v8, v9
903-
; CHECK-NEXT: vsext.vf2 v9, v10
904-
; CHECK-NEXT: vsext.vf2 v10, v11
905-
; CHECK-NEXT: vmul.vv v11, v12, v10
906-
; CHECK-NEXT: vmul.vv v10, v8, v10
907-
; CHECK-NEXT: vdivu.vv v8, v8, v9
908-
; CHECK-NEXT: vor.vv v9, v11, v10
901+
; CHECK-NEXT: vdivu.vv v8, v10, v8
902+
; CHECK-NEXT: vle8.v v9, (a0)
903+
; CHECK-NEXT: vle8.v v11, (a3)
904+
; CHECK-NEXT: vsext.vf2 v12, v9
905+
; CHECK-NEXT: vsext.vf2 v9, v11
906+
; CHECK-NEXT: vmul.vv v11, v12, v9
907+
; CHECK-NEXT: vmul.vv v9, v10, v9
908+
; CHECK-NEXT: vor.vv v9, v11, v9
909909
; CHECK-NEXT: vor.vv v8, v9, v8
910910
; CHECK-NEXT: ret
911911
%a = load <2 x i8>, ptr %x

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,8 +1564,8 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
15641564
; CHECK-NEXT: .LBB27_3: # %vector.body
15651565
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
15661566
; CHECK-NEXT: vl1re32.v v8, (a5)
1567-
; CHECK-NEXT: sub a6, a6, a3
15681567
; CHECK-NEXT: vfdiv.vf v8, v8, fa0
1568+
; CHECK-NEXT: sub a6, a6, a3
15691569
; CHECK-NEXT: vs1r.v v8, (a5)
15701570
; CHECK-NEXT: add a5, a5, a1
15711571
; CHECK-NEXT: bnez a6, .LBB27_3
@@ -1654,8 +1654,8 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
16541654
; CHECK-NEXT: .LBB28_3: # %vector.body
16551655
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
16561656
; CHECK-NEXT: vl1re32.v v8, (a5)
1657-
; CHECK-NEXT: sub a6, a6, a3
16581657
; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
1658+
; CHECK-NEXT: sub a6, a6, a3
16591659
; CHECK-NEXT: vs1r.v v8, (a5)
16601660
; CHECK-NEXT: add a5, a5, a1
16611661
; CHECK-NEXT: bnez a6, .LBB28_3
@@ -2504,8 +2504,8 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
25042504
; CHECK-NEXT: .LBB42_3: # %vector.body
25052505
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
25062506
; CHECK-NEXT: vl2re32.v v8, (a6)
2507-
; CHECK-NEXT: sub a7, a7, a3
25082507
; CHECK-NEXT: vdivu.vx v8, v8, a1
2508+
; CHECK-NEXT: sub a7, a7, a3
25092509
; CHECK-NEXT: vs2r.v v8, (a6)
25102510
; CHECK-NEXT: add a6, a6, a5
25112511
; CHECK-NEXT: bnez a7, .LBB42_3
@@ -2595,8 +2595,8 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
25952595
; CHECK-NEXT: .LBB43_3: # %vector.body
25962596
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
25972597
; CHECK-NEXT: vl2re32.v v8, (a6)
2598-
; CHECK-NEXT: sub a7, a7, a3
25992598
; CHECK-NEXT: vdiv.vx v8, v8, a1
2599+
; CHECK-NEXT: sub a7, a7, a3
26002600
; CHECK-NEXT: vs2r.v v8, (a6)
26012601
; CHECK-NEXT: add a6, a6, a5
26022602
; CHECK-NEXT: bnez a7, .LBB43_3
@@ -2686,8 +2686,8 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
26862686
; CHECK-NEXT: .LBB44_3: # %vector.body
26872687
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
26882688
; CHECK-NEXT: vl2re32.v v8, (a6)
2689-
; CHECK-NEXT: sub a7, a7, a3
26902689
; CHECK-NEXT: vremu.vx v8, v8, a1
2690+
; CHECK-NEXT: sub a7, a7, a3
26912691
; CHECK-NEXT: vs2r.v v8, (a6)
26922692
; CHECK-NEXT: add a6, a6, a5
26932693
; CHECK-NEXT: bnez a7, .LBB44_3
@@ -2777,8 +2777,8 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
27772777
; CHECK-NEXT: .LBB45_3: # %vector.body
27782778
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
27792779
; CHECK-NEXT: vl2re32.v v8, (a6)
2780-
; CHECK-NEXT: sub a7, a7, a3
27812780
; CHECK-NEXT: vrem.vx v8, v8, a1
2781+
; CHECK-NEXT: sub a7, a7, a3
27822782
; CHECK-NEXT: vs2r.v v8, (a6)
27832783
; CHECK-NEXT: add a6, a6, a5
27842784
; CHECK-NEXT: bnez a7, .LBB45_3

llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -221,16 +221,16 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
221221
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
222222
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
223223
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
224-
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
225-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
224+
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
226225
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
227-
; CHECK-NEXT: vfdiv.vv v0, v0, v8
226+
; CHECK-NEXT: vfdiv.vv v16, v0, v16
228227
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
229-
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
228+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
230229
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
231-
; CHECK-NEXT: vfdiv.vv v16, v16, v24
230+
; CHECK-NEXT: vfdiv.vv v24, v0, v24
232231
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
233-
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
232+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
233+
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
234234
; CHECK-NEXT: csrr a0, vlenb
235235
; CHECK-NEXT: slli a0, a0, 3
236236
; CHECK-NEXT: add sp, sp, a0
@@ -249,32 +249,42 @@ define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
249249
; CHECK-NEXT: addi sp, sp, -16
250250
; CHECK-NEXT: .cfi_def_cfa_offset 16
251251
; CHECK-NEXT: csrr a0, vlenb
252-
; CHECK-NEXT: slli a0, a0, 3
252+
; CHECK-NEXT: slli a0, a0, 4
253253
; CHECK-NEXT: sub sp, sp, a0
254-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
254+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
255255
; CHECK-NEXT: fmv.x.h a0, fa0
256256
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
257257
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
258258
; CHECK-NEXT: addi a1, sp, 16
259259
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
260-
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
261260
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
262-
; CHECK-NEXT: vmv.v.x v8, a0
261+
; CHECK-NEXT: vmv.v.x v16, a0
263262
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
264-
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
265-
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
263+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
264+
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
265+
; CHECK-NEXT: csrr a0, vlenb
266+
; CHECK-NEXT: slli a0, a0, 3
267+
; CHECK-NEXT: add a0, sp, a0
268+
; CHECK-NEXT: addi a0, a0, 16
269+
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
266270
; CHECK-NEXT: addi a0, sp, 16
267-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
271+
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
268272
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
269-
; CHECK-NEXT: vfdiv.vv v0, v8, v0
273+
; CHECK-NEXT: vfdiv.vv v24, v16, v0
270274
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
271-
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
275+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
276+
; CHECK-NEXT: csrr a0, vlenb
277+
; CHECK-NEXT: slli a0, a0, 3
278+
; CHECK-NEXT: add a0, sp, a0
279+
; CHECK-NEXT: addi a0, a0, 16
280+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
272281
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
273-
; CHECK-NEXT: vfdiv.vv v16, v24, v16
282+
; CHECK-NEXT: vfdiv.vv v16, v0, v8
274283
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
284+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
275285
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
276286
; CHECK-NEXT: csrr a0, vlenb
277-
; CHECK-NEXT: slli a0, a0, 3
287+
; CHECK-NEXT: slli a0, a0, 4
278288
; CHECK-NEXT: add sp, sp, a0
279289
; CHECK-NEXT: .cfi_def_cfa sp, 16
280290
; CHECK-NEXT: addi sp, sp, 16
@@ -573,16 +583,16 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
573583
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
574584
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
575585
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
576-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
577-
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
586+
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
578587
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
579-
; ZVFHMIN-NEXT: vfdiv.vv v0, v0, v8
588+
; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v16
580589
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
581-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
590+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
582591
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
583-
; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24
592+
; ZVFHMIN-NEXT: vfdiv.vv v24, v0, v24
584593
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
585-
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
594+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
595+
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
586596
; ZVFHMIN-NEXT: csrr a0, vlenb
587597
; ZVFHMIN-NEXT: slli a0, a0, 3
588598
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -607,32 +617,42 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
607617
; ZVFHMIN-NEXT: addi sp, sp, -16
608618
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
609619
; ZVFHMIN-NEXT: csrr a0, vlenb
610-
; ZVFHMIN-NEXT: slli a0, a0, 3
620+
; ZVFHMIN-NEXT: slli a0, a0, 4
611621
; ZVFHMIN-NEXT: sub sp, sp, a0
612-
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
622+
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
613623
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
614624
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
615625
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
616626
; ZVFHMIN-NEXT: addi a1, sp, 16
617627
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
618-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
619628
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
620-
; ZVFHMIN-NEXT: vmv.v.x v8, a0
629+
; ZVFHMIN-NEXT: vmv.v.x v16, a0
621630
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
622-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
623-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
631+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
632+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
633+
; ZVFHMIN-NEXT: csrr a0, vlenb
634+
; ZVFHMIN-NEXT: slli a0, a0, 3
635+
; ZVFHMIN-NEXT: add a0, sp, a0
636+
; ZVFHMIN-NEXT: addi a0, a0, 16
637+
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
624638
; ZVFHMIN-NEXT: addi a0, sp, 16
625-
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
639+
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
626640
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
627-
; ZVFHMIN-NEXT: vfdiv.vv v0, v8, v0
641+
; ZVFHMIN-NEXT: vfdiv.vv v24, v16, v0
628642
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
629-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
643+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
644+
; ZVFHMIN-NEXT: csrr a0, vlenb
645+
; ZVFHMIN-NEXT: slli a0, a0, 3
646+
; ZVFHMIN-NEXT: add a0, sp, a0
647+
; ZVFHMIN-NEXT: addi a0, a0, 16
648+
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
630649
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
631-
; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16
650+
; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v8
632651
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
652+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
633653
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
634654
; ZVFHMIN-NEXT: csrr a0, vlenb
635-
; ZVFHMIN-NEXT: slli a0, a0, 3
655+
; ZVFHMIN-NEXT: slli a0, a0, 4
636656
; ZVFHMIN-NEXT: add sp, sp, a0
637657
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
638658
; ZVFHMIN-NEXT: addi sp, sp, 16

0 commit comments

Comments
 (0)