Skip to content

Commit 8f81c60

Browse files
authored
[RISCV] Remove custom instruction selection for VFCVT_RM and friends (#72540)
We already have the pseudo's for lowering these as MI nodes with rounding mode operands, and the generic FRM insertion pass. Doing the insertion later in the backend allows SSA level passes to avoid reasoning about physical register copies, and happens to produce better code in practice. The later is mostly an accident of our insertion order; we happen to place the frm write after the vsetvli, and it's very common for a register to be killed at the vsetvli. End result is that we get slightly better scalar register allocation. I'm a bit unclear on the history here. I was surprised to find this code in ISEL lowering at all, but am also surprised once I found it that all the patterns and pseudos seem to already exist. My best guess is that maybe we didn't do all the possible cleanup after introducing the HasRoundMode mechanism?
1 parent f501659 commit 8f81c60

21 files changed

+491
-571
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -16252,47 +16252,6 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
1625216252
return TailMBB;
1625316253
}
1625416254

16255-
static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
16256-
unsigned Opcode) {
16257-
DebugLoc DL = MI.getDebugLoc();
16258-
16259-
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16260-
16261-
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16262-
Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16263-
16264-
assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
16265-
unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
16266-
16267-
// Update FRM and save the old value.
16268-
BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
16269-
.addImm(MI.getOperand(FRMIdx).getImm());
16270-
16271-
// Emit an VFCVT with the FRM == DYN
16272-
auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
16273-
16274-
for (unsigned I = 0; I < MI.getNumOperands(); I++)
16275-
if (I != FRMIdx)
16276-
MIB = MIB.add(MI.getOperand(I));
16277-
else
16278-
MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
16279-
16280-
MIB.add(MachineOperand::CreateReg(RISCV::FRM,
16281-
/*IsDef*/ false,
16282-
/*IsImp*/ true));
16283-
16284-
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16285-
MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16286-
16287-
// Restore FRM.
16288-
BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
16289-
.addReg(SavedFRM, RegState::Kill);
16290-
16291-
// Erase the pseudoinstruction.
16292-
MI.eraseFromParent();
16293-
return BB;
16294-
}
16295-
1629616255
static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
1629716256
MachineBasicBlock *BB,
1629816257
unsigned CVTXOpc,
@@ -16537,43 +16496,6 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1653716496
return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
1653816497
Subtarget);
1653916498

16540-
#define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
16541-
case RISCV::RMOpc##_##LMUL: \
16542-
return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
16543-
case RISCV::RMOpc##_##LMUL##_MASK: \
16544-
return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
16545-
16546-
#define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16547-
PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
16548-
PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
16549-
PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
16550-
PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
16551-
PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
16552-
16553-
#define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
16554-
PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16555-
PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
16556-
16557-
#define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
16558-
PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16559-
PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
16560-
16561-
// VFCVT
16562-
PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
16563-
PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
16564-
PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
16565-
PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
16566-
16567-
// VFWCVT
16568-
PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
16569-
PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
16570-
16571-
// VFNCVT
16572-
PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
16573-
PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
16574-
PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
16575-
PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
16576-
1657716499
case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
1657816500
return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
1657916501
RISCV::PseudoVFCVT_F_X_V_M1_MASK);

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,6 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass,
11271127
let HasSEWOp = 1;
11281128
let HasVecPolicyOp = 1;
11291129
let HasRoundModeOp = 1;
1130-
let usesCustomInserter = 1;
11311130
}
11321131

11331132
class VPseudoUnaryMask_FRM<VReg RetClass,
@@ -1147,7 +1146,6 @@ class VPseudoUnaryMask_FRM<VReg RetClass,
11471146
let HasVecPolicyOp = 1;
11481147
let UsesMaskPolicy = 1;
11491148
let HasRoundModeOp = 1;
1150-
let usesCustomInserter = 1;
11511149
}
11521150

11531151
class VPseudoUnaryNoMaskGPROut :

llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
1515
; CHECK-NEXT: vfabs.v v9, v8, v0.t
1616
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
1717
; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
18-
; CHECK-NEXT: fsrmi a0, 3
1918
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
19+
; CHECK-NEXT: fsrmi a0, 3
2020
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
2121
; CHECK-NEXT: fsrm a0
2222
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -59,8 +59,8 @@ define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
5959
; CHECK-NEXT: vfabs.v v9, v8, v0.t
6060
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
6161
; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
62-
; CHECK-NEXT: fsrmi a0, 3
6362
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
63+
; CHECK-NEXT: fsrmi a0, 3
6464
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
6565
; CHECK-NEXT: fsrm a0
6666
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -103,8 +103,8 @@ define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
103103
; CHECK-NEXT: vfabs.v v9, v8, v0.t
104104
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
105105
; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
106-
; CHECK-NEXT: fsrmi a0, 3
107106
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
107+
; CHECK-NEXT: fsrmi a0, 3
108108
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
109109
; CHECK-NEXT: fsrm a0
110110
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -148,8 +148,8 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
148148
; CHECK-NEXT: vfabs.v v12, v8, v0.t
149149
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
150150
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
151-
; CHECK-NEXT: fsrmi a0, 3
152151
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
152+
; CHECK-NEXT: fsrmi a0, 3
153153
; CHECK-NEXT: vmv1r.v v0, v10
154154
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
155155
; CHECK-NEXT: fsrm a0
@@ -194,8 +194,8 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
194194
; CHECK-NEXT: vfabs.v v16, v8, v0.t
195195
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
196196
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
197-
; CHECK-NEXT: fsrmi a0, 3
198197
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
198+
; CHECK-NEXT: fsrmi a0, 3
199199
; CHECK-NEXT: vmv1r.v v0, v12
200200
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
201201
; CHECK-NEXT: fsrm a0
@@ -240,8 +240,8 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
240240
; CHECK-NEXT: vfabs.v v24, v8, v0.t
241241
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
242242
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
243-
; CHECK-NEXT: fsrmi a0, 3
244243
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
244+
; CHECK-NEXT: fsrmi a0, 3
245245
; CHECK-NEXT: vmv1r.v v0, v16
246246
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
247247
; CHECK-NEXT: fsrm a0
@@ -285,8 +285,8 @@ define <vscale x 1 x float> @vp_ceil_vv_nxv1f32(<vscale x 1 x float> %va, <vscal
285285
; CHECK-NEXT: fmv.w.x fa5, a0
286286
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
287287
; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
288-
; CHECK-NEXT: fsrmi a0, 3
289288
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
289+
; CHECK-NEXT: fsrmi a0, 3
290290
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
291291
; CHECK-NEXT: fsrm a0
292292
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -329,8 +329,8 @@ define <vscale x 2 x float> @vp_ceil_vv_nxv2f32(<vscale x 2 x float> %va, <vscal
329329
; CHECK-NEXT: fmv.w.x fa5, a0
330330
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
331331
; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
332-
; CHECK-NEXT: fsrmi a0, 3
333332
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
333+
; CHECK-NEXT: fsrmi a0, 3
334334
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
335335
; CHECK-NEXT: fsrm a0
336336
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -374,8 +374,8 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
374374
; CHECK-NEXT: fmv.w.x fa5, a0
375375
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
376376
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
377-
; CHECK-NEXT: fsrmi a0, 3
378377
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
378+
; CHECK-NEXT: fsrmi a0, 3
379379
; CHECK-NEXT: vmv1r.v v0, v10
380380
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
381381
; CHECK-NEXT: fsrm a0
@@ -420,8 +420,8 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
420420
; CHECK-NEXT: fmv.w.x fa5, a0
421421
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
422422
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
423-
; CHECK-NEXT: fsrmi a0, 3
424423
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
424+
; CHECK-NEXT: fsrmi a0, 3
425425
; CHECK-NEXT: vmv1r.v v0, v12
426426
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
427427
; CHECK-NEXT: fsrm a0
@@ -466,8 +466,8 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vs
466466
; CHECK-NEXT: fmv.w.x fa5, a0
467467
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
468468
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
469-
; CHECK-NEXT: fsrmi a0, 3
470469
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
470+
; CHECK-NEXT: fsrmi a0, 3
471471
; CHECK-NEXT: vmv1r.v v0, v16
472472
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
473473
; CHECK-NEXT: fsrm a0
@@ -511,8 +511,8 @@ define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vsc
511511
; CHECK-NEXT: vfabs.v v9, v8, v0.t
512512
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
513513
; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
514-
; CHECK-NEXT: fsrmi a0, 3
515514
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
515+
; CHECK-NEXT: fsrmi a0, 3
516516
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
517517
; CHECK-NEXT: fsrm a0
518518
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -556,8 +556,8 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vsc
556556
; CHECK-NEXT: vfabs.v v12, v8, v0.t
557557
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
558558
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
559-
; CHECK-NEXT: fsrmi a0, 3
560559
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
560+
; CHECK-NEXT: fsrmi a0, 3
561561
; CHECK-NEXT: vmv1r.v v0, v10
562562
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
563563
; CHECK-NEXT: fsrm a0
@@ -602,8 +602,8 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vsc
602602
; CHECK-NEXT: vfabs.v v16, v8, v0.t
603603
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
604604
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
605-
; CHECK-NEXT: fsrmi a0, 3
606605
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
606+
; CHECK-NEXT: fsrmi a0, 3
607607
; CHECK-NEXT: vmv1r.v v0, v12
608608
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
609609
; CHECK-NEXT: fsrm a0
@@ -648,8 +648,8 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vsc
648648
; CHECK-NEXT: vfabs.v v24, v8, v0.t
649649
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
650650
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
651-
; CHECK-NEXT: fsrmi a0, 3
652651
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
652+
; CHECK-NEXT: fsrmi a0, 3
653653
; CHECK-NEXT: vmv1r.v v0, v16
654654
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
655655
; CHECK-NEXT: fsrm a0
@@ -694,8 +694,8 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vsc
694694
; CHECK-NEXT: vfabs.v v24, v8, v0.t
695695
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
696696
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
697-
; CHECK-NEXT: fsrmi a0, 3
698697
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
698+
; CHECK-NEXT: fsrmi a0, 3
699699
; CHECK-NEXT: vmv1r.v v0, v16
700700
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
701701
; CHECK-NEXT: fsrm a0
@@ -758,8 +758,8 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
758758
; CHECK-NEXT: vfabs.v v8, v16, v0.t
759759
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
760760
; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t
761-
; CHECK-NEXT: fsrmi a2, 3
762761
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
762+
; CHECK-NEXT: fsrmi a2, 3
763763
; CHECK-NEXT: vmv1r.v v0, v25
764764
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
765765
; CHECK-NEXT: fsrm a2
@@ -782,8 +782,8 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
782782
; CHECK-NEXT: vfabs.v v16, v8, v0.t
783783
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
784784
; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t
785-
; CHECK-NEXT: fsrmi a0, 3
786785
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
786+
; CHECK-NEXT: fsrmi a0, 3
787787
; CHECK-NEXT: vmv1r.v v0, v24
788788
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
789789
; CHECK-NEXT: fsrm a0

0 commit comments

Comments
 (0)