Skip to content

Commit ac20135

Browse files
authored
[RISCV] Rematerialize vid.v (#97520)
This adds initial support for rematerializing vector instructions, starting with vid.v since it's simple and has the least number of operands. It has one passthru operand which we need to check is undefined. It also has an AVL operand, but it's fine to rematerialize with it because it's scalar and register allocation is split between vector and scalar. RISCVInsertVSETVLI can still happen before vector regalloc if -riscv-vsetvl-after-rvv-regalloc is false, so this makes sure that we only rematerialize after regalloc by checking for the implicit uses that are added.
1 parent 507b0f6 commit ac20135

File tree

4 files changed

+126
-0
lines changed

4 files changed

+126
-0
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,18 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
166166
return 0;
167167
}
168168

169+
bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
170+
const MachineInstr &MI) const {
171+
if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&
172+
MI.getOperand(1).isUndef() &&
173+
/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and
174+
vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
175+
i.e. -riscv-vsetvl-after-rvv-regalloc=true */
176+
!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
177+
return true;
178+
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
179+
}
180+
169181
static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
170182
unsigned NumRegs) {
171183
return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
7676
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex,
7777
unsigned &MemBytes) const override;
7878

79+
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
80+
7981
void copyPhysRegVector(MachineBasicBlock &MBB,
8082
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
8183
MCRegister DstReg, MCRegister SrcReg, bool KillSrc,

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6629,6 +6629,7 @@ defm PseudoVIOTA_M: VPseudoVIOTA_M;
66296629
//===----------------------------------------------------------------------===//
66306630
// 15.9. Vector Element Index Instruction
66316631
//===----------------------------------------------------------------------===//
6632+
let isReMaterializable = 1 in
66326633
defm PseudoVID : VPseudoVID_V;
66336634
} // Predicates = [HasVInstructions]
66346635

llvm/test/CodeGen/RISCV/rvv/remat.ll

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,POSTRA
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-vsetvl-after-rvv-regalloc=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,PRERA
4+
5+
define void @vid(ptr %p) {
6+
; POSTRA-LABEL: vid:
7+
; POSTRA: # %bb.0:
8+
; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
9+
; POSTRA-NEXT: vid.v v8
10+
; POSTRA-NEXT: vs8r.v v8, (a0)
11+
; POSTRA-NEXT: vl8re64.v v16, (a0)
12+
; POSTRA-NEXT: vl8re64.v v24, (a0)
13+
; POSTRA-NEXT: vl8re64.v v0, (a0)
14+
; POSTRA-NEXT: vl8re64.v v8, (a0)
15+
; POSTRA-NEXT: vs8r.v v8, (a0)
16+
; POSTRA-NEXT: vs8r.v v0, (a0)
17+
; POSTRA-NEXT: vs8r.v v24, (a0)
18+
; POSTRA-NEXT: vs8r.v v16, (a0)
19+
; POSTRA-NEXT: vid.v v8
20+
; POSTRA-NEXT: vs8r.v v8, (a0)
21+
; POSTRA-NEXT: ret
22+
;
23+
; PRERA-LABEL: vid:
24+
; PRERA: # %bb.0:
25+
; PRERA-NEXT: addi sp, sp, -16
26+
; PRERA-NEXT: .cfi_def_cfa_offset 16
27+
; PRERA-NEXT: csrr a1, vlenb
28+
; PRERA-NEXT: slli a1, a1, 3
29+
; PRERA-NEXT: sub sp, sp, a1
30+
; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
31+
; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
32+
; PRERA-NEXT: vid.v v8
33+
; PRERA-NEXT: vs8r.v v8, (a0)
34+
; PRERA-NEXT: vl8re64.v v16, (a0)
35+
; PRERA-NEXT: addi a1, sp, 16
36+
; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
37+
; PRERA-NEXT: vl8re64.v v24, (a0)
38+
; PRERA-NEXT: vl8re64.v v0, (a0)
39+
; PRERA-NEXT: vl8re64.v v16, (a0)
40+
; PRERA-NEXT: vs8r.v v16, (a0)
41+
; PRERA-NEXT: vs8r.v v0, (a0)
42+
; PRERA-NEXT: vs8r.v v24, (a0)
43+
; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
44+
; PRERA-NEXT: vs8r.v v16, (a0)
45+
; PRERA-NEXT: vs8r.v v8, (a0)
46+
; PRERA-NEXT: csrr a0, vlenb
47+
; PRERA-NEXT: slli a0, a0, 3
48+
; PRERA-NEXT: add sp, sp, a0
49+
; PRERA-NEXT: addi sp, sp, 16
50+
; PRERA-NEXT: ret
51+
%vid = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(<vscale x 8 x i64> poison, i64 -1)
52+
store volatile <vscale x 8 x i64> %vid, ptr %p
53+
54+
%a = load volatile <vscale x 8 x i64>, ptr %p
55+
%b = load volatile <vscale x 8 x i64>, ptr %p
56+
%c = load volatile <vscale x 8 x i64>, ptr %p
57+
%d = load volatile <vscale x 8 x i64>, ptr %p
58+
store volatile <vscale x 8 x i64> %d, ptr %p
59+
store volatile <vscale x 8 x i64> %c, ptr %p
60+
store volatile <vscale x 8 x i64> %b, ptr %p
61+
store volatile <vscale x 8 x i64> %a, ptr %p
62+
63+
store volatile <vscale x 8 x i64> %vid, ptr %p
64+
ret void
65+
}
66+
67+
68+
define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
69+
; CHECK-LABEL: vid_passthru:
70+
; CHECK: # %bb.0:
71+
; CHECK-NEXT: addi sp, sp, -16
72+
; CHECK-NEXT: .cfi_def_cfa_offset 16
73+
; CHECK-NEXT: csrr a1, vlenb
74+
; CHECK-NEXT: slli a1, a1, 3
75+
; CHECK-NEXT: sub sp, sp, a1
76+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
77+
; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma
78+
; CHECK-NEXT: vid.v v8
79+
; CHECK-NEXT: vs8r.v v8, (a0)
80+
; CHECK-NEXT: vl8re64.v v16, (a0)
81+
; CHECK-NEXT: addi a1, sp, 16
82+
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
83+
; CHECK-NEXT: vl8re64.v v24, (a0)
84+
; CHECK-NEXT: vl8re64.v v0, (a0)
85+
; CHECK-NEXT: vl8re64.v v16, (a0)
86+
; CHECK-NEXT: vs8r.v v16, (a0)
87+
; CHECK-NEXT: vs8r.v v0, (a0)
88+
; CHECK-NEXT: vs8r.v v24, (a0)
89+
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
90+
; CHECK-NEXT: vs8r.v v16, (a0)
91+
; CHECK-NEXT: vs8r.v v8, (a0)
92+
; CHECK-NEXT: csrr a0, vlenb
93+
; CHECK-NEXT: slli a0, a0, 3
94+
; CHECK-NEXT: add sp, sp, a0
95+
; CHECK-NEXT: addi sp, sp, 16
96+
; CHECK-NEXT: ret
97+
%vid = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(<vscale x 8 x i64> %v, i64 1)
98+
store volatile <vscale x 8 x i64> %vid, ptr %p
99+
100+
%a = load volatile <vscale x 8 x i64>, ptr %p
101+
%b = load volatile <vscale x 8 x i64>, ptr %p
102+
%c = load volatile <vscale x 8 x i64>, ptr %p
103+
%d = load volatile <vscale x 8 x i64>, ptr %p
104+
store volatile <vscale x 8 x i64> %d, ptr %p
105+
store volatile <vscale x 8 x i64> %c, ptr %p
106+
store volatile <vscale x 8 x i64> %b, ptr %p
107+
store volatile <vscale x 8 x i64> %a, ptr %p
108+
109+
store volatile <vscale x 8 x i64> %vid, ptr %p
110+
ret void
111+
}

0 commit comments

Comments
 (0)