Skip to content

Commit 5f94992

Browse files
authored
[RISCV] isLoadFromStackSlot and isStoreToStackSlot for vector spill/fill (#132296)
This is an adapted version of arsenm's #120524. The intention of the change is to enable dead stack slot copy elimination in StackSlotColoring for vector loads and stores. In terms of testing, see stack-slot-coloring.mir. This has little impact on in tree tests otherwise. This change has a different and smaller set of test diffs then then @arsenm's patch because I'm using scalable sizes for the LMULs, not a single signal value. His patch allowed vector load/store pairs of different width to be deleted, mine does not. There's also simply been a lot of churn in regalloc behavior on these particular tests recently, so that may explain some of the diff as well.
1 parent f60bec9 commit 5f94992

File tree

3 files changed

+59
-38
lines changed

3 files changed

+59
-38
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,37 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
9999
return isLoadFromStackSlot(MI, FrameIndex, Dummy);
100100
}
101101

102+
static std::optional<unsigned> getLMULForRVVWholeLoadStore(unsigned Opcode) {
103+
switch (Opcode) {
104+
default:
105+
return std::nullopt;
106+
case RISCV::VS1R_V:
107+
case RISCV::VL1RE8_V:
108+
case RISCV::VL1RE16_V:
109+
case RISCV::VL1RE32_V:
110+
case RISCV::VL1RE64_V:
111+
return 1;
112+
case RISCV::VS2R_V:
113+
case RISCV::VL2RE8_V:
114+
case RISCV::VL2RE16_V:
115+
case RISCV::VL2RE32_V:
116+
case RISCV::VL2RE64_V:
117+
return 2;
118+
case RISCV::VS4R_V:
119+
case RISCV::VL4RE8_V:
120+
case RISCV::VL4RE16_V:
121+
case RISCV::VL4RE32_V:
122+
case RISCV::VL4RE64_V:
123+
return 4;
124+
case RISCV::VS8R_V:
125+
case RISCV::VL8RE8_V:
126+
case RISCV::VL8RE16_V:
127+
case RISCV::VL8RE32_V:
128+
case RISCV::VL8RE64_V:
129+
return 8;
130+
}
131+
}
132+
102133
Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
103134
int &FrameIndex,
104135
TypeSize &MemBytes) const {
@@ -125,6 +156,17 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
125156
case RISCV::FLD:
126157
MemBytes = TypeSize::getFixed(8);
127158
break;
159+
case RISCV::VL1RE8_V:
160+
case RISCV::VL2RE8_V:
161+
case RISCV::VL4RE8_V:
162+
case RISCV::VL8RE8_V:
163+
if (!MI.getOperand(1).isFI())
164+
return Register();
165+
FrameIndex = MI.getOperand(1).getIndex();
166+
unsigned BytesPerBlock = RISCV::RVVBitsPerBlock / 8;
167+
unsigned LMUL = *getLMULForRVVWholeLoadStore(MI.getOpcode());
168+
MemBytes = TypeSize::getScalable(BytesPerBlock * LMUL);
169+
return MI.getOperand(0).getReg();
128170
}
129171

130172
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
@@ -165,6 +207,17 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
165207
case RISCV::FSD:
166208
MemBytes = TypeSize::getFixed(8);
167209
break;
210+
case RISCV::VS1R_V:
211+
case RISCV::VS2R_V:
212+
case RISCV::VS4R_V:
213+
case RISCV::VS8R_V:
214+
if (!MI.getOperand(1).isFI())
215+
return Register();
216+
FrameIndex = MI.getOperand(1).getIndex();
217+
unsigned BytesPerBlock = RISCV::RVVBitsPerBlock / 8;
218+
unsigned LMUL = *getLMULForRVVWholeLoadStore(MI.getOpcode());
219+
MemBytes = TypeSize::getScalable(BytesPerBlock * LMUL);
220+
return MI.getOperand(0).getReg();
168221
}
169222

170223
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
@@ -4071,40 +4124,12 @@ bool RISCV::isZEXT_B(const MachineInstr &MI) {
40714124
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
40724125
}
40734126

4074-
static bool isRVVWholeLoadStore(unsigned Opcode) {
4075-
switch (Opcode) {
4076-
default:
4077-
return false;
4078-
case RISCV::VS1R_V:
4079-
case RISCV::VS2R_V:
4080-
case RISCV::VS4R_V:
4081-
case RISCV::VS8R_V:
4082-
case RISCV::VL1RE8_V:
4083-
case RISCV::VL2RE8_V:
4084-
case RISCV::VL4RE8_V:
4085-
case RISCV::VL8RE8_V:
4086-
case RISCV::VL1RE16_V:
4087-
case RISCV::VL2RE16_V:
4088-
case RISCV::VL4RE16_V:
4089-
case RISCV::VL8RE16_V:
4090-
case RISCV::VL1RE32_V:
4091-
case RISCV::VL2RE32_V:
4092-
case RISCV::VL4RE32_V:
4093-
case RISCV::VL8RE32_V:
4094-
case RISCV::VL1RE64_V:
4095-
case RISCV::VL2RE64_V:
4096-
case RISCV::VL4RE64_V:
4097-
case RISCV::VL8RE64_V:
4098-
return true;
4099-
}
4100-
}
4101-
41024127
bool RISCV::isRVVSpill(const MachineInstr &MI) {
41034128
// RVV lacks any support for immediate addressing for stack addresses, so be
41044129
// conservative.
41054130
unsigned Opcode = MI.getOpcode();
41064131
if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
4107-
!isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
4132+
!getLMULForRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
41084133
return false;
41094134
return true;
41104135
}

llvm/test/CodeGen/RISCV/rvv/expandload.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,16 +273,16 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8
273273
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, mu
274274
; CHECK-RV32-NEXT: viota.m v24, v0
275275
; CHECK-RV32-NEXT: csrr a0, vlenb
276-
; CHECK-RV32-NEXT: li a1, 24
277-
; CHECK-RV32-NEXT: mul a0, a0, a1
276+
; CHECK-RV32-NEXT: slli a0, a0, 4
278277
; CHECK-RV32-NEXT: add a0, sp, a0
279278
; CHECK-RV32-NEXT: addi a0, a0, 16
280-
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
279+
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
281280
; CHECK-RV32-NEXT: csrr a0, vlenb
282-
; CHECK-RV32-NEXT: slli a0, a0, 4
281+
; CHECK-RV32-NEXT: li a1, 24
282+
; CHECK-RV32-NEXT: mul a0, a0, a1
283283
; CHECK-RV32-NEXT: add a0, sp, a0
284284
; CHECK-RV32-NEXT: addi a0, a0, 16
285-
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
285+
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
286286
; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
287287
; CHECK-RV32-NEXT: csrr a0, vlenb
288288
; CHECK-RV32-NEXT: li a1, 24

llvm/test/CodeGen/RISCV/rvv/stack-slot-coloring.mir

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,6 @@ body: |
5151
; CHECK-NEXT: VS1R_V killed renamable $v31, %stack.1 :: (store unknown-size into %stack.1, align 8)
5252
; CHECK-NEXT: renamable $v31 = VL1RE8_V %stack.0 :: (volatile load unknown-size, align 1)
5353
; CHECK-NEXT: VS1R_V killed renamable $v31, %stack.0 :: (volatile store unknown-size, align 1)
54-
; CHECK-NEXT: renamable $v31 = VL1RE8_V %stack.1 :: (load unknown-size from %stack.1, align 8)
55-
; CHECK-NEXT: VS1R_V killed renamable $v31, %stack.1 :: (store unknown-size into %stack.1, align 8)
5654
; CHECK-NEXT: renamable $v31 = VL1RE8_V %stack.0 :: (volatile load unknown-size, align 1)
5755
; CHECK-NEXT: VS1R_V killed renamable $v31, %stack.0 :: (volatile store unknown-size, align 1)
5856
; CHECK-NEXT: renamable $v31 = VL1RE8_V %stack.1 :: (load unknown-size from %stack.1, align 8)
@@ -214,8 +212,6 @@ body: |
214212
; CHECK-NEXT: VS2R_V killed renamable $v30m2, %stack.1 :: (store unknown-size into %stack.1, align 8)
215213
; CHECK-NEXT: renamable $v30m2 = VL2RE8_V %stack.0 :: (volatile load unknown-size, align 1)
216214
; CHECK-NEXT: VS2R_V killed renamable $v30m2, %stack.0 :: (volatile store unknown-size, align 1)
217-
; CHECK-NEXT: renamable $v30m2 = VL2RE8_V %stack.1 :: (load unknown-size from %stack.1, align 8)
218-
; CHECK-NEXT: VS2R_V killed renamable $v30m2, %stack.1 :: (store unknown-size into %stack.1, align 8)
219215
; CHECK-NEXT: renamable $v30m2 = VL2RE8_V %stack.0 :: (volatile load unknown-size, align 1)
220216
; CHECK-NEXT: VS2R_V killed renamable $v30m2, %stack.0 :: (volatile store unknown-size, align 1)
221217
; CHECK-NEXT: renamable $v30m2 = VL2RE8_V %stack.1 :: (load unknown-size from %stack.1, align 8)

0 commit comments

Comments
 (0)