Skip to content

Commit 91299d4

Browse files
committed
[RISCV] Enable load clustering in SelectionDAG scheduler
1 parent 1a46b84 commit 91299d4

30 files changed

+1388
-1296
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2194,6 +2194,71 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
21942194
.setMIFlags(MemI.getFlags());
21952195
}
21962196

2197+
bool RISCVInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
2198+
int64_t &Offset1,
2199+
int64_t &Offset2) const {
2200+
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
2201+
return false;
2202+
2203+
auto IsLoadOpcode = [&](unsigned Opcode) {
2204+
switch (Opcode) {
2205+
case RISCV::LB:
2206+
case RISCV::LBU:
2207+
case RISCV::LH:
2208+
case RISCV::LHU:
2209+
case RISCV::FLH:
2210+
case RISCV::LW:
2211+
case RISCV::LWU:
2212+
case RISCV::FLW:
2213+
case RISCV::LD:
2214+
case RISCV::FLD:
2215+
return true;
2216+
default:
2217+
return false;
2218+
}
2219+
};
2220+
2221+
if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
2222+
!IsLoadOpcode(Load2->getMachineOpcode()))
2223+
return false;
2224+
2225+
// Check if base address and chain operands match.
2226+
if (Load1->getOperand(0) != Load2->getOperand(0))
2227+
if (Load1->getOperand(0) != Load2->getOperand(0) ||
2228+
Load1->getOperand(2) != Load2->getOperand(2))
2229+
return false;
2230+
2231+
// Determine the offsets.
2232+
if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
2233+
isa<ConstantSDNode>(Load2->getOperand(1))) {
2234+
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
2235+
Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
2236+
return true;
2237+
}
2238+
2239+
return false;
2240+
}
2241+
2242+
bool RISCVInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
2243+
int64_t Offset1, int64_t Offset2,
2244+
unsigned NumLoads) const {
2245+
assert(Offset2 > Offset1);
2246+
2247+
if ((Offset2 - Offset1) / 8 > 64)
2248+
return false;
2249+
2250+
// Check if the machine opcodes are different. If they are different
2251+
// then we consider them to not be of the same base address,
2252+
if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()))
2253+
return false; // FIXME: overly conservative?
2254+
2255+
// Four loads in a row should be sufficient.
2256+
if (NumLoads >= 3)
2257+
return false;
2258+
2259+
return true;
2260+
}
2261+
21972262
bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
21982263
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
21992264
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,20 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
157157
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
158158
const TargetRegisterInfo *TRI) const override;
159159

160+
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1,
161+
int64_t &Offset2) const override;
162+
160163
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
161164
int64_t Offset1, bool OffsetIsScalable1,
162165
ArrayRef<const MachineOperand *> BaseOps2,
163166
int64_t Offset2, bool OffsetIsScalable2,
164167
unsigned ClusterSize,
165168
unsigned NumBytes) const override;
166169

170+
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1,
171+
int64_t Offset2,
172+
unsigned NumLoads) const override;
173+
167174
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
168175
const MachineOperand *&BaseOp,
169176
int64_t &Offset, unsigned &Width,

llvm/test/CodeGen/RISCV/byval.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ define void @caller() nounwind {
2626
; RV32I-NEXT: sw a1, 12(sp)
2727
; RV32I-NEXT: addi a0, a0, %lo(foo)
2828
; RV32I-NEXT: lw a1, 12(a0)
29-
; RV32I-NEXT: sw a1, 24(sp)
30-
; RV32I-NEXT: lw a1, 8(a0)
31-
; RV32I-NEXT: sw a1, 20(sp)
29+
; RV32I-NEXT: lw a2, 8(a0)
3230
; RV32I-NEXT: lw a0, 4(a0)
31+
; RV32I-NEXT: sw a1, 24(sp)
32+
; RV32I-NEXT: sw a2, 20(sp)
3333
; RV32I-NEXT: sw a0, 16(sp)
3434
; RV32I-NEXT: addi a0, sp, 12
3535
; RV32I-NEXT: call callee@plt

llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -190,21 +190,21 @@ define i32 @caller_many_scalars() nounwind {
190190
define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind {
191191
; RV32I-FPELIM-LABEL: callee_large_scalars:
192192
; RV32I-FPELIM: # %bb.0:
193-
; RV32I-FPELIM-NEXT: lw a2, 0(a0)
194-
; RV32I-FPELIM-NEXT: lw a3, 4(a0)
195-
; RV32I-FPELIM-NEXT: lw a4, 12(a1)
193+
; RV32I-FPELIM-NEXT: lw a2, 0(a1)
194+
; RV32I-FPELIM-NEXT: lw a3, 4(a1)
195+
; RV32I-FPELIM-NEXT: lw a4, 8(a1)
196+
; RV32I-FPELIM-NEXT: lw a1, 12(a1)
196197
; RV32I-FPELIM-NEXT: lw a5, 12(a0)
197-
; RV32I-FPELIM-NEXT: lw a6, 0(a1)
198-
; RV32I-FPELIM-NEXT: lw a7, 4(a1)
199-
; RV32I-FPELIM-NEXT: lw a1, 8(a1)
198+
; RV32I-FPELIM-NEXT: lw a6, 4(a0)
199+
; RV32I-FPELIM-NEXT: lw a7, 0(a0)
200200
; RV32I-FPELIM-NEXT: lw a0, 8(a0)
201-
; RV32I-FPELIM-NEXT: xor a4, a5, a4
202-
; RV32I-FPELIM-NEXT: xor a3, a3, a7
203-
; RV32I-FPELIM-NEXT: or a3, a3, a4
204-
; RV32I-FPELIM-NEXT: xor a0, a0, a1
205-
; RV32I-FPELIM-NEXT: xor a1, a2, a6
206-
; RV32I-FPELIM-NEXT: or a0, a1, a0
207-
; RV32I-FPELIM-NEXT: or a0, a0, a3
201+
; RV32I-FPELIM-NEXT: xor a1, a5, a1
202+
; RV32I-FPELIM-NEXT: xor a3, a6, a3
203+
; RV32I-FPELIM-NEXT: or a1, a3, a1
204+
; RV32I-FPELIM-NEXT: xor a0, a0, a4
205+
; RV32I-FPELIM-NEXT: xor a2, a7, a2
206+
; RV32I-FPELIM-NEXT: or a0, a2, a0
207+
; RV32I-FPELIM-NEXT: or a0, a0, a1
208208
; RV32I-FPELIM-NEXT: seqz a0, a0
209209
; RV32I-FPELIM-NEXT: ret
210210
;
@@ -214,21 +214,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind {
214214
; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
215215
; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
216216
; RV32I-WITHFP-NEXT: addi s0, sp, 16
217-
; RV32I-WITHFP-NEXT: lw a2, 0(a0)
218-
; RV32I-WITHFP-NEXT: lw a3, 4(a0)
219-
; RV32I-WITHFP-NEXT: lw a4, 12(a1)
217+
; RV32I-WITHFP-NEXT: lw a2, 0(a1)
218+
; RV32I-WITHFP-NEXT: lw a3, 4(a1)
219+
; RV32I-WITHFP-NEXT: lw a4, 8(a1)
220+
; RV32I-WITHFP-NEXT: lw a1, 12(a1)
220221
; RV32I-WITHFP-NEXT: lw a5, 12(a0)
221-
; RV32I-WITHFP-NEXT: lw a6, 0(a1)
222-
; RV32I-WITHFP-NEXT: lw a7, 4(a1)
223-
; RV32I-WITHFP-NEXT: lw a1, 8(a1)
222+
; RV32I-WITHFP-NEXT: lw a6, 4(a0)
223+
; RV32I-WITHFP-NEXT: lw a7, 0(a0)
224224
; RV32I-WITHFP-NEXT: lw a0, 8(a0)
225-
; RV32I-WITHFP-NEXT: xor a4, a5, a4
226-
; RV32I-WITHFP-NEXT: xor a3, a3, a7
227-
; RV32I-WITHFP-NEXT: or a3, a3, a4
228-
; RV32I-WITHFP-NEXT: xor a0, a0, a1
229-
; RV32I-WITHFP-NEXT: xor a1, a2, a6
230-
; RV32I-WITHFP-NEXT: or a0, a1, a0
231-
; RV32I-WITHFP-NEXT: or a0, a0, a3
225+
; RV32I-WITHFP-NEXT: xor a1, a5, a1
226+
; RV32I-WITHFP-NEXT: xor a3, a6, a3
227+
; RV32I-WITHFP-NEXT: or a1, a3, a1
228+
; RV32I-WITHFP-NEXT: xor a0, a0, a4
229+
; RV32I-WITHFP-NEXT: xor a2, a7, a2
230+
; RV32I-WITHFP-NEXT: or a0, a2, a0
231+
; RV32I-WITHFP-NEXT: or a0, a0, a1
232232
; RV32I-WITHFP-NEXT: seqz a0, a0
233233
; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
234234
; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -297,21 +297,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
297297
; RV32I-FPELIM-LABEL: callee_large_scalars_exhausted_regs:
298298
; RV32I-FPELIM: # %bb.0:
299299
; RV32I-FPELIM-NEXT: lw a0, 4(sp)
300-
; RV32I-FPELIM-NEXT: lw a1, 0(a7)
301-
; RV32I-FPELIM-NEXT: lw a2, 4(a7)
302-
; RV32I-FPELIM-NEXT: lw a3, 12(a0)
300+
; RV32I-FPELIM-NEXT: lw a1, 0(a0)
301+
; RV32I-FPELIM-NEXT: lw a2, 4(a0)
302+
; RV32I-FPELIM-NEXT: lw a3, 8(a0)
303+
; RV32I-FPELIM-NEXT: lw a0, 12(a0)
303304
; RV32I-FPELIM-NEXT: lw a4, 12(a7)
304-
; RV32I-FPELIM-NEXT: lw a5, 0(a0)
305-
; RV32I-FPELIM-NEXT: lw a6, 4(a0)
306-
; RV32I-FPELIM-NEXT: lw a0, 8(a0)
305+
; RV32I-FPELIM-NEXT: lw a5, 4(a7)
306+
; RV32I-FPELIM-NEXT: lw a6, 0(a7)
307307
; RV32I-FPELIM-NEXT: lw a7, 8(a7)
308-
; RV32I-FPELIM-NEXT: xor a3, a4, a3
309-
; RV32I-FPELIM-NEXT: xor a2, a2, a6
310-
; RV32I-FPELIM-NEXT: or a2, a2, a3
311-
; RV32I-FPELIM-NEXT: xor a0, a7, a0
312-
; RV32I-FPELIM-NEXT: xor a1, a1, a5
308+
; RV32I-FPELIM-NEXT: xor a0, a4, a0
309+
; RV32I-FPELIM-NEXT: xor a2, a5, a2
310+
; RV32I-FPELIM-NEXT: or a0, a2, a0
311+
; RV32I-FPELIM-NEXT: xor a2, a7, a3
312+
; RV32I-FPELIM-NEXT: xor a1, a6, a1
313+
; RV32I-FPELIM-NEXT: or a1, a1, a2
313314
; RV32I-FPELIM-NEXT: or a0, a1, a0
314-
; RV32I-FPELIM-NEXT: or a0, a0, a2
315315
; RV32I-FPELIM-NEXT: seqz a0, a0
316316
; RV32I-FPELIM-NEXT: ret
317317
;
@@ -322,21 +322,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
322322
; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
323323
; RV32I-WITHFP-NEXT: addi s0, sp, 16
324324
; RV32I-WITHFP-NEXT: lw a0, 4(s0)
325-
; RV32I-WITHFP-NEXT: lw a1, 0(a7)
326-
; RV32I-WITHFP-NEXT: lw a2, 4(a7)
327-
; RV32I-WITHFP-NEXT: lw a3, 12(a0)
325+
; RV32I-WITHFP-NEXT: lw a1, 0(a0)
326+
; RV32I-WITHFP-NEXT: lw a2, 4(a0)
327+
; RV32I-WITHFP-NEXT: lw a3, 8(a0)
328+
; RV32I-WITHFP-NEXT: lw a0, 12(a0)
328329
; RV32I-WITHFP-NEXT: lw a4, 12(a7)
329-
; RV32I-WITHFP-NEXT: lw a5, 0(a0)
330-
; RV32I-WITHFP-NEXT: lw a6, 4(a0)
331-
; RV32I-WITHFP-NEXT: lw a0, 8(a0)
330+
; RV32I-WITHFP-NEXT: lw a5, 4(a7)
331+
; RV32I-WITHFP-NEXT: lw a6, 0(a7)
332332
; RV32I-WITHFP-NEXT: lw a7, 8(a7)
333-
; RV32I-WITHFP-NEXT: xor a3, a4, a3
334-
; RV32I-WITHFP-NEXT: xor a2, a2, a6
335-
; RV32I-WITHFP-NEXT: or a2, a2, a3
336-
; RV32I-WITHFP-NEXT: xor a0, a7, a0
337-
; RV32I-WITHFP-NEXT: xor a1, a1, a5
333+
; RV32I-WITHFP-NEXT: xor a0, a4, a0
334+
; RV32I-WITHFP-NEXT: xor a2, a5, a2
335+
; RV32I-WITHFP-NEXT: or a0, a2, a0
336+
; RV32I-WITHFP-NEXT: xor a2, a7, a3
337+
; RV32I-WITHFP-NEXT: xor a1, a6, a1
338+
; RV32I-WITHFP-NEXT: or a1, a1, a2
338339
; RV32I-WITHFP-NEXT: or a0, a1, a0
339-
; RV32I-WITHFP-NEXT: or a0, a0, a2
340340
; RV32I-WITHFP-NEXT: seqz a0, a0
341341
; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
342342
; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload

llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -106,21 +106,21 @@ define i32 @caller_many_scalars() nounwind {
106106
define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
107107
; RV64I-LABEL: callee_large_scalars:
108108
; RV64I: # %bb.0:
109-
; RV64I-NEXT: ld a2, 0(a0)
110-
; RV64I-NEXT: ld a3, 8(a0)
111-
; RV64I-NEXT: ld a4, 24(a1)
109+
; RV64I-NEXT: ld a2, 0(a1)
110+
; RV64I-NEXT: ld a3, 8(a1)
111+
; RV64I-NEXT: ld a4, 16(a1)
112+
; RV64I-NEXT: ld a1, 24(a1)
112113
; RV64I-NEXT: ld a5, 24(a0)
113-
; RV64I-NEXT: ld a6, 0(a1)
114-
; RV64I-NEXT: ld a7, 8(a1)
115-
; RV64I-NEXT: ld a1, 16(a1)
114+
; RV64I-NEXT: ld a6, 8(a0)
115+
; RV64I-NEXT: ld a7, 0(a0)
116116
; RV64I-NEXT: ld a0, 16(a0)
117-
; RV64I-NEXT: xor a4, a5, a4
118-
; RV64I-NEXT: xor a3, a3, a7
119-
; RV64I-NEXT: or a3, a3, a4
120-
; RV64I-NEXT: xor a0, a0, a1
121-
; RV64I-NEXT: xor a1, a2, a6
122-
; RV64I-NEXT: or a0, a1, a0
123-
; RV64I-NEXT: or a0, a0, a3
117+
; RV64I-NEXT: xor a1, a5, a1
118+
; RV64I-NEXT: xor a3, a6, a3
119+
; RV64I-NEXT: or a1, a3, a1
120+
; RV64I-NEXT: xor a0, a0, a4
121+
; RV64I-NEXT: xor a2, a7, a2
122+
; RV64I-NEXT: or a0, a2, a0
123+
; RV64I-NEXT: or a0, a0, a1
124124
; RV64I-NEXT: seqz a0, a0
125125
; RV64I-NEXT: ret
126126
%1 = icmp eq i256 %a, %b
@@ -161,21 +161,21 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
161161
; RV64I-LABEL: callee_large_scalars_exhausted_regs:
162162
; RV64I: # %bb.0:
163163
; RV64I-NEXT: ld a0, 8(sp)
164-
; RV64I-NEXT: ld a1, 0(a7)
165-
; RV64I-NEXT: ld a2, 8(a7)
166-
; RV64I-NEXT: ld a3, 24(a0)
164+
; RV64I-NEXT: ld a1, 0(a0)
165+
; RV64I-NEXT: ld a2, 8(a0)
166+
; RV64I-NEXT: ld a3, 16(a0)
167+
; RV64I-NEXT: ld a0, 24(a0)
167168
; RV64I-NEXT: ld a4, 24(a7)
168-
; RV64I-NEXT: ld a5, 0(a0)
169-
; RV64I-NEXT: ld a6, 8(a0)
170-
; RV64I-NEXT: ld a0, 16(a0)
169+
; RV64I-NEXT: ld a5, 8(a7)
170+
; RV64I-NEXT: ld a6, 0(a7)
171171
; RV64I-NEXT: ld a7, 16(a7)
172-
; RV64I-NEXT: xor a3, a4, a3
173-
; RV64I-NEXT: xor a2, a2, a6
174-
; RV64I-NEXT: or a2, a2, a3
175-
; RV64I-NEXT: xor a0, a7, a0
176-
; RV64I-NEXT: xor a1, a1, a5
172+
; RV64I-NEXT: xor a0, a4, a0
173+
; RV64I-NEXT: xor a2, a5, a2
174+
; RV64I-NEXT: or a0, a2, a0
175+
; RV64I-NEXT: xor a2, a7, a3
176+
; RV64I-NEXT: xor a1, a6, a1
177+
; RV64I-NEXT: or a1, a1, a2
177178
; RV64I-NEXT: or a0, a1, a0
178-
; RV64I-NEXT: or a0, a0, a2
179179
; RV64I-NEXT: seqz a0, a0
180180
; RV64I-NEXT: ret
181181
%1 = icmp eq i256 %h, %j

0 commit comments

Comments
 (0)