Skip to content

Commit c9ca21b

Browse files
committed
Add parameter to allow reordering due to memop clustering
Reordering based on the sort order of the MemOpInfo array was disabled in <https://reviews.llvm.org/D72706>, but it's not clear this is desirable for all targets. It also makes it more difficult to compare the incremental benefit of enabling load clustering in the selectiondag scheduler as well as the machinescheduler, as the sdag scheduler does seem to allow this reordering.
1 parent ceb5e02 commit c9ca21b

39 files changed

+4120
-4113
lines changed

llvm/include/llvm/CodeGen/MachineScheduler.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,11 +1349,13 @@ ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
13491349

13501350
std::unique_ptr<ScheduleDAGMutation>
13511351
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1352-
const TargetRegisterInfo *TRI);
1352+
const TargetRegisterInfo *TRI,
1353+
bool ReorderWhileClustering = false);
13531354

13541355
std::unique_ptr<ScheduleDAGMutation>
13551356
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1356-
const TargetRegisterInfo *TRI);
1357+
const TargetRegisterInfo *TRI,
1358+
bool ReorderWhileClustering = false);
13571359

13581360
std::unique_ptr<ScheduleDAGMutation>
13591361
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,

llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,11 +1743,14 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
17431743
const TargetInstrInfo *TII;
17441744
const TargetRegisterInfo *TRI;
17451745
bool IsLoad;
1746+
bool ReorderWhileClustering;
17461747

17471748
public:
17481749
BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1749-
const TargetRegisterInfo *tri, bool IsLoad)
1750-
: TII(tii), TRI(tri), IsLoad(IsLoad) {}
1750+
const TargetRegisterInfo *tri, bool IsLoad,
1751+
bool ReorderWhileClustering)
1752+
: TII(tii), TRI(tri), IsLoad(IsLoad),
1753+
ReorderWhileClustering(ReorderWhileClustering) {}
17511754

17521755
void apply(ScheduleDAGInstrs *DAGInstrs) override;
17531756

@@ -1763,14 +1766,16 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
17631766
class StoreClusterMutation : public BaseMemOpClusterMutation {
17641767
public:
17651768
StoreClusterMutation(const TargetInstrInfo *tii,
1766-
const TargetRegisterInfo *tri)
1767-
: BaseMemOpClusterMutation(tii, tri, false) {}
1769+
const TargetRegisterInfo *tri,
1770+
bool ReorderWhileClustering)
1771+
: BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
17681772
};
17691773

17701774
class LoadClusterMutation : public BaseMemOpClusterMutation {
17711775
public:
1772-
LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1773-
: BaseMemOpClusterMutation(tii, tri, true) {}
1776+
LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1777+
bool ReorderWhileClustering)
1778+
: BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
17741779
};
17751780

17761781
} // end anonymous namespace
@@ -1779,15 +1784,19 @@ namespace llvm {
17791784

17801785
std::unique_ptr<ScheduleDAGMutation>
17811786
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1782-
const TargetRegisterInfo *TRI) {
1783-
return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1787+
const TargetRegisterInfo *TRI,
1788+
bool ReorderWhileClustering) {
1789+
return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1790+
TII, TRI, ReorderWhileClustering)
17841791
: nullptr;
17851792
}
17861793

17871794
std::unique_ptr<ScheduleDAGMutation>
17881795
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1789-
const TargetRegisterInfo *TRI) {
1790-
return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1796+
const TargetRegisterInfo *TRI,
1797+
bool ReorderWhileClustering) {
1798+
return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1799+
TII, TRI, ReorderWhileClustering)
17911800
: nullptr;
17921801
}
17931802

@@ -1840,7 +1849,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
18401849

18411850
SUnit *SUa = MemOpa.SU;
18421851
SUnit *SUb = MemOpb.SU;
1843-
if (SUa->NodeNum > SUb->NodeNum)
1852+
if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
18441853
std::swap(SUa, SUb);
18451854

18461855
// FIXME: Is this check really required?

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ class RISCVPassConfig : public TargetPassConfig {
346346
createMachineScheduler(MachineSchedContext *C) const override {
347347
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
348348
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
349-
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
349+
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI, true));
350350
if (ST.hasMacroFusion())
351351
DAG->addMutation(createRISCVMacroFusionDAGMutation());
352352
return DAG;

llvm/test/CodeGen/RISCV/add-before-shl.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -167,17 +167,17 @@ define i128 @add_wide_operand(i128 %a) nounwind {
167167
; RV32I: # %bb.0:
168168
; RV32I-NEXT: lw a2, 0(a1)
169169
; RV32I-NEXT: lw a3, 4(a1)
170-
; RV32I-NEXT: lw a4, 12(a1)
171-
; RV32I-NEXT: lw a1, 8(a1)
170+
; RV32I-NEXT: lw a4, 8(a1)
171+
; RV32I-NEXT: lw a1, 12(a1)
172172
; RV32I-NEXT: srli a5, a2, 29
173173
; RV32I-NEXT: slli a6, a3, 3
174174
; RV32I-NEXT: or a5, a6, a5
175175
; RV32I-NEXT: srli a3, a3, 29
176-
; RV32I-NEXT: slli a6, a1, 3
176+
; RV32I-NEXT: slli a6, a4, 3
177177
; RV32I-NEXT: or a3, a6, a3
178-
; RV32I-NEXT: srli a1, a1, 29
179-
; RV32I-NEXT: slli a4, a4, 3
180-
; RV32I-NEXT: or a1, a4, a1
178+
; RV32I-NEXT: srli a4, a4, 29
179+
; RV32I-NEXT: slli a1, a1, 3
180+
; RV32I-NEXT: or a1, a1, a4
181181
; RV32I-NEXT: slli a2, a2, 3
182182
; RV32I-NEXT: lui a4, 128
183183
; RV32I-NEXT: add a1, a1, a4
@@ -200,26 +200,26 @@ define i128 @add_wide_operand(i128 %a) nounwind {
200200
;
201201
; RV32C-LABEL: add_wide_operand:
202202
; RV32C: # %bb.0:
203-
; RV32C-NEXT: lw a6, 8(a1)
204-
; RV32C-NEXT: c.lw a3, 12(a1)
205-
; RV32C-NEXT: c.lw a2, 4(a1)
206-
; RV32C-NEXT: c.lw a1, 0(a1)
203+
; RV32C-NEXT: c.lw a2, 12(a1)
204+
; RV32C-NEXT: lw a6, 0(a1)
205+
; RV32C-NEXT: c.lw a3, 4(a1)
206+
; RV32C-NEXT: c.lw a1, 8(a1)
207207
; RV32C-NEXT: c.lui a5, 16
208-
; RV32C-NEXT: c.add a3, a5
209-
; RV32C-NEXT: c.slli a3, 3
210-
; RV32C-NEXT: srli a5, a6, 29
211-
; RV32C-NEXT: c.or a3, a5
208+
; RV32C-NEXT: c.add a2, a5
209+
; RV32C-NEXT: c.slli a2, 3
212210
; RV32C-NEXT: srli a5, a1, 29
213-
; RV32C-NEXT: slli a4, a2, 3
211+
; RV32C-NEXT: c.or a2, a5
212+
; RV32C-NEXT: srli a5, a6, 29
213+
; RV32C-NEXT: slli a4, a3, 3
214214
; RV32C-NEXT: c.or a4, a5
215-
; RV32C-NEXT: c.srli a2, 29
216-
; RV32C-NEXT: c.slli a6, 3
217-
; RV32C-NEXT: or a2, a6, a2
215+
; RV32C-NEXT: c.srli a3, 29
218216
; RV32C-NEXT: c.slli a1, 3
219-
; RV32C-NEXT: c.sw a1, 0(a0)
220-
; RV32C-NEXT: c.sw a2, 8(a0)
217+
; RV32C-NEXT: c.or a1, a3
218+
; RV32C-NEXT: c.slli a6, 3
219+
; RV32C-NEXT: sw a6, 0(a0)
220+
; RV32C-NEXT: c.sw a1, 8(a0)
221221
; RV32C-NEXT: c.sw a4, 4(a0)
222-
; RV32C-NEXT: c.sw a3, 12(a0)
222+
; RV32C-NEXT: c.sw a2, 12(a0)
223223
; RV32C-NEXT: c.jr ra
224224
;
225225
; RV64C-LABEL: add_wide_operand:

llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,8 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
192192
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
193193
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
194194
; RV32-NEXT: mv s0, a0
195-
; RV32-NEXT: lw a4, 4(a0)
196195
; RV32-NEXT: lw a5, 0(a0)
196+
; RV32-NEXT: lw a4, 4(a0)
197197
; RV32-NEXT: mv s1, a2
198198
; RV32-NEXT: mv s2, a1
199199
; RV32-NEXT: j .LBB11_2
@@ -268,8 +268,8 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
268268
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
269269
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
270270
; RV32-NEXT: mv s0, a0
271-
; RV32-NEXT: lw a4, 4(a0)
272271
; RV32-NEXT: lw a5, 0(a0)
272+
; RV32-NEXT: lw a4, 4(a0)
273273
; RV32-NEXT: mv s1, a2
274274
; RV32-NEXT: mv s2, a1
275275
; RV32-NEXT: j .LBB13_2
@@ -344,8 +344,8 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
344344
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
345345
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
346346
; RV32-NEXT: mv s0, a0
347-
; RV32-NEXT: lw a4, 4(a0)
348347
; RV32-NEXT: lw a5, 0(a0)
348+
; RV32-NEXT: lw a4, 4(a0)
349349
; RV32-NEXT: mv s1, a2
350350
; RV32-NEXT: mv s2, a1
351351
; RV32-NEXT: j .LBB15_2
@@ -420,8 +420,8 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
420420
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
421421
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
422422
; RV32-NEXT: mv s0, a0
423-
; RV32-NEXT: lw a4, 4(a0)
424423
; RV32-NEXT: lw a5, 0(a0)
424+
; RV32-NEXT: lw a4, 4(a0)
425425
; RV32-NEXT: mv s1, a2
426426
; RV32-NEXT: mv s2, a1
427427
; RV32-NEXT: j .LBB17_2

0 commit comments

Comments
 (0)