Skip to content

Commit 7d9af03

Browse files
committed
[Scheduling][ARM] Consistently enable PostRA Machine scheduling
In the ARM backend, for historical reasons we have only some targets using Machine Scheduling. The rest use the old list scheduler as they are using itinaries and the list scheduler seems to produce better code (and not crash running out of register on v6m codes). So whether to use the MIScheduler or not is checked at runtime from the subtarget features. This is fine, except for post-ra scheduling. Whether to use the old post-ra list scheduler or the post-ra machine schedule is decided as the pass manager is set up, in arms case from a newly constructed subtarget. Under some situations, like LTO, this won't include the correct cpu so can pick the wrong option. This can have a surprising effect on performance. To fix that, this patch overrides targetSchedulesPostRAScheduling and addPreSched2 in the ARM backend, adding _both_ post-ra schedulers and picking at runtime which to execute. To pick between the two I've had to add a enablePostRAMachineScheduler() method that normally returns enableMachineScheduler() && enablePostRAScheduler(), which can be overridden to enable just one of PostRAMachineScheduler vs PostRAScheduler. Thanks to David Penry for the identifying this problem. Differential Revision: https://reviews.llvm.org/D69775
1 parent f71e35d commit 7d9af03

17 files changed

+72
-18
lines changed

llvm/include/llvm/CodeGen/TargetSubtargetInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
206206
/// which is the preferred way to influence this.
207207
virtual bool enablePostRAScheduler() const;
208208

209+
/// True if the subtarget should run a machine scheduler after register
210+
/// allocation.
211+
virtual bool enablePostRAMachineScheduler() const;
212+
209213
/// True if the subtarget should run the atomic expansion pass.
210214
virtual bool enableAtomicExpand() const;
211215

llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
402402
if (EnablePostRAMachineSched.getNumOccurrences()) {
403403
if (!EnablePostRAMachineSched)
404404
return false;
405-
} else if (!mf.getSubtarget().enablePostRAScheduler()) {
405+
} else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
406406
LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
407407
return false;
408408
}

llvm/lib/CodeGen/TargetSubtargetInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
5454
return getSchedModel().PostRAScheduler;
5555
}
5656

57+
bool TargetSubtargetInfo::enablePostRAMachineScheduler() const {
58+
return enableMachineScheduler() && enablePostRAScheduler();
59+
}
60+
5761
bool TargetSubtargetInfo::useAA() const {
5862
return false;
5963
}

llvm/lib/Target/ARM/ARMSubtarget.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,9 +381,19 @@ bool ARMSubtarget::enableMachineScheduler() const {
381381

382382
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
383383
bool ARMSubtarget::enablePostRAScheduler() const {
384+
if (enableMachineScheduler())
385+
return false;
386+
if (disablePostRAScheduler())
387+
return false;
388+
// Thumb1 cores will generally not benefit from post-ra scheduling
389+
return !isThumb1Only();
390+
}
391+
392+
bool ARMSubtarget::enablePostRAMachineScheduler() const {
393+
if (!enableMachineScheduler())
394+
return false;
384395
if (disablePostRAScheduler())
385396
return false;
386-
// Don't reschedule potential IT blocks.
387397
return !isThumb1Only();
388398
}
389399

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
806806
/// True for some subtargets at > -O0.
807807
bool enablePostRAScheduler() const override;
808808

809+
/// True for some subtargets at > -O0.
810+
bool enablePostRAMachineScheduler() const override;
811+
809812
/// Enable use of alias analysis during code generation (during MI
810813
/// scheduling, DAGCombine, etc.).
811814
bool useAA() const override { return UseAA; }

llvm/lib/Target/ARM/ARMTargetMachine.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -322,14 +322,7 @@ namespace {
322322
class ARMPassConfig : public TargetPassConfig {
323323
public:
324324
ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM)
325-
: TargetPassConfig(TM, PM) {
326-
if (TM.getOptLevel() != CodeGenOpt::None) {
327-
ARMGenSubtargetInfo STI(TM.getTargetTriple(), TM.getTargetCPU(),
328-
TM.getTargetFeatureString());
329-
if (STI.hasFeature(ARM::FeatureUseMISched))
330-
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
331-
}
332-
}
325+
: TargetPassConfig(TM, PM) {}
333326

334327
ARMBaseTargetMachine &getARMTargetMachine() const {
335328
return getTM<ARMBaseTargetMachine>();
@@ -523,6 +516,13 @@ void ARMPassConfig::addPreSched2() {
523516
}
524517
addPass(createMVEVPTBlockPass());
525518
addPass(createThumb2ITBlockPass());
519+
520+
// Add both scheduling passes to give the subtarget an opportunity to pick
521+
// between them.
522+
if (getOptLevel() != CodeGenOpt::None) {
523+
addPass(&PostMachineSchedulerID);
524+
addPass(&PostRASchedulerID);
525+
}
526526
}
527527

528528
void ARMPassConfig::addPreEmitPass() {

llvm/lib/Target/ARM/ARMTargetMachine.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
7070
TargetTriple.isOSWindows() ||
7171
TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
7272
}
73+
74+
bool targetSchedulesPostRAScheduling() const override { return true; };
7375
};
7476

7577
/// ARM/Thumb little endian target machine.

llvm/test/CodeGen/ARM/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@
141141
; CHECK-NEXT: Thumb IT blocks insertion pass
142142
; CHECK-NEXT: MachineDominator Tree Construction
143143
; CHECK-NEXT: Machine Natural Loop Construction
144+
; CHECK-NEXT: PostRA Machine Instruction Scheduler
144145
; CHECK-NEXT: Post RA top-down list latency scheduler
145146
; CHECK-NEXT: Analyze Machine Code For Garbage Collection
146147
; CHECK-NEXT: Machine Block Frequency Analysis

llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33
;
44

55
@a = global i32 0, align 4

llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33

44
; CHECK: ********** MI Scheduling **********
55
; We need second, post-ra scheduling to have LDM instruction combined from single-loads

llvm/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33
; N=3 STMIA_UPD should have latency 2cyc and writeback latency 1cyc
44

55
; CHECK: ********** MI Scheduling **********

llvm/test/CodeGen/ARM/cortex-a57-misched-stm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33
; N=3 STMIB should have latency 2cyc
44

55
; CHECK: ********** MI Scheduling **********

llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33
;
44

55
@a = global double 0.0, align 4

llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33

44
; CHECK: ********** MI Scheduling **********
55
; We need second, post-ra scheduling to have VLDM instruction combined from single-loads

llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33

44
; CHECK: ********** MI Scheduling **********
55
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores

llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
33

44
; CHECK: ********** MI Scheduling **********
55
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores

llvm/test/CodeGen/ARM/postrasched.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; REQUIRES: asserts
2+
; RUN: llc < %s -mtriple=thumbv8m.main-none-eabi -debug-only=machine-scheduler,post-RA-sched -print-before=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
3+
4+
; CHECK-LABEL: test_misched
5+
; Pre and post ra machine scheduling
6+
; CHECK: ********** MI Scheduling **********
7+
; CHECK: t2LDRi12
8+
; CHECK: Latency : 2
9+
; CHECK: ********** MI Scheduling **********
10+
; CHECK: t2LDRi12
11+
; CHECK: Latency : 2
12+
13+
define i32 @test_misched(i32* %ptr) "target-cpu"="cortex-m33" {
14+
entry:
15+
%l = load i32, i32* %ptr
16+
store i32 0, i32* %ptr
17+
ret i32 %l
18+
}
19+
20+
; CHECK-LABEL: test_rasched
21+
; CHECK: Subtarget disables post-MI-sched.
22+
; CHECK: ********** List Scheduling **********
23+
24+
define i32 @test_rasched(i32* %ptr) {
25+
entry:
26+
%l = load i32, i32* %ptr
27+
store i32 0, i32* %ptr
28+
ret i32 %l
29+
}
30+

0 commit comments

Comments
 (0)