Skip to content

Commit b25b51e

Browse files
authored
[InlineSpiller] Check rematerialization before folding operand (#134015)
Current implementation tries to fold the operand before rematerialization because it can reduce one register usage. But if there is a physical register available we can still rematerialize it without causing high register pressure. This patch do this check to find the better choice. Then we can produce xorps %xmm1, %xmm1 ucomiss %xmm1, %xmm0 instead of ucomiss LCPI0_1(%rip), %xmm0
1 parent 2b57ebb commit b25b51e

File tree

10 files changed

+185
-98
lines changed

10 files changed

+185
-98
lines changed

llvm/include/llvm/CodeGen/Spiller.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class LiveIntervals;
2323
class LiveStacks;
2424
class MachineDominatorTree;
2525
class MachineBlockFrequencyInfo;
26+
class AllocationOrder;
2627

2728
/// Spiller interface.
2829
///
@@ -35,7 +36,7 @@ class Spiller {
3536
virtual ~Spiller() = 0;
3637

3738
/// spill - Spill the LRE.getParent() live interval.
38-
virtual void spill(LiveRangeEdit &LRE) = 0;
39+
virtual void spill(LiveRangeEdit &LRE, AllocationOrder *Order = nullptr) = 0;
3940

4041
/// Return the registers that were spilled.
4142
virtual ArrayRef<Register> getSpilledRegs() = 0;
@@ -58,7 +59,8 @@ class Spiller {
5859
/// of deferring though VirtRegMap.
5960
Spiller *createInlineSpiller(const Spiller::RequiredAnalyses &Analyses,
6061
MachineFunction &MF, VirtRegMap &VRM,
61-
VirtRegAuxInfo &VRAI);
62+
VirtRegAuxInfo &VRAI,
63+
LiveRegMatrix *Matrix = nullptr);
6264

6365
} // end namespace llvm
6466

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14+
#include "AllocationOrder.h"
1415
#include "SplitKit.h"
1516
#include "llvm/ADT/ArrayRef.h"
1617
#include "llvm/ADT/DenseMap.h"
@@ -23,6 +24,7 @@
2324
#include "llvm/CodeGen/LiveInterval.h"
2425
#include "llvm/CodeGen/LiveIntervals.h"
2526
#include "llvm/CodeGen/LiveRangeEdit.h"
27+
#include "llvm/CodeGen/LiveRegMatrix.h"
2628
#include "llvm/CodeGen/LiveStacks.h"
2729
#include "llvm/CodeGen/MachineBasicBlock.h"
2830
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -149,12 +151,14 @@ class InlineSpiller : public Spiller {
149151
MachineRegisterInfo &MRI;
150152
const TargetInstrInfo &TII;
151153
const TargetRegisterInfo &TRI;
154+
LiveRegMatrix *Matrix = nullptr;
152155

153156
// Variables that are valid during spill(), but used by multiple methods.
154157
LiveRangeEdit *Edit = nullptr;
155158
LiveInterval *StackInt = nullptr;
156159
int StackSlot;
157160
Register Original;
161+
AllocationOrder *Order = nullptr;
158162

159163
// All registers to spill to StackSlot, including the main register.
160164
SmallVector<Register, 8> RegsToSpill;
@@ -184,13 +188,13 @@ class InlineSpiller : public Spiller {
184188

185189
public:
186190
InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF,
187-
VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
191+
VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix)
188192
: MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), VRM(VRM),
189193
MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
190-
TRI(*MF.getSubtarget().getRegisterInfo()), HSpiller(Analyses, MF, VRM),
191-
VRAI(VRAI) {}
194+
TRI(*MF.getSubtarget().getRegisterInfo()), Matrix(Matrix),
195+
HSpiller(Analyses, MF, VRM), VRAI(VRAI) {}
192196

193-
void spill(LiveRangeEdit &) override;
197+
void spill(LiveRangeEdit &, AllocationOrder *Order = nullptr) override;
194198
ArrayRef<Register> getSpilledRegs() override { return RegsToSpill; }
195199
ArrayRef<Register> getReplacedRegs() override { return RegsReplaced; }
196200
void postOptimization() override;
@@ -207,6 +211,7 @@ class InlineSpiller : public Spiller {
207211

208212
void markValueUsed(LiveInterval*, VNInfo*);
209213
bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI);
214+
bool hasPhysRegAvailable(const MachineInstr &MI);
210215
bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
211216
void reMaterializeAll();
212217

@@ -229,8 +234,8 @@ void Spiller::anchor() {}
229234
Spiller *
230235
llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses,
231236
MachineFunction &MF, VirtRegMap &VRM,
232-
VirtRegAuxInfo &VRAI) {
233-
return new InlineSpiller(Analyses, MF, VRM, VRAI);
237+
VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix) {
238+
return new InlineSpiller(Analyses, MF, VRM, VRAI, Matrix);
234239
}
235240

236241
//===----------------------------------------------------------------------===//
@@ -615,6 +620,23 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg,
615620
return true;
616621
}
617622

623+
/// hasPhysRegAvailable - Check if there is an available physical register for
624+
/// rematerialization.
625+
bool InlineSpiller::hasPhysRegAvailable(const MachineInstr &MI) {
626+
if (!Order || !Matrix)
627+
return false;
628+
629+
SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
630+
SlotIndex PrevIdx = UseIdx.getPrevSlot();
631+
632+
for (MCPhysReg PhysReg : *Order) {
633+
if (!Matrix->checkInterference(PrevIdx, UseIdx, PhysReg))
634+
return true;
635+
}
636+
637+
return false;
638+
}
639+
618640
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
619641
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
620642
// Analyze instruction
@@ -661,6 +683,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
661683
// Before rematerializing into a register for a single instruction, try to
662684
// fold a load into the instruction. That avoids allocating a new register.
663685
if (RM.OrigMI->canFoldAsLoad() &&
686+
(RM.OrigMI->mayLoad() || !hasPhysRegAvailable(MI)) &&
664687
foldMemoryOperand(Ops, RM.OrigMI)) {
665688
Edit->markRematerialized(RM.ParentVNI);
666689
++NumFoldedLoads;
@@ -1282,9 +1305,10 @@ void InlineSpiller::spillAll() {
12821305
Edit->eraseVirtReg(Reg);
12831306
}
12841307

1285-
void InlineSpiller::spill(LiveRangeEdit &edit) {
1308+
void InlineSpiller::spill(LiveRangeEdit &edit, AllocationOrder *order) {
12861309
++NumSpilledRanges;
12871310
Edit = &edit;
1311+
Order = order;
12881312
assert(!edit.getReg().isStack() && "Trying to spill a stack slot.");
12891313
// Share a stack slot among all descendants of Original.
12901314
Original = VRM.getOriginal(edit.getReg());

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2664,7 +2664,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
26642664
NamedRegionTimer T("spill", "Spiller", TimerGroupName,
26652665
TimerGroupDescription, TimePassesIsEnabled);
26662666
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2667-
spiller().spill(LRE);
2667+
spiller().spill(LRE, &Order);
26682668
ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
26692669

26702670
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
@@ -2908,8 +2908,8 @@ bool RAGreedy::run(MachineFunction &mf) {
29082908
PriorityAdvisor = PriorityProvider->getAdvisor(*MF, *this, *Indexes);
29092909

29102910
VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2911-
SpillerInstance.reset(
2912-
createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF, *VRM, *VRAI));
2911+
SpillerInstance.reset(createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF,
2912+
*VRM, *VRAI, Matrix));
29132913

29142914
VRAI->calculateSpillWeightsAndHints();
29152915

llvm/test/CodeGen/X86/avx-cmp.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ define void @render(double %a0) nounwind {
4343
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
4444
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
4545
; CHECK-NEXT: # xmm0 = mem[0],zero
46-
; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
46+
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
47+
; CHECK-NEXT: vucomisd %xmm1, %xmm0
4748
; CHECK-NEXT: jne .LBB2_4
4849
; CHECK-NEXT: jnp .LBB2_2
4950
; CHECK-NEXT: .LBB2_4: # %if.then

llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) {
111111
; AVX512-NEXT: callq use.v4.i32@PLT
112112
; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
113113
; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
114-
; AVX512-NEXT: vmovdqa32 {{.*#+}} xmm0 {%k1} {z} = [4294967295,4294967295,4294967295,4294967295]
114+
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
115+
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
115116
; AVX512-NEXT: addq $24, %rsp
116117
; AVX512-NEXT: .cfi_def_cfa_offset 8
117118
; AVX512-NEXT: retq

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
195195
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
196196
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
197197
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
198-
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
198+
; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
199+
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
199200
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
200201
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
201202
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT

llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
567567
; CHECK-NEXT: cvttss2si %xmm0, %eax
568568
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
569569
; CHECK-NEXT: cmovbl %ebp, %eax
570-
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
570+
; CHECK-NEXT: xorps %xmm1, %xmm1
571+
; CHECK-NEXT: ucomiss %xmm1, %xmm0
571572
; CHECK-NEXT: cmoval %ebx, %eax
572573
; CHECK-NEXT: ucomiss %xmm0, %xmm0
573574
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -581,7 +582,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
581582
; CHECK-NEXT: cvttss2si %xmm0, %eax
582583
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
583584
; CHECK-NEXT: cmovbl %ebp, %eax
584-
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
585+
; CHECK-NEXT: xorps %xmm1, %xmm1
586+
; CHECK-NEXT: ucomiss %xmm1, %xmm0
585587
; CHECK-NEXT: cmoval %ebx, %eax
586588
; CHECK-NEXT: ucomiss %xmm0, %xmm0
587589
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -593,7 +595,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
593595
; CHECK-NEXT: cvttss2si %xmm0, %eax
594596
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
595597
; CHECK-NEXT: cmovbl %ebp, %eax
596-
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
598+
; CHECK-NEXT: xorps %xmm1, %xmm1
599+
; CHECK-NEXT: ucomiss %xmm1, %xmm0
597600
; CHECK-NEXT: cmoval %ebx, %eax
598601
; CHECK-NEXT: ucomiss %xmm0, %xmm0
599602
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -609,7 +612,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
609612
; CHECK-NEXT: cvttss2si %xmm0, %eax
610613
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
611614
; CHECK-NEXT: cmovbl %ebp, %eax
612-
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
615+
; CHECK-NEXT: xorps %xmm1, %xmm1
616+
; CHECK-NEXT: ucomiss %xmm1, %xmm0
613617
; CHECK-NEXT: cmoval %ebx, %eax
614618
; CHECK-NEXT: ucomiss %xmm0, %xmm0
615619
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -621,7 +625,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
621625
; CHECK-NEXT: cvttss2si %xmm0, %eax
622626
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
623627
; CHECK-NEXT: cmovbl %ebp, %eax
624-
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
628+
; CHECK-NEXT: xorps %xmm1, %xmm1
629+
; CHECK-NEXT: ucomiss %xmm1, %xmm0
625630
; CHECK-NEXT: cmoval %ebx, %eax
626631
; CHECK-NEXT: ucomiss %xmm0, %xmm0
627632
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -634,7 +639,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
634639
; CHECK-NEXT: cvttss2si %xmm0, %eax
635640
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
636641
; CHECK-NEXT: cmovbl %ebp, %eax
637-
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
642+
; CHECK-NEXT: xorps %xmm1, %xmm1
643+
; CHECK-NEXT: ucomiss %xmm1, %xmm0
638644
; CHECK-NEXT: cmoval %ebx, %eax
639645
; CHECK-NEXT: ucomiss %xmm0, %xmm0
640646
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -646,7 +652,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
646652
; CHECK-NEXT: cvttss2si %xmm0, %eax
647653
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
648654
; CHECK-NEXT: cmovbl %ebp, %eax
649-
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
655+
; CHECK-NEXT: xorps %xmm1, %xmm1
656+
; CHECK-NEXT: ucomiss %xmm1, %xmm0
650657
; CHECK-NEXT: cmoval %ebx, %eax
651658
; CHECK-NEXT: ucomiss %xmm0, %xmm0
652659
; CHECK-NEXT: cmovpl %ebx, %eax

0 commit comments

Comments
 (0)