-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[InlineSpiller] Check rematerialization before folding operand (#134015)" #137801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Revert "[InlineSpiller] Check rematerialization before folding operand (#134015)" #137801
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-regalloc @llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesThis reverts commit b25b51e. The InlineSpiller should conceptually not be aware of the allocation order. Patch is 42.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137801.diff 10 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/Spiller.h b/llvm/include/llvm/CodeGen/Spiller.h
index 84fc872a07606..3132cefeb6c68 100644
--- a/llvm/include/llvm/CodeGen/Spiller.h
+++ b/llvm/include/llvm/CodeGen/Spiller.h
@@ -23,7 +23,6 @@ class LiveIntervals;
class LiveStacks;
class MachineDominatorTree;
class MachineBlockFrequencyInfo;
-class AllocationOrder;
/// Spiller interface.
///
@@ -36,7 +35,7 @@ class Spiller {
virtual ~Spiller() = 0;
/// spill - Spill the LRE.getParent() live interval.
- virtual void spill(LiveRangeEdit &LRE, AllocationOrder *Order = nullptr) = 0;
+ virtual void spill(LiveRangeEdit &LRE) = 0;
/// Return the registers that were spilled.
virtual ArrayRef<Register> getSpilledRegs() = 0;
@@ -59,8 +58,7 @@ class Spiller {
/// of deferring though VirtRegMap.
Spiller *createInlineSpiller(const Spiller::RequiredAnalyses &Analyses,
MachineFunction &MF, VirtRegMap &VRM,
- VirtRegAuxInfo &VRAI,
- LiveRegMatrix *Matrix = nullptr);
+ VirtRegAuxInfo &VRAI);
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index f384740be2e33..920873c739f46 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "AllocationOrder.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -24,7 +23,6 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -151,14 +149,12 @@ class InlineSpiller : public Spiller {
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
- LiveRegMatrix *Matrix = nullptr;
// Variables that are valid during spill(), but used by multiple methods.
LiveRangeEdit *Edit = nullptr;
LiveInterval *StackInt = nullptr;
int StackSlot;
Register Original;
- AllocationOrder *Order = nullptr;
// All registers to spill to StackSlot, including the main register.
SmallVector<Register, 8> RegsToSpill;
@@ -188,13 +184,13 @@ class InlineSpiller : public Spiller {
public:
InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF,
- VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix)
+ VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
: MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), VRM(VRM),
MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
- TRI(*MF.getSubtarget().getRegisterInfo()), Matrix(Matrix),
- HSpiller(Analyses, MF, VRM), VRAI(VRAI) {}
+ TRI(*MF.getSubtarget().getRegisterInfo()), HSpiller(Analyses, MF, VRM),
+ VRAI(VRAI) {}
- void spill(LiveRangeEdit &, AllocationOrder *Order = nullptr) override;
+ void spill(LiveRangeEdit &) override;
ArrayRef<Register> getSpilledRegs() override { return RegsToSpill; }
ArrayRef<Register> getReplacedRegs() override { return RegsReplaced; }
void postOptimization() override;
@@ -211,7 +207,6 @@ class InlineSpiller : public Spiller {
void markValueUsed(LiveInterval*, VNInfo*);
bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI);
- bool hasPhysRegAvailable(const MachineInstr &MI);
bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
void reMaterializeAll();
@@ -234,8 +229,8 @@ void Spiller::anchor() {}
Spiller *
llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses,
MachineFunction &MF, VirtRegMap &VRM,
- VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix) {
- return new InlineSpiller(Analyses, MF, VRM, VRAI, Matrix);
+ VirtRegAuxInfo &VRAI) {
+ return new InlineSpiller(Analyses, MF, VRM, VRAI);
}
//===----------------------------------------------------------------------===//
@@ -620,23 +615,6 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg,
return true;
}
-/// hasPhysRegAvailable - Check if there is an available physical register for
-/// rematerialization.
-bool InlineSpiller::hasPhysRegAvailable(const MachineInstr &MI) {
- if (!Order || !Matrix)
- return false;
-
- SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
- SlotIndex PrevIdx = UseIdx.getPrevSlot();
-
- for (MCPhysReg PhysReg : *Order) {
- if (!Matrix->checkInterference(PrevIdx, UseIdx, PhysReg))
- return true;
- }
-
- return false;
-}
-
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
@@ -683,7 +661,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Before rematerializing into a register for a single instruction, try to
// fold a load into the instruction. That avoids allocating a new register.
if (RM.OrigMI->canFoldAsLoad() &&
- (RM.OrigMI->mayLoad() || !hasPhysRegAvailable(MI)) &&
foldMemoryOperand(Ops, RM.OrigMI)) {
Edit->markRematerialized(RM.ParentVNI);
++NumFoldedLoads;
@@ -1305,10 +1282,9 @@ void InlineSpiller::spillAll() {
Edit->eraseVirtReg(Reg);
}
-void InlineSpiller::spill(LiveRangeEdit &edit, AllocationOrder *order) {
+void InlineSpiller::spill(LiveRangeEdit &edit) {
++NumSpilledRanges;
Edit = &edit;
- Order = order;
assert(!edit.getReg().isStack() && "Trying to spill a stack slot.");
// Share a stack slot among all descendants of Original.
Original = VRM.getOriginal(edit.getReg());
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 250776e8bf7b1..56d3bd953f57d 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2664,7 +2664,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
NamedRegionTimer T("spill", "Spiller", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
- spiller().spill(LRE, &Order);
+ spiller().spill(LRE);
ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
@@ -2908,8 +2908,8 @@ bool RAGreedy::run(MachineFunction &mf) {
PriorityAdvisor = PriorityProvider->getAdvisor(*MF, *this, *Indexes);
VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
- SpillerInstance.reset(createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF,
- *VRM, *VRAI, Matrix));
+ SpillerInstance.reset(
+ createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF, *VRM, *VRAI));
VRAI->calculateSpillWeightsAndHints();
diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll
index 3ced4f71bad8c..d31107bfeb7bb 100644
--- a/llvm/test/CodeGen/X86/avx-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx-cmp.ll
@@ -43,8 +43,7 @@ define void @render(double %a0) nounwind {
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: jne .LBB2_4
; CHECK-NEXT: jnp .LBB2_2
; CHECK-NEXT: .LBB2_4: # %if.then
diff --git a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
index 3243d950740ca..95a7a10d50f59 100644
--- a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
+++ b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
@@ -111,8 +111,7 @@ define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) {
; AVX512-NEXT: callq use.v4.i32@PLT
; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovdqa32 {{.*#+}} xmm0 {%k1} {z} = [4294967295,4294967295,4294967295,4294967295]
; AVX512-NEXT: addq $24, %rsp
; AVX512-NEXT: .cfi_def_cfa_offset 8
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index 59a61722927de..67c9e7cc22236 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -195,8 +195,7 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
-; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index 7f6d64c21724a..536a1ae3b918d 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -567,8 +567,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -582,8 +581,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -595,8 +593,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -612,8 +609,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -625,8 +621,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -639,8 +634,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -652,8 +646,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
index ffbdd66529f5c..4305886168abe 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
@@ -280,12 +280,11 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: callq __fixunssfti@PLT
; CHECK-NEXT: movq %rax, %r12
; CHECK-NEXT: movq %rdx, %r13
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r14, %r13
; CHECK-NEXT: cmovbq %r14, %r12
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %rbp, %r12
; CHECK-NEXT: cmovaq %rbp, %r13
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
@@ -294,21 +293,19 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: callq __fixunssfti@PLT
; CHECK-NEXT: movq %rax, %rbp
; CHECK-NEXT: movq %rdx, %r14
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %eax
; CHECK-NEXT: cmovbq %rax, %r14
; CHECK-NEXT: cmovbq %rax, %rbp
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmovaq %rax, %rbp
; CHECK-NEXT: cmovaq %rax, %r14
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __fixunssfti@PLT
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %ecx
; CHECK-NEXT: cmovbq %rcx, %rdx
; CHECK-NEXT: cmovbq %rcx, %rax
@@ -509,8 +506,7 @@ define <2 x i128> @test_unsigned_v2i128_v2f64(<2 x double> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __fixunsdfti@PLT
; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: xorpd %xmm1, %xmm1
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -566,8 +562,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -579,8 +574,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -590,8 +584,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -605,8 +598,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -616,8 +608,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -628,8 +619,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -639,8 +629,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -684,8 +673,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -694,8 +682,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomis...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,cpp -- llvm/include/llvm/CodeGen/Spiller.h llvm/lib/CodeGen/InlineSpiller.cpp llvm/lib/CodeGen/RegAllocGreedy.cpp View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 920873c73..3fbac25c3 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -660,8 +660,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Before rematerializing into a register for a single instruction, try to
// fold a load into the instruction. That avoids allocating a new register.
- if (RM.OrigMI->canFoldAsLoad() &&
- foldMemoryOperand(Ops, RM.OrigMI)) {
+ if (RM.OrigMI->canFoldAsLoad() && foldMemoryOperand(Ops, RM.OrigMI)) {
Edit->markRematerialized(RM.ParentVNI);
++NumFoldedLoads;
return true;
|
Can I pass RegClassInfo into InlineSpiller, and call RegClassInfo.getOrder() to get the similar information? |
FYI I fixed some build breakage in 1f56afa while you all discuss the revert here; you'll probably want to revert that as well. |
b25b51e adds a reference to LiveRegMatrix in Spiller.h without including the header, or adding a declaration. That breaks modules builds. It looks like it might get reverted in llvm#137801, but let's fix the breakage in the meantime.
b25b51e adds a reference to LiveRegMatrix in Spiller.h without including the header, or adding a declaration. That breaks modules builds. It looks like it might get reverted in llvm#137801, but let's fix the breakage in the meantime.
b25b51e adds a reference to LiveRegMatrix in Spiller.h without including the header, or adding a declaration. That breaks modules builds. It looks like it might get reverted in llvm#137801, but let's fix the breakage in the meantime.
b25b51e adds a reference to LiveRegMatrix in Spiller.h without including the header, or adding a declaration. That breaks modules builds. It looks like it might get reverted in llvm#137801, but let's fix the breakage in the meantime.
b25b51e adds a reference to LiveRegMatrix in Spiller.h without including the header, or adding a declaration. That breaks modules builds. It looks like it might get reverted in llvm#137801, but let's fix the breakage in the meantime.
I'm not sure that's much better, it's still the inliner trying to perform an assignment |
The spiller doesn't need to do actual register assignment, it needs to check if an assignment is possible. It is required to decide if rematerialization or folded load is better. How about do this check in register allocator, then spiller can call this function through a RegAllocBase pointer? |
This reverts commit b25b51e.
The InlineSpiller should conceptually not be aware of the allocation order.