Skip to content

Commit d8127b2

Browse files
committed
InlineSpiller: Consider if all subranges are the same when avoiding redundant spills
This avoids some redundant spills of subranges, and avoids a compile failure. This greatly reduces the numbers of spills in a loop. The main range is not informative when multiple instructions are needed to fully define a register. A common scenario is a lowered reg_sequence where every subregister is sequentially defined, but each def changes the main range's value number. If we look at specific lanes at the use index, we can see the value is actually the same. In this testcase, there are a large number of materialized 64-bit constant defs which are hoisted outside of the loop by MachineLICM. These are feeding REG_SEQUENCES, which is not considered rematerializable inside the loop. After coalescing, the split constant defs produce main ranges with an apparent phi def. There's no phi def if you look at each individual subrange, and only half of the register is really redefined to a constant. Fixes: SWDEV-380865 https://reviews.llvm.org/D147079
1 parent 7252787 commit d8127b2

File tree

7 files changed

+299
-342
lines changed

7 files changed

+299
-342
lines changed

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,31 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
495495
return true;
496496
}
497497

498+
/// Check if all subranges in \p LI and \p SLI have the same value number at \p
499+
/// Idx.
500+
static bool allSubRangeValNoSame(const LiveInterval &LI,
501+
const LiveInterval &SLI,
502+
const MachineInstr &MI,
503+
const MachineRegisterInfo &MRI,
504+
const TargetRegisterInfo &TRI, SlotIndex Idx) {
505+
for (auto &SR : SLI.subranges()) {
506+
VNInfo *SubVNI = SR.getVNInfoAt(Idx);
507+
508+
for (auto &SubLI : LI.subranges()) {
509+
if (SubLI.LaneMask == SR.LaneMask) {
510+
if (SubVNI != SubLI.getVNInfoAt(Idx))
511+
return false;
512+
} else if ((SubLI.LaneMask & SR.LaneMask).any()) {
513+
// TODO: Check non-exact, overlapping subranges if they share the same
514+
// def instruction
515+
return false;
516+
}
517+
}
518+
}
519+
520+
return true;
521+
}
522+
498523
/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
499524
/// redundant spills of this value in SLI.reg and sibling copies.
500525
void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
@@ -524,7 +549,13 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
524549
if (!MI.mayStore() && !TII.isCopyInstr(MI))
525550
continue;
526551
SlotIndex Idx = LIS.getInstructionIndex(MI);
527-
if (LI->getVNInfoAt(Idx) != VNI)
552+
553+
// The main range value numbers will differ if multiple instructions are
554+
// used to define its various subregisters. Check the subregister value
555+
// numbers as a fallback.
556+
if (LI->getVNInfoAt(Idx) != VNI &&
557+
(!SLI.hasSubRanges() ||
558+
!allSubRangeValNoSame(*LI, SLI, MI, MRI, TRI, Idx)))
528559
continue;
529560

530561
// Follow sibling copies down the dominator tree.

llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ body: |
4747
; GCN-NEXT: {{ $}}
4848
; GCN-NEXT: bb.1:
4949
; GCN-NEXT: successors: %bb.2(0x80000000)
50-
; GCN-NEXT: liveins: $exec:0x000000000000000F, $sgpr30, $sgpr31, $vgpr0:0x0000000000000003, $vgpr1:0x0000000000000003, $vgpr2:0x0000000000000003, $vgpr3:0x0000000000000003, $vgpr4:0x0000000000000003, $vgpr5:0x0000000000000003, $vgpr6:0x0000000000000003, $vgpr7:0x0000000000000003, $vgpr8:0x0000000000000003, $vgpr9:0x0000000000000003, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
50+
; GCN-NEXT: liveins: $exec:0x000000000000000F, $sgpr30, $sgpr31, $vgpr0:0x0000000000000003, $vgpr1:0x0000000000000003, $vgpr2:0x0000000000000003, $vgpr3:0x0000000000000003, $vgpr4:0x0000000000000003, $vgpr5:0x0000000000000003, $vgpr6:0x0000000000000003, $vgpr7:0x0000000000000003, $vgpr8:0x0000000000000003, $vgpr9:0x0000000000000003, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
5151
; GCN-NEXT: {{ $}}
5252
; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec
5353
; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec
@@ -62,17 +62,15 @@ body: |
6262
; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
6363
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
6464
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
65-
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
66-
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
67-
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
68-
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: (store (s32) into %stack.2 + 4, addrspace 5)
65+
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.1, addrspace 5)
66+
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: (store (s32) into %stack.1 + 4, addrspace 5)
6967
; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
70-
; GCN-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1, addrspace 5)
71-
; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1 + 4, addrspace 5)
68+
; GCN-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.2, addrspace 5)
69+
; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.2 + 4, addrspace 5)
7270
; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
7371
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
74-
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2, addrspace 5)
75-
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2 + 4, addrspace 5)
72+
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5)
73+
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1 + 4, addrspace 5)
7674
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
7775
; GCN-NEXT: {{ $}}
7876
; GCN-NEXT: bb.2:

0 commit comments

Comments
 (0)