Skip to content

Commit 7252787

Browse files
committed
RegAllocGreedy: Fix detection of lanes read by a bundle
SplitKit creates questionably formed bundles of copies when it needs to copy a subset of live lanes and can't do it with a single subregister index. These are merely marked as part of a bundle, and don't start with a BUNDLE instruction. Queries for the slot index would give the first copy in the bundle, and we need to inspect the operands of all the other bundled copies. Also fix and simplify detection of read lane subsets. This causes some RISCV test regressions, but these look like accidentally beneficial splits. I don't see a subrange based reason to perform these splits. Avoids some really ugly regressions in a future patch. https://reviews.llvm.org/D146859
1 parent 5b7a7ec commit 7252787

File tree

6 files changed

+482
-356
lines changed

6 files changed

+482
-356
lines changed

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,16 +1329,20 @@ static unsigned getNumAllocatableRegsForConstraints(
13291329

13301330
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
13311331
const TargetRegisterInfo &TRI,
1332-
const MachineInstr &MI, Register Reg) {
1332+
const MachineInstr &FirstMI,
1333+
Register Reg) {
13331334
LaneBitmask Mask;
1334-
for (const MachineOperand &MO : MI.operands()) {
1335-
if (!MO.isReg() || MO.getReg() != Reg)
1336-
continue;
1335+
SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
1336+
(void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
13371337

1338+
for (auto [MI, OpIdx] : Ops) {
1339+
const MachineOperand &MO = MI->getOperand(OpIdx);
1340+
assert(MO.isReg() && MO.getReg() == Reg);
13381341
unsigned SubReg = MO.getSubReg();
13391342
if (SubReg == 0 && MO.isUse()) {
1340-
Mask |= MRI.getMaxLaneMaskForVReg(Reg);
1341-
continue;
1343+
if (MO.isUndef())
1344+
continue;
1345+
return MRI.getMaxLaneMaskForVReg(Reg);
13421346
}
13431347

13441348
LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
@@ -1358,9 +1362,11 @@ static bool readsLaneSubset(const MachineRegisterInfo &MRI,
13581362
const MachineInstr *MI, const LiveInterval &VirtReg,
13591363
const TargetRegisterInfo *TRI, SlotIndex Use,
13601364
const TargetInstrInfo *TII) {
1361-
// Early check the common case.
1365+
// Early check the common case. Beware of the semi-formed bundles SplitKit
1366+
// creates by setting the bundle flag on copies without a matching BUNDLE.
1367+
13621368
auto DestSrc = TII->isCopyInstr(*MI);
1363-
if (DestSrc &&
1369+
if (DestSrc && !MI->isBundled() &&
13641370
DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
13651371
return false;
13661372

llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ body: |
8888
; RA-NEXT: S_NOP 0, csr_amdgpu, implicit [[DEF]], implicit [[DEF1]]
8989
; RA-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
9090
; RA-NEXT: S_BRANCH %bb.2
91+
;
9192
; VR-LABEL: name: splitkit_copy_bundle
9293
; VR: bb.0:
9394
; VR-NEXT: successors: %bb.1(0x80000000)
@@ -264,22 +265,36 @@ body: |
264265
; RA-NEXT: [[DEF2]].sub8:sgpr_512 = S_MOV_B32 -1
265266
; RA-NEXT: [[DEF2]].sub13:sgpr_512 = S_MOV_B32 -1
266267
; RA-NEXT: [[DEF2]].sub14:sgpr_512 = S_MOV_B32 -1
267-
; RA-NEXT: undef %15.sub4_sub5:sgpr_512 = COPY [[DEF2]].sub4_sub5 {
268-
; RA-NEXT: internal %15.sub10_sub11:sgpr_512 = COPY [[DEF2]].sub10_sub11
269-
; RA-NEXT: internal %15.sub7:sgpr_512 = COPY [[DEF2]].sub7
270-
; RA-NEXT: internal %15.sub8:sgpr_512 = COPY [[DEF2]].sub8
271-
; RA-NEXT: internal %15.sub13:sgpr_512 = COPY [[DEF2]].sub13
272-
; RA-NEXT: internal %15.sub14:sgpr_512 = COPY [[DEF2]].sub14
268+
; RA-NEXT: undef %16.sub4_sub5:sgpr_512 = COPY [[DEF2]].sub4_sub5 {
269+
; RA-NEXT: internal %16.sub10_sub11:sgpr_512 = COPY [[DEF2]].sub10_sub11
270+
; RA-NEXT: internal %16.sub7:sgpr_512 = COPY [[DEF2]].sub7
271+
; RA-NEXT: internal %16.sub8:sgpr_512 = COPY [[DEF2]].sub8
272+
; RA-NEXT: internal %16.sub13:sgpr_512 = COPY [[DEF2]].sub13
273+
; RA-NEXT: internal %16.sub14:sgpr_512 = COPY [[DEF2]].sub14
273274
; RA-NEXT: }
274-
; RA-NEXT: SI_SPILL_S512_SAVE %15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
275+
; RA-NEXT: undef %18.sub4_sub5:sgpr_512 = COPY %16.sub4_sub5 {
276+
; RA-NEXT: internal %18.sub10_sub11:sgpr_512 = COPY %16.sub10_sub11
277+
; RA-NEXT: internal %18.sub7:sgpr_512 = COPY %16.sub7
278+
; RA-NEXT: internal %18.sub8:sgpr_512 = COPY %16.sub8
279+
; RA-NEXT: internal %18.sub13:sgpr_512 = COPY %16.sub13
280+
; RA-NEXT: internal %18.sub14:sgpr_512 = COPY %16.sub14
281+
; RA-NEXT: }
282+
; RA-NEXT: SI_SPILL_S512_SAVE %18, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
275283
; RA-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
276284
; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
277-
; RA-NEXT: undef %14.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 {
278-
; RA-NEXT: internal %14.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11
279-
; RA-NEXT: internal %14.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7
280-
; RA-NEXT: internal %14.sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8
281-
; RA-NEXT: internal %14.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13
282-
; RA-NEXT: internal %14.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14
285+
; RA-NEXT: undef %17.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 {
286+
; RA-NEXT: internal %17.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11
287+
; RA-NEXT: internal %17.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7
288+
; RA-NEXT: internal %17.sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8
289+
; RA-NEXT: internal %17.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13
290+
; RA-NEXT: internal %17.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14
291+
; RA-NEXT: }
292+
; RA-NEXT: undef %14.sub4_sub5:sgpr_512 = COPY %17.sub4_sub5 {
293+
; RA-NEXT: internal %14.sub10_sub11:sgpr_512 = COPY %17.sub10_sub11
294+
; RA-NEXT: internal %14.sub7:sgpr_512 = COPY %17.sub7
295+
; RA-NEXT: internal %14.sub8:sgpr_512 = COPY %17.sub8
296+
; RA-NEXT: internal %14.sub13:sgpr_512 = COPY %17.sub13
297+
; RA-NEXT: internal %14.sub14:sgpr_512 = COPY %17.sub14
283298
; RA-NEXT: }
284299
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0 :: (dereferenceable invariant load (s32))
285300
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0 :: (dereferenceable invariant load (s32))
@@ -290,6 +305,7 @@ body: |
290305
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0 :: (dereferenceable invariant load (s32))
291306
; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0 :: (dereferenceable invariant load (s32))
292307
; RA-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]]
308+
;
293309
; VR-LABEL: name: splitkit_copy_unbundle_reorder
294310
; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
295311
; VR-NEXT: renamable $sgpr16 = S_MOV_B32 -1

0 commit comments

Comments
 (0)