Skip to content

Commit 88e5251

Browse files
authored
[AMDGPU] Compiler should synthesize private buffer resource descriptor from flat_scratch_init (#79586)
This change implements synthesizing the private buffer resource descriptor in the kernel prolog instead of using the preloaded kernel argument.
1 parent 74fc16a commit 88e5251

25 files changed

+494
-350
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5530,9 +5530,13 @@ If the *Target Properties* column of :ref:`amdgpu-processor-table` specifies
55305530
Instead the flat SCRATCH instructions are used.
55315531

55325532
Otherwise, Private Segment Buffer SGPR register is used to initialize 4 SGPRs
5533-
that are used as a V# to access scratch. CP uses the value provided by the
5534-
runtime. It is used, together with Scratch Wavefront Offset as an offset, to
5535-
access the private memory space using a segment address. See
5533+
that are used as a V# to access scratch.
5534+
The compiler synthesizes the initialization value for the Private Segment
5535+
Buffer in the kernel prologue, using the Flat Scratch Init to initialize low
5536+
64-bit and a known constant for the high ones. If the Flat Scratch Init is not
5537+
available, CP uses the value provided by the runtime. It is used, together with
5538+
Scratch Wavefront Offset as an offset, to access the private memory space using
5539+
a segment address. See
55365540
:ref:`amdgpu-amdhsa-initial-kernel-execution-state`.
55375541

55385542
The scratch V# is a four-aligned SGPR and always selected for the kernel as

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,8 @@ class PrologEpilogSGPRSpillBuilder {
379379
} // namespace llvm
380380

381381
// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
382-
void SIFrameLowering::emitEntryFunctionFlatScratchInit(
382+
// and return the FlatScratchInit Register used
383+
Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
383384
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
384385
const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
385386
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -399,6 +400,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
399400

400401
Register FlatScrInitLo;
401402
Register FlatScrInitHi;
403+
Register FlatScratchInitReg;
402404

403405
if (ST.isAmdPalOS()) {
404406
// Extract the scratch offset from the descriptor in the GIT
@@ -408,7 +410,6 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
408410

409411
// Find unused reg to load flat scratch init into
410412
MachineRegisterInfo &MRI = MF.getRegInfo();
411-
Register FlatScrInit = AMDGPU::NoRegister;
412413
ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
413414
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
414415
AllSGPR64s = AllSGPR64s.slice(
@@ -417,16 +418,28 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
417418
for (MCPhysReg Reg : AllSGPR64s) {
418419
if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
419420
MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
420-
FlatScrInit = Reg;
421+
FlatScratchInitReg = Reg;
421422
break;
422423
}
423424
}
424-
assert(FlatScrInit && "Failed to find free register for scratch init");
425425

426-
FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
427-
FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
426+
} else {
427+
FlatScratchInitReg =
428+
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
429+
430+
MachineRegisterInfo &MRI = MF.getRegInfo();
431+
MRI.addLiveIn(FlatScratchInitReg);
432+
MBB.addLiveIn(FlatScratchInitReg);
433+
}
434+
435+
assert(FlatScratchInitReg && "Failed to find free register for scratch init");
436+
437+
FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
438+
FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
439+
440+
if (ST.isAmdPalOS()) {
428441

429-
buildGitPtr(MBB, I, DL, TII, FlatScrInit);
442+
buildGitPtr(MBB, I, DL, TII, FlatScratchInitReg);
430443

431444
// We now have the GIT ptr - now get the scratch descriptor from the entry
432445
// at offset 0 (or offset 16 for a compute shader).
@@ -441,29 +454,18 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
441454
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
442455
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
443456
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
444-
BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
445-
.addReg(FlatScrInit)
457+
BuildMI(MBB, I, DL, LoadDwordX2, FlatScratchInitReg)
458+
.addReg(FlatScratchInitReg)
446459
.addImm(EncodedOffset) // offset
447460
.addImm(0) // cpol
448461
.addMemOperand(MMO);
449462

450463
// Mask the offset in [47:0] of the descriptor
451464
const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
452465
auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
453-
.addReg(FlatScrInitHi)
454-
.addImm(0xffff);
466+
.addReg(FlatScrInitHi)
467+
.addImm(0xffff);
455468
And->getOperand(3).setIsDead(); // Mark SCC as dead.
456-
} else {
457-
Register FlatScratchInitReg =
458-
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459-
assert(FlatScratchInitReg);
460-
461-
MachineRegisterInfo &MRI = MF.getRegInfo();
462-
MRI.addLiveIn(FlatScratchInitReg);
463-
MBB.addLiveIn(FlatScratchInitReg);
464-
465-
FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
466-
FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
467469
}
468470

469471
// Do a 64-bit pointer add.
@@ -486,20 +488,21 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
486488
addReg(FlatScrInitHi).
487489
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
488490
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
489-
return;
491+
return FlatScratchInitReg;
490492
}
491493

492-
// For GFX9.
494+
assert(ST.getGeneration() == AMDGPUSubtarget::GFX9);
495+
493496
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
494-
.addReg(FlatScrInitLo)
495-
.addReg(ScratchWaveOffsetReg);
497+
.addReg(FlatScrInitLo)
498+
.addReg(ScratchWaveOffsetReg);
496499
auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
497500
AMDGPU::FLAT_SCR_HI)
498501
.addReg(FlatScrInitHi)
499502
.addImm(0);
500503
Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
501504

502-
return;
505+
return AMDGPU::FLAT_SCR;
503506
}
504507

505508
assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
@@ -520,6 +523,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
520523
.addReg(FlatScrInitLo, RegState::Kill)
521524
.addImm(8);
522525
LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
526+
return AMDGPU::FLAT_SCR;
523527
}
524528

525529
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
@@ -611,11 +615,15 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
611615
const SIInstrInfo *TII = ST.getInstrInfo();
612616
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
613617
MachineRegisterInfo &MRI = MF.getRegInfo();
614-
const Function &F = MF.getFunction();
615618
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
616619

617620
assert(MFI->isEntryFunction());
618621

622+
bool NeedsFlatScratchInit =
623+
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
624+
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
625+
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
626+
619627
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
620628
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
621629

@@ -641,7 +649,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
641649
// Now that we have fixed the reserved SRSRC we need to locate the
642650
// (potentially) preloaded SRSRC.
643651
Register PreloadedScratchRsrcReg;
644-
if (ST.isAmdHsaOrMesa(F)) {
652+
if (ST.isAmdHsaOrMesa(MF.getFunction()) && !NeedsFlatScratchInit) {
645653
PreloadedScratchRsrcReg =
646654
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
647655
if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
@@ -697,33 +705,30 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
697705
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
698706
}
699707

700-
bool NeedsFlatScratchInit =
701-
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
702-
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
703-
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
704-
705708
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
706709
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
707710
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
708711
MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
709712
}
710713

714+
Register FlatScratchInit;
711715
if (NeedsFlatScratchInit) {
712-
emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
716+
FlatScratchInit =
717+
emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
713718
}
714719

715720
if (ScratchRsrcReg) {
716-
emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
717-
PreloadedScratchRsrcReg,
718-
ScratchRsrcReg, ScratchWaveOffsetReg);
721+
emitEntryFunctionScratchRsrcRegSetup(
722+
MF, MBB, I, DL, FlatScratchInit, ScratchRsrcReg,
723+
PreloadedScratchRsrcReg, ScratchWaveOffsetReg);
719724
}
720725
}
721726

722727
// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
723728
void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
724729
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
725-
const DebugLoc &DL, Register PreloadedScratchRsrcReg,
726-
Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
730+
const DebugLoc &DL, Register FlatScratchInit, Register ScratchRsrcReg,
731+
Register PreloadedScratchRsrcReg, Register ScratchWaveOffsetReg) const {
727732

728733
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
729734
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -771,7 +776,8 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
771776
.addImm(21)
772777
.addReg(Rsrc03);
773778
}
774-
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
779+
} else if (ST.isMesaGfxShader(Fn) ||
780+
(!FlatScratchInit.isValid() && !PreloadedScratchRsrcReg)) {
775781
assert(!ST.isAmdHsaOrMesa(Fn));
776782
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
777783

@@ -830,6 +836,26 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
830836
.addImm(Rsrc23 >> 32)
831837
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
832838
} else if (ST.isAmdHsaOrMesa(Fn)) {
839+
840+
if (FlatScratchInit) {
841+
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
842+
Register Lo_32 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
843+
Register Hi_32 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
844+
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
845+
I = BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY),
846+
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1))
847+
.addReg(FlatScratchInit)
848+
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
849+
BuildMI(MBB, I, DL, SMovB32, Lo_32)
850+
.addImm(Rsrc23 & 0xffffffff)
851+
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
852+
853+
BuildMI(MBB, I, DL, SMovB32, Hi_32)
854+
.addImm(Rsrc23 >> 32)
855+
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
856+
return;
857+
}
858+
833859
assert(PreloadedScratchRsrcReg);
834860

835861
if (ScratchRsrcReg != PreloadedScratchRsrcReg) {

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,19 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
6767
MachineBasicBlock::iterator MI) const override;
6868

6969
private:
70-
void emitEntryFunctionFlatScratchInit(MachineFunction &MF,
71-
MachineBasicBlock &MBB,
72-
MachineBasicBlock::iterator I,
73-
const DebugLoc &DL,
74-
Register ScratchWaveOffsetReg) const;
70+
Register
71+
emitEntryFunctionFlatScratchInit(MachineFunction &MF, MachineBasicBlock &MBB,
72+
MachineBasicBlock::iterator I,
73+
const DebugLoc &DL,
74+
Register ScratchWaveOffsetReg) const;
7575

7676
Register getEntryFunctionReservedScratchRsrcReg(MachineFunction &MF) const;
7777

7878
void emitEntryFunctionScratchRsrcRegSetup(
7979
MachineFunction &MF, MachineBasicBlock &MBB,
8080
MachineBasicBlock::iterator I, const DebugLoc &DL,
81-
Register PreloadedPrivateBufferReg, Register ScratchRsrcReg,
82-
Register ScratchWaveOffsetReg) const;
81+
Register FlatScratchInit, Register ScratchRsrcReg,
82+
Register PreloadedScratchRsrcReg, Register ScratchWaveOffsetReg) const;
8383

8484
public:
8585
bool hasFP(const MachineFunction &MF) const override;

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@ define amdgpu_kernel void @kernel_caller_stack() {
1313
; MUBUF-LABEL: kernel_caller_stack:
1414
; MUBUF: ; %bb.0:
1515
; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s4, s7
16+
; MUBUF-NEXT: s_mov_b32 s2, -1
1617
; MUBUF-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
17-
; MUBUF-NEXT: s_add_u32 s0, s0, s7
18+
; MUBUF-NEXT: s_mov_b32 s3, 0xe00000
1819
; MUBUF-NEXT: s_mov_b32 s32, 0
19-
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
20+
; MUBUF-NEXT: s_mov_b64 s[0:1], flat_scratch
2021
; MUBUF-NEXT: v_mov_b32_e32 v0, 9
2122
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
2223
; MUBUF-NEXT: v_mov_b32_e32 v0, 10
@@ -61,9 +62,10 @@ define amdgpu_kernel void @kernel_caller_byval() {
6162
; MUBUF-LABEL: kernel_caller_byval:
6263
; MUBUF: ; %bb.0:
6364
; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s4, s7
65+
; MUBUF-NEXT: s_mov_b32 s2, -1
6466
; MUBUF-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
65-
; MUBUF-NEXT: s_add_u32 s0, s0, s7
66-
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
67+
; MUBUF-NEXT: s_mov_b32 s3, 0xe00000
68+
; MUBUF-NEXT: s_mov_b64 s[0:1], flat_scratch
6769
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
6870
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
6971
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12

llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,20 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
4848
; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
4949
; FIXEDABI-SDAG: ; %bb.0:
5050
; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9
51-
; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
51+
; FIXEDABI-SDAG-NEXT: s_mov_b32 s2, -1
5252
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
53-
; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s9
53+
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
54+
; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
55+
; FIXEDABI-SDAG-NEXT: s_mov_b32 s3, 0x11e80000
5456
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
5557
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
56-
; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0
58+
; FIXEDABI-SDAG-NEXT: s_mov_b64 s[0:1], flat_scratch
5759
; FIXEDABI-SDAG-NEXT: s_mov_b32 s14, s8
5860
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2
5961
; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0
6062
; FIXEDABI-SDAG-NEXT: s_mov_b32 s12, s6
6163
; FIXEDABI-SDAG-NEXT: s_mov_b32 s13, s7
6264
; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0
63-
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
6465
; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5]
6566
; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
6667
; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
@@ -70,19 +71,20 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
7071
; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs:
7172
; FIXEDABI-GISEL: ; %bb.0:
7273
; FIXEDABI-GISEL-NEXT: s_add_i32 s4, s4, s9
73-
; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
74+
; FIXEDABI-GISEL-NEXT: s_mov_b32 s2, -1
7475
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
75-
; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s9
76+
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
77+
; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
78+
; FIXEDABI-GISEL-NEXT: s_mov_b32 s3, 0x11e80000
7679
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
7780
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
78-
; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0
81+
; FIXEDABI-GISEL-NEXT: s_mov_b64 s[0:1], flat_scratch
7982
; FIXEDABI-GISEL-NEXT: s_mov_b32 s14, s8
8083
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1
8184
; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0
8285
; FIXEDABI-GISEL-NEXT: s_mov_b32 s12, s6
8386
; FIXEDABI-GISEL-NEXT: s_mov_b32 s13, s7
8487
; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0
85-
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
8688
; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5]
8789
; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
8890
; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12

llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
1010
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
1111
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
1212
; CHECK-NEXT: s_load_dwordx8 s[36:43], s[6:7], 0x0
13-
; CHECK-NEXT: s_add_u32 s0, s0, s15
14-
; CHECK-NEXT: s_addc_u32 s1, s1, 0
13+
; CHECK-NEXT: s_mov_b32 s2, -1
14+
; CHECK-NEXT: s_mov_b32 s3, 0x31c16000
15+
; CHECK-NEXT: s_mov_b64 s[0:1], s[10:11]
1516
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
1617
; CHECK-NEXT: s_mov_b32 s8, 0
1718
; CHECK-NEXT: s_waitcnt lgkmcnt(0)

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
55
; GFX90A-LABEL: name: f1
66
; GFX90A: bb.0.bb:
77
; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
8-
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr15, $sgpr10_sgpr11
8+
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr15, $sgpr10_sgpr11
99
; GFX90A-NEXT: {{ $}}
1010
; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0
1111
; GFX90A-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr10, $sgpr15, implicit-def $scc
1212
; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr11, 0, implicit-def dead $scc, implicit $scc
13-
; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr15, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
14-
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
13+
; GFX90A-NEXT: $sgpr2 = S_MOV_B32 4294967295, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
14+
; GFX90A-NEXT: $sgpr3 = S_MOV_B32 14680064, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
15+
; GFX90A-NEXT: $sgpr0_sgpr1 = COPY $flat_scr, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
1516
; GFX90A-NEXT: renamable $sgpr10_sgpr11 = COPY $sgpr8_sgpr9
1617
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
1718
; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr6_sgpr7, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)

0 commit comments

Comments
 (0)