Skip to content

Commit f661057

Browse files
authored
Revert "[AMDGPU] Compiler should synthesize private buffer resource descriptor from flat_scratch_init" (#81234)
Reverts #79586 This broke the AMDGPU OpenMP Offload buildbot. The typical error message was that the GPU attempted to read beyong the largest legal address. Error message: AMDGPU fatal error 1: Received error in queue 0x7f8363f22000: HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address.
1 parent 38b54c7 commit f661057

25 files changed

+350
-494
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5530,13 +5530,9 @@ If the *Target Properties* column of :ref:`amdgpu-processor-table` specifies
55305530
Instead the flat SCRATCH instructions are used.
55315531

55325532
Otherwise, Private Segment Buffer SGPR register is used to initialize 4 SGPRs
5533-
that are used as a V# to access scratch.
5534-
The compiler synthesizes the initialization value for the Private Segment
5535-
Buffer in the kernel prologue, using the Flat Scratch Init to initialize low
5536-
64-bit and a known constant for the high ones. If the Flat Scratch Init is not
5537-
available, CP uses the value provided by the runtime. It is used, together with
5538-
Scratch Wavefront Offset as an offset, to access the private memory space using
5539-
a segment address. See
5533+
that are used as a V# to access scratch. CP uses the value provided by the
5534+
runtime. It is used, together with Scratch Wavefront Offset as an offset, to
5535+
access the private memory space using a segment address. See
55405536
:ref:`amdgpu-amdhsa-initial-kernel-execution-state`.
55415537

55425538
The scratch V# is a four-aligned SGPR and always selected for the kernel as

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 41 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,7 @@ class PrologEpilogSGPRSpillBuilder {
379379
} // namespace llvm
380380

381381
// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
382-
// and return the FlatScratchInit Register used
383-
Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
382+
void SIFrameLowering::emitEntryFunctionFlatScratchInit(
384383
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
385384
const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
386385
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -400,7 +399,6 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
400399

401400
Register FlatScrInitLo;
402401
Register FlatScrInitHi;
403-
Register FlatScratchInitReg;
404402

405403
if (ST.isAmdPalOS()) {
406404
// Extract the scratch offset from the descriptor in the GIT
@@ -410,6 +408,7 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
410408

411409
// Find unused reg to load flat scratch init into
412410
MachineRegisterInfo &MRI = MF.getRegInfo();
411+
Register FlatScrInit = AMDGPU::NoRegister;
413412
ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
414413
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
415414
AllSGPR64s = AllSGPR64s.slice(
@@ -418,28 +417,16 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
418417
for (MCPhysReg Reg : AllSGPR64s) {
419418
if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
420419
MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
421-
FlatScratchInitReg = Reg;
420+
FlatScrInit = Reg;
422421
break;
423422
}
424423
}
424+
assert(FlatScrInit && "Failed to find free register for scratch init");
425425

426-
} else {
427-
FlatScratchInitReg =
428-
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
429-
430-
MachineRegisterInfo &MRI = MF.getRegInfo();
431-
MRI.addLiveIn(FlatScratchInitReg);
432-
MBB.addLiveIn(FlatScratchInitReg);
433-
}
434-
435-
assert(FlatScratchInitReg && "Failed to find free register for scratch init");
436-
437-
FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
438-
FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
439-
440-
if (ST.isAmdPalOS()) {
426+
FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
427+
FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
441428

442-
buildGitPtr(MBB, I, DL, TII, FlatScratchInitReg);
429+
buildGitPtr(MBB, I, DL, TII, FlatScrInit);
443430

444431
// We now have the GIT ptr - now get the scratch descriptor from the entry
445432
// at offset 0 (or offset 16 for a compute shader).
@@ -454,18 +441,29 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
454441
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
455442
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
456443
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
457-
BuildMI(MBB, I, DL, LoadDwordX2, FlatScratchInitReg)
458-
.addReg(FlatScratchInitReg)
444+
BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
445+
.addReg(FlatScrInit)
459446
.addImm(EncodedOffset) // offset
460447
.addImm(0) // cpol
461448
.addMemOperand(MMO);
462449

463450
// Mask the offset in [47:0] of the descriptor
464451
const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
465452
auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
466-
.addReg(FlatScrInitHi)
467-
.addImm(0xffff);
453+
.addReg(FlatScrInitHi)
454+
.addImm(0xffff);
468455
And->getOperand(3).setIsDead(); // Mark SCC as dead.
456+
} else {
457+
Register FlatScratchInitReg =
458+
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459+
assert(FlatScratchInitReg);
460+
461+
MachineRegisterInfo &MRI = MF.getRegInfo();
462+
MRI.addLiveIn(FlatScratchInitReg);
463+
MBB.addLiveIn(FlatScratchInitReg);
464+
465+
FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
466+
FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
469467
}
470468

471469
// Do a 64-bit pointer add.
@@ -488,21 +486,20 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
488486
addReg(FlatScrInitHi).
489487
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
490488
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
491-
return FlatScratchInitReg;
489+
return;
492490
}
493491

494-
assert(ST.getGeneration() == AMDGPUSubtarget::GFX9);
495-
492+
// For GFX9.
496493
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
497-
.addReg(FlatScrInitLo)
498-
.addReg(ScratchWaveOffsetReg);
494+
.addReg(FlatScrInitLo)
495+
.addReg(ScratchWaveOffsetReg);
499496
auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
500497
AMDGPU::FLAT_SCR_HI)
501498
.addReg(FlatScrInitHi)
502499
.addImm(0);
503500
Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
504501

505-
return AMDGPU::FLAT_SCR;
502+
return;
506503
}
507504

508505
assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
@@ -523,7 +520,6 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
523520
.addReg(FlatScrInitLo, RegState::Kill)
524521
.addImm(8);
525522
LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
526-
return AMDGPU::FLAT_SCR;
527523
}
528524

529525
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
@@ -615,15 +611,11 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
615611
const SIInstrInfo *TII = ST.getInstrInfo();
616612
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
617613
MachineRegisterInfo &MRI = MF.getRegInfo();
614+
const Function &F = MF.getFunction();
618615
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
619616

620617
assert(MFI->isEntryFunction());
621618

622-
bool NeedsFlatScratchInit =
623-
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
624-
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
625-
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
626-
627619
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
628620
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
629621

@@ -649,7 +641,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
649641
// Now that we have fixed the reserved SRSRC we need to locate the
650642
// (potentially) preloaded SRSRC.
651643
Register PreloadedScratchRsrcReg;
652-
if (ST.isAmdHsaOrMesa(MF.getFunction()) && !NeedsFlatScratchInit) {
644+
if (ST.isAmdHsaOrMesa(F)) {
653645
PreloadedScratchRsrcReg =
654646
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
655647
if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
@@ -705,30 +697,33 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
705697
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
706698
}
707699

700+
bool NeedsFlatScratchInit =
701+
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
702+
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
703+
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
704+
708705
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
709706
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
710707
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
711708
MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
712709
}
713710

714-
Register FlatScratchInit;
715711
if (NeedsFlatScratchInit) {
716-
FlatScratchInit =
717-
emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
712+
emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
718713
}
719714

720715
if (ScratchRsrcReg) {
721-
emitEntryFunctionScratchRsrcRegSetup(
722-
MF, MBB, I, DL, FlatScratchInit, ScratchRsrcReg,
723-
PreloadedScratchRsrcReg, ScratchWaveOffsetReg);
716+
emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
717+
PreloadedScratchRsrcReg,
718+
ScratchRsrcReg, ScratchWaveOffsetReg);
724719
}
725720
}
726721

727722
// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
728723
void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
729724
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
730-
const DebugLoc &DL, Register FlatScratchInit, Register ScratchRsrcReg,
731-
Register PreloadedScratchRsrcReg, Register ScratchWaveOffsetReg) const {
725+
const DebugLoc &DL, Register PreloadedScratchRsrcReg,
726+
Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
732727

733728
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
734729
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -776,8 +771,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
776771
.addImm(21)
777772
.addReg(Rsrc03);
778773
}
779-
} else if (ST.isMesaGfxShader(Fn) ||
780-
(!FlatScratchInit.isValid() && !PreloadedScratchRsrcReg)) {
774+
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
781775
assert(!ST.isAmdHsaOrMesa(Fn));
782776
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
783777

@@ -836,26 +830,6 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
836830
.addImm(Rsrc23 >> 32)
837831
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
838832
} else if (ST.isAmdHsaOrMesa(Fn)) {
839-
840-
if (FlatScratchInit) {
841-
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
842-
Register Lo_32 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
843-
Register Hi_32 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
844-
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
845-
I = BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY),
846-
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1))
847-
.addReg(FlatScratchInit)
848-
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
849-
BuildMI(MBB, I, DL, SMovB32, Lo_32)
850-
.addImm(Rsrc23 & 0xffffffff)
851-
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
852-
853-
BuildMI(MBB, I, DL, SMovB32, Hi_32)
854-
.addImm(Rsrc23 >> 32)
855-
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
856-
return;
857-
}
858-
859833
assert(PreloadedScratchRsrcReg);
860834

861835
if (ScratchRsrcReg != PreloadedScratchRsrcReg) {

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,19 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
6767
MachineBasicBlock::iterator MI) const override;
6868

6969
private:
70-
Register
71-
emitEntryFunctionFlatScratchInit(MachineFunction &MF, MachineBasicBlock &MBB,
72-
MachineBasicBlock::iterator I,
73-
const DebugLoc &DL,
74-
Register ScratchWaveOffsetReg) const;
70+
void emitEntryFunctionFlatScratchInit(MachineFunction &MF,
71+
MachineBasicBlock &MBB,
72+
MachineBasicBlock::iterator I,
73+
const DebugLoc &DL,
74+
Register ScratchWaveOffsetReg) const;
7575

7676
Register getEntryFunctionReservedScratchRsrcReg(MachineFunction &MF) const;
7777

7878
void emitEntryFunctionScratchRsrcRegSetup(
7979
MachineFunction &MF, MachineBasicBlock &MBB,
8080
MachineBasicBlock::iterator I, const DebugLoc &DL,
81-
Register FlatScratchInit, Register ScratchRsrcReg,
82-
Register PreloadedScratchRsrcReg, Register ScratchWaveOffsetReg) const;
81+
Register PreloadedPrivateBufferReg, Register ScratchRsrcReg,
82+
Register ScratchWaveOffsetReg) const;
8383

8484
public:
8585
bool hasFP(const MachineFunction &MF) const override;

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ define amdgpu_kernel void @kernel_caller_stack() {
1313
; MUBUF-LABEL: kernel_caller_stack:
1414
; MUBUF: ; %bb.0:
1515
; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s4, s7
16-
; MUBUF-NEXT: s_mov_b32 s2, -1
1716
; MUBUF-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
18-
; MUBUF-NEXT: s_mov_b32 s3, 0xe00000
17+
; MUBUF-NEXT: s_add_u32 s0, s0, s7
1918
; MUBUF-NEXT: s_mov_b32 s32, 0
20-
; MUBUF-NEXT: s_mov_b64 s[0:1], flat_scratch
19+
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
2120
; MUBUF-NEXT: v_mov_b32_e32 v0, 9
2221
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
2322
; MUBUF-NEXT: v_mov_b32_e32 v0, 10
@@ -62,10 +61,9 @@ define amdgpu_kernel void @kernel_caller_byval() {
6261
; MUBUF-LABEL: kernel_caller_byval:
6362
; MUBUF: ; %bb.0:
6463
; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s4, s7
65-
; MUBUF-NEXT: s_mov_b32 s2, -1
6664
; MUBUF-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
67-
; MUBUF-NEXT: s_mov_b32 s3, 0xe00000
68-
; MUBUF-NEXT: s_mov_b64 s[0:1], flat_scratch
65+
; MUBUF-NEXT: s_add_u32 s0, s0, s7
66+
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
6967
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
7068
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0
7169
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4

llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,19 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
4848
; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
4949
; FIXEDABI-SDAG: ; %bb.0:
5050
; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9
51-
; FIXEDABI-SDAG-NEXT: s_mov_b32 s2, -1
52-
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
53-
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
5451
; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
55-
; FIXEDABI-SDAG-NEXT: s_mov_b32 s3, 0x11e80000
52+
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
53+
; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s9
5654
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
5755
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
58-
; FIXEDABI-SDAG-NEXT: s_mov_b64 s[0:1], flat_scratch
56+
; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0
5957
; FIXEDABI-SDAG-NEXT: s_mov_b32 s14, s8
6058
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2
6159
; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0
6260
; FIXEDABI-SDAG-NEXT: s_mov_b32 s12, s6
6361
; FIXEDABI-SDAG-NEXT: s_mov_b32 s13, s7
6462
; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0
63+
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
6564
; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5]
6665
; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
6766
; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
@@ -71,20 +70,19 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
7170
; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs:
7271
; FIXEDABI-GISEL: ; %bb.0:
7372
; FIXEDABI-GISEL-NEXT: s_add_i32 s4, s4, s9
74-
; FIXEDABI-GISEL-NEXT: s_mov_b32 s2, -1
75-
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
76-
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
7773
; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
78-
; FIXEDABI-GISEL-NEXT: s_mov_b32 s3, 0x11e80000
74+
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
75+
; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s9
7976
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
8077
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
81-
; FIXEDABI-GISEL-NEXT: s_mov_b64 s[0:1], flat_scratch
78+
; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0
8279
; FIXEDABI-GISEL-NEXT: s_mov_b32 s14, s8
8380
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1
8481
; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0
8582
; FIXEDABI-GISEL-NEXT: s_mov_b32 s12, s6
8683
; FIXEDABI-GISEL-NEXT: s_mov_b32 s13, s7
8784
; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0
85+
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
8886
; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5]
8987
; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
9088
; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12

llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
1010
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
1111
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
1212
; CHECK-NEXT: s_load_dwordx8 s[36:43], s[6:7], 0x0
13-
; CHECK-NEXT: s_mov_b32 s2, -1
14-
; CHECK-NEXT: s_mov_b32 s3, 0x31c16000
15-
; CHECK-NEXT: s_mov_b64 s[0:1], s[10:11]
13+
; CHECK-NEXT: s_add_u32 s0, s0, s15
14+
; CHECK-NEXT: s_addc_u32 s1, s1, 0
1615
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
1716
; CHECK-NEXT: s_mov_b32 s8, 0
1817
; CHECK-NEXT: s_waitcnt lgkmcnt(0)

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
55
; GFX90A-LABEL: name: f1
66
; GFX90A: bb.0.bb:
77
; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
8-
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr15, $sgpr10_sgpr11
8+
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr15, $sgpr10_sgpr11
99
; GFX90A-NEXT: {{ $}}
1010
; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0
1111
; GFX90A-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr10, $sgpr15, implicit-def $scc
1212
; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr11, 0, implicit-def dead $scc, implicit $scc
13-
; GFX90A-NEXT: $sgpr2 = S_MOV_B32 4294967295, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
14-
; GFX90A-NEXT: $sgpr3 = S_MOV_B32 14680064, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
15-
; GFX90A-NEXT: $sgpr0_sgpr1 = COPY $flat_scr, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
13+
; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr15, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
14+
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
1615
; GFX90A-NEXT: renamable $sgpr10_sgpr11 = COPY $sgpr8_sgpr9
1716
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
1817
; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr6_sgpr7, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)

0 commit comments

Comments
 (0)