-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Lazily emit waitcnts on function entry #73122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -238,7 +238,9 @@ class WaitcntBrackets { | |
|
||
bool merge(const WaitcntBrackets &Other); | ||
|
||
RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII, | ||
RegInterval getRegInterval(Register Reg, const MachineRegisterInfo *MRI, | ||
const SIRegisterInfo *TRI) const; | ||
RegInterval getRegInterval(const MachineInstr *MI, | ||
const MachineRegisterInfo *MRI, | ||
const SIRegisterInfo *TRI, unsigned OpNo) const; | ||
|
||
|
@@ -252,6 +254,7 @@ class WaitcntBrackets { | |
const MachineRegisterInfo *MRI, WaitEventType E, | ||
MachineInstr &MI); | ||
|
||
void setPendingEvent(WaitEventType E) { PendingEvents |= 1 << E; } | ||
unsigned hasPendingEvent() const { return PendingEvents; } | ||
unsigned hasPendingEvent(WaitEventType E) const { | ||
return PendingEvents & (1 << E); | ||
|
@@ -292,6 +295,56 @@ class WaitcntBrackets { | |
VgprVmemTypes[GprNo] = 0; | ||
} | ||
|
||
void setNonEntryFunctionInitialState(const MachineFunction &MF) { | ||
const MachineRegisterInfo &MRI = MF.getRegInfo(); | ||
const SIRegisterInfo &TRI = ST->getInstrInfo()->getRegisterInfo(); | ||
|
||
// All counters are in unknown states. | ||
for (auto T : inst_counter_types()) | ||
setScoreUB(T, getWaitCountMax(T)); | ||
|
||
// There may be pending events of any type. | ||
if (ST->hasVscnt()) { | ||
setPendingEvent(VMEM_READ_ACCESS); | ||
setPendingEvent(VMEM_WRITE_ACCESS); | ||
setPendingEvent(SCRATCH_WRITE_ACCESS); | ||
} else { | ||
setPendingEvent(VMEM_ACCESS); | ||
} | ||
for (unsigned I = LDS_ACCESS; I < NUM_WAIT_EVENTS; ++I) | ||
setPendingEvent(WaitEventType(I)); | ||
|
||
auto SetStateForPhysReg = [&](MCRegister Reg) { | ||
RegInterval Interval = getRegInterval(Reg, &MRI, &TRI); | ||
if (Interval.first < NUM_ALL_VGPRS) { | ||
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { | ||
for (auto T : inst_counter_types()) | ||
setRegScore(RegNo, T, getWaitCountMax(T)); | ||
VgprVmemTypes[RegNo] = -1; | ||
} | ||
} else { | ||
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) | ||
setRegScore(RegNo, LGKM_CNT, getWaitCountMax(LGKM_CNT)); | ||
} | ||
}; | ||
|
||
// Live-in registers may depend on any counter. | ||
const MachineBasicBlock &EntryMBB = MF.front(); | ||
for (auto [Reg, Mask] : EntryMBB.liveins()) { | ||
// TODO: Use Mask to narrow the interval? | ||
SetStateForPhysReg(Reg); | ||
} | ||
|
||
// Reserved SGPRs (e.g. stack pointer or scratch descriptor) may depend on | ||
// any counter. | ||
// FIXME: Why are these not live-in to the function and/or the entry BB? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a good reason for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SP is always manually managed without memory operations, so I don't think there's any useful way it can depend on a counter. If the caller didn't have FP/BP and was using it as a general SGPR, it's maybe possible they were used with a memory operation we need to complete before initializing them in the prolog |
||
for (unsigned I = 0, E = ST->getMaxNumSGPRs(MF); I != E; ++I) { | ||
MCRegister Reg = AMDGPU::SGPR_32RegClass.getRegister(I); | ||
if (MRI.getReservedRegs()[Reg]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Maybe hoist the getReservedRegs call out of the loop. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a trivial accessor defined in the header so I don't think there's much point. |
||
SetStateForPhysReg(Reg); | ||
} | ||
} | ||
|
||
void print(raw_ostream &); | ||
void dump() { print(dbgs()); } | ||
|
||
|
@@ -490,54 +543,59 @@ class SIInsertWaitcnts : public MachineFunctionPass { | |
|
||
} // end anonymous namespace | ||
|
||
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI, | ||
const SIInstrInfo *TII, | ||
RegInterval WaitcntBrackets::getRegInterval(Register Reg, | ||
const MachineRegisterInfo *MRI, | ||
const SIRegisterInfo *TRI, | ||
unsigned OpNo) const { | ||
const MachineOperand &Op = MI->getOperand(OpNo); | ||
if (!TRI->isInAllocatableClass(Op.getReg())) | ||
return {-1, -1}; | ||
|
||
// A use via a PW operand does not need a waitcnt. | ||
// A partial write is not a WAW. | ||
assert(!Op.getSubReg() || !Op.isUndef()); | ||
|
||
const SIRegisterInfo *TRI) const { | ||
RegInterval Result; | ||
|
||
unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST)) & | ||
AMDGPU::HWEncoding::REG_IDX_MASK; | ||
unsigned RegNo = TRI->getEncodingValue(AMDGPU::getMCReg(Reg, *ST)) & | ||
AMDGPU::HWEncoding::REG_IDX_MASK; | ||
|
||
if (TRI->isVectorRegister(*MRI, Op.getReg())) { | ||
assert(Reg >= Encoding.VGPR0 && Reg <= Encoding.VGPRL); | ||
Result.first = Reg - Encoding.VGPR0; | ||
if (TRI->isAGPR(*MRI, Op.getReg())) | ||
if (TRI->isVectorRegister(*MRI, Reg)) { | ||
assert(RegNo >= Encoding.VGPR0 && RegNo <= Encoding.VGPRL); | ||
Result.first = RegNo - Encoding.VGPR0; | ||
if (TRI->isAGPR(*MRI, Reg)) | ||
Result.first += AGPR_OFFSET; | ||
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS); | ||
} else if (TRI->isSGPRReg(*MRI, Op.getReg())) { | ||
assert(Reg >= Encoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS); | ||
Result.first = Reg - Encoding.SGPR0 + NUM_ALL_VGPRS; | ||
} else if (TRI->isSGPRReg(*MRI, Reg)) { | ||
assert(RegNo >= Encoding.SGPR0 && RegNo < SQ_MAX_PGM_SGPRS); | ||
Result.first = RegNo - Encoding.SGPR0 + NUM_ALL_VGPRS; | ||
assert(Result.first >= NUM_ALL_VGPRS && | ||
Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS); | ||
} | ||
// TODO: Handle TTMP | ||
// else if (TRI->isTTMP(*MRI, Reg.getReg())) ... | ||
// else if (TRI->isTTMP(*MRI, Reg)) ... | ||
else | ||
return {-1, -1}; | ||
|
||
const TargetRegisterClass *RC = TII->getOpRegClass(*MI, OpNo); | ||
const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); | ||
unsigned Size = TRI->getRegSizeInBits(*RC); | ||
Result.second = Result.first + ((Size + 16) / 32); | ||
|
||
return Result; | ||
} | ||
|
||
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI, | ||
const MachineRegisterInfo *MRI, | ||
const SIRegisterInfo *TRI, | ||
unsigned OpNo) const { | ||
const MachineOperand &Op = MI->getOperand(OpNo); | ||
|
||
// A use via a PW operand does not need a waitcnt. | ||
// A partial write is not a WAW. | ||
assert(!Op.getSubReg() || !Op.isUndef()); | ||
if (!TRI->isInAllocatableClass(Op.getReg())) | ||
return {-1, -1}; | ||
|
||
return getRegInterval(Op.getReg(), MRI, TRI); | ||
} | ||
|
||
void WaitcntBrackets::setExpScore(const MachineInstr *MI, | ||
const SIInstrInfo *TII, | ||
const SIRegisterInfo *TRI, | ||
const MachineRegisterInfo *MRI, unsigned OpNo, | ||
unsigned Val) { | ||
RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo); | ||
RegInterval Interval = getRegInterval(MI, MRI, TRI, OpNo); | ||
assert(TRI->isVectorRegister(*MRI, MI->getOperand(OpNo).getReg())); | ||
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { | ||
setRegScore(RegNo, EXP_CNT, Val); | ||
|
@@ -563,7 +621,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, | |
// PendingEvents and ScoreUB need to be update regardless if this event | ||
// changes the score of a register or not. | ||
// Examples including vm_cnt when buffer-store or lgkm_cnt when send-message. | ||
PendingEvents |= 1 << E; | ||
setPendingEvent(E); | ||
setScoreUB(T, CurrScore); | ||
|
||
if (T == EXP_CNT) { | ||
|
@@ -673,7 +731,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, | |
Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) { | ||
MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data); | ||
unsigned OpNo;//TODO: find the OpNo for this operand; | ||
RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo); | ||
RegInterval Interval = getRegInterval(&Inst, MRI, TRI, OpNo); | ||
for (int RegNo = Interval.first; RegNo < Interval.second; | ||
++RegNo) { | ||
setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore); | ||
|
@@ -685,7 +743,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, | |
auto &Op = Inst.getOperand(I); | ||
if (!Op.isReg() || !Op.isDef()) | ||
continue; | ||
RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I); | ||
RegInterval Interval = getRegInterval(&Inst, MRI, TRI, I); | ||
if (T == VM_CNT) { | ||
if (Interval.first >= NUM_ALL_VGPRS) | ||
continue; | ||
|
@@ -1136,7 +1194,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, | |
|
||
if (MI.getOperand(CallAddrOpIdx).isReg()) { | ||
RegInterval CallAddrOpInterval = | ||
ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, CallAddrOpIdx); | ||
ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOpIdx); | ||
|
||
for (int RegNo = CallAddrOpInterval.first; | ||
RegNo < CallAddrOpInterval.second; ++RegNo) | ||
|
@@ -1146,7 +1204,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, | |
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); | ||
if (RtnAddrOpIdx != -1) { | ||
RegInterval RtnAddrOpInterval = | ||
ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, RtnAddrOpIdx); | ||
ScoreBrackets.getRegInterval(&MI, MRI, TRI, RtnAddrOpIdx); | ||
|
||
for (int RegNo = RtnAddrOpInterval.first; | ||
RegNo < RtnAddrOpInterval.second; ++RegNo) | ||
|
@@ -1198,8 +1256,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, | |
if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI)) | ||
continue; | ||
|
||
RegInterval Interval = | ||
ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I); | ||
RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, I); | ||
|
||
const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg()); | ||
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { | ||
|
@@ -1775,7 +1832,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, | |
MachineOperand &Op = MI.getOperand(I); | ||
if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg())) | ||
continue; | ||
RegInterval Interval = Brackets.getRegInterval(&MI, TII, MRI, TRI, I); | ||
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, I); | ||
// Vgpr use | ||
if (Op.isUse()) { | ||
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { | ||
|
@@ -1849,28 +1906,19 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { | |
BlockInfos.clear(); | ||
bool Modified = false; | ||
|
||
if (!MFI->isEntryFunction()) { | ||
// Wait for any outstanding memory operations that the input registers may | ||
// depend on. We can't track them and it's better to do the wait after the | ||
// costly call sequence. | ||
|
||
// TODO: Could insert earlier and schedule more liberally with operations | ||
// that only use caller preserved registers. | ||
MachineBasicBlock &EntryBB = MF.front(); | ||
MachineBasicBlock::iterator I = EntryBB.begin(); | ||
for (MachineBasicBlock::iterator E = EntryBB.end(); | ||
I != E && (I->isPHI() || I->isMetaInstruction()); ++I) | ||
; | ||
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0); | ||
|
||
Modified = true; | ||
} | ||
|
||
// Keep iterating over the blocks in reverse post order, inserting and | ||
// updating s_waitcnt where needed, until a fix point is reached. | ||
for (auto *MBB : ReversePostOrderTraversal<MachineFunction *>(&MF)) | ||
BlockInfos.insert({MBB, BlockInfo()}); | ||
|
||
if (!MFI->isEntryFunction()) { | ||
MachineBasicBlock &EntryMBB = MF.front(); | ||
BlockInfo &EntryBI = BlockInfos.find(&EntryMBB)->second; | ||
EntryBI.Incoming = std::make_unique<WaitcntBrackets>(ST, Limits, Encoding); | ||
WaitcntBrackets &Brackets = *EntryBI.Incoming; | ||
Brackets.setNonEntryFunctionInitialState(MF); | ||
} | ||
|
||
std::unique_ptr<WaitcntBrackets> Brackets; | ||
bool Repeat; | ||
do { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -171,15 +171,15 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) { | |
; | ||
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat: | ||
; GFX9: ; %bb.0: | ||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc0ffc0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to wait for expcnt before the v_mov? IIUC if there's an export or GDS instruction that reads the VGPRs in the caller, we're supposed to wait for EXPcnt before we write the VGPRs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I think you're right. I guess that's another fundamental problem with this patch :( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stepping back a bit, it might make more sense to change the ABI to say that expcnt must be zero at a function call boundary. I suspect there aren't any realistic cases where it would be useful to make a function call with an un-completed export in flight. But I don't want to change the ABI in this patch. |
||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 | ||
; GFX9-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat: | ||
; GFX8: ; %bb.0: | ||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0 | ||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX8-NEXT: v_add_u16_e32 v2, 0xffc0, v0 | ||
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD | ||
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 | ||
|
@@ -204,15 +204,15 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) { | |
; | ||
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo: | ||
; GFX9: ; %bb.0: | ||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4ffc0 | ||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 | ||
; GFX9-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_lo: | ||
; GFX8: ; %bb.0: | ||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX8-NEXT: v_mov_b32_e32 v2, 4 | ||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX8-NEXT: v_add_u16_e32 v1, 0xffc0, v0 | ||
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD | ||
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 | ||
|
@@ -237,15 +237,15 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) { | |
; | ||
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi: | ||
; GFX9: ; %bb.0: | ||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc00004 | ||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 | ||
; GFX9-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi: | ||
; GFX8: ; %bb.0: | ||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0 | ||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX8-NEXT: v_add_u16_e32 v2, 4, v0 | ||
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD | ||
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,10 +6,12 @@ declare hidden ptr addrspace(1) @ext(ptr addrspace(1)) | |
define ptr addrspace(1) @call_assert_align() { | ||
; CHECK-LABEL: call_assert_align: | ||
; CHECK: ; %bb.0: ; %entry | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: s_waitcnt lgkmcnt(0) | ||
; CHECK-NEXT: s_mov_b32 s16, s33 | ||
; CHECK-NEXT: s_mov_b32 s33, s32 | ||
; CHECK-NEXT: s_waitcnt expcnt(0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we waiting for expcnt here? (as opposed to after changing EXEC) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Quoting from
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, thanks! Found it in the SPG too, it just wasn't in the table where I was originally looking. |
||
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) | ||
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill | ||
; CHECK-NEXT: s_mov_b64 exec, s[18:19] | ||
; CHECK-NEXT: v_writelane_b32 v40, s16, 2 | ||
|
@@ -44,7 +46,6 @@ entry: | |
define ptr addrspace(1) @tail_call_assert_align() { | ||
; CHECK-LABEL: tail_call_assert_align: | ||
; CHECK: ; %bb.0: ; %entry | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do these drop completely here, but in the next test ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function ends with a tail call, not a return. The ABI says that counters are undefined on function entry but must be 0 on return. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I completely filtered out that it was a tail call. Makes sense now. |
||
; CHECK-NEXT: v_mov_b32_e32 v0, 0 | ||
; CHECK-NEXT: v_mov_b32_e32 v1, 0 | ||
; CHECK-NEXT: s_getpc_b64 s[16:17] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried reimplementing this part using
MRI.liveins
instead ofEntryMBB.liveins
but apparently VGPR function arguments are not included in the former. Is that expected?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MRI.liveins is way less reliable. For example, we're missing verification that everything in it is mirrored in the entry block live ins. Additionally GlobalISel doesn't even populate it, and it's apparently not an issue