Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 58abe5f

Browse files
committed
AMDGPU/SI: Implement a work-around for smrd corrupting vccz bit
Summary: We will hit this once we have enabled uniform branches. The smrd-vccz-bug.ll test will be added with the uniform branch commit. Reviewers: mareko, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16725 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260137 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent da65ee3 commit 58abe5f

File tree

1 file changed

+55
-1
lines changed

1 file changed

+55
-1
lines changed

lib/Target/AMDGPU/SIInsertWaits.cpp

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ class SIInsertWaits : public MachineFunctionPass {
8888
/// \brief Whether the machine function returns void
8989
bool ReturnsVoid;
9090

91+
/// Whether the VCCZ bit is possibly corrupt
92+
bool VCCZCorrupt;
93+
9194
/// \brief Get increment/decrement amount for this instruction.
9295
Counters getHwCounts(MachineInstr &MI);
9396

@@ -116,14 +119,19 @@ class SIInsertWaits : public MachineFunctionPass {
116119
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
117120
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
118121

122+
/// Return true if there are LGKM instrucitons that haven't been waited on
123+
/// yet.
124+
bool hasOutstandingLGKM() const;
125+
119126
public:
120127
static char ID;
121128

122129
SIInsertWaits() :
123130
MachineFunctionPass(ID),
124131
TII(nullptr),
125132
TRI(nullptr),
126-
ExpInstrTypesSeen(0) { }
133+
ExpInstrTypesSeen(0),
134+
VCCZCorrupt(false) { }
127135

128136
bool runOnMachineFunction(MachineFunction &MF) override;
129137

@@ -155,6 +163,13 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
155163
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
156164
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
157165

166+
static bool readsVCCZ(unsigned Opcode) {
167+
return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCNZ;
168+
}
169+
170+
bool SIInsertWaits::hasOutstandingLGKM() const {
171+
return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
172+
}
158173

159174
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
160175
uint64_t TSFlags = MI.getDesc().TSFlags;
@@ -475,6 +490,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
475490
TRI =
476491
static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
477492

493+
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
478494
MRI = &MF.getRegInfo();
479495

480496
WaitedOn = ZeroCounts;
@@ -493,6 +509,44 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
493509
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
494510
I != E; ++I) {
495511

512+
if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
513+
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
514+
// vccz bit, so when we detect that an instruction may read from a
515+
// corrupt vccz bit, we need to:
516+
// 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
517+
// complete.
518+
// 2. Restore the correct value of vccz by writing the current value
519+
// of vcc back to vcc.
520+
521+
if (TII->isSMRD(I->getOpcode())) {
522+
VCCZCorrupt = true;
523+
} else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
524+
// FIXME: We only care about SMRD instructions here, not LDS or GDS.
525+
// Whenever we store a value in vcc, the correct value of vccz is
526+
// restored.
527+
VCCZCorrupt = false;
528+
}
529+
530+
// Check if we need to apply the bug work-around
531+
if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
532+
DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
533+
534+
// Wait on everything, not just LGKM. vccz reads usually come from
535+
// terminators, and we always wait on everything at the end of the
536+
// block, so if we only wait on LGKM here, we might end up with
537+
// another s_waitcnt inserted right after this if there are non-LGKM
538+
// instructions still outstanding.
539+
insertWait(MBB, I, LastIssued);
540+
541+
// Restore the vccz bit. Any time a value is written to vcc, the vcc
542+
// bit is updated, so we can restore the bit by reading the value of
543+
// vcc and then writing it back to the register.
544+
BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
545+
AMDGPU::VCC)
546+
.addReg(AMDGPU::VCC);
547+
}
548+
}
549+
496550
// Wait for everything before a barrier.
497551
if (I->getOpcode() == AMDGPU::S_BARRIER)
498552
Changes |= insertWait(MBB, I, LastIssued);

0 commit comments

Comments
 (0)