@@ -88,6 +88,9 @@ class SIInsertWaits : public MachineFunctionPass {
88
88
// / \brief Whether the machine function returns void
89
89
bool ReturnsVoid;
90
90
91
+ // / Whether the VCCZ bit is possibly corrupt
92
+ bool VCCZCorrupt;
93
+
91
94
// / \brief Get increment/decrement amount for this instruction.
92
95
Counters getHwCounts (MachineInstr &MI);
93
96
@@ -116,14 +119,19 @@ class SIInsertWaits : public MachineFunctionPass {
116
119
// / \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
117
120
void handleSendMsg (MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
118
121
122
+ // / Return true if there are LGKM instrucitons that haven't been waited on
123
+ // / yet.
124
+ bool hasOutstandingLGKM () const ;
125
+
119
126
public:
120
127
static char ID;
121
128
122
129
SIInsertWaits () :
123
130
MachineFunctionPass (ID),
124
131
TII (nullptr ),
125
132
TRI (nullptr ),
126
- ExpInstrTypesSeen (0 ) { }
133
+ ExpInstrTypesSeen (0 ),
134
+ VCCZCorrupt (false ) { }
127
135
128
136
bool runOnMachineFunction (MachineFunction &MF) override ;
129
137
@@ -155,6 +163,13 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
155
163
const Counters SIInsertWaits::WaitCounts = { { 15 , 7 , 15 } };
156
164
const Counters SIInsertWaits::ZeroCounts = { { 0 , 0 , 0 } };
157
165
166
+ static bool readsVCCZ (unsigned Opcode) {
167
+ return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCNZ;
168
+ }
169
+
170
+ bool SIInsertWaits::hasOutstandingLGKM () const {
171
+ return WaitedOn.Named .LGKM != LastIssued.Named .LGKM ;
172
+ }
158
173
159
174
Counters SIInsertWaits::getHwCounts (MachineInstr &MI) {
160
175
uint64_t TSFlags = MI.getDesc ().TSFlags ;
@@ -475,6 +490,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
475
490
TRI =
476
491
static_cast <const SIRegisterInfo *>(MF.getSubtarget ().getRegisterInfo ());
477
492
493
+ const AMDGPUSubtarget &ST = MF.getSubtarget <AMDGPUSubtarget>();
478
494
MRI = &MF.getRegInfo ();
479
495
480
496
WaitedOn = ZeroCounts;
@@ -493,6 +509,44 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
493
509
for (MachineBasicBlock::iterator I = MBB.begin (), E = MBB.end ();
494
510
I != E; ++I) {
495
511
512
+ if (ST.getGeneration () <= AMDGPUSubtarget::SEA_ISLANDS) {
513
+ // There is a hardware bug on CI/SI where SMRD instruction may corrupt
514
+ // vccz bit, so when we detect that an instruction may read from a
515
+ // corrupt vccz bit, we need to:
516
+ // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
517
+ // complete.
518
+ // 2. Restore the correct value of vccz by writing the current value
519
+ // of vcc back to vcc.
520
+
521
+ if (TII->isSMRD (I->getOpcode ())) {
522
+ VCCZCorrupt = true ;
523
+ } else if (!hasOutstandingLGKM () && I->modifiesRegister (AMDGPU::VCC, TRI)) {
524
+ // FIXME: We only care about SMRD instructions here, not LDS or GDS.
525
+ // Whenever we store a value in vcc, the correct value of vccz is
526
+ // restored.
527
+ VCCZCorrupt = false ;
528
+ }
529
+
530
+ // Check if we need to apply the bug work-around
531
+ if (readsVCCZ (I->getOpcode ()) && VCCZCorrupt) {
532
+ DEBUG (dbgs () << " Inserting vccz bug work-around before: " << *I << ' \n ' );
533
+
534
+ // Wait on everything, not just LGKM. vccz reads usually come from
535
+ // terminators, and we always wait on everything at the end of the
536
+ // block, so if we only wait on LGKM here, we might end up with
537
+ // another s_waitcnt inserted right after this if there are non-LGKM
538
+ // instructions still outstanding.
539
+ insertWait (MBB, I, LastIssued);
540
+
541
+ // Restore the vccz bit. Any time a value is written to vcc, the vcc
542
+ // bit is updated, so we can restore the bit by reading the value of
543
+ // vcc and then writing it back to the register.
544
+ BuildMI (MBB, I, I->getDebugLoc (), TII->get (AMDGPU::S_MOV_B64),
545
+ AMDGPU::VCC)
546
+ .addReg (AMDGPU::VCC);
547
+ }
548
+ }
549
+
496
550
// Wait for everything before a barrier.
497
551
if (I->getOpcode () == AMDGPU::S_BARRIER)
498
552
Changes |= insertWait (MBB, I, LastIssued);
0 commit comments