@@ -11188,6 +11188,95 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
11188
11188
return Node;
11189
11189
}
11190
11190
11191
+ // Any MIMG instructions that use tfe or lwe require an initialization of the
11192
+ // result register that will be written in the case of a memory access failure.
11193
+ // The required code is also added to tie this init code to the result of the
11194
+ // img instruction.
11195
+ void SITargetLowering::AddIMGInit (MachineInstr &MI) const {
11196
+ const SIInstrInfo *TII = getSubtarget ()->getInstrInfo ();
11197
+ const SIRegisterInfo &TRI = TII->getRegisterInfo ();
11198
+ MachineRegisterInfo &MRI = MI.getMF ()->getRegInfo ();
11199
+ MachineBasicBlock &MBB = *MI.getParent ();
11200
+
11201
+ MachineOperand *TFE = TII->getNamedOperand (MI, AMDGPU::OpName::tfe);
11202
+ MachineOperand *LWE = TII->getNamedOperand (MI, AMDGPU::OpName::lwe);
11203
+ MachineOperand *D16 = TII->getNamedOperand (MI, AMDGPU::OpName::d16);
11204
+
11205
+ if (!TFE && !LWE) // intersect_ray
11206
+ return ;
11207
+
11208
+ unsigned TFEVal = TFE ? TFE->getImm () : 0 ;
11209
+ unsigned LWEVal = LWE->getImm ();
11210
+ unsigned D16Val = D16 ? D16->getImm () : 0 ;
11211
+
11212
+ if (!TFEVal && !LWEVal)
11213
+ return ;
11214
+
11215
+ // At least one of TFE or LWE are non-zero
11216
+ // We have to insert a suitable initialization of the result value and
11217
+ // tie this to the dest of the image instruction.
11218
+
11219
+ const DebugLoc &DL = MI.getDebugLoc ();
11220
+
11221
+ int DstIdx =
11222
+ AMDGPU::getNamedOperandIdx (MI.getOpcode (), AMDGPU::OpName::vdata);
11223
+
11224
+ // Calculate which dword we have to initialize to 0.
11225
+ MachineOperand *MO_Dmask = TII->getNamedOperand (MI, AMDGPU::OpName::dmask);
11226
+
11227
+ // check that dmask operand is found.
11228
+ assert (MO_Dmask && " Expected dmask operand in instruction" );
11229
+
11230
+ unsigned dmask = MO_Dmask->getImm ();
11231
+ // Determine the number of active lanes taking into account the
11232
+ // Gather4 special case
11233
+ unsigned ActiveLanes = TII->isGather4 (MI) ? 4 : countPopulation (dmask);
11234
+
11235
+ bool Packed = !Subtarget->hasUnpackedD16VMem ();
11236
+
11237
+ unsigned InitIdx =
11238
+ D16Val && Packed ? ((ActiveLanes + 1 ) >> 1 ) + 1 : ActiveLanes + 1 ;
11239
+
11240
+ // Abandon attempt if the dst size isn't large enough
11241
+ // - this is in fact an error but this is picked up elsewhere and
11242
+ // reported correctly.
11243
+ uint32_t DstSize = TRI.getRegSizeInBits (*TII->getOpRegClass (MI, DstIdx)) / 32 ;
11244
+ if (DstSize < InitIdx)
11245
+ return ;
11246
+
11247
+ // Create a register for the intialization value.
11248
+ Register PrevDst = MRI.createVirtualRegister (TII->getOpRegClass (MI, DstIdx));
11249
+ unsigned NewDst = 0 ; // Final initialized value will be in here
11250
+
11251
+ // If PRTStrictNull feature is enabled (the default) then initialize
11252
+ // all the result registers to 0, otherwise just the error indication
11253
+ // register (VGPRn+1)
11254
+ unsigned SizeLeft = Subtarget->usePRTStrictNull () ? InitIdx : 1 ;
11255
+ unsigned CurrIdx = Subtarget->usePRTStrictNull () ? 0 : (InitIdx - 1 );
11256
+
11257
+ BuildMI (MBB, MI, DL, TII->get (AMDGPU::IMPLICIT_DEF), PrevDst);
11258
+ for (; SizeLeft; SizeLeft--, CurrIdx++) {
11259
+ NewDst = MRI.createVirtualRegister (TII->getOpRegClass (MI, DstIdx));
11260
+ // Initialize dword
11261
+ Register SubReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
11262
+ BuildMI (MBB, MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), SubReg)
11263
+ .addImm (0 );
11264
+ // Insert into the super-reg
11265
+ BuildMI (MBB, MI, DL, TII->get (TargetOpcode::INSERT_SUBREG), NewDst)
11266
+ .addReg (PrevDst)
11267
+ .addReg (SubReg)
11268
+ .addImm (SIRegisterInfo::getSubRegFromChannel (CurrIdx));
11269
+
11270
+ PrevDst = NewDst;
11271
+ }
11272
+
11273
+ // Add as an implicit operand
11274
+ MI.addOperand (MachineOperand::CreateReg (NewDst, false , true ));
11275
+
11276
+ // Tie the just added implicit operand to the dst
11277
+ MI.tieOperands (DstIdx, MI.getNumOperands () - 1 );
11278
+ }
11279
+
11191
11280
// / Assign the register class depending on the number of
11192
11281
// / bits set in the writemask
11193
11282
void SITargetLowering::AdjustInstrPostInstrSelection (MachineInstr &MI,
@@ -11271,6 +11360,9 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
11271
11360
}
11272
11361
return ;
11273
11362
}
11363
+
11364
+ if (TII->isMIMG (MI) && !MI.mayStore ())
11365
+ AddIMGInit (MI);
11274
11366
}
11275
11367
11276
11368
static SDValue buildSMovImm32 (SelectionDAG &DAG, const SDLoc &DL,
0 commit comments