Skip to content

Commit 4af6251

Browse files
committed
[AMDGPU][SDag] Add IMG init in AdjustInstrPostInstrSelection
Doing this in a post-isel hook avoids the cost of running SIAddIMGInit which is yet another pass over the MIR. Differential Revision: https://reviews.llvm.org/D99747
1 parent 96d8c6b commit 4af6251

File tree

3 files changed

+93
-1
lines changed

3 files changed

+93
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,6 @@ bool GCNPassConfig::addInstSelector() {
10971097
AMDGPUPassConfig::addInstSelector();
10981098
addPass(&SIFixSGPRCopiesID);
10991099
addPass(createSILowerI1CopiesPass());
1100-
addPass(createSIAddIMGInitPass());
11011100
return false;
11021101
}
11031102

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11188,6 +11188,95 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
1118811188
return Node;
1118911189
}
1119011190

11191+
// Any MIMG instructions that use tfe or lwe require an initialization of the
11192+
// result register that will be written in the case of a memory access failure.
11193+
// The required code is also added to tie this init code to the result of the
11194+
// img instruction.
11195+
void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
11196+
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
11197+
const SIRegisterInfo &TRI = TII->getRegisterInfo();
11198+
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
11199+
MachineBasicBlock &MBB = *MI.getParent();
11200+
11201+
MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
11202+
MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
11203+
MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
11204+
11205+
if (!TFE && !LWE) // intersect_ray
11206+
return;
11207+
11208+
unsigned TFEVal = TFE ? TFE->getImm() : 0;
11209+
unsigned LWEVal = LWE->getImm();
11210+
unsigned D16Val = D16 ? D16->getImm() : 0;
11211+
11212+
if (!TFEVal && !LWEVal)
11213+
return;
11214+
11215+
// At least one of TFE or LWE are non-zero
11216+
// We have to insert a suitable initialization of the result value and
11217+
// tie this to the dest of the image instruction.
11218+
11219+
const DebugLoc &DL = MI.getDebugLoc();
11220+
11221+
int DstIdx =
11222+
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
11223+
11224+
// Calculate which dword we have to initialize to 0.
11225+
MachineOperand *MO_Dmask = TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
11226+
11227+
// check that dmask operand is found.
11228+
assert(MO_Dmask && "Expected dmask operand in instruction");
11229+
11230+
unsigned dmask = MO_Dmask->getImm();
11231+
// Determine the number of active lanes taking into account the
11232+
// Gather4 special case
11233+
unsigned ActiveLanes = TII->isGather4(MI) ? 4 : countPopulation(dmask);
11234+
11235+
bool Packed = !Subtarget->hasUnpackedD16VMem();
11236+
11237+
unsigned InitIdx =
11238+
D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
11239+
11240+
// Abandon attempt if the dst size isn't large enough
11241+
// - this is in fact an error but this is picked up elsewhere and
11242+
// reported correctly.
11243+
uint32_t DstSize = TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
11244+
if (DstSize < InitIdx)
11245+
return;
11246+
11247+
// Create a register for the intialization value.
11248+
Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
11249+
unsigned NewDst = 0; // Final initialized value will be in here
11250+
11251+
// If PRTStrictNull feature is enabled (the default) then initialize
11252+
// all the result registers to 0, otherwise just the error indication
11253+
// register (VGPRn+1)
11254+
unsigned SizeLeft = Subtarget->usePRTStrictNull() ? InitIdx : 1;
11255+
unsigned CurrIdx = Subtarget->usePRTStrictNull() ? 0 : (InitIdx - 1);
11256+
11257+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
11258+
for (; SizeLeft; SizeLeft--, CurrIdx++) {
11259+
NewDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
11260+
// Initialize dword
11261+
Register SubReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
11262+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
11263+
.addImm(0);
11264+
// Insert into the super-reg
11265+
BuildMI(MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
11266+
.addReg(PrevDst)
11267+
.addReg(SubReg)
11268+
.addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));
11269+
11270+
PrevDst = NewDst;
11271+
}
11272+
11273+
// Add as an implicit operand
11274+
MI.addOperand(MachineOperand::CreateReg(NewDst, false, true));
11275+
11276+
// Tie the just added implicit operand to the dst
11277+
MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
11278+
}
11279+
1119111280
/// Assign the register class depending on the number of
1119211281
/// bits set in the writemask
1119311282
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
@@ -11271,6 +11360,9 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1127111360
}
1127211361
return;
1127311362
}
11363+
11364+
if (TII->isMIMG(MI) && !MI.mayStore())
11365+
AddIMGInit(MI);
1127411366
}
1127511367

1127611368
static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL,

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
397397

398398
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
399399
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
400+
void AddIMGInit(MachineInstr &MI) const;
400401
void AdjustInstrPostInstrSelection(MachineInstr &MI,
401402
SDNode *Node) const override;
402403

0 commit comments

Comments
 (0)