Skip to content

Commit 3d07a6d

Browse files
committed
[AMDGPU][GlobalISel] Add IMG init in selectImageIntrinsic
Doing this during instruction selection avoids the cost of running SIAddIMGInit which is yet another pass over the MIR. Differential Revision: https://reviews.llvm.org/D99670
1 parent 4af6251 commit 3d07a6d

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,38 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
16891689
if (BaseOpcode->HasD16)
16901690
MIB.addImm(IsD16 ? -1 : 0);
16911691

1692+
if (IsTexFail) {
1693+
// An image load instruction with TFE/LWE only conditionally writes to its
1694+
// result registers. Initialize them to zero so that we always get well
1695+
// defined result values.
1696+
assert(VDataOut && !VDataIn);
1697+
Register Tied = MRI->cloneVirtualRegister(VDataOut);
1698+
Register Zero = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1699+
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::V_MOV_B32_e32), Zero)
1700+
.addImm(0);
1701+
auto Parts = TRI.getRegSplitParts(MRI->getRegClass(Tied), 4);
1702+
if (STI.usePRTStrictNull()) {
1703+
// With enable-prt-strict-null enabled, initialize all result registers to
1704+
// zero.
1705+
auto RegSeq =
1706+
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
1707+
for (auto Sub : Parts)
1708+
RegSeq.addReg(Zero).addImm(Sub);
1709+
} else {
1710+
// With enable-prt-strict-null disabled, only initialize the extra TFE/LWE
1711+
// result register.
1712+
Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1713+
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
1714+
auto RegSeq =
1715+
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
1716+
for (auto Sub : Parts.drop_back(1))
1717+
RegSeq.addReg(Undef).addImm(Sub);
1718+
RegSeq.addReg(Zero).addImm(Parts.back());
1719+
}
1720+
MIB.addReg(Tied, RegState::Implicit);
1721+
MIB->tieOperands(0, MIB->getNumOperands() - 1);
1722+
}
1723+
16921724
MI.eraseFromParent();
16931725
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
16941726
}

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,10 +1133,6 @@ void GCNPassConfig::addPreGlobalInstructionSelect() {
11331133

11341134
bool GCNPassConfig::addGlobalInstructionSelect() {
11351135
addPass(new InstructionSelect(getOptLevel()));
1136-
// TODO: Fix instruction selection to do the right thing for image
1137-
// instructions with tfe or lwe in the first place, instead of running a
1138-
// separate pass to fix them up?
1139-
addPass(createSIAddIMGInitPass());
11401136
return false;
11411137
}
11421138

0 commit comments

Comments
 (0)