Skip to content

Commit 62aa596

Browse files
authored
[AMDGPU] Add no return image_sample intrinsics and instructions (#97542)
An appropriately configured image resource descriptor can trigger image_sample instructions to store outputs directly to a linked memory location instead of returning to VGPRs. This is opaque to the backend as instruction encoding is unchanged; however, a mechanism is require to allow frontends to communicate that these instructions do not require destination VGPRs and store to memory. Flagging these as stores means they will not be optimized away.
1 parent bbd4af5 commit 62aa596

12 files changed

+746
-52
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -769,9 +769,10 @@ class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod,
769769

770770
class AMDGPUDimSampleProfile<string opmod,
771771
AMDGPUDimProps dim,
772-
AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
772+
AMDGPUSampleVariant sample,
773+
bit has_return = true> : AMDGPUDimProfile<opmod, dim> {
773774
let IsSample = true;
774-
let RetTypes = [llvm_any_ty];
775+
let RetTypes = !if(has_return, [llvm_any_ty], []);
775776
let ExtraAddrArgs = sample.ExtraAddrArgs;
776777
let Offset = sample.Offset;
777778
let Bias = sample.Bias;
@@ -780,6 +781,12 @@ class AMDGPUDimSampleProfile<string opmod,
780781
let LodClampMip = sample.LodOrClamp;
781782
}
782783

784+
class AMDGPUDimSampleNoReturnProfile<string opmod,
785+
AMDGPUDimProps dim,
786+
AMDGPUSampleVariant sample>
787+
: AMDGPUDimSampleProfile<opmod, dim, sample, false> {
788+
}
789+
783790
class AMDGPUDimNoSampleProfile<string opmod,
784791
AMDGPUDimProps dim,
785792
list<LLVMType> retty,
@@ -970,6 +977,21 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
970977
AMDGPUImageDMaskIntrinsic;
971978
}
972979

980+
multiclass AMDGPUImageDimSampleNoReturnDims<string opmod,
981+
AMDGPUSampleVariant sample> {
982+
foreach dim = AMDGPUDims.NoMsaa in {
983+
def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic<
984+
AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>,
985+
[IntrWillReturn], [SDNPMemOperand]>;
986+
}
987+
}
988+
foreach sample = AMDGPUSampleVariants in {
989+
defm int_amdgcn_image_sample # sample.LowerCaseMod
990+
: AMDGPUImageDimSampleNoReturnDims<
991+
"SAMPLE" # sample.UpperCaseMod # "_nortn", sample>,
992+
AMDGPUImageDMaskIntrinsic;
993+
}
994+
973995
defm int_amdgcn_image_getlod
974996
: AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
975997
AMDGPUImageDMaskIntrinsic;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,6 +1870,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
18701870
VDataIn = MI.getOperand(1).getReg();
18711871
VDataTy = MRI->getType(VDataIn);
18721872
NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32;
1873+
} else if (BaseOpcode->NoReturn) {
1874+
NumVDataDwords = 0;
18731875
} else {
18741876
VDataOut = MI.getOperand(0).getReg();
18751877
VDataTy = MRI->getType(VDataOut);
@@ -3616,6 +3618,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
36163618
return selectG_INSERT_VECTOR_ELT(I);
36173619
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
36183620
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3621+
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
36193622
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
36203623
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
36213624
const AMDGPU::ImageDimIntrinsicInfo *Intr =

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6334,8 +6334,13 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
63346334
const LLT V2S16 = LLT::fixed_vector(2, 16);
63356335

63366336
unsigned DMask = 0;
6337-
Register VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg();
6338-
LLT Ty = MRI->getType(VData);
6337+
Register VData;
6338+
LLT Ty;
6339+
6340+
if (!BaseOpcode->NoReturn || BaseOpcode->Store) {
6341+
VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg();
6342+
Ty = MRI->getType(VData);
6343+
}
63396344

63406345
const bool IsAtomicPacked16Bit =
63416346
(BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
@@ -6373,7 +6378,11 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
63736378
: AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE;
63746379
const unsigned LoadOpcode = IsD16 ? AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16
63756380
: AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD;
6376-
unsigned NewOpcode = NumDefs == 0 ? StoreOpcode : LoadOpcode;
6381+
unsigned NewOpcode = LoadOpcode;
6382+
if (BaseOpcode->Store)
6383+
NewOpcode = StoreOpcode;
6384+
else if (BaseOpcode->NoReturn)
6385+
NewOpcode = AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET;
63776386

63786387
// Track that we legalized this
63796388
MI.setDesc(B.getTII().get(NewOpcode));
@@ -6503,7 +6512,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
65036512
Flags |= 2;
65046513
MI.addOperand(MachineOperand::CreateImm(Flags));
65056514

6506-
if (BaseOpcode->Store) { // No TFE for stores?
6515+
if (BaseOpcode->NoReturn) { // No TFE for stores?
65076516
// TODO: Handle dmask trim
65086517
if (!Ty.isVector() || !IsD16)
65096518
return true;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3172,6 +3172,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
31723172
}
31733173
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
31743174
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3175+
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
31753176
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
31763177
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
31773178
const AMDGPU::RsrcIntrinsic *RSrcIntrin =
@@ -4842,6 +4843,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48424843
}
48434844
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
48444845
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4846+
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
48454847
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
48464848
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
48474849
auto IntrID = AMDGPU::getIntrinsicID(MI);

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3868,7 +3868,8 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
38683868
int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
38693869
int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
38703870

3871-
assert(VDataIdx != -1);
3871+
if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3872+
return true;
38723873

38733874
if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
38743875
return true;

0 commit comments

Comments
 (0)