Skip to content

Commit a10c2ea

Browse files
OutOfCacheeasyonaadit
authored andcommitted
[AMDGPU] Add intrinsics for atomic struct buffer loads (llvm#100140)
Mark these intrinsics as atomic loads within LLVM to prevent hoisting out of loops in cases where the load is considered invariant. Similar to llvm#97707, but for struct buffer loads.
1 parent b9da5c2 commit a10c2ea

File tree

6 files changed

+772
-3
lines changed

6 files changed

+772
-3
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,23 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
14091409
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
14101410
def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
14111411

1412+
class AMDGPUStructAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
1413+
[data_ty],
1414+
[llvm_v4i32_ty, // rsrc(SGPR)
1415+
llvm_i32_ty, // vindex(VGPR)
1416+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
1417+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1418+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1419+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1420+
// bit 3 = swz, bit 4 = scc (gfx90a)
1421+
// gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1422+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1423+
// bit 6 = swz
1424+
// all: volatile op (bit 31, stripped at lowering)
1425+
[ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1426+
AMDGPURsrcIntrinsic<0>;
1427+
def int_amdgcn_struct_atomic_buffer_load : AMDGPUStructAtomicBufferLoad;
1428+
14121429
class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
14131430
[data_ty],
14141431
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
@@ -1428,6 +1445,24 @@ class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIn
14281445
def int_amdgcn_struct_ptr_buffer_load_format : AMDGPUStructPtrBufferLoad;
14291446
def int_amdgcn_struct_ptr_buffer_load : AMDGPUStructPtrBufferLoad;
14301447

1448+
class AMDGPUStructPtrAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
1449+
[data_ty],
1450+
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
1451+
llvm_i32_ty, // vindex(VGPR)
1452+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
1453+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1454+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1455+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1456+
// bit 3 = swz, bit 4 = scc (gfx90a)
1457+
// gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1458+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1459+
// bit 6 = swz
1460+
// all: volatile op (bit 31, stripped at lowering)
1461+
[IntrArgMemOnly, NoCapture<ArgIndex<0>>,
1462+
ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1463+
AMDGPURsrcIntrinsic<0>;
1464+
def int_amdgcn_struct_ptr_atomic_buffer_load : AMDGPUStructPtrAtomicBufferLoad;
1465+
14311466
class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
14321467
[],
14331468
[data_ty, // vdata(VGPR)

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7384,6 +7384,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73847384
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
73857385
case Intrinsic::amdgcn_struct_buffer_load:
73867386
case Intrinsic::amdgcn_struct_ptr_buffer_load:
7387+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
7388+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
73877389
return legalizeBufferLoad(MI, MRI, B, false, false);
73887390
case Intrinsic::amdgcn_raw_buffer_load_format:
73897391
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5134,7 +5134,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
51345134
case Intrinsic::amdgcn_struct_buffer_load:
51355135
case Intrinsic::amdgcn_struct_ptr_buffer_load:
51365136
case Intrinsic::amdgcn_struct_tbuffer_load:
5137-
case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
5137+
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5138+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
5139+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
51385140
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
51395141
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
51405142
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13011301
return true;
13021302
}
13031303
case Intrinsic::amdgcn_raw_atomic_buffer_load:
1304-
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load: {
1304+
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
1305+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
1306+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
13051307
Info.memVT =
13061308
memVTFromLoadIntrReturn(*this, MF.getDataLayout(), CI.getType(),
13071309
std::numeric_limits<unsigned>::max());
@@ -9013,7 +9015,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
90139015
case Intrinsic::amdgcn_struct_buffer_load:
90149016
case Intrinsic::amdgcn_struct_ptr_buffer_load:
90159017
case Intrinsic::amdgcn_struct_buffer_load_format:
9016-
case Intrinsic::amdgcn_struct_ptr_buffer_load_format: {
9018+
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
9019+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
9020+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
90179021
const bool IsFormat =
90189022
IntrID == Intrinsic::amdgcn_struct_buffer_load_format ||
90199023
IntrID == Intrinsic::amdgcn_struct_ptr_buffer_load_format;

0 commit comments

Comments
 (0)