Skip to content

Commit 198624c

Browse files
committed
AMDGPU/GlobalISel: Select llvm.amdgcn.raw.buffer.load.format
1 parent c98d98b commit 198624c

File tree

7 files changed

+498
-20
lines changed

7 files changed

+498
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
135135
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_UBYTE, SIbuffer_load_ubyte>;
136136
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SSHORT, SIbuffer_load_short>;
137137
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SBYTE, SIbuffer_load_byte>;
138+
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT, SIbuffer_load_format>;
139+
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT_D16, SIbuffer_load_format_d16>;
138140

139141
// FIXME: Check MMO is atomic
140142
def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2454,29 +2454,44 @@ bool AMDGPULegalizerInfo::legalizeRawBufferLoad(MachineInstr &MI,
24542454
unsigned ImmOffset;
24552455
unsigned TotalOffset;
24562456

2457+
LLT Ty = MRI.getType(Dst);
2458+
LLT EltTy = Ty.getScalarType();
2459+
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
2460+
const bool Unpacked = ST.hasUnpackedD16VMem();
2461+
24572462
std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
24582463
if (TotalOffset != 0)
24592464
MMO = B.getMF().getMachineMemOperand(MMO, TotalOffset, MemSize);
24602465

24612466
unsigned Opc;
2462-
switch (MemSize) {
2463-
case 1:
2464-
if (IsFormat)
2465-
return false;
2466-
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
2467-
break;
2468-
case 2:
2469-
if (IsFormat)
2470-
return false;
2471-
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
2472-
break;
2473-
default:
2474-
Opc = IsFormat ? -1/*TODO*/ : AMDGPU::G_AMDGPU_BUFFER_LOAD;
2475-
break;
2467+
if (IsFormat) {
2468+
Opc = IsD16 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16 :
2469+
AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT;
2470+
} else {
2471+
switch (MemSize) {
2472+
case 1:
2473+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
2474+
break;
2475+
case 2:
2476+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
2477+
break;
2478+
default:
2479+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD;
2480+
break;
2481+
}
24762482
}
24772483

2478-
Register LoadDstReg = MemSize >= 4 ? Dst :
2479-
B.getMRI()->createGenericVirtualRegister(S32);
2484+
Register LoadDstReg;
2485+
2486+
bool IsExtLoad = (!IsD16 && MemSize < 4) || (IsD16 && !Ty.isVector());
2487+
LLT UnpackedTy = Ty.changeElementSize(32);
2488+
2489+
if (IsExtLoad)
2490+
LoadDstReg = B.getMRI()->createGenericVirtualRegister(S32);
2491+
else if (Unpacked && IsD16 && Ty.isVector())
2492+
LoadDstReg = B.getMRI()->createGenericVirtualRegister(UnpackedTy);
2493+
else
2494+
LoadDstReg = Dst;
24802495

24812496
Register VIndex = B.buildConstant(S32, 0).getReg(0);
24822497

@@ -2492,9 +2507,20 @@ bool AMDGPULegalizerInfo::legalizeRawBufferLoad(MachineInstr &MI,
24922507
.addMemOperand(MMO);
24932508

24942509
if (LoadDstReg != Dst) {
2495-
// Widen result for extending loads was widened.
24962510
B.setInsertPt(B.getMBB(), ++B.getInsertPt());
2497-
B.buildTrunc(Dst, LoadDstReg);
2511+
2512+
// Widen result for extending loads was widened.
2513+
if (IsExtLoad)
2514+
B.buildTrunc(Dst, LoadDstReg);
2515+
else {
2516+
// Repack to original 16-bit vector result
2517+
// FIXME: G_TRUNC should work, but legalization currently fails
2518+
auto Unmerge = B.buildUnmerge(S32, LoadDstReg);
2519+
SmallVector<Register, 4> Repack;
2520+
for (unsigned I = 0, N = Unmerge->getNumOperands() - 1; I != N; ++I)
2521+
Repack.push_back(B.buildTrunc(EltTy, Unmerge.getReg(I)).getReg(0));
2522+
B.buildMerge(Dst, Repack);
2523+
}
24982524
}
24992525

25002526
MI.eraseFromParent();
@@ -2637,6 +2663,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
26372663
return legalizeRawBufferStore(MI, MRI, B, true);
26382664
case Intrinsic::amdgcn_raw_buffer_load:
26392665
return legalizeRawBufferLoad(MI, MRI, B, false);
2666+
case Intrinsic::amdgcn_raw_buffer_load_format:
2667+
return legalizeRawBufferLoad(MI, MRI, B, true);
26402668
case Intrinsic::amdgcn_atomic_inc:
26412669
return legalizeAtomicIncDec(MI, B, true);
26422670
case Intrinsic::amdgcn_atomic_dec:

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2247,7 +2247,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
22472247
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
22482248
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
22492249
case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
2250-
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE: {
2250+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
2251+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
2252+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16: {
22512253
applyDefaultMapping(OpdMapper);
22522254
executeInWaterfallLoop(MI, MRI, {1, 4});
22532255
return;
@@ -3071,7 +3073,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
30713073
case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
30723074
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
30733075
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3074-
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT: {
3076+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3077+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3078+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16: {
30753079
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
30763080

30773081
// rsrc

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,13 +1221,15 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_X
12211221
let SubtargetPredicate = HasUnpackedD16VMem in {
12221222
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
12231223
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
1224+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
12241225
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
12251226
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
12261227
} // End HasUnpackedD16VMem.
12271228

12281229
let SubtargetPredicate = HasPackedD16VMem in {
12291230
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
12301231
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
1232+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X">;
12311233
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
12321234
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
12331235
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2160,6 +2160,8 @@ def G_AMDGPU_BUFFER_LOAD_SBYTE : BufferLoadGenericInstruction;
21602160
def G_AMDGPU_BUFFER_LOAD_USHORT : BufferLoadGenericInstruction;
21612161
def G_AMDGPU_BUFFER_LOAD_SSHORT : BufferLoadGenericInstruction;
21622162
def G_AMDGPU_BUFFER_LOAD : BufferLoadGenericInstruction;
2163+
def G_AMDGPU_BUFFER_LOAD_FORMAT : BufferLoadGenericInstruction;
2164+
def G_AMDGPU_BUFFER_LOAD_FORMAT_D16 : BufferLoadGenericInstruction;
21632165

21642166
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
21652167
// operand Expects a MachineMemOperand in addition to explicit

0 commit comments

Comments
 (0)