Skip to content

Commit 96023b9

Browse files
committed
[AMDGPU] Legalize and select raw/struct_buffer_load with tfe
1 parent abc4c21 commit 96023b9

14 files changed

+4823
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,11 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
250250
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_UBYTE, SIbuffer_load_ubyte>;
251251
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SSHORT, SIbuffer_load_short>;
252252
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SBYTE, SIbuffer_load_byte>;
253+
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_TFE, SIbuffer_load_tfe>;
254+
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT_TFE, SIbuffer_load_ushort_tfe>;
255+
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, SIbuffer_load_ubyte_tfe>;
256+
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SSHORT_TFE, SIbuffer_load_short_tfe>;
257+
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SBYTE_TFE, SIbuffer_load_byte_tfe>;
253258
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT, SIbuffer_load_format>;
254259
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT_TFE, SIbuffer_load_format_tfe>;
255260
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT_D16, SIbuffer_load_format_d16>;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5529,6 +5529,11 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
55295529
NODE_NAME_CASE(BUFFER_LOAD_USHORT)
55305530
NODE_NAME_CASE(BUFFER_LOAD_BYTE)
55315531
NODE_NAME_CASE(BUFFER_LOAD_SHORT)
5532+
NODE_NAME_CASE(BUFFER_LOAD_TFE)
5533+
NODE_NAME_CASE(BUFFER_LOAD_UBYTE_TFE)
5534+
NODE_NAME_CASE(BUFFER_LOAD_USHORT_TFE)
5535+
NODE_NAME_CASE(BUFFER_LOAD_BYTE_TFE)
5536+
NODE_NAME_CASE(BUFFER_LOAD_SHORT_TFE)
55325537
NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
55335538
NODE_NAME_CASE(BUFFER_LOAD_FORMAT_TFE)
55345539
NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,11 @@ enum NodeType : unsigned {
582582
BUFFER_LOAD_USHORT,
583583
BUFFER_LOAD_BYTE,
584584
BUFFER_LOAD_SHORT,
585+
BUFFER_LOAD_TFE,
586+
BUFFER_LOAD_UBYTE_TFE,
587+
BUFFER_LOAD_USHORT_TFE,
588+
BUFFER_LOAD_BYTE_TFE,
589+
BUFFER_LOAD_SHORT_TFE,
585590
BUFFER_LOAD_FORMAT,
586591
BUFFER_LOAD_FORMAT_TFE,
587592
BUFFER_LOAD_FORMAT_D16,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5870,17 +5870,18 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
58705870
: AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT;
58715871
}
58725872
} else {
5873-
if (IsTFE)
5874-
return false;
58755873
switch (MemTy.getSizeInBits()) {
58765874
case 8:
5877-
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
5875+
Opc = IsTFE ? AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE
5876+
: AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
58785877
break;
58795878
case 16:
5880-
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
5879+
Opc = IsTFE ? AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE
5880+
: AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
58815881
break;
58825882
default:
5883-
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD;
5883+
Opc = IsTFE ? AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE
5884+
: AMDGPU::G_AMDGPU_BUFFER_LOAD;
58845885
break;
58855886
}
58865887
}
@@ -5892,7 +5893,11 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
58925893
Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(LoadTy);
58935894
buildBufferLoad(Opc, LoadDstReg, RSrc, VIndex, VOffset, SOffset, ImmOffset,
58945895
Format, AuxiliaryData, MMO, IsTyped, HasVIndex, B);
5895-
if (NumValueDWords == 1) {
5896+
if (MemTy.getSizeInBits() < 32) {
5897+
Register ExtDst = B.getMRI()->createGenericVirtualRegister(S32);
5898+
B.buildUnmerge({ExtDst, StatusDst}, LoadDstReg);
5899+
B.buildTrunc(Dst, ExtDst);
5900+
} else if (NumValueDWords == 1) {
58965901
B.buildUnmerge({Dst, StatusDst}, LoadDstReg);
58975902
} else {
58985903
SmallVector<Register, 5> LoadElts;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3041,6 +3041,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
30413041
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
30423042
case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
30433043
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3044+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3045+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3046+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3047+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3048+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
30443049
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
30453050
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
30463051
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
@@ -4323,6 +4328,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
43234328
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
43244329
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
43254330
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4331+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4332+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4333+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4334+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4335+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
43264336
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
43274337
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
43284338
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,15 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_short, i32, "BUFFER_LOAD_SSHORT">;
14341434
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte, i32, "BUFFER_LOAD_UBYTE">;
14351435
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
14361436

1437+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_tfe, v2i32, "BUFFER_LOAD_DWORD_TFE">;
1438+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_tfe, v3i32, "BUFFER_LOAD_DWORDX2_TFE">;
1439+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_tfe, v4i32, "BUFFER_LOAD_DWORDX3_TFE">;
1440+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_tfe, v5i32, "BUFFER_LOAD_DWORDX4_TFE">;
1441+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_byte_tfe, v2i32, "BUFFER_LOAD_SBYTE_TFE">;
1442+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_short_tfe, v2i32, "BUFFER_LOAD_SSHORT_TFE">;
1443+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte_tfe, v2i32, "BUFFER_LOAD_UBYTE_TFE">;
1444+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort_tfe, v2i32, "BUFFER_LOAD_USHORT_TFE">;
1445+
14371446
multiclass MUBUF_StoreIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
14381447
string opcode, ValueType memoryVt = vt> {
14391448
defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_store<name, memoryVt>);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5962,24 +5962,19 @@ SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat,
59625962
assert(M->getNumValues() == 2 || M->getNumValues() == 3);
59635963
bool IsTFE = M->getNumValues() == 3;
59645964

5965-
unsigned Opc;
5966-
if (IsFormat) {
5967-
Opc = IsTFE ? AMDGPUISD::BUFFER_LOAD_FORMAT_TFE
5968-
: AMDGPUISD::BUFFER_LOAD_FORMAT;
5969-
} else {
5970-
// TODO: Support non-format TFE loads.
5971-
if (IsTFE)
5972-
return SDValue();
5973-
Opc = AMDGPUISD::BUFFER_LOAD;
5974-
}
5965+
unsigned Opc = IsFormat ? (IsTFE ? AMDGPUISD::BUFFER_LOAD_FORMAT_TFE
5966+
: AMDGPUISD::BUFFER_LOAD_FORMAT)
5967+
: IsTFE ? AMDGPUISD::BUFFER_LOAD_TFE
5968+
: AMDGPUISD::BUFFER_LOAD;
59755969

59765970
if (IsD16) {
59775971
return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG, Ops);
59785972
}
59795973

59805974
// Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
59815975
if (!IsD16 && !LoadVT.isVector() && EltType.getSizeInBits() < 32)
5982-
return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M->getMemOperand());
5976+
return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M->getMemOperand(),
5977+
IsTFE);
59835978

59845979
if (isTypeLegal(LoadVT)) {
59855980
return getMemIntrinsicNode(Opc, DL, M->getVTList(), Ops, IntVT,
@@ -10172,11 +10167,30 @@ SDValue SITargetLowering::lowerPointerAsRsrcIntrin(SDNode *Op,
1017210167
}
1017310168

1017410169
// Handle 8 bit and 16 bit buffer loads
10175-
SDValue
10176-
SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT,
10177-
SDLoc DL, ArrayRef<SDValue> Ops,
10178-
MachineMemOperand *MMO) const {
10170+
SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
10171+
EVT LoadVT, SDLoc DL,
10172+
ArrayRef<SDValue> Ops,
10173+
MachineMemOperand *MMO,
10174+
bool IsTFE) const {
1017910175
EVT IntVT = LoadVT.changeTypeToInteger();
10176+
10177+
if (IsTFE) {
10178+
unsigned Opc = (LoadVT.getScalarType() == MVT::i8)
10179+
? AMDGPUISD::BUFFER_LOAD_UBYTE_TFE
10180+
: AMDGPUISD::BUFFER_LOAD_USHORT_TFE;
10181+
MachineFunction &MF = DAG.getMachineFunction();
10182+
MachineMemOperand *OpMMO = MF.getMachineMemOperand(MMO, 0, 8);
10183+
SDVTList VTs = DAG.getVTList(MVT::v2i32, MVT::Other);
10184+
SDValue Op = getMemIntrinsicNode(Opc, DL, VTs, Ops, MVT::v2i32, OpMMO, DAG);
10185+
SDValue Status = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Op,
10186+
DAG.getVectorIdxConstant(1, DL));
10187+
SDValue Data = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Op,
10188+
DAG.getVectorIdxConstant(0, DL));
10189+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Data);
10190+
SDValue Value = DAG.getNode(ISD::BITCAST, DL, LoadVT, Trunc);
10191+
return DAG.getMergeValues({Value, Status, SDValue(Op.getNode(), 1)}, DL);
10192+
}
10193+
1018010194
unsigned Opc = (LoadVT.getScalarType() == MVT::i8) ?
1018110195
AMDGPUISD::BUFFER_LOAD_UBYTE : AMDGPUISD::BUFFER_LOAD_USHORT;
1018210196

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
275275
// Handle 8 bit and 16 bit buffer loads
276276
SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
277277
ArrayRef<SDValue> Ops,
278-
MachineMemOperand *MMO) const;
278+
MachineMemOperand *MMO,
279+
bool IsTFE = false) const;
279280

280281
// Handle 8 bit and 16 bit buffer stores
281282
SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType,

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,16 @@ def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
148148
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
149149
def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
150150
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
151+
def SIbuffer_load_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_TFE", SDTBufferLoad,
152+
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
153+
def SIbuffer_load_ubyte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE_TFE", SDTBufferLoad,
154+
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
155+
def SIbuffer_load_ushort_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT_TFE", SDTBufferLoad,
156+
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
157+
def SIbuffer_load_byte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE_TFE", SDTBufferLoad,
158+
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
159+
def SIbuffer_load_short_tfe: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT_TFE", SDTBufferLoad,
160+
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
151161
def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
152162
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
153163
def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad,

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3762,6 +3762,11 @@ def G_AMDGPU_BUFFER_LOAD_SBYTE : BufferLoadGenericInstruction;
37623762
def G_AMDGPU_BUFFER_LOAD_USHORT : BufferLoadGenericInstruction;
37633763
def G_AMDGPU_BUFFER_LOAD_SSHORT : BufferLoadGenericInstruction;
37643764
def G_AMDGPU_BUFFER_LOAD : BufferLoadGenericInstruction;
3765+
def G_AMDGPU_BUFFER_LOAD_UBYTE_TFE : BufferLoadGenericInstruction;
3766+
def G_AMDGPU_BUFFER_LOAD_SBYTE_TFE : BufferLoadGenericInstruction;
3767+
def G_AMDGPU_BUFFER_LOAD_USHORT_TFE : BufferLoadGenericInstruction;
3768+
def G_AMDGPU_BUFFER_LOAD_SSHORT_TFE : BufferLoadGenericInstruction;
3769+
def G_AMDGPU_BUFFER_LOAD_TFE : BufferLoadGenericInstruction;
37653770
def G_AMDGPU_BUFFER_LOAD_FORMAT : BufferLoadGenericInstruction;
37663771
def G_AMDGPU_BUFFER_LOAD_FORMAT_TFE : BufferLoadGenericInstruction;
37673772
def G_AMDGPU_BUFFER_LOAD_FORMAT_D16 : BufferLoadGenericInstruction;

0 commit comments

Comments
 (0)