Skip to content

Commit 26b14ae

Browse files
authored
[AMDGPU] CodeGen for GFX12 VIMAGE and VSAMPLE instructions (#75488)
1 parent f643eec commit 26b14ae

File tree

57 files changed

+13333
-72
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+13333
-72
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -845,7 +845,9 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
845845
!if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR)
846846
llvm_i1_ty], []), // unorm(imm)
847847
[llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
848-
llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
848+
llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc;
849+
// gfx12+ imm: bits [0-2] = th, bits [3-4] = scope)
850+
// TODO-GFX12: Update all other cachepolicy descriptions.
849851

850852
!listconcat(props,
851853
!if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,6 +1832,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
18321832
unsigned IntrOpcode = Intr->BaseOpcode;
18331833
const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
18341834
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI);
1835+
const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI);
18351836

18361837
const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
18371838

@@ -1916,7 +1917,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
19161917
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
19171918
if (BaseOpcode->Atomic)
19181919
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
1919-
if (CPol & ~AMDGPU::CPol::ALL)
1920+
if (CPol & ~(IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12))
19201921
return false;
19211922

19221923
int NumVAddrRegs = 0;
@@ -1951,7 +1952,10 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
19511952
++NumVDataDwords;
19521953

19531954
int Opcode = -1;
1954-
if (IsGFX11Plus) {
1955+
if (IsGFX12Plus) {
1956+
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
1957+
NumVDataDwords, NumVAddrDwords);
1958+
} else if (IsGFX11Plus) {
19551959
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
19561960
UseNSA ? AMDGPU::MIMGEncGfx11NSA
19571961
: AMDGPU::MIMGEncGfx11Default,
@@ -2024,7 +2028,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
20242028

20252029
if (IsGFX10Plus)
20262030
MIB.addImm(DimInfo->Encoding);
2027-
MIB.addImm(Unorm);
2031+
if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::unorm))
2032+
MIB.addImm(Unorm);
20282033

20292034
MIB.addImm(CPol);
20302035
MIB.addImm(IsA16 && // a16 or r128
@@ -2039,7 +2044,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
20392044
return false;
20402045
}
20412046

2042-
MIB.addImm(LWE); // lwe
2047+
if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::lwe))
2048+
MIB.addImm(LWE); // lwe
20432049
if (!IsGFX10Plus)
20442050
MIB.addImm(DimInfo->DA ? -1 : 0);
20452051
if (BaseOpcode->HasD16)
@@ -5448,7 +5454,9 @@ void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
54485454
const MachineInstr &MI,
54495455
int OpIdx) const {
54505456
assert(OpIdx >= 0 && "expected to match an immediate operand");
5451-
MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
5457+
MIB.addImm(MI.getOperand(OpIdx).getImm() &
5458+
(AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL
5459+
: AMDGPU::CPol::ALL_pregfx12));
54525460
}
54535461

54545462
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6151,7 +6151,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
61516151
return false;
61526152
}
61536153

6154-
const unsigned NSAMaxSize = ST.getNSAMaxSize();
6154+
const unsigned NSAMaxSize = ST.getNSAMaxSize(BaseOpcode->Sampler);
61556155
const unsigned HasPartialNSA = ST.hasPartialNSAEncoding();
61566156

61576157
if (IsA16 || IsG16) {
@@ -6211,7 +6211,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
62116211
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
62126212
// allocation when possible.
62136213
//
6214-
// Partial NSA is allowed on GFX11 where the final register is a contiguous
6214+
// Partial NSA is allowed on GFX11+ where the final register is a contiguous
62156215
// set of the remaining addresses.
62166216
const bool UseNSA = ST.hasNSAEncoding() &&
62176217
CorrectedNumVAddrs >= ST.getNSAThreshold(MF) &&
@@ -6635,28 +6635,34 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
66356635
return false;
66366636
}
66376637

6638+
const bool IsGFX11 = AMDGPU::isGFX11(ST);
66386639
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(ST);
6640+
const bool IsGFX12Plus = AMDGPU::isGFX12Plus(ST);
66396641
const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
66406642
const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
66416643
const unsigned NumVDataDwords = 4;
66426644
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
66436645
const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
6644-
const bool UseNSA = ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize();
6646+
const bool UseNSA =
6647+
IsGFX12Plus || (ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize());
6648+
66456649
const unsigned BaseOpcodes[2][2] = {
66466650
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
66476651
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
66486652
AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
66496653
int Opcode;
66506654
if (UseNSA) {
66516655
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
6652-
IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
6656+
IsGFX12Plus ? AMDGPU::MIMGEncGfx12
6657+
: IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
66536658
: AMDGPU::MIMGEncGfx10NSA,
66546659
NumVDataDwords, NumVAddrDwords);
66556660
} else {
6656-
Opcode = AMDGPU::getMIMGOpcode(
6657-
BaseOpcodes[Is64][IsA16],
6658-
IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default,
6659-
NumVDataDwords, NumVAddrDwords);
6661+
assert(!IsGFX12Plus);
6662+
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
6663+
IsGFX11 ? AMDGPU::MIMGEncGfx11Default
6664+
: AMDGPU::MIMGEncGfx10Default,
6665+
NumVDataDwords, NumVAddrDwords);
66606666
}
66616667
assert(Opcode != -1);
66626668

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7129,6 +7129,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
71297129
unsigned IntrOpcode = Intr->BaseOpcode;
71307130
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
71317131
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
7132+
bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
71327133

71337134
SmallVector<EVT, 3> ResultTypes(Op->values());
71347135
SmallVector<EVT, 3> OrigResultTypes(Op->values());
@@ -7148,7 +7149,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
71487149
if (BaseOpcode->Atomic) {
71497150
VData = Op.getOperand(2);
71507151

7151-
bool Is64Bit = VData.getValueType() == MVT::i64;
7152+
bool Is64Bit = VData.getValueSizeInBits() == 64;
71527153
if (BaseOpcode->AtomicX2) {
71537154
SDValue VData2 = Op.getOperand(3);
71547155
VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, DL,
@@ -7308,9 +7309,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
73087309
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
73097310
// allocation when possible.
73107311
//
7311-
// Partial NSA is allowed on GFX11 where the final register is a contiguous
7312+
// Partial NSA is allowed on GFX11+ where the final register is a contiguous
73127313
// set of the remaining addresses.
7313-
const unsigned NSAMaxSize = ST->getNSAMaxSize();
7314+
const unsigned NSAMaxSize = ST->getNSAMaxSize(BaseOpcode->Sampler);
73147315
const bool HasPartialNSAEncoding = ST->hasPartialNSAEncoding();
73157316
const bool UseNSA = ST->hasNSAEncoding() &&
73167317
VAddrs.size() >= ST->getNSAThreshold(MF) &&
@@ -7387,7 +7388,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
73877388
Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
73887389
if (BaseOpcode->Atomic)
73897390
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
7390-
if (CPol & ~AMDGPU::CPol::ALL)
7391+
if (CPol & ~(IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12))
73917392
return Op;
73927393

73937394
SmallVector<SDValue, 26> Ops;
@@ -7407,7 +7408,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
74077408
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
74087409
if (IsGFX10Plus)
74097410
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
7410-
Ops.push_back(Unorm);
7411+
if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
7412+
Ops.push_back(Unorm);
74117413
Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
74127414
Ops.push_back(IsA16 && // r128, a16 for gfx9
74137415
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
@@ -7418,7 +7420,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
74187420
} else if (cast<ConstantSDNode>(TFE)->getZExtValue()) {
74197421
report_fatal_error("TFE is not supported on this GPU");
74207422
}
7421-
Ops.push_back(LWE); // lwe
7423+
if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
7424+
Ops.push_back(LWE); // lwe
74227425
if (!IsGFX10Plus)
74237426
Ops.push_back(DimInfo->DA ? True : False);
74247427
if (BaseOpcode->HasD16)
@@ -7430,7 +7433,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
74307433
UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
74317434
int Opcode = -1;
74327435

7433-
if (IsGFX11Plus) {
7436+
if (IsGFX12Plus) {
7437+
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
7438+
NumVDataDwords, NumVAddrDwords);
7439+
} else if (IsGFX11Plus) {
74347440
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
74357441
UseNSA ? AMDGPU::MIMGEncGfx11NSA
74367442
: AMDGPU::MIMGEncGfx11Default,
@@ -7748,7 +7754,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
77487754
SDLoc(Op), MVT::i32);
77497755
case Intrinsic::amdgcn_s_buffer_load: {
77507756
unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
7751-
if (CPol & ~AMDGPU::CPol::ALL)
7757+
if (CPol & ~((Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12)
7758+
? AMDGPU::CPol::ALL
7759+
: AMDGPU::CPol::ALL_pregfx12))
77527760
return Op;
77537761
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
77547762
DAG);
@@ -8518,30 +8526,34 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
85188526
return SDValue();
85198527
}
85208528

8529+
const bool IsGFX11 = AMDGPU::isGFX11(*Subtarget);
85218530
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
8531+
const bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
85228532
const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
85238533
const bool Is64 = NodePtr.getValueType() == MVT::i64;
85248534
const unsigned NumVDataDwords = 4;
85258535
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
85268536
const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
8527-
const bool UseNSA =
8528-
Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize();
8537+
const bool UseNSA = (Subtarget->hasNSAEncoding() &&
8538+
NumVAddrs <= Subtarget->getNSAMaxSize()) ||
8539+
IsGFX12Plus;
85298540
const unsigned BaseOpcodes[2][2] = {
85308541
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
85318542
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
85328543
AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
85338544
int Opcode;
85348545
if (UseNSA) {
85358546
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
8536-
IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
8547+
IsGFX12Plus ? AMDGPU::MIMGEncGfx12
8548+
: IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
85378549
: AMDGPU::MIMGEncGfx10NSA,
85388550
NumVDataDwords, NumVAddrDwords);
85398551
} else {
8540-
Opcode =
8541-
AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
8542-
IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
8543-
: AMDGPU::MIMGEncGfx10Default,
8544-
NumVDataDwords, NumVAddrDwords);
8552+
assert(!IsGFX12Plus);
8553+
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
8554+
IsGFX11 ? AMDGPU::MIMGEncGfx11Default
8555+
: AMDGPU::MIMGEncGfx10Default,
8556+
NumVDataDwords, NumVAddrDwords);
85458557
}
85468558
assert(Opcode != -1);
85478559

@@ -14042,7 +14054,7 @@ static unsigned SubIdx2Lane(unsigned Idx) {
1404214054
}
1404314055
}
1404414056

14045-
/// Adjust the writemask of MIMG instructions
14057+
/// Adjust the writemask of MIMG, VIMAGE or VSAMPLE instructions
1404614058
SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
1404714059
SelectionDAG &DAG) const {
1404814060
unsigned Opcode = Node->getMachineOpcode();
@@ -14060,7 +14072,7 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
1406014072
unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
1406114073
unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
1406214074
bool UsesTFC = ((int(TFEIdx) >= 0 && Node->getConstantOperandVal(TFEIdx)) ||
14063-
Node->getConstantOperandVal(LWEIdx))
14075+
(int(LWEIdx) >= 0 && Node->getConstantOperandVal(LWEIdx)))
1406414076
? true
1406514077
: false;
1406614078
unsigned TFCLane = 0;
@@ -14272,7 +14284,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
1427214284
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
1427314285
unsigned Opcode = Node->getMachineOpcode();
1427414286

14275-
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
14287+
if (TII->isImage(Opcode) && !TII->get(Opcode).mayStore() &&
1427614288
!TII->isGather4(Opcode) &&
1427714289
AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::dmask)) {
1427814290
return adjustWritemask(Node, DAG);
@@ -14359,7 +14371,7 @@ void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
1435914371
return;
1436014372

1436114373
unsigned TFEVal = TFE ? TFE->getImm() : 0;
14362-
unsigned LWEVal = LWE->getImm();
14374+
unsigned LWEVal = LWE ? LWE->getImm() : 0;
1436314375
unsigned D16Val = D16 ? D16->getImm() : 0;
1436414376

1436514377
if (!TFEVal && !LWEVal)
@@ -14496,7 +14508,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1449614508
return;
1449714509
}
1449814510

14499-
if (TII->isMIMG(MI)) {
14511+
if (TII->isImage(MI)) {
1450014512
if (!MI.mayStore())
1450114513
AddIMGInit(MI);
1450214514
TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr);

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "GCNHazardRecognizer.h"
1818
#include "GCNSubtarget.h"
1919
#include "SIMachineFunctionInfo.h"
20+
#include "Utils/AMDGPUBaseInfo.h"
2021
#include "llvm/Analysis/ValueTracking.h"
2122
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2223
#include "llvm/CodeGen/LiveIntervals.h"
@@ -4515,8 +4516,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
45154516
return true;
45164517
}
45174518

4518-
if (isMIMG(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) {
4519-
ErrInfo = "missing memory operand from MIMG instruction.";
4519+
if (isImage(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) {
4520+
ErrInfo = "missing memory operand from image instruction.";
45204521
return false;
45214522
}
45224523

@@ -4708,8 +4709,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
47084709
}
47094710
}
47104711

4711-
// Verify MIMG
4712-
if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
4712+
// Verify MIMG / VIMAGE / VSAMPLE
4713+
if (isImage(MI.getOpcode()) && !MI.mayStore()) {
47134714
// Ensure that the return type used is large enough for all the options
47144715
// being used TFE/LWE require an extra result register.
47154716
const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
@@ -4973,12 +4974,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
49734974
}
49744975
}
49754976

4976-
if (isMIMG(MI)) {
4977+
if (isImage(MI)) {
49774978
const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
49784979
if (DimOp) {
49794980
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
49804981
AMDGPU::OpName::vaddr0);
4981-
int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
4982+
int RSrcOpName =
4983+
isMIMG(MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
4984+
int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
49824985
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode);
49834986
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
49844987
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
@@ -4999,16 +5002,17 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
49995002
IsA16 = A16->getImm() != 0;
50005003
}
50015004

5002-
bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
5005+
bool IsNSA = RsrcIdx - VAddr0Idx > 1;
50035006

50045007
unsigned AddrWords =
50055008
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16());
50065009

50075010
unsigned VAddrWords;
50085011
if (IsNSA) {
5009-
VAddrWords = SRsrcIdx - VAddr0Idx;
5010-
if (ST.hasPartialNSAEncoding() && AddrWords > ST.getNSAMaxSize()) {
5011-
unsigned LastVAddrIdx = SRsrcIdx - 1;
5012+
VAddrWords = RsrcIdx - VAddr0Idx;
5013+
if (ST.hasPartialNSAEncoding() &&
5014+
AddrWords > ST.getNSAMaxSize(isVSAMPLE(MI))) {
5015+
unsigned LastVAddrIdx = RsrcIdx - 1;
50125016
VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1;
50135017
}
50145018
} else {
@@ -6528,18 +6532,21 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
65286532
return CreatedBB;
65296533
}
65306534

6531-
// Legalize MIMG and MUBUF/MTBUF for shaders.
6535+
// Legalize MIMG/VIMAGE/VSAMPLE and MUBUF/MTBUF for shaders.
65326536
//
65336537
// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
65346538
// scratch memory access. In both cases, the legalization never involves
65356539
// conversion to the addr64 form.
6536-
if (isMIMG(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) &&
6537-
(isMUBUF(MI) || isMTBUF(MI)))) {
6538-
MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
6540+
if (isImage(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) &&
6541+
(isMUBUF(MI) || isMTBUF(MI)))) {
6542+
int RSrcOpName = (isVIMAGE(MI) || isVSAMPLE(MI)) ? AMDGPU::OpName::rsrc
6543+
: AMDGPU::OpName::srsrc;
6544+
MachineOperand *SRsrc = getNamedOperand(MI, RSrcOpName);
65396545
if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg())))
65406546
CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SRsrc}, MDT);
65416547

6542-
MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
6548+
int SampOpName = isMIMG(MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6549+
MachineOperand *SSamp = getNamedOperand(MI, SampOpName);
65436550
if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg())))
65446551
CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SSamp}, MDT);
65456552

0 commit comments

Comments
 (0)