Skip to content

Commit 0880df2

Browse files
committed
[AMDGPU] Fix negative immediate offset for unbuffered smem loads
For unbuffered smem loads, It is illegal and undefined for the immediate offset to be negative if the resulting IOFFSET + (SGPR[Offset] or M0 or zero) is negative. As a workaround for this issue, if there is no SGPR[Offset] and the immediate offset is negative, subtract the absolute value of the immediate offset from the base address. Then change the immediate offset to 0.
1 parent df5e431 commit 0880df2

10 files changed

+319
-27
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ def gi_smrd_sgpr_imm :
116116
GIComplexOperandMatcher<s64, "selectSmrdSgprImm">,
117117
GIComplexPatternEquiv<SMRDSgprImm>;
118118

119+
def gi_smrd_prefetch_imm :
120+
GIComplexOperandMatcher<s64, "selectSmrdPrefetchImm">,
121+
GIComplexPatternEquiv<SMRDPrefetchImm>;
122+
119123
def gi_flat_offset :
120124
GIComplexOperandMatcher<s64, "selectFlatOffset">,
121125
GIComplexPatternEquiv<FlatOffset>;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,13 +2072,16 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
20722072
// true, match only 32-bit immediate offsets available on CI.
20732073
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
20742074
SDValue *SOffset, SDValue *Offset,
2075-
bool Imm32Only,
2076-
bool IsBuffer) const {
2075+
bool Imm32Only, bool IsBuffer,
2076+
bool IsPrefetch,
2077+
bool HasSOffset) const {
20772078
if (SOffset && Offset) {
20782079
assert(!Imm32Only && !IsBuffer);
20792080
SDValue B;
2080-
return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
2081-
SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
2081+
return SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false,
2082+
IsPrefetch, true) &&
2083+
SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false,
2084+
IsPrefetch, true);
20822085
}
20832086

20842087
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -2097,21 +2100,48 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
20972100
}
20982101
if (!N0 || !N1)
20992102
return false;
2103+
2104+
bool Selected = false;
21002105
if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
21012106
SBase = N0;
2102-
return true;
2107+
Selected = true;
21032108
}
2109+
21042110
if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
21052111
SBase = N1;
2112+
Selected = true;
2113+
}
2114+
2115+
if (Selected) {
2116+
// For unbuffered smem loads, it is illegal and undefined for the Immediate
2117+
// Offset to be negative if the resulting (Offset + (M0 or SOffset or zero)
2118+
// is negative. Handle the case where the Immediate Offset is negative and
2119+
// there is no SOffset.
2120+
//
2121+
// FIXME: Also handle M0 or SOffset case?
2122+
if (Offset && !HasSOffset && !IsBuffer && !IsPrefetch &&
2123+
Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11) {
2124+
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset)) {
2125+
if (C->getSExtValue() < 0) {
2126+
SDLoc SL(SBase);
2127+
*Offset = CurDAG->getTargetConstant(std::abs(C->getSExtValue()), SL,
2128+
MVT::i32);
2129+
const SDValue Ops[] = {SBase, *Offset};
2130+
SBase = SDValue(
2131+
CurDAG->getMachineNode(AMDGPU::S_SUB_U64, SL, MVT::i64, Ops), 0);
2132+
*Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2133+
}
2134+
}
2135+
}
21062136
return true;
21072137
}
21082138
return false;
21092139
}
21102140

21112141
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
21122142
SDValue *SOffset, SDValue *Offset,
2113-
bool Imm32Only) const {
2114-
if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2143+
bool Imm32Only, bool IsPrefetch) const {
2144+
if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only, IsPrefetch)) {
21152145
SBase = Expand32BitAddress(SBase);
21162146
return true;
21172147
}
@@ -2170,6 +2200,11 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
21702200
/* IsBuffer */ true);
21712201
}
21722202

2203+
bool AMDGPUDAGToDAGISel::SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase,
2204+
SDValue &Offset) const {
2205+
return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset, false, true);
2206+
}
2207+
21732208
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
21742209
SDValue &Base,
21752210
SDValue &Offset) const {

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
194194
SDValue *Offset, bool Imm32Only = false,
195195
bool IsBuffer = false) const;
196196
SDValue Expand32BitAddress(SDValue Addr) const;
197-
bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
198-
SDValue *Offset, bool Imm32Only = false,
199-
bool IsBuffer = false) const;
200-
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
201-
SDValue *Offset, bool Imm32Only = false) const;
197+
bool SelectSMRDBaseOffset(SDValue Addr, SDValue & SBase, SDValue * SOffset,
198+
SDValue * Offset, bool Imm32Only = false,
199+
bool IsBuffer = false, bool IsPrefetch = false,
200+
bool HasSOffset = false) const;
201+
bool SelectSMRD(SDValue Addr, SDValue & SBase, SDValue * SOffset,
202+
SDValue * Offset, bool Imm32Only = false,
203+
bool IsPrefetch = false) const;
202204
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
203205
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
204206
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
@@ -208,6 +210,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
208210
bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
209211
bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
210212
SDValue &Offset) const;
213+
bool SelectSMRDPrefetchImm(SDValue Addr, SDValue & SBase, SDValue & Offset)
214+
const;
211215
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
212216

213217
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4221,7 +4221,8 @@ AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
42214221
bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
42224222
Register &Base,
42234223
Register *SOffset,
4224-
int64_t *Offset) const {
4224+
int64_t *Offset,
4225+
bool IsPrefetch) const {
42254226
MachineInstr *MI = Root.getParent();
42264227
MachineBasicBlock *MBB = MI->getParent();
42274228

@@ -4257,6 +4258,27 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
42574258
if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
42584259
Base = GEPI.SgprParts[0];
42594260
*Offset = *EncodedImm;
4261+
// For unbuffered smem loads, it is illegal and undefined for the Immediate
4262+
// Offset to be negative if the resulting (Offset + (M0 or SOffset or zero)
4263+
// is negative. Handle the case where the Immediate Offset is negative and
4264+
// there is no SOffset.
4265+
//
4266+
// FIXME: Also handle M0 or SOffset case?
4267+
if (!IsPrefetch && *Offset < 0 &&
4268+
STI.getGeneration() >= AMDGPUSubtarget::GFX11) {
4269+
// Subtract the absolute value of the offset from the base register and
4270+
// set the immediate offset to 0.
4271+
Register SubtractReg =
4272+
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
4273+
4274+
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_SUB_U64),
4275+
SubtractReg)
4276+
.addReg(Base)
4277+
.addImm(std::abs(*Offset));
4278+
Base = SubtractReg;
4279+
*Offset = 0;
4280+
}
4281+
42604282
return true;
42614283
}
42624284

@@ -4339,6 +4361,17 @@ AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {
43394361
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
43404362
}
43414363

4364+
InstructionSelector::ComplexRendererFns
4365+
AMDGPUInstructionSelector::selectSmrdPrefetchImm(MachineOperand &Root) const {
4366+
Register Base;
4367+
int64_t Offset;
4368+
if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset, true))
4369+
return std::nullopt;
4370+
4371+
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
4372+
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
4373+
}
4374+
43424375
std::pair<Register, int>
43434376
AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
43444377
uint64_t FlatVariant) const {

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,10 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
220220
InstructionSelector::ComplexRendererFns
221221
selectVINTERPModsHi(MachineOperand &Root) const;
222222

223-
bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset,
224-
int64_t *Offset) const;
223+
bool selectSmrdOffset(MachineOperand & Root, Register & Base,
224+
Register * SOffset, int64_t * Offset,
225+
bool IsPrefetch = false) const;
226+
225227
InstructionSelector::ComplexRendererFns
226228
selectSmrdImm(MachineOperand &Root) const;
227229
InstructionSelector::ComplexRendererFns
@@ -230,6 +232,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
230232
selectSmrdSgpr(MachineOperand &Root) const;
231233
InstructionSelector::ComplexRendererFns
232234
selectSmrdSgprImm(MachineOperand &Root) const;
235+
InstructionSelector::ComplexRendererFns
236+
selectSmrdPrefetchImm(MachineOperand &Root) const;
233237

234238
std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root,
235239
uint64_t FlatVariant) const;

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,7 @@ def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
859859
def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
860860
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
861861
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
862+
def SMRDPrefetchImm : ComplexPattern<iPTR, 2, "SelectSMRDPrefetchImm">;
862863

863864
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
864865

@@ -1080,7 +1081,7 @@ def i32imm_one : TImmLeaf <i32, [{
10801081

10811082
multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
10821083
def : GCNPat <
1083-
(smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type),
1084+
(smrd_prefetch (SMRDPrefetchImm i64:$sbase, i32:$offset), timm, timm, cache_type),
10841085
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
10851086
>;
10861087

0 commit comments

Comments
 (0)