Skip to content

Commit e697c99

Browse files
authored
[AMDGPU] Add custom MachineValueType entries for buffer fat poiners (#127692)
The old hack of returning v5/v6i32 for the fat and strided buffer pointers was causing issuse during vectorization queries that expected to be able to construct a VectorType from the return value of `MVT getPointerType()`. On example is in the test attached to this PR, which used to crash. Now, we define the custom MVT entries, the 160-bit amdgpuBufferFatPointer and 192-bit amdgpuBufferStridedPointer, which are used to represent ptr addrspace(7) and ptr addrspace(9) respectively. Neither of these types will be present at the time of lowering to a SelectionDAG or other MIR - MVT::amdgpuBufferFatPointer is eliminated by the LowerBufferFatPointers pass and amdgpu::bufferStridedPointer is not currently used outside of the SPIR-V translator (which does its own lowering). An alternative solution would be to add MVT::i160 and MVT::i192. We elect not to do this now as it would require changes to unrelated code and runs the risk of breaking any SelectionDAG code that assumes that the MVT series are all powers of two (and so can be split apart and merged back together) in ways that wouldn't be obvious if someone tried to use MVT::i160 in codegen. If i160 is added at some future point, these custom types can be retired.
1 parent fa072bd commit e697c99

File tree

5 files changed

+65
-11
lines changed

5 files changed

+65
-11
lines changed

llvm/include/llvm/CodeGen/ValueTypes.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,12 @@ def i64x8 : ValueType<512, 231>; // 8 Consecutive GPRs (AArch64)
331331
def aarch64svcount
332332
: ValueType<16, 232>; // AArch64 predicate-as-counter
333333
def spirvbuiltin : ValueType<0, 233>; // SPIR-V's builtin type
334+
// AMDGPU buffer fat pointer, buffer rsrc + offset, rewritten before MIR translation.
335+
// FIXME: Remove this and the getPointerType() override if MVT::i160 is added.
336+
def amdgpuBufferFatPointer : ValueType<160, 234>;
337+
// AMDGPU buffer strided pointer, buffer rsrc + index + offset, doesn't reach MIR.
338+
// FIXME: Remove this and the getPointerType() override if MVT::i82 is added.
339+
def amdgpuBufferStridedPointer : ValueType<192, 235>;
334340

335341
let isNormalValueType = false in {
336342
def token : ValueType<0, 504>; // TokenTy

llvm/lib/CodeGen/ValueTypes.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ std::string EVT::getEVTString() const {
194194
return "aarch64svcount";
195195
case MVT::spirvbuiltin:
196196
return "spirvbuiltin";
197+
case MVT::amdgpuBufferFatPointer:
198+
return "amdgpuBufferFatPointer";
199+
case MVT::amdgpuBufferStridedPointer:
200+
return "amdgpuBufferStridedPointer";
197201
}
198202
}
199203

@@ -219,6 +223,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
219223
return TargetExtType::get(Context, "aarch64.svcount");
220224
case MVT::x86amx: return Type::getX86_AMXTy(Context);
221225
case MVT::i64x8: return IntegerType::get(Context, 512);
226+
case MVT::amdgpuBufferFatPointer: return IntegerType::get(Context, 160);
227+
case MVT::amdgpuBufferStridedPointer: return IntegerType::get(Context, 192);
222228
case MVT::externref: return Type::getWasm_ExternrefTy(Context);
223229
case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
224230
case MVT::Metadata: return Type::getMetadataTy(Context);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,19 +1180,20 @@ static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI,
11801180
return memVTFromLoadIntrData(TLI, DL, ST->getContainedType(0), MaxNumLanes);
11811181
}
11821182

1183-
/// Map address space 7 to MVT::v5i32 because that's its in-memory
1184-
/// representation. This return value is vector-typed because there is no
1185-
/// MVT::i160 and it is not clear if one can be added. While this could
1186-
/// cause issues during codegen, these address space 7 pointers will be
1187-
/// rewritten away by then. Therefore, we can return MVT::v5i32 in order
1188-
/// to allow pre-codegen passes that query TargetTransformInfo, often for cost
1189-
/// modeling, to work.
1183+
/// Map address space 7 to MVT::amdgpuBufferFatPointer because that's its
1184+
/// in-memory representation. This return value is a custom type because there
1185+
/// is no MVT::i160 and adding one breaks integer promotion logic. While this
1186+
/// could cause issues during codegen, these address space 7 pointers will be
1187+
/// rewritten away by then. Therefore, we can return MVT::amdgpuBufferFatPointer
1188+
/// in order to allow pre-codegen passes that query TargetTransformInfo, often
1189+
/// for cost modeling, to work. (This also sets us up decently for doing the
1190+
/// buffer lowering in GlobalISel if SelectionDAG ever goes away.)
11901191
MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
11911192
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
1192-
return MVT::v5i32;
1193+
return MVT::amdgpuBufferFatPointer;
11931194
if (AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
11941195
DL.getPointerSizeInBits(AS) == 192)
1195-
return MVT::v6i32;
1196+
return MVT::amdgpuBufferStridedPointer;
11961197
return AMDGPUTargetLowering::getPointerTy(DL, AS);
11971198
}
11981199
/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
303303
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
304304

305305
// While address space 7 should never make it to codegen, it still needs to
306-
// have a MVT to prevent some analyses that query this function from breaking,
307-
// so, to work around the lack of i160, map it to v5i32.
306+
// have a MVT to prevent some analyses that query this function from breaking.
307+
// We use the custum MVT::amdgpuBufferFatPointer and
308+
// amdgpu::amdgpuBufferStridedPointer for this, though we use v8i32 for the
309+
// memory type (which is probably unused).
308310
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
309311
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
310312

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=loop-vectorize -S < %s | FileCheck %s
3+
4+
; Reduced from a crash, variables added to make things more realistic.
5+
; This is a roundabout test for TargetLowering::getValueType() returning
6+
; a reasonable value for <N x p7> instead of asserting.
7+
define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %v) {
8+
; CHECK-LABEL: define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(
9+
; CHECK-SAME: ptr addrspace(1) [[DOTPTR:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[_LR_PH5:.*]]:
11+
; CHECK-NEXT: [[DOTRSRC:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[DOTPTR]], i16 0, i32 -2147483648, i32 159744)
12+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(8) [[DOTRSRC]] to ptr addrspace(7)
13+
; CHECK-NEXT: br label %[[LOOP:.*]]
14+
; CHECK: [[LOOP]]:
15+
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, %[[_LR_PH5]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
16+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP1]], i32 0
17+
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
18+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP3]], [[V]]
19+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[__CRIT_EDGE_LOOPEXIT:.*]], label %[[LOOP]]
20+
; CHECK: [[__CRIT_EDGE_LOOPEXIT]]:
21+
; CHECK-NEXT: ret void
22+
;
23+
entry:
24+
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
25+
%fat = addrspacecast ptr addrspace(8) %rsrc to ptr addrspace(7)
26+
br label %loop
27+
28+
loop: ; preds = %loop, %entry
29+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
30+
%ptr = getelementptr i32, ptr addrspace(7) %fat, i32 0
31+
%iv.next = add i64 %iv, 1
32+
%exitcond.not = icmp eq i64 %iv, %v
33+
br i1 %exitcond.not, label %exit, label %loop
34+
35+
exit: ; preds = %exit
36+
ret void
37+
}
38+
39+
declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) readnone, i16, i32, i32)

0 commit comments

Comments
 (0)