Skip to content

Commit f338a84

Browse files
committed
[AMDGPU] Add custom MachineValueType entries for buffer fat poiners
The old hack of returning v5/v6i32 for the fat and strided buffer pointers was causing issuse during vectorization queries that expected to be able to construct a VectorType from the return value of `MVT getPointerType()`. On example is in the test attached to this PR, which used to crash. Now, we define the custom MVT entries, the 160-bit amdgpuBufferFatPointer and 192-bit amdgpuBufferStridedPointer, which are used to represent ptr addrspace(7) and ptr addrspace(9) respectively. Neither of thse types will de present at the time of lowering to a SelectionDAG or other MIR - MVT::amdgpuBufferFatPointer is eliminated by the LowerBufferFatPointers pass and amdgpu::bufferStridedPointer is not currently used outside of the SPIR-V translator (which does its own lowering).
1 parent 6720465 commit f338a84

File tree

5 files changed

+63
-11
lines changed

5 files changed

+63
-11
lines changed

llvm/include/llvm/CodeGen/ValueTypes.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,10 @@ def i64x8 : ValueType<512, 231>; // 8 Consecutive GPRs (AArch64)
331331
def aarch64svcount
332332
: ValueType<16, 232>; // AArch64 predicate-as-counter
333333
def spirvbuiltin : ValueType<0, 233>; // SPIR-V's builtin type
334+
// AMDGPU buffer fat pointer, buffer rsrc + offset, rewritten before MIR translation
335+
def amdgpuBufferFatPointer : ValueType<160, 234>;
336+
// AMDGPU buffer strided pointer, buffer rsrc + index + offset, doesn't reach MIR
337+
def amdgpuBufferStridedPointer : ValueType<192, 235>;
334338

335339
let isNormalValueType = false in {
336340
def token : ValueType<0, 504>; // TokenTy

llvm/lib/CodeGen/ValueTypes.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ std::string EVT::getEVTString() const {
194194
return "aarch64svcount";
195195
case MVT::spirvbuiltin:
196196
return "spirvbuiltin";
197+
case MVT::amdgpuBufferFatPointer:
198+
return "amdgpuBufferFatPointer";
199+
case MVT::amdgpuBufferStridedPointer:
200+
return "amdgpuBufferStridedPointer";
197201
}
198202
}
199203

@@ -219,6 +223,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
219223
return TargetExtType::get(Context, "aarch64.svcount");
220224
case MVT::x86amx: return Type::getX86_AMXTy(Context);
221225
case MVT::i64x8: return IntegerType::get(Context, 512);
226+
case MVT::amdgpuBufferFatPointer: return IntegerType::get(Context, 160);
227+
case MVT::amdgpuBufferStridedPointer: return IntegerType::get(Context, 192);
222228
case MVT::externref: return Type::getWasm_ExternrefTy(Context);
223229
case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
224230
case MVT::Metadata: return Type::getMetadataTy(Context);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,19 +1180,20 @@ static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI,
11801180
return memVTFromLoadIntrData(TLI, DL, ST->getContainedType(0), MaxNumLanes);
11811181
}
11821182

1183-
/// Map address space 7 to MVT::v5i32 because that's its in-memory
1184-
/// representation. This return value is vector-typed because there is no
1185-
/// MVT::i160 and it is not clear if one can be added. While this could
1186-
/// cause issues during codegen, these address space 7 pointers will be
1187-
/// rewritten away by then. Therefore, we can return MVT::v5i32 in order
1188-
/// to allow pre-codegen passes that query TargetTransformInfo, often for cost
1189-
/// modeling, to work.
1183+
/// Map address space 7 to MVT::amdgpuBufferFatPointer because that's its
1184+
/// in-memory representation. This return value is a custom type because there
1185+
/// is no MVT::i160 and adding one breaks integer promotion logic. While this
1186+
/// could cause issues during codegen, these address space 7 pointers will be
1187+
/// rewritten away by then. Therefore, we can return MVT::amdgpuBufferFatPointer
1188+
/// in order to allow pre-codegen passes that query TargetTransformInfo, often
1189+
/// for cost modeling, to work. (This also sets us up decently for doing the
1190+
/// buffer lowering in GlobalISel if SelectionDAG ever goes away.)
11901191
MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
11911192
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
1192-
return MVT::v5i32;
1193+
return MVT::amdgpuBufferFatPointer;
11931194
if (AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
11941195
DL.getPointerSizeInBits(AS) == 192)
1195-
return MVT::v6i32;
1196+
return MVT::amdgpuBufferStridedPointer;
11961197
return AMDGPUTargetLowering::getPointerTy(DL, AS);
11971198
}
11981199
/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
303303
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
304304

305305
// While address space 7 should never make it to codegen, it still needs to
306-
// have a MVT to prevent some analyses that query this function from breaking,
307-
// so, to work around the lack of i160, map it to v5i32.
306+
// have a MVT to prevent some analyses that query this function from breaking.
307+
// We use the custum MVT::amdgpuBufferFatPointer and
308+
// amdgpu::amdgpuBufferStridedPointer for this, though we use v8i32 for the
309+
// momyr type (which is probably unused).
308310
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
309311
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
310312

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=loop-vectorize -S < %s | FileCheck %s
3+
4+
; Reduced from a crash, variables added to make things more realistic.
5+
; This is a roundabout test for TargetLowering::getValueType() returning
6+
; a reasonable value for <N x p7> instead of asserting.
7+
define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %v) {
8+
; CHECK-LABEL: define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(
9+
; CHECK-SAME: ptr addrspace(1) [[DOTPTR:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[_LR_PH5:.*:]]
11+
; CHECK-NEXT: [[DOTRSRC:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[DOTPTR]], i16 0, i32 -2147483648, i32 159744)
12+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(8) [[DOTRSRC]] to ptr addrspace(7)
13+
; CHECK-NEXT: br label %[[BB2:.*]]
14+
; CHECK: [[BB2]]:
15+
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[DOTLR_PH5:%.*]] ], [ [[TMP5:%.*]], %[[BB2]] ]
16+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP1]], i32 0
17+
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
18+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP3]], [[TMP0]]
19+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], [[DOT_CRIT_EDGE_LOOPEXIT:label %.*]], label %[[BB2]]
20+
; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
21+
; CHECK-NEXT: ret void
22+
;
23+
entry:
24+
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
25+
%fat = addrspacecast ptr addrspace(8) %rsrc to ptr addrspace(7)
26+
br label %loop
27+
28+
loop: ; preds = %loop, %entry
29+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
30+
%ptr = getelementptr i32, ptr addrspace(7) %fat, i32 0
31+
%iv.next = add i64 %iv, 1
32+
%exitcond.not = icmp eq i64 %iv, %v
33+
br i1 %exitcond.not, label %exit, label %loop
34+
35+
exit: ; preds = %exit
36+
ret void
37+
}
38+
39+
declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) readnone, i16, i32, i32)

0 commit comments

Comments
 (0)