Skip to content

[AMDGPU] Add custom MachineValueType entries for buffer fat poiners #127692

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/ValueTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,12 @@ def i64x8 : ValueType<512, 231>; // 8 Consecutive GPRs (AArch64)
def aarch64svcount
: ValueType<16, 232>; // AArch64 predicate-as-counter
def spirvbuiltin : ValueType<0, 233>; // SPIR-V's builtin type
// AMDGPU buffer fat pointer, buffer rsrc + offset, rewritten before MIR translation.
// FIXME: Remove this and the getPointerType() override if MVT::i160 is added.
def amdgpuBufferFatPointer : ValueType<160, 234>;
// AMDGPU buffer strided pointer, buffer rsrc + index + offset, doesn't reach MIR.
// FIXME: Remove this and the getPointerType() override if MVT::i82 is added.
def amdgpuBufferStridedPointer : ValueType<192, 235>;

let isNormalValueType = false in {
def token : ValueType<0, 504>; // TokenTy
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/ValueTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ std::string EVT::getEVTString() const {
return "aarch64svcount";
case MVT::spirvbuiltin:
return "spirvbuiltin";
case MVT::amdgpuBufferFatPointer:
return "amdgpuBufferFatPointer";
case MVT::amdgpuBufferStridedPointer:
return "amdgpuBufferStridedPointer";
}
}

Expand All @@ -219,6 +223,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return TargetExtType::get(Context, "aarch64.svcount");
case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::i64x8: return IntegerType::get(Context, 512);
case MVT::amdgpuBufferFatPointer: return IntegerType::get(Context, 160);
case MVT::amdgpuBufferStridedPointer: return IntegerType::get(Context, 192);
case MVT::externref: return Type::getWasm_ExternrefTy(Context);
case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
case MVT::Metadata: return Type::getMetadataTy(Context);
Expand Down
19 changes: 10 additions & 9 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1180,19 +1180,20 @@ static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI,
return memVTFromLoadIntrData(TLI, DL, ST->getContainedType(0), MaxNumLanes);
}

/// Map address space 7 to MVT::v5i32 because that's its in-memory
/// representation. This return value is vector-typed because there is no
/// MVT::i160 and it is not clear if one can be added. While this could
/// cause issues during codegen, these address space 7 pointers will be
/// rewritten away by then. Therefore, we can return MVT::v5i32 in order
/// to allow pre-codegen passes that query TargetTransformInfo, often for cost
/// modeling, to work.
/// Map address space 7 to MVT::amdgpuBufferFatPointer because that's its
/// in-memory representation. This return value is a custom type because there
/// is no MVT::i160 and adding one breaks integer promotion logic. While this
/// could cause issues during codegen, these address space 7 pointers will be
/// rewritten away by then. Therefore, we can return MVT::amdgpuBufferFatPointer
/// in order to allow pre-codegen passes that query TargetTransformInfo, often
/// for cost modeling, to work. (This also sets us up decently for doing the
/// buffer lowering in GlobalISel if SelectionDAG ever goes away.)
MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
return MVT::v5i32;
return MVT::amdgpuBufferFatPointer;
if (AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
DL.getPointerSizeInBits(AS) == 192)
return MVT::v6i32;
return MVT::amdgpuBufferStridedPointer;
return AMDGPUTargetLowering::getPointerTy(DL, AS);
}
/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;

// While address space 7 should never make it to codegen, it still needs to
// have a MVT to prevent some analyses that query this function from breaking,
// so, to work around the lack of i160, map it to v5i32.
// have a MVT to prevent some analyses that query this function from breaking.
// We use the custum MVT::amdgpuBufferFatPointer and
// amdgpu::amdgpuBufferStridedPointer for this, though we use v8i32 for the
// memory type (which is probably unused).
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;

Expand Down
39 changes: 39 additions & 0 deletions llvm/test/Transforms/LoopVectorize/AMDGPU/buffer-fat-pointer.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=loop-vectorize -S < %s | FileCheck %s

; Reduced from a crash, variables added to make things more realistic.
; This is a roundabout test for TargetLowering::getValueType() returning
; a reasonable value for <N x p7> instead of asserting.
define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %v) {
; CHECK-LABEL: define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(
; CHECK-SAME: ptr addrspace(1) [[DOTPTR:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[_LR_PH5:.*]]:
; CHECK-NEXT: [[DOTRSRC:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[DOTPTR]], i16 0, i32 -2147483648, i32 159744)
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(8) [[DOTRSRC]] to ptr addrspace(7)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, %[[_LR_PH5]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP1]], i32 0
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP3]], [[V]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[__CRIT_EDGE_LOOPEXIT:.*]], label %[[LOOP]]
; CHECK: [[__CRIT_EDGE_LOOPEXIT]]:
; CHECK-NEXT: ret void
;
entry:
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
%fat = addrspacecast ptr addrspace(8) %rsrc to ptr addrspace(7)
br label %loop

loop: ; preds = %loop, %entry
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%ptr = getelementptr i32, ptr addrspace(7) %fat, i32 0
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv, %v
br i1 %exitcond.not, label %exit, label %loop

exit: ; preds = %exit
ret void
}

declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) readnone, i16, i32, i32)