Skip to content

Commit f7d0370

Browse files
authored
[AMDGPU] Generalize amdgcn.make.buffer.rsrc to fat pointers (llvm#126828)
Attempting to pass a `ptr addrspace(7)` to functions that take `ptr` arguments produces undesirable `addrspacecast(addrspacecast(p8 x to p7) to p0) => addrspacecast(p8 x to p0)` folds. This results in illegal GEP operations on buffer resources, which can't be GEP'd. (However, note that, while unimplemneted, addressspacecast from ptr addrspace(7) to ptr is legal - it's just an effective address computation) To resolve this problem, and thus prevent illegal `getelementptr T, ptr addrspace(8) %x, ...` s from being produces, this commit extends amdgcn.make.buffer.rsrc to also be variadic in its result type, auto-upgrading old manglings. The logic for handling a make.buffer.rsrc in instruction selection remains untouched and expects the output type to be a ptr addrspace(8), as does the Clang lowering for its builtin (the pointer-to-pointer version might want a different name in clang). LowerBufferFatPointers has been updated to lower amdgcn.make.buffer.rsrc.p7.p* to amdgcn.make.buffer.rsrc.p8.p* . This'll also make exposing buffer fat pointers in Clang easier, since you don't have to cast between a `__amdgcn_rsrc_t` and a pointer.
1 parent 3a00c42 commit f7d0370

File tree

15 files changed

+217
-58
lines changed

15 files changed

+217
-58
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20723,9 +20723,19 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
2072320723
case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
2072420724
return emitBuiltinWithOneOverloadedType<4>(*this, E,
2072520725
Intrinsic::amdgcn_bitop3);
20726-
case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20727-
return emitBuiltinWithOneOverloadedType<4>(
20728-
*this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20726+
case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
20727+
// TODO: LLVM has this overloaded to allow for fat pointers, but since
20728+
// those haven't been plumbed through to Clang yet, default to creating the
20729+
// resource type.
20730+
SmallVector<Value *, 4> Args;
20731+
for (unsigned I = 0; I < 4; ++I)
20732+
Args.push_back(EmitScalarExpr(E->getArg(I)));
20733+
llvm::PointerType *RetTy = llvm::PointerType::get(
20734+
Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
20735+
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
20736+
{RetTy, Args[0]->getType()});
20737+
return Builder.CreateCall(F, Args);
20738+
}
2072920739
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
2073020740
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
2073120741
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:

clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
2626
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
2727
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
28-
// CHECK-NEXT: [[TMP4:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]])
28+
// CHECK-NEXT: [[TMP4:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]])
2929
// CHECK-NEXT: ret ptr addrspace(8) [[TMP4]]
3030
//
3131
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, int num, int flags) {
@@ -49,7 +49,7 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short
4949
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
5050
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
5151
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
52-
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[TMP0]], i16 4, i32 [[TMP1]], i32 [[TMP2]])
52+
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 4, i32 [[TMP1]], i32 [[TMP2]])
5353
// CHECK-NEXT: ret ptr addrspace(8) [[TMP3]]
5454
//
5555
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p, int num, int flags) {
@@ -73,7 +73,7 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constan
7373
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
7474
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
7575
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
76-
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[TMP0]], i16 [[TMP1]], i32 1234, i32 [[TMP2]])
76+
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 1234, i32 [[TMP2]])
7777
// CHECK-NEXT: ret ptr addrspace(8) [[TMP3]]
7878
//
7979
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, short stride, int flags) {
@@ -97,7 +97,7 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(v
9797
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
9898
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
9999
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
100-
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 5678)
100+
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 5678)
101101
// CHECK-NEXT: ret ptr addrspace(8) [[TMP3]]
102102
//
103103
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, short stride, int num) {

clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0(
66
// CHECK-NEXT: entry:
7-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
7+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
88
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
99
//
1010
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, int num, int flags) {
@@ -13,7 +13,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, in
1313

1414
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_stride_constant(
1515
// CHECK-NEXT: entry:
16-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
16+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
1717
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
1818
//
1919
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p, int num, int flags) {
@@ -22,7 +22,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p,
2222

2323
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_num_constant(
2424
// CHECK-NEXT: entry:
25-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
25+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
2626
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
2727
//
2828
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, short stride, int flags) {
@@ -31,7 +31,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, sho
3131

3232
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_flags_constant(
3333
// CHECK-NEXT: entry:
34-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
34+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
3535
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
3636
//
3737
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, short stride, int num) {
@@ -40,7 +40,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, s
4040

4141
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1(
4242
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
4444
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
4545
//
4646
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1(global void *p, short stride, int num, int flags) {
@@ -49,7 +49,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1(global void *p, short str
4949

5050
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_stride_constant(
5151
// CHECK-NEXT: entry:
52-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
52+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
5353
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
5454
//
5555
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_stride_constant(global void *p, int num, int flags) {
@@ -58,7 +58,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_stride_constant(global vo
5858

5959
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_num_constant(
6060
// CHECK-NEXT: entry:
61-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
61+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
6262
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
6363
//
6464
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_num_constant(global void *p, short stride, int flags) {
@@ -67,7 +67,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_num_constant(global void
6767

6868
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_flags_constant(
6969
// CHECK-NEXT: entry:
70-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
70+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
7171
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
7272
//
7373
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global void *p, short stride, int num) {
@@ -76,7 +76,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global voi
7676

7777
// CHECK-LABEL: @test_amdgcn_make_buffer_p0_nullptr(
7878
// CHECK-NEXT: entry:
79-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
79+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
8080
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
8181
//
8282
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num, int flags) {
@@ -85,7 +85,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num,
8585

8686
// CHECK-LABEL: @test_amdgcn_make_buffer_p1_nullptr(
8787
// CHECK-NEXT: entry:
88-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
88+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
8989
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
9090
//
9191
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p1_nullptr(short stride, int num, int flags) {

llvm/docs/AMDGPUUsage.rst

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,12 @@ supported for the ``amdgcn`` target.
990990
the stride must be 0, the "add tid" flag must be 0, the swizzle enable bits
991991
must be off, and the extent must be measured in bytes. (On subtargets where
992992
bounds checking may be disabled, buffer fat pointers may choose to enable
993-
it or not).
993+
it or not). The cache swizzle support introduced in gfx942 may be used.
994+
995+
These pointers can be created by `addrspacecast` from a buffer resource
996+
(`ptr addrspace(8)`) or by using `llvm.amdgcn.make.buffer.rsrc` to produce a
997+
`ptr addrspace(7)` directly, which produces a buffer fat pointer with an initial
998+
offset of 0 and prevents the address space cast from being rewritten away.
994999

9951000
**Buffer Resource**
9961001
The buffer resource pointer, in address space 8, is the newer form
@@ -1027,6 +1032,12 @@ supported for the ``amdgcn`` target.
10271032
the stride is the size of a structured element, the "add tid" flag must be 0,
10281033
and the swizzle enable bits must be off.
10291034

1035+
These pointers can be created by `addrspacecast` from a buffer resource
1036+
(`ptr addrspace(8)`) or by using `llvm.amdgcn.make.buffer.rsrc` to produce a
1037+
`ptr addrspace(9)` directly, which produces a buffer strided pointer whose initial
1038+
index and offset values are both 0. This prevents the address space cast from
1039+
being rewritten away.
1040+
10301041
**Streamout Registers**
10311042
Dedicated registers used by the GS NGG Streamout Instructions. The register
10321043
file is modelled as a memory in a distinct address space because it is indexed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1284,11 +1284,24 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
12841284
// Data type for buffer resources (V#). Maybe, in the future, we can create a
12851285
// similar one for textures (T#).
12861286
def AMDGPUBufferRsrcTy : LLVMQualPointerType<8>;
1287+
// Data type for buffer fat pointers, which are a buffer resource (V#) followed by
1288+
// a 32-bit offset. These don't exist in hardware and are a compiler-internal
1289+
// convenience.
1290+
def AMDGPUBufferFatPointerTy : LLVMQualPointerType<7>;
12871291

12881292
let TargetPrefix = "amdgcn" in {
12891293

1294+
// Create a buffer resource wrapping `base` with the specified `stride`
1295+
// `numrecords`, and `flags`. All of these values will need to be
1296+
// wave-uniform when the buffer instructions are invoked, so non-uniform
1297+
// inputs to this intrinsic will trigger waterfall loops.
1298+
//
1299+
// In addition to creating ptr addrspace(8), whe representation of buffer
1300+
// resources, it can create the fat pointers ptr addrspace(7) and ptr addrspace(9),
1301+
// which carry additional offset bits. When this intrinsic is used to create
1302+
// these fat pointers, their offset and index fields (if applicable) are zero.
12901303
def int_amdgcn_make_buffer_rsrc : DefaultAttrsIntrinsic <
1291-
[AMDGPUBufferRsrcTy],
1304+
[llvm_anyptr_ty],
12921305
[llvm_anyptr_ty, // base
12931306
llvm_i16_ty, // stride (and swizzle control)
12941307
llvm_i32_ty, // NumRecords / extent

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,6 +2078,7 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
20782078
switch (IID) {
20792079
default:
20802080
return false;
2081+
case Intrinsic::amdgcn_make_buffer_rsrc:
20812082
case Intrinsic::ptrmask:
20822083
case Intrinsic::invariant_start:
20832084
case Intrinsic::invariant_end:
@@ -2092,6 +2093,25 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
20922093
switch (IID) {
20932094
default:
20942095
break;
2096+
case Intrinsic::amdgcn_make_buffer_rsrc: {
2097+
if (!isSplitFatPtr(I.getType()))
2098+
return {nullptr, nullptr};
2099+
Value *Base = I.getArgOperand(0);
2100+
Value *Stride = I.getArgOperand(1);
2101+
Value *NumRecords = I.getArgOperand(2);
2102+
Value *Flags = I.getArgOperand(3);
2103+
auto *SplitType = cast<StructType>(I.getType());
2104+
Type *RsrcType = SplitType->getElementType(0);
2105+
Type *OffType = SplitType->getElementType(1);
2106+
IRB.SetInsertPoint(&I);
2107+
Value *Rsrc = IRB.CreateIntrinsic(IID, {RsrcType, Base->getType()},
2108+
{Base, Stride, NumRecords, Flags});
2109+
copyMetadata(Rsrc, &I);
2110+
Rsrc->takeName(&I);
2111+
Value *Zero = Constant::getNullValue(OffType);
2112+
SplitUsers.insert(&I);
2113+
return {Rsrc, Zero};
2114+
}
20952115
case Intrinsic::ptrmask: {
20962116
Value *Ptr = I.getArgOperand(0);
20972117
if (!isSplitFatPtr(Ptr->getType()))

0 commit comments

Comments
 (0)