Skip to content

Commit 6da31fa

Browse files
committed
InstCombine: Fix infinite loop in copy-constant-to-alloca transform
This was broken by 16295d5, when instructions started being handled and not just constant expressions. This was re-inserting an equivalent bitcast to the original memcpy operand, which made a non-functional IR change on every iteration. This also fixes a secondary problem where it was inserting addrspacecasts which may not have been legal (i.e. it changed the source address space). Start visiting all pointer users and fail out if we can't process them. Also start handling the relevant memory intrinsic users. These cases can be dealt with by running InferAddressSpaces separately.
1 parent 0728b67 commit 6da31fa

File tree

4 files changed

+182
-42
lines changed

4 files changed

+182
-42
lines changed

llvm/include/llvm/IR/IRBuilder.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,12 +573,22 @@ class IRBuilderBase {
573573
NoAliasTag);
574574
}
575575

576+
CallInst *CreateMemTransferInst(
577+
Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src,
578+
MaybeAlign SrcAlign, Value *Size, bool isVolatile = false,
579+
MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr,
580+
MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr);
581+
576582
CallInst *CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src,
577583
MaybeAlign SrcAlign, Value *Size,
578584
bool isVolatile = false, MDNode *TBAATag = nullptr,
579585
MDNode *TBAAStructTag = nullptr,
580586
MDNode *ScopeTag = nullptr,
581-
MDNode *NoAliasTag = nullptr);
587+
MDNode *NoAliasTag = nullptr) {
588+
return CreateMemTransferInst(Intrinsic::memcpy, Dst, DstAlign, Src,
589+
SrcAlign, Size, isVolatile, TBAATag,
590+
TBAAStructTag, ScopeTag, NoAliasTag);
591+
}
582592

583593
CallInst *CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, Value *Src,
584594
MaybeAlign SrcAlign, Value *Size);

llvm/lib/IR/IRBuilder.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -136,22 +136,21 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
136136
return CI;
137137
}
138138

139-
CallInst *IRBuilderBase::CreateMemCpy(Value *Dst, MaybeAlign DstAlign,
140-
Value *Src, MaybeAlign SrcAlign,
141-
Value *Size, bool isVolatile,
142-
MDNode *TBAATag, MDNode *TBAAStructTag,
143-
MDNode *ScopeTag, MDNode *NoAliasTag) {
139+
CallInst *IRBuilderBase::CreateMemTransferInst(
140+
Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src,
141+
MaybeAlign SrcAlign, Value *Size, bool isVolatile, MDNode *TBAATag,
142+
MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) {
144143
Dst = getCastedInt8PtrValue(Dst);
145144
Src = getCastedInt8PtrValue(Src);
146145

147146
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
148147
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
149148
Module *M = BB->getParent()->getParent();
150-
Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
149+
Function *TheFn = Intrinsic::getDeclaration(M, IntrID, Tys);
151150

152151
CallInst *CI = createCallHelper(TheFn, Ops, this);
153152

154-
auto* MCI = cast<MemCpyInst>(CI);
153+
auto* MCI = cast<MemTransferInst>(CI);
155154
if (DstAlign)
156155
MCI->setDestAlignment(*DstAlign);
157156
if (SrcAlign)

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -237,37 +237,40 @@ namespace {
237237
class PointerReplacer {
238238
public:
239239
PointerReplacer(InstCombinerImpl &IC) : IC(IC) {}
240+
241+
bool collectUsers(Instruction &I);
240242
void replacePointer(Instruction &I, Value *V);
241243

242244
private:
243-
void findLoadAndReplace(Instruction &I);
244245
void replace(Instruction *I);
245246
Value *getReplacement(Value *I);
246247

247-
SmallVector<Instruction *, 4> Path;
248+
SmallSetVector<Instruction *, 4> Worklist;
248249
MapVector<Value *, Value *> WorkMap;
249250
InstCombinerImpl &IC;
250251
};
251252
} // end anonymous namespace
252253

253-
void PointerReplacer::findLoadAndReplace(Instruction &I) {
254+
bool PointerReplacer::collectUsers(Instruction &I) {
254255
for (auto U : I.users()) {
255-
auto *Inst = dyn_cast<Instruction>(&*U);
256-
if (!Inst)
257-
return;
258-
LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
259-
if (isa<LoadInst>(Inst)) {
260-
for (auto P : Path)
261-
replace(P);
262-
replace(Inst);
256+
Instruction *Inst = cast<Instruction>(&*U);
257+
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
258+
if (Load->isVolatile())
259+
return false;
260+
Worklist.insert(Load);
263261
} else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
264-
Path.push_back(Inst);
265-
findLoadAndReplace(*Inst);
266-
Path.pop_back();
262+
Worklist.insert(Inst);
263+
if (!collectUsers(*Inst))
264+
return false;
265+
} else if (isa<MemTransferInst>(Inst)) {
266+
Worklist.insert(Inst);
267267
} else {
268-
return;
268+
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
269+
return false;
269270
}
270271
}
272+
273+
return true;
271274
}
272275

273276
Value *PointerReplacer::getReplacement(Value *V) {
@@ -309,6 +312,28 @@ void PointerReplacer::replace(Instruction *I) {
309312
IC.InsertNewInstWith(NewI, *BC);
310313
NewI->takeName(BC);
311314
WorkMap[BC] = NewI;
315+
} else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
316+
auto *SrcV = getReplacement(MemCpy->getRawSource());
317+
// The pointer may appear in the destination of a copy, but we don't want to
318+
// replace it.
319+
if (!SrcV) {
320+
assert(getReplacement(MemCpy->getRawDest()) &&
321+
"destination not in replace list");
322+
return;
323+
}
324+
325+
IC.Builder.SetInsertPoint(MemCpy);
326+
auto *NewI = IC.Builder.CreateMemTransferInst(
327+
MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
328+
SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
329+
MemCpy->isVolatile());
330+
AAMDNodes AAMD;
331+
MemCpy->getAAMetadata(AAMD);
332+
if (AAMD)
333+
NewI->setAAMetadata(AAMD);
334+
335+
IC.eraseInstFromFunction(*MemCpy);
336+
WorkMap[MemCpy] = NewI;
312337
} else {
313338
llvm_unreachable("should never reach here");
314339
}
@@ -322,7 +347,9 @@ void PointerReplacer::replacePointer(Instruction &I, Value *V) {
322347
"Invalid usage");
323348
#endif
324349
WorkMap[&I] = V;
325-
findLoadAndReplace(I);
350+
351+
for (Instruction *Workitem : Worklist)
352+
replace(Workitem);
326353
}
327354

328355
Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
@@ -376,32 +403,36 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
376403
// read.
377404
SmallVector<Instruction *, 4> ToDelete;
378405
if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
406+
Value *TheSrc = Copy->getSource();
379407
Align AllocaAlign = AI.getAlign();
380408
Align SourceAlign = getOrEnforceKnownAlignment(
381-
Copy->getSource(), AllocaAlign, DL, &AI, &AC, &DT);
409+
TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
382410
if (AllocaAlign <= SourceAlign &&
383-
isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
411+
isDereferenceableForAllocaSize(TheSrc, &AI, DL)) {
384412
LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
385413
LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
386-
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
387-
eraseInstFromFunction(*ToDelete[i]);
388-
Value *TheSrc = Copy->getSource();
389-
auto *SrcTy = TheSrc->getType();
390-
auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
391-
SrcTy->getPointerAddressSpace());
392-
Value *Cast =
393-
Builder.CreatePointerBitCastOrAddrSpaceCast(TheSrc, DestTy);
394-
if (AI.getType()->getPointerAddressSpace() ==
395-
SrcTy->getPointerAddressSpace()) {
414+
unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
415+
auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace);
416+
if (AI.getType()->getAddressSpace() == SrcAddrSpace) {
417+
for (Instruction *Delete : ToDelete)
418+
eraseInstFromFunction(*Delete);
419+
420+
Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
396421
Instruction *NewI = replaceInstUsesWith(AI, Cast);
397422
eraseInstFromFunction(*Copy);
398423
++NumGlobalCopies;
399424
return NewI;
400425
}
401426

402427
PointerReplacer PtrReplacer(*this);
403-
PtrReplacer.replacePointer(AI, Cast);
404-
++NumGlobalCopies;
428+
if (PtrReplacer.collectUsers(AI)) {
429+
for (Instruction *Delete : ToDelete)
430+
eraseInstFromFunction(*Delete);
431+
432+
Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
433+
PtrReplacer.replacePointer(AI, Cast);
434+
++NumGlobalCopies;
435+
}
405436
}
406437
}
407438

llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,23 @@ define i8 @memcpy_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias rea
2323
ret i8 %load
2424
}
2525

26+
; Simple memmove to alloca from constant address space argument.
27+
define i8 @memmove_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
28+
; CHECK-LABEL: @memmove_constant_arg_ptr_to_alloca(
29+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
30+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
31+
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
32+
; CHECK-NEXT: ret i8 [[LOAD]]
33+
;
34+
%alloca = alloca [32 x i8], align 4, addrspace(5)
35+
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
36+
%arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)*
37+
call void @llvm.memmove.p5i8.p4i8.i32(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i32 32, i1 false)
38+
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
39+
%load = load i8, i8 addrspace(5)* %gep
40+
ret i8 %load
41+
}
42+
2643
; Simple memcpy to alloca from byref constant address space argument.
2744
define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 byref([32 x i8]) %arg, i8 addrspace(1)* %out, i32 %idx) {
2845
; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca(
@@ -87,9 +104,13 @@ define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(
87104
; Alloca is written through a flat pointer
88105
define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat([31 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
89106
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat(
90-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
91-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
92-
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
107+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5)
108+
; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0
109+
; CHECK-NEXT: [[ALLOCA_CAST_ASC:%.*]] = addrspacecast i8 addrspace(5)* [[ALLOCA_CAST]] to i8*
110+
; CHECK-NEXT: [[ARG_CAST:%.*]] = getelementptr inbounds [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 0
111+
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i64(i8* nonnull align 1 dereferenceable(31) [[ALLOCA_CAST_ASC]], i8 addrspace(4)* align 4 dereferenceable(31) [[ARG_CAST]], i64 31, i1 false)
112+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
113+
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1
93114
; CHECK-NEXT: ret i8 [[LOAD]]
94115
;
95116
%alloca = alloca [32 x i8], align 4, addrspace(5)
@@ -125,9 +146,88 @@ define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat2([32 x i8] ad
125146
ret i8 %load
126147
}
127148

128-
declare void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* nocapture, i8 addrspace(4)* nocapture, i64, i1) #0
149+
%struct.ty = type { [4 x i32] }
150+
151+
define amdgpu_kernel void @byref_infloop(i8* %scratch, %struct.ty addrspace(4)* byref(%struct.ty) align 4 %arg) local_unnamed_addr #1 {
152+
; CHECK-LABEL: @byref_infloop(
153+
; CHECK-NEXT: bb:
154+
; CHECK-NEXT: [[CAST_ALLOCA:%.*]] = bitcast [[STRUCT_TY:%.*]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
155+
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false)
156+
; CHECK-NEXT: ret void
157+
;
158+
bb:
159+
%alloca = alloca [4 x i32], align 4, addrspace(5)
160+
%cast.arg = bitcast %struct.ty addrspace(4)* %arg to i8 addrspace(4)*
161+
%cast.alloca = bitcast [4 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
162+
call void @llvm.memcpy.p5i8.p4i8.i32(i8 addrspace(5)* align 4 %cast.alloca, i8 addrspace(4)* align 4 %cast.arg, i32 16, i1 false)
163+
call void @llvm.memcpy.p0i8.p5i8.i32(i8* align 4 %scratch, i8 addrspace(5)* align 4 %cast.alloca, i32 16, i1 false)
164+
ret void
165+
}
166+
167+
define amdgpu_kernel void @byref_infloop_metadata(i8* %scratch, %struct.ty addrspace(4)* byref(%struct.ty) align 4 %arg) local_unnamed_addr #1 {
168+
; CHECK-LABEL: @byref_infloop_metadata(
169+
; CHECK-NEXT: bb:
170+
; CHECK-NEXT: [[CAST_ALLOCA:%.*]] = bitcast [[STRUCT_TY:%.*]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
171+
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false), !noalias !0
172+
; CHECK-NEXT: ret void
173+
;
174+
bb:
175+
%alloca = alloca [4 x i32], align 4, addrspace(5)
176+
%cast.arg = bitcast %struct.ty addrspace(4)* %arg to i8 addrspace(4)*
177+
%cast.alloca = bitcast [4 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
178+
call void @llvm.memcpy.p5i8.p4i8.i32(i8 addrspace(5)* align 4 %cast.alloca, i8 addrspace(4)* align 4 %cast.arg, i32 16, i1 false), !noalias !0
179+
call void @llvm.memcpy.p0i8.p5i8.i32(i8* align 4 %scratch, i8 addrspace(5)* align 4 %cast.alloca, i32 16, i1 false), !noalias !1
180+
ret void
181+
}
182+
183+
define amdgpu_kernel void @byref_infloop_addrspacecast(i8* %scratch, %struct.ty addrspace(4)* byref(%struct.ty) align 4 %arg) local_unnamed_addr #1 {
184+
; CHECK-LABEL: @byref_infloop_addrspacecast(
185+
; CHECK-NEXT: bb:
186+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4 x i32], align 4, addrspace(5)
187+
; CHECK-NEXT: [[CAST_ARG:%.*]] = bitcast [[STRUCT_TY:%.*]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
188+
; CHECK-NEXT: [[CAST_ALLOCA:%.*]] = bitcast [4 x i32] addrspace(5)* [[ALLOCA]] to i8 addrspace(5)*
189+
; CHECK-NEXT: [[ADDRSPACECAST_ALLOCA:%.*]] = addrspacecast i8 addrspace(5)* [[CAST_ALLOCA]] to i8*
190+
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i64(i8* nonnull align 4 dereferenceable(16) [[ADDRSPACECAST_ALLOCA]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ARG]], i64 16, i1 false)
191+
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8* nonnull align 4 dereferenceable(16) [[ADDRSPACECAST_ALLOCA]], i64 16, i1 false)
192+
; CHECK-NEXT: ret void
193+
;
194+
bb:
195+
%alloca = alloca [4 x i32], align 4, addrspace(5)
196+
%cast.arg = bitcast %struct.ty addrspace(4)* %arg to i8 addrspace(4)*
197+
%cast.alloca = bitcast [4 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
198+
%addrspacecast.alloca = addrspacecast i8 addrspace(5)* %cast.alloca to i8*
199+
call void @llvm.memcpy.p0i8.p4i8.i64(i8* nonnull align 4 dereferenceable(16) %addrspacecast.alloca, i8 addrspace(4)* align 4 dereferenceable(16) %cast.arg, i64 16, i1 false)
200+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %scratch, i8* nonnull align 4 dereferenceable(16) %addrspacecast.alloca, i64 16, i1 false)
201+
ret void
202+
}
203+
204+
define amdgpu_kernel void @byref_infloop_memmove(i8* %scratch, %struct.ty addrspace(4)* byref(%struct.ty) align 4 %arg) local_unnamed_addr #1 {
205+
; CHECK-LABEL: @byref_infloop_memmove(
206+
; CHECK-NEXT: bb:
207+
; CHECK-NEXT: [[CAST_ALLOCA:%.*]] = bitcast [[STRUCT_TY:%.*]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
208+
; CHECK-NEXT: call void @llvm.memmove.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false)
209+
; CHECK-NEXT: ret void
210+
;
211+
bb:
212+
%alloca = alloca [4 x i32], align 4, addrspace(5)
213+
%cast.arg = bitcast %struct.ty addrspace(4)* %arg to i8 addrspace(4)*
214+
%cast.alloca = bitcast [4 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
215+
call void @llvm.memmove.p5i8.p4i8.i32(i8 addrspace(5)* align 4 %cast.alloca, i8 addrspace(4)* align 4 %cast.arg, i32 16, i1 false)
216+
call void @llvm.memmove.p0i8.p5i8.i32(i8* align 4 %scratch, i8 addrspace(5)* align 4 %cast.alloca, i32 16, i1 false)
217+
ret void
218+
}
219+
220+
declare void @llvm.memcpy.p0i8.p5i8.i32(i8* noalias nocapture writeonly, i8 addrspace(5)* noalias nocapture readonly, i32, i1 immarg) #0
221+
declare void @llvm.memcpy.p5i8.p4i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(4)* nocapture, i32, i1) #0
129222
declare void @llvm.memcpy.p0i8.p4i8.i64(i8* nocapture, i8 addrspace(4)* nocapture, i64, i1) #0
223+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
224+
declare void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* nocapture, i8 addrspace(4)* nocapture, i64, i1) #0
225+
declare void @llvm.memmove.p5i8.p4i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(4)* nocapture, i32, i1) #0
226+
declare void @llvm.memmove.p0i8.p5i8.i32(i8* nocapture, i8 addrspace(5)* nocapture, i32, i1) #0
130227
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #1
131228

132229
attributes #0 = { argmemonly nounwind willreturn }
133230
attributes #1 = { nounwind readnone speculatable }
231+
232+
!0 = !{!0}
233+
!1 = !{!1}

0 commit comments

Comments
 (0)