Skip to content

Commit 0577e45

Browse files
author
git apple-llvm automerger
committed
Merge commit '6a9484f4bf6c' from llvm.org/master into apple/main
2 parents 1113b15 + 6a9484f commit 0577e45

File tree

2 files changed

+57
-3
lines changed

2 files changed

+57
-3
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,12 @@ void PointerReplacer::replace(Instruction *I) {
287287
if (auto *LT = dyn_cast<LoadInst>(I)) {
288288
auto *V = getReplacement(LT->getPointerOperand());
289289
assert(V && "Operand not replaced");
290-
auto *NewI = new LoadInst(I->getType(), V, "", false,
291-
IC.getDataLayout().getABITypeAlign(I->getType()));
290+
auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
291+
LT->getAlign(), LT->getOrdering(),
292+
LT->getSyncScopeID());
292293
NewI->takeName(LT);
294+
copyMetadataForLoad(*NewI, *LT);
295+
293296
IC.InsertNewInstWith(NewI, *LT);
294297
IC.replaceInstUsesWith(*LT, NewI);
295298
WorkMap[LT] = NewI;

llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,57 @@ define i8 @memcpy_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias rea
2323
ret i8 %load
2424
}
2525

26+
define i8 @memcpy_constant_arg_ptr_to_alloca_load_metadata([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
27+
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_metadata(
28+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
29+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
30+
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1, !noalias !0
31+
; CHECK-NEXT: ret i8 [[LOAD]]
32+
;
33+
%alloca = alloca [32 x i8], align 4, addrspace(5)
34+
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
35+
%arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)*
36+
call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 32, i1 false)
37+
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
38+
%load = load i8, i8 addrspace(5)* %gep, !noalias !0
39+
ret i8 %load
40+
}
41+
42+
define i64 @memcpy_constant_arg_ptr_to_alloca_load_alignment([32 x i64] addrspace(4)* noalias readonly align 4 dereferenceable(256) %arg, i32 %idx) {
43+
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_alignment(
44+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
45+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i64], [32 x i64] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
46+
; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64 addrspace(4)* [[GEP]], align 16
47+
; CHECK-NEXT: ret i64 [[LOAD]]
48+
;
49+
%alloca = alloca [32 x i64], align 4, addrspace(5)
50+
%alloca.cast = bitcast [32 x i64] addrspace(5)* %alloca to i8 addrspace(5)*
51+
%arg.cast = bitcast [32 x i64] addrspace(4)* %arg to i8 addrspace(4)*
52+
call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 256, i1 false)
53+
%gep = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* %alloca, i32 0, i32 %idx
54+
%load = load i64, i64 addrspace(5)* %gep, align 16
55+
ret i64 %load
56+
}
57+
58+
define i64 @memcpy_constant_arg_ptr_to_alloca_load_atomic([32 x i64] addrspace(4)* noalias readonly align 8 dereferenceable(256) %arg, i32 %idx) {
59+
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_atomic(
60+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i64], align 8, addrspace(5)
61+
; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = bitcast [32 x i64] addrspace(5)* [[ALLOCA]] to i8 addrspace(5)*
62+
; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast [32 x i64] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
63+
; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 dereferenceable(256) [[ALLOCA_CAST]], i8 addrspace(4)* align 8 dereferenceable(256) [[ARG_CAST]], i64 256, i1 false)
64+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
65+
; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, i64 addrspace(5)* [[GEP]] syncscope("somescope") acquire, align 8
66+
; CHECK-NEXT: ret i64 [[LOAD]]
67+
;
68+
%alloca = alloca [32 x i64], align 8, addrspace(5)
69+
%alloca.cast = bitcast [32 x i64] addrspace(5)* %alloca to i8 addrspace(5)*
70+
%arg.cast = bitcast [32 x i64] addrspace(4)* %arg to i8 addrspace(4)*
71+
call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 256, i1 false)
72+
%gep = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* %alloca, i32 0, i32 %idx
73+
%load = load atomic i64, i64 addrspace(5)* %gep syncscope("somescope") acquire, align 8
74+
ret i64 %load
75+
}
76+
2677
; Simple memmove to alloca from constant address space argument.
2778
define i8 @memmove_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
2879
; CHECK-LABEL: @memmove_constant_arg_ptr_to_alloca(
@@ -168,7 +219,7 @@ define amdgpu_kernel void @byref_infloop_metadata(i8* %scratch, %struct.ty addrs
168219
; CHECK-LABEL: @byref_infloop_metadata(
169220
; CHECK-NEXT: bb:
170221
; CHECK-NEXT: [[CAST_ALLOCA:%.*]] = bitcast [[STRUCT_TY:%.*]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
171-
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false), !noalias !0
222+
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false), !noalias !1
172223
; CHECK-NEXT: ret void
173224
;
174225
bb:

0 commit comments

Comments
 (0)