@@ -23,6 +23,57 @@ define i8 @memcpy_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias rea
23
23
ret i8 %load
24
24
}
25
25
26
+ define i8 @memcpy_constant_arg_ptr_to_alloca_load_metadata ([32 x i8 ] addrspace (4 )* noalias readonly align 4 dereferenceable (32 ) %arg , i32 %idx ) {
27
+ ; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_metadata(
28
+ ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
29
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
30
+ ; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1, !noalias !0
31
+ ; CHECK-NEXT: ret i8 [[LOAD]]
32
+ ;
33
+ %alloca = alloca [32 x i8 ], align 4 , addrspace (5 )
34
+ %alloca.cast = bitcast [32 x i8 ] addrspace (5 )* %alloca to i8 addrspace (5 )*
35
+ %arg.cast = bitcast [32 x i8 ] addrspace (4 )* %arg to i8 addrspace (4 )*
36
+ call void @llvm.memcpy.p5i8.p4i8.i64 (i8 addrspace (5 )* %alloca.cast , i8 addrspace (4 )* %arg.cast , i64 32 , i1 false )
37
+ %gep = getelementptr inbounds [32 x i8 ], [32 x i8 ] addrspace (5 )* %alloca , i32 0 , i32 %idx
38
+ %load = load i8 , i8 addrspace (5 )* %gep , !noalias !0
39
+ ret i8 %load
40
+ }
41
+
42
+ define i64 @memcpy_constant_arg_ptr_to_alloca_load_alignment ([32 x i64 ] addrspace (4 )* noalias readonly align 4 dereferenceable (256 ) %arg , i32 %idx ) {
43
+ ; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_alignment(
44
+ ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
45
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i64], [32 x i64] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
46
+ ; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64 addrspace(4)* [[GEP]], align 16
47
+ ; CHECK-NEXT: ret i64 [[LOAD]]
48
+ ;
49
+ %alloca = alloca [32 x i64 ], align 4 , addrspace (5 )
50
+ %alloca.cast = bitcast [32 x i64 ] addrspace (5 )* %alloca to i8 addrspace (5 )*
51
+ %arg.cast = bitcast [32 x i64 ] addrspace (4 )* %arg to i8 addrspace (4 )*
52
+ call void @llvm.memcpy.p5i8.p4i8.i64 (i8 addrspace (5 )* %alloca.cast , i8 addrspace (4 )* %arg.cast , i64 256 , i1 false )
53
+ %gep = getelementptr inbounds [32 x i64 ], [32 x i64 ] addrspace (5 )* %alloca , i32 0 , i32 %idx
54
+ %load = load i64 , i64 addrspace (5 )* %gep , align 16
55
+ ret i64 %load
56
+ }
57
+
58
+ define i64 @memcpy_constant_arg_ptr_to_alloca_load_atomic ([32 x i64 ] addrspace (4 )* noalias readonly align 8 dereferenceable (256 ) %arg , i32 %idx ) {
59
+ ; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_atomic(
60
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i64], align 8, addrspace(5)
61
+ ; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = bitcast [32 x i64] addrspace(5)* [[ALLOCA]] to i8 addrspace(5)*
62
+ ; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast [32 x i64] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
63
+ ; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 dereferenceable(256) [[ALLOCA_CAST]], i8 addrspace(4)* align 8 dereferenceable(256) [[ARG_CAST]], i64 256, i1 false)
64
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
65
+ ; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, i64 addrspace(5)* [[GEP]] syncscope("somescope") acquire, align 8
66
+ ; CHECK-NEXT: ret i64 [[LOAD]]
67
+ ;
68
+ %alloca = alloca [32 x i64 ], align 8 , addrspace (5 )
69
+ %alloca.cast = bitcast [32 x i64 ] addrspace (5 )* %alloca to i8 addrspace (5 )*
70
+ %arg.cast = bitcast [32 x i64 ] addrspace (4 )* %arg to i8 addrspace (4 )*
71
+ call void @llvm.memcpy.p5i8.p4i8.i64 (i8 addrspace (5 )* %alloca.cast , i8 addrspace (4 )* %arg.cast , i64 256 , i1 false )
72
+ %gep = getelementptr inbounds [32 x i64 ], [32 x i64 ] addrspace (5 )* %alloca , i32 0 , i32 %idx
73
+ %load = load atomic i64 , i64 addrspace (5 )* %gep syncscope("somescope" ) acquire , align 8
74
+ ret i64 %load
75
+ }
76
+
26
77
; Simple memmove to alloca from constant address space argument.
27
78
define i8 @memmove_constant_arg_ptr_to_alloca ([32 x i8 ] addrspace (4 )* noalias readonly align 4 dereferenceable (32 ) %arg , i32 %idx ) {
28
79
; CHECK-LABEL: @memmove_constant_arg_ptr_to_alloca(
@@ -168,7 +219,7 @@ define amdgpu_kernel void @byref_infloop_metadata(i8* %scratch, %struct.ty addrs
168
219
; CHECK-LABEL: @byref_infloop_metadata(
169
220
; CHECK-NEXT: bb:
170
221
; CHECK-NEXT: [[CAST_ALLOCA:%.*]] = bitcast [[STRUCT_TY:%.*]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
171
- ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false), !noalias !0
222
+ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false), !noalias !1
172
223
; CHECK-NEXT: ret void
173
224
;
174
225
bb:
0 commit comments