@@ -198,6 +198,48 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3(ptr %out) {
198
198
ret void
199
199
}
200
200
201
+ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset (ptr %out ) {
202
+ ; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(
203
+ ; CHECK-SAME: ptr [[OUT:%.*]]) {
204
+ ; CHECK-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
205
+ ; CHECK-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y()
206
+ ; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3
207
+ ; CHECK-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3
208
+ ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2
209
+ ; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]]
210
+ ; CHECK-NEXT: [[SEL3:%.*]] = zext i32 [[SEL2]] to i64
211
+ ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
212
+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP1]]
213
+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i64> poison, i64 [[TMP2]], i64 0
214
+ ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], 1
215
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP4]]
216
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <3 x i64> [[TMP3]], i64 [[TMP5]], i64 1
217
+ ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP1]], 2
218
+ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP7]]
219
+ ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x i64> [[TMP6]], i64 [[TMP8]], i64 2
220
+ ; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP9]], i32 2
221
+ ; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
222
+ ; CHECK-NEXT: ret void
223
+ ;
224
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x ()
225
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y ()
226
+ %c1 = icmp uge i32 %x , 3
227
+ %c2 = icmp uge i32 %y , 3
228
+ %sel1 = select i1 %c1 , i32 1 , i32 2
229
+ %sel2 = select i1 %c2 , i32 0 , i32 %sel1
230
+ %sel3 = zext i32 %sel2 to i64
231
+ %alloca = alloca [2 x [3 x i64 ]], align 16 , addrspace (5 )
232
+ %gep.00 = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i32 0
233
+ %gep.01 = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
234
+ store <3 x i64 > <i64 0 , i64 1 , i64 2 >, ptr addrspace (5 ) %gep.00
235
+ store <3 x i64 > <i64 3 , i64 4 , i64 5 >, ptr addrspace (5 ) %gep.01
236
+ %gep = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i64 0 , i64 %sel3
237
+ %load = load <3 x i64 >, ptr addrspace (5 ) %gep
238
+ %elem = extractelement <3 x i64 > %load , i32 2
239
+ store i64 %elem , ptr %out
240
+ ret void
241
+ }
242
+
201
243
define amdgpu_kernel void @i64_2d_load_store_subvec_4 (ptr %out ) {
202
244
; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_4(
203
245
; CHECK-SAME: ptr [[OUT:%.*]]) {
0 commit comments