@@ -288,5 +288,104 @@ define amdgpu_kernel void @i32_3d_load_store(ptr %out) {
288
288
ret void
289
289
}
290
290
291
+ define amdgpu_kernel void @i16_2d_load_store (ptr %out , i32 %sel ) {
292
+ ; CHECK-LABEL: define amdgpu_kernel void @i16_2d_load_store(
293
+ ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[SEL:%.*]]) {
294
+ ; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]]
295
+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5>, i32 [[TMP1]]
296
+ ; CHECK-NEXT: store i16 [[TMP2]], ptr [[OUT]], align 2
297
+ ; CHECK-NEXT: ret void
298
+ ;
299
+ %alloca = alloca [2 x [3 x i16 ]], align 16 , addrspace (5 )
300
+ %gep.00 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 0
301
+ %gep.01 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 1
302
+ %gep.02 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 2
303
+ %gep.10 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
304
+ %gep.11 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 1
305
+ %gep.12 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 2
306
+ store i16 0 , ptr addrspace (5 ) %gep.00
307
+ store i16 1 , ptr addrspace (5 ) %gep.01
308
+ store i16 2 , ptr addrspace (5 ) %gep.02
309
+ store i16 3 , ptr addrspace (5 ) %gep.10
310
+ store i16 4 , ptr addrspace (5 ) %gep.11
311
+ store i16 5 , ptr addrspace (5 ) %gep.12
312
+ %gep = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 %sel
313
+ %load = load i16 , ptr addrspace (5 ) %gep
314
+ store i16 %load , ptr %out
315
+ ret void
316
+ }
317
+
318
+ define amdgpu_kernel void @float_2d_load_store (ptr %out , i32 %sel ) {
319
+ ; CHECK-LABEL: define amdgpu_kernel void @float_2d_load_store(
320
+ ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[SEL:%.*]]) {
321
+ ; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]]
322
+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00>, i32 [[TMP1]]
323
+ ; CHECK-NEXT: store float [[TMP2]], ptr [[OUT]], align 4
324
+ ; CHECK-NEXT: ret void
325
+ ;
326
+ %alloca = alloca [2 x [3 x float ]], align 16 , addrspace (5 )
327
+ %gep.00 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 0
328
+ %gep.01 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 1
329
+ %gep.02 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 2
330
+ %gep.10 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
331
+ %gep.11 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 1
332
+ %gep.12 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 2
333
+ store float 0 .0 , ptr addrspace (5 ) %gep.00
334
+ store float 1 .0 , ptr addrspace (5 ) %gep.01
335
+ store float 2 .0 , ptr addrspace (5 ) %gep.02
336
+ store float 3 .0 , ptr addrspace (5 ) %gep.10
337
+ store float 4 .0 , ptr addrspace (5 ) %gep.11
338
+ store float 5 .0 , ptr addrspace (5 ) %gep.12
339
+ %gep = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 %sel
340
+ %load = load float , ptr addrspace (5 ) %gep
341
+ store float %load , ptr %out
342
+ ret void
343
+ }
344
+
345
+ define amdgpu_kernel void @ptr_2d_load_store (ptr %out , i32 %sel ) {
346
+ ; CHECK-LABEL: define amdgpu_kernel void @ptr_2d_load_store(
347
+ ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[SEL:%.*]]) {
348
+ ; CHECK-NEXT: [[PTR_0:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 0
349
+ ; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 1
350
+ ; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 2
351
+ ; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 3
352
+ ; CHECK-NEXT: [[PTR_4:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 4
353
+ ; CHECK-NEXT: [[PTR_5:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 5
354
+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x ptr> undef, ptr [[PTR_0]], i32 0
355
+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <6 x ptr> [[TMP1]], ptr [[PTR_1]], i32 1
356
+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <6 x ptr> [[TMP2]], ptr [[PTR_2]], i32 2
357
+ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x ptr> [[TMP3]], ptr [[PTR_3]], i32 3
358
+ ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x ptr> [[TMP4]], ptr [[PTR_4]], i32 4
359
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x ptr> [[TMP5]], ptr [[PTR_5]], i32 5
360
+ ; CHECK-NEXT: [[TMP7:%.*]] = add i32 3, [[SEL]]
361
+ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x ptr> [[TMP6]], i32 [[TMP7]]
362
+ ; CHECK-NEXT: store ptr [[TMP8]], ptr [[OUT]], align 8
363
+ ; CHECK-NEXT: ret void
364
+ ;
365
+ %alloca = alloca [2 x [3 x ptr ]], align 16 , addrspace (5 )
366
+ %gep.00 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 0
367
+ %gep.01 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 1
368
+ %gep.02 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 2
369
+ %gep.10 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
370
+ %gep.11 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 1
371
+ %gep.12 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 2
372
+ %ptr.0 = getelementptr inbounds ptr , ptr %out , i32 0
373
+ %ptr.1 = getelementptr inbounds ptr , ptr %out , i32 1
374
+ %ptr.2 = getelementptr inbounds ptr , ptr %out , i32 2
375
+ %ptr.3 = getelementptr inbounds ptr , ptr %out , i32 3
376
+ %ptr.4 = getelementptr inbounds ptr , ptr %out , i32 4
377
+ %ptr.5 = getelementptr inbounds ptr , ptr %out , i32 5
378
+ store ptr %ptr.0 , ptr addrspace (5 ) %gep.00
379
+ store ptr %ptr.1 , ptr addrspace (5 ) %gep.01
380
+ store ptr %ptr.2 , ptr addrspace (5 ) %gep.02
381
+ store ptr %ptr.3 , ptr addrspace (5 ) %gep.10
382
+ store ptr %ptr.4 , ptr addrspace (5 ) %gep.11
383
+ store ptr %ptr.5 , ptr addrspace (5 ) %gep.12
384
+ %gep = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 %sel
385
+ %load = load ptr , ptr addrspace (5 ) %gep
386
+ store ptr %load , ptr %out
387
+ ret void
388
+ }
389
+
291
390
declare i32 @llvm.amdgcn.workitem.id.x ()
292
391
declare i32 @llvm.amdgcn.workitem.id.y ()
0 commit comments