@@ -291,3 +291,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
291
291
%gep = getelementptr inbounds i8 , ptr %base , i64 %mul
292
292
ret ptr %gep
293
293
}
294
+
295
+ ; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
296
+ define amdgpu_kernel void @uniform_base_varying_offset_imm (ptr addrspace (1 ) %p ) {
297
+ ; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
298
+ ; GFX942_PTRADD: ; %bb.0: ; %entry
299
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
300
+ ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
301
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
302
+ ; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
303
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1
304
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
305
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
306
+ ; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16
307
+ ; GFX942_PTRADD-NEXT: s_endpgm
308
+ ;
309
+ ; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
310
+ ; GFX942_LEGACY: ; %bb.0: ; %entry
311
+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
312
+ ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
313
+ ; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
314
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1
315
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
316
+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16
317
+ ; GFX942_LEGACY-NEXT: s_endpgm
318
+ entry:
319
+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
320
+ %shift = shl i32 %tid , 2
321
+ %voffset = zext i32 %shift to i64
322
+ %gep1 = getelementptr inbounds i8 , ptr addrspace (1 ) %p , i64 %voffset
323
+ %gep2 = getelementptr inbounds i8 , ptr addrspace (1 ) %gep1 , i64 16
324
+ store i32 1 , ptr addrspace (1 ) %gep2
325
+ ret void
326
+ }
327
+
328
+ ; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
329
+ ; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
330
+ define amdgpu_kernel void @global_load_saddr_i32_uniform_offset (ptr addrspace (1 ) %sbase , i32 %soffset , ptr addrspace (1 ) %r ) {
331
+ ; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
332
+ ; GFX942_PTRADD: ; %bb.0:
333
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
334
+ ; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8
335
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
336
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
337
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
338
+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6
339
+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0
340
+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
341
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
342
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
343
+ ; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
344
+ ; GFX942_PTRADD-NEXT: s_endpgm
345
+ ;
346
+ ; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
347
+ ; GFX942_LEGACY: ; %bb.0:
348
+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
349
+ ; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8
350
+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
351
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
352
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
353
+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
354
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
355
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
356
+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
357
+ ; GFX942_LEGACY-NEXT: s_endpgm
358
+ %zext.offset = zext i32 %soffset to i64
359
+ %gep0 = getelementptr inbounds i8 , ptr addrspace (1 ) %sbase , i64 %zext.offset
360
+ %load = load i32 , ptr addrspace (1 ) %gep0
361
+ %to.vgpr = bitcast i32 %load to float
362
+ store float %to.vgpr , ptr addrspace (1 ) %r
363
+ ret void
364
+ }
365
+
366
+ ; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
367
+ ; Intrinsic::amdgcn_global_load_lds.
368
+ define void @global_load_lds_dword_saddr_and_vaddr (ptr addrspace (1 ) nocapture inreg %gptr , ptr addrspace (3 ) nocapture %lptr , i32 %voffset ) {
369
+ ; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
370
+ ; GFX942_PTRADD: ; %bb.0: ; %main_body
371
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1
373
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0
374
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
375
+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
376
+ ; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0
377
+ ; GFX942_PTRADD-NEXT: s_nop 0
378
+ ; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1
379
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
380
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
381
+ ;
382
+ ; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
383
+ ; GFX942_LEGACY: ; %bb.0: ; %main_body
384
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385
+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0
386
+ ; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2
387
+ ; GFX942_LEGACY-NEXT: s_nop 0
388
+ ; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
389
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
390
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
391
+ main_body:
392
+ %voffset.64 = zext i32 %voffset to i64
393
+ %gep = getelementptr i8 , ptr addrspace (1 ) %gptr , i64 %voffset.64
394
+ call void @llvm.amdgcn.global.load.lds (ptr addrspace (1 ) %gep , ptr addrspace (3 ) %lptr , i32 4 , i32 48 , i32 16 )
395
+ ret void
396
+ }
397
+
398
+ ; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
399
+ ; SITargetLowering::performSHLPtrCombine.
400
+ define void @shl_base_global_ptr_global_atomic_fadd (ptr addrspace (1 ) %out , ptr addrspace (1 ) %extra.use , ptr addrspace (1 ) %ptr ) {
401
+ ; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
402
+ ; GFX942_PTRADD: ; %bb.0:
403
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404
+ ; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80
405
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
406
+ ; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
407
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000
408
+ ; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off
409
+ ; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
410
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
411
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
412
+ ;
413
+ ; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
414
+ ; GFX942_LEGACY: ; %bb.0:
415
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416
+ ; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
417
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000
418
+ ; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
419
+ ; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80
420
+ ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
421
+ ; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
422
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
423
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
424
+ %arrayidx0 = getelementptr inbounds [512 x i32 ], ptr addrspace (1 ) %ptr , i64 0 , i64 32
425
+ %cast = ptrtoint ptr addrspace (1 ) %arrayidx0 to i64
426
+ %shl = shl i64 %cast , 2
427
+ %castback = inttoptr i64 %shl to ptr addrspace (1 )
428
+ %unused = atomicrmw fadd ptr addrspace (1 ) %castback , float 100 .0 syncscope("agent" ) monotonic , align 4 , !amdgpu.no.fine.grained.memory !0 , !amdgpu.ignore.denormal.mode !0
429
+ store volatile i64 %cast , ptr addrspace (1 ) %extra.use , align 4
430
+ ret void
431
+ }
432
+
433
+ ; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
434
+ ; TargetLowering::ShrinkDemandedOp.
435
+ define i32 @gep_in_const_as_cast_to_const32_as (ptr addrspace (4 ) %src , i64 %offset ) {
436
+ ; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
437
+ ; GFX942_PTRADD: ; %bb.0: ; %entry
438
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
440
+ ; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0
441
+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
442
+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
443
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
444
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0
445
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
446
+ ;
447
+ ; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
448
+ ; GFX942_LEGACY: ; %bb.0: ; %entry
449
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450
+ ; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2
451
+ ; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0
452
+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0
453
+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0
454
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
455
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0
456
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
457
+ entry:
458
+ %gep = getelementptr i8 , ptr addrspace (4 ) %src , i64 %offset
459
+ %gep.cast = addrspacecast ptr addrspace (4 ) %gep to ptr addrspace (6 )
460
+ %l = load i32 , ptr addrspace (6 ) %gep.cast
461
+ ret i32 %l
462
+ }
463
+
464
+ @CG = addrspace (4 ) constant [16 x i32 ] zeroinitializer , align 4
465
+
466
+ ; Test PTRADD handling in isMemSrcFromConstant.
467
+ define void @replace_const0_memcpy_by_memset (ptr align 4 %dst ) {
468
+ ; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
469
+ ; GFX942_PTRADD: ; %bb.0: ; %entry
470
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471
+ ; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
472
+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, CG@gotpcrel32@lo+4
473
+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, CG@gotpcrel32@hi+12
474
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
475
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
476
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4
477
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
478
+ ; GFX942_PTRADD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
479
+ ; GFX942_PTRADD-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
480
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
481
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
482
+ ;
483
+ ; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
484
+ ; GFX942_LEGACY: ; %bb.0: ; %entry
485
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 0
487
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v3, v2
488
+ ; GFX942_LEGACY-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
489
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
490
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
491
+ entry:
492
+ %gep = getelementptr i8 , ptr addrspace (4 ) @CG , i64 4
493
+ tail call void @llvm.memcpy.p0.p4.i64 (ptr noundef nonnull align 4 %dst , ptr addrspace (4 ) noundef nonnull align 4 %gep , i64 8 , i1 false )
494
+ ret void
495
+ }
496
+
497
+ declare void @llvm.memcpy.p0.p4.i64 (ptr noalias nocapture writeonly , ptr addrspace (4 ) noalias nocapture readonly , i64 , i1 immarg)
498
+
499
+ !0 = !{}
0 commit comments