@@ -434,5 +434,201 @@ entry:
434
434
ret void
435
435
}
436
436
437
+ ; GCN-LABEL: {{^}}no_alias_atomic_rmw_relaxed:
438
+ ; GCN: ds_add_u32
439
+ ; GCN: s_load_dword s
440
+ ; GCN-NOT: global_load_dword
441
+ ; GCN: global_store_dword
442
+ define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed (i32 addrspace (1 )* %in , i32 addrspace (1 )* %out ) {
443
+ ; CHECK-LABEL: @no_alias_atomic_rmw_relaxed(
444
+ ; CHECK-NEXT: entry:
445
+ ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic, align 4
446
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
447
+ ; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
448
+ ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
449
+ ; CHECK-NEXT: ret void
450
+ ;
451
+ entry:
452
+ %unused = atomicrmw add i32 addrspace (3 )* @LDS , i32 5 monotonic
453
+ %gep = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i64 0
454
+ %ld = load i32 , i32 addrspace (1 )* %gep , align 4
455
+ store i32 %ld , i32 addrspace (1 )* %out , align 4
456
+ ret void
457
+ }
458
+
459
+ ; GCN-LABEL: {{^}}no_alias_atomic_cmpxchg:
460
+ ; GCN: ds_cmpst_b32
461
+ ; GCN: s_load_dword s
462
+ ; GCN-NOT: global_load_dword
463
+ ; GCN: global_store_dword
464
+ define protected amdgpu_kernel void @no_alias_atomic_cmpxchg (i32 addrspace (1 )* %in , i32 addrspace (1 )* %out , i32 %swap ) {
465
+ ; CHECK-LABEL: @no_alias_atomic_cmpxchg(
466
+ ; CHECK-NEXT: entry:
467
+ ; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg i32 addrspace(3)* @LDS, i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4
468
+ ; CHECK-NEXT: fence syncscope("workgroup") release
469
+ ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
470
+ ; CHECK-NEXT: fence syncscope("workgroup") acquire
471
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
472
+ ; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
473
+ ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
474
+ ; CHECK-NEXT: ret void
475
+ ;
476
+ entry:
477
+ %unused = cmpxchg i32 addrspace (3 )* @LDS , i32 7 , i32 %swap seq_cst monotonic
478
+ fence syncscope("workgroup" ) release
479
+ tail call void @llvm.amdgcn.s.barrier ()
480
+ fence syncscope("workgroup" ) acquire
481
+ %gep = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i64 0
482
+ %ld = load i32 , i32 addrspace (1 )* %gep , align 4
483
+ store i32 %ld , i32 addrspace (1 )* %out , align 4
484
+ ret void
485
+ }
486
+
487
+ ; GCN-LABEL: {{^}}no_alias_atomic_rmw:
488
+ ; GCN: ds_add_u32
489
+ ; GCN: s_load_dword s
490
+ ; GCN-NOT: global_load_dword
491
+ ; GCN: global_store_dword
492
+ define protected amdgpu_kernel void @no_alias_atomic_rmw (i32 addrspace (1 )* %in , i32 addrspace (1 )* %out ) {
493
+ ; CHECK-LABEL: @no_alias_atomic_rmw(
494
+ ; CHECK-NEXT: entry:
495
+ ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4
496
+ ; CHECK-NEXT: fence syncscope("workgroup") release
497
+ ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
498
+ ; CHECK-NEXT: fence syncscope("workgroup") acquire
499
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
500
+ ; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
501
+ ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
502
+ ; CHECK-NEXT: ret void
503
+ ;
504
+ entry:
505
+ %unused = atomicrmw add i32 addrspace (3 )* @LDS , i32 5 seq_cst
506
+ fence syncscope("workgroup" ) release
507
+ tail call void @llvm.amdgcn.s.barrier ()
508
+ fence syncscope("workgroup" ) acquire
509
+ %gep = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i64 0
510
+ %ld = load i32 , i32 addrspace (1 )* %gep , align 4
511
+ store i32 %ld , i32 addrspace (1 )* %out , align 4
512
+ ret void
513
+ }
514
+
515
+ ; GCN-LABEL: {{^}}may_alias_atomic_cmpxchg:
516
+ ; GCN: global_atomic_cmpswap
517
+ ; GCN: global_load_dword
518
+ ; GCN: global_store_dword
519
+ define protected amdgpu_kernel void @may_alias_atomic_cmpxchg (i32 addrspace (1 )* %in , i32 addrspace (1 )* %out , i32 %swap ) {
520
+ ; CHECK-LABEL: @may_alias_atomic_cmpxchg(
521
+ ; CHECK-NEXT: entry:
522
+ ; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg i32 addrspace(1)* [[OUT:%.*]], i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4
523
+ ; CHECK-NEXT: fence syncscope("workgroup") release
524
+ ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
525
+ ; CHECK-NEXT: fence syncscope("workgroup") acquire
526
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
527
+ ; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
528
+ ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4
529
+ ; CHECK-NEXT: ret void
530
+ ;
531
+ entry:
532
+ %unused = cmpxchg i32 addrspace (1 )* %out , i32 7 , i32 %swap seq_cst monotonic
533
+ fence syncscope("workgroup" ) release
534
+ tail call void @llvm.amdgcn.s.barrier ()
535
+ fence syncscope("workgroup" ) acquire
536
+ %gep = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i64 0
537
+ %ld = load i32 , i32 addrspace (1 )* %gep , align 4
538
+ store i32 %ld , i32 addrspace (1 )* %out , align 4
539
+ ret void
540
+ }
541
+
542
+ ; GCN-LABEL: {{^}}may_alias_atomic_rmw:
543
+ ; GCN: global_atomic_add
544
+ ; GCN: global_load_dword
545
+ ; GCN: global_store_dword
546
+ define protected amdgpu_kernel void @may_alias_atomic_rmw (i32 addrspace (1 )* %in , i32 addrspace (1 )* %out ) {
547
+ ; CHECK-LABEL: @may_alias_atomic_rmw(
548
+ ; CHECK-NEXT: entry:
549
+ ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(1)* [[OUT:%.*]], i32 5 seq_cst, align 4
550
+ ; CHECK-NEXT: fence syncscope("workgroup") release
551
+ ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
552
+ ; CHECK-NEXT: fence syncscope("workgroup") acquire
553
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
554
+ ; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
555
+ ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4
556
+ ; CHECK-NEXT: ret void
557
+ ;
558
+ entry:
559
+ %unused = atomicrmw add i32 addrspace (1 )* %out , i32 5 seq_cst
560
+ fence syncscope("workgroup" ) release
561
+ tail call void @llvm.amdgcn.s.barrier ()
562
+ fence syncscope("workgroup" ) acquire
563
+ %gep = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i64 0
564
+ %ld = load i32 , i32 addrspace (1 )* %gep , align 4
565
+ store i32 %ld , i32 addrspace (1 )* %out , align 4
566
+ ret void
567
+ }
568
+
569
+ ; GCN-LABEL: {{^}}no_alias_atomic_rmw_then_clobber:
570
+ ; CGN: global_store_dword
571
+ ; CGN: global_store_dword
572
+ ; GCN: ds_add_u32
573
+ ; GCN: global_load_dword
574
+ ; GCN: global_store_dword
575
+ define protected amdgpu_kernel void @no_alias_atomic_rmw_then_clobber (i32 addrspace (1 )* %in , i32 addrspace (1 )* %out , i32 addrspace (1 )* noalias %noalias ) {
576
+ ; CHECK-LABEL: @no_alias_atomic_rmw_then_clobber(
577
+ ; CHECK-NEXT: entry:
578
+ ; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[OUT:%.*]], align 4
579
+ ; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[NOALIAS:%.*]], align 4
580
+ ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4
581
+ ; CHECK-NEXT: fence syncscope("workgroup") release
582
+ ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
583
+ ; CHECK-NEXT: fence syncscope("workgroup") acquire
584
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
585
+ ; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
586
+ ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4
587
+ ; CHECK-NEXT: ret void
588
+ ;
589
+ entry:
590
+ store i32 1 , i32 addrspace (1 )* %out , align 4
591
+ store i32 2 , i32 addrspace (1 )* %noalias , align 4
592
+ %unused = atomicrmw add i32 addrspace (3 )* @LDS , i32 5 seq_cst
593
+ fence syncscope("workgroup" ) release
594
+ tail call void @llvm.amdgcn.s.barrier ()
595
+ fence syncscope("workgroup" ) acquire
596
+ %gep = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i64 0
597
+ %ld = load i32 , i32 addrspace (1 )* %gep , align 4
598
+ store i32 %ld , i32 addrspace (1 )* %out , align 4
599
+ ret void
600
+ }
601
+
602
+ ; GCN-LABEL: {{^}}no_alias_atomic_rmw_then_no_alias_store:
603
+ ; CGN: global_store_dword
604
+ ; GCN: ds_add_u32
605
+ ; GCN: s_load_dword s
606
+ ; GCN-NOT: global_load_dword
607
+ ; GCN: global_store_dword
608
+ define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store (i32 addrspace (1 )* %in , i32 addrspace (1 )* %out , i32 addrspace (1 )* noalias %noalias ) {
609
+ ; CHECK-LABEL: @no_alias_atomic_rmw_then_no_alias_store(
610
+ ; CHECK-NEXT: entry:
611
+ ; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[NOALIAS:%.*]], align 4
612
+ ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4
613
+ ; CHECK-NEXT: fence syncscope("workgroup") release
614
+ ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
615
+ ; CHECK-NEXT: fence syncscope("workgroup") acquire
616
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
617
+ ; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
618
+ ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
619
+ ; CHECK-NEXT: ret void
620
+ ;
621
+ entry:
622
+ store i32 2 , i32 addrspace (1 )* %noalias , align 4
623
+ %unused = atomicrmw add i32 addrspace (3 )* @LDS , i32 5 seq_cst
624
+ fence syncscope("workgroup" ) release
625
+ tail call void @llvm.amdgcn.s.barrier ()
626
+ fence syncscope("workgroup" ) acquire
627
+ %gep = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i64 0
628
+ %ld = load i32 , i32 addrspace (1 )* %gep , align 4
629
+ store i32 %ld , i32 addrspace (1 )* %out , align 4
630
+ ret void
631
+ }
632
+
437
633
declare void @llvm.amdgcn.s.barrier ()
438
634
declare void @llvm.amdgcn.wave.barrier ()
0 commit comments