Skip to content

Commit a45eb62

Browse files
committed
AtomicExpand: Fix dropping a syncscope when bitcasting atomicrmw
1 parent 8350d9c commit a45eb62

File tree

4 files changed

+13
-12
lines changed

4 files changed

+13
-12
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,9 @@ AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
434434
? Builder.CreatePtrToInt(Val, NewTy)
435435
: Builder.CreateBitCast(Val, NewTy);
436436

437-
auto *NewRMWI = Builder.CreateAtomicRMW(
438-
AtomicRMWInst::Xchg, Addr, NewVal, RMWI->getAlign(), RMWI->getOrdering());
437+
auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
438+
RMWI->getAlign(), RMWI->getOrdering(),
439+
RMWI->getSyncScopeID());
439440
NewRMWI->setVolatile(RMWI->isVolatile());
440441
LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
441442

llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3641,7 +3641,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr %out, double %in) {
36413641
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
36423642
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3] offset:32
36433643
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
3644-
; GFX12-NEXT: global_inv scope:SCOPE_SYS
3644+
; GFX12-NEXT: global_inv scope:SCOPE_DEV
36453645
; GFX12-NEXT: s_endpgm
36463646
entry:
36473647
%gep = getelementptr double, ptr %out, i64 4
@@ -3688,7 +3688,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr %out, ptr %in) {
36883688
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
36893689
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3] offset:32
36903690
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
3691-
; GFX12-NEXT: global_inv scope:SCOPE_SYS
3691+
; GFX12-NEXT: global_inv scope:SCOPE_DEV
36923692
; GFX12-NEXT: s_endpgm
36933693
entry:
36943694
%gep = getelementptr ptr, ptr %out, i32 4

llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4570,7 +4570,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr addrspace(1) %out, double
45704570
; GFX12-NEXT: v_mov_b32_e32 v0, s2
45714571
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1] offset:32
45724572
; GFX12-NEXT: s_wait_storecnt 0x0
4573-
; GFX12-NEXT: global_inv scope:SCOPE_SYS
4573+
; GFX12-NEXT: global_inv scope:SCOPE_DEV
45744574
; GFX12-NEXT: s_endpgm
45754575
entry:
45764576
%gep = getelementptr double, ptr addrspace(1) %out, i64 4
@@ -4625,7 +4625,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr addrspace(1) %out, ptr
46254625
; GFX12-NEXT: v_mov_b32_e32 v0, s2
46264626
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1] offset:32
46274627
; GFX12-NEXT: s_wait_storecnt 0x0
4628-
; GFX12-NEXT: global_inv scope:SCOPE_SYS
4628+
; GFX12-NEXT: global_inv scope:SCOPE_DEV
46294629
; GFX12-NEXT: s_endpgm
46304630
entry:
46314631
%gep = getelementptr ptr, ptr addrspace(1) %out, i64 4

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ define half @test_atomicrmw_xchg_f16_global_agent(ptr addrspace(1) %ptr, half %v
918918
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
919919
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
920920
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
921-
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
921+
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
922922
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
923923
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
924924
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -942,7 +942,7 @@ define half @test_atomicrmw_xchg_f16_global_agent_align4(ptr addrspace(1) %ptr,
942942
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
943943
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
944944
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
945-
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
945+
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
946946
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
947947
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
948948
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -973,7 +973,7 @@ define half @test_atomicrmw_xchg_f16_flat_agent(ptr %ptr, half %value) {
973973
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
974974
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
975975
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
976-
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
976+
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
977977
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
978978
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
979979
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -997,7 +997,7 @@ define half @test_atomicrmw_xchg_f16_flat_agent_align4(ptr %ptr, half %value) {
997997
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
998998
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
999999
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
1000-
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
1000+
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
10011001
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
10021002
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
10031003
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1028,7 +1028,7 @@ define bfloat @test_atomicrmw_xchg_bf16_global_agent(ptr addrspace(1) %ptr, bflo
10281028
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
10291029
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
10301030
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
1031-
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
1031+
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
10321032
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
10331033
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
10341034
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1052,7 +1052,7 @@ define bfloat @test_atomicrmw_xchg_bf16_global_agent_align4(ptr addrspace(1) %pt
10521052
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
10531053
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
10541054
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
1055-
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
1055+
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
10561056
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
10571057
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
10581058
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]

0 commit comments

Comments
 (0)