Skip to content

Commit 8c01d93

Browse files
committed
AMDGPU: Start considering new atomicrmw metadata on integer operations
Start considering !amdgpu.no.remote.memory.access and !amdgpu.no.fine.grained.host.memory metadata when deciding to expand integer atomic operations. This does not yet attempt to accurately handle fadd/fmin/fmax, which are trickier and require migrating the old "amdgpu-unsafe-fp-atomics" attribute.
1 parent 493d7e3 commit 8c01d93

28 files changed

+11173
-9985
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16784,19 +16784,60 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1678416784
case AtomicRMWInst::UDecWrap: {
1678516785
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
1678616786
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16787-
// Always expand system scope atomics.
16788-
if (HasSystemScope) {
16789-
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16790-
Op == AtomicRMWInst::Xor) {
16791-
// Atomic sub/or/xor do not work over PCI express, but atomic add
16792-
// does. InstCombine transforms these with 0 to or, so undo that.
16793-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16794-
ConstVal && ConstVal->isNullValue())
16795-
return AtomicExpansionKind::Expand;
16796-
}
16797-
16798-
return AtomicExpansionKind::CmpXChg;
16787+
// On most subtargets, for atomicrmw operations other than add/xchg,
16788+
// whether or not the instructions will behave correctly depends on where
16789+
// the address physically resides and what interconnect is used in the
16790+
// system configuration. On some some targets the instruction will nop,
16791+
// and in others synchronization will only occur at degraded device scope.
16792+
//
16793+
// If the allocation is known local to the device, the instructions should
16794+
// work correctly.
16795+
if (RMW->hasMetadata("amdgpu.no.remote.memory"))
16796+
return atomicSupportedIfLegalIntType(RMW);
16797+
16798+
// If fine-grained remote memory works at device scope, we don't need to
16799+
// do anything.
16800+
if (!HasSystemScope &&
16801+
Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16802+
return atomicSupportedIfLegalIntType(RMW);
16803+
16804+
// If we are targeting a remote allocated address, it depends what kind of
16805+
// allocation the address belongs to.
16806+
//
16807+
// If the allocation is fine-grained (in host memory, or in PCIe peer
16808+
// device memory), the operation will fail depending on the target.
16809+
//
16810+
// Note fine-grained host memory access does work on APUs or if XGMI is
16811+
// used, but we do not know if we are targeting an APU or the system
16812+
// configuration from the ISA version/target-cpu.
16813+
if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16814+
return atomicSupportedIfLegalIntType(RMW);
16815+
16816+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16817+
Op == AtomicRMWInst::Xor) {
16818+
// Atomic sub/or/xor do not work over PCI express, but atomic add
16819+
// does. InstCombine transforms these with 0 to or, so undo that.
16820+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16821+
ConstVal && ConstVal->isNullValue())
16822+
return AtomicExpansionKind::Expand;
1679916823
}
16824+
16825+
// If the allocation could be in remote, fine-grained memory, the rmw
16826+
// instructions may fail. cmpxchg should work, so emit that. On some
16827+
// system configurations, PCIe atomics aren't supported so cmpxchg won't
16828+
// even work, so you're out of luck anyway.
16829+
16830+
// In summary:
16831+
//
16832+
// Cases that may fail:
16833+
// - fine-grained pinned host memory
16834+
// - fine-grained migratable host memory
16835+
// - fine-grained PCIe peer device
16836+
//
16837+
// Cases that should work, but may be treated overly conservatively.
16838+
// - fine-grained host memory on an APU
16839+
// - fine-grained XGMI peer device
16840+
return AtomicExpansionKind::CmpXChg;
1680016841
}
1680116842

1680216843
return atomicSupportedIfLegalIntType(RMW);

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr add
8585
; GFX11-NEXT: v_mov_b32_e32 v1, 0
8686
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
8787
; GFX11-NEXT: s_endpgm
88-
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
88+
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
8989
store i32 %result, ptr addrspace(1) %out, align 4
9090
ret void
9191
}
@@ -350,7 +350,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr
350350
; GFX11-NEXT: buffer_gl0_inv
351351
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
352352
; GFX11-NEXT: s_endpgm
353-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
353+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
354354
store i32 %result, ptr addrspace(1) %out, align 4
355355
ret void
356356
}
@@ -427,7 +427,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %ou
427427
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
428428
; GFX11-NEXT: s_endpgm
429429
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
430-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
430+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
431431
store i32 %result, ptr addrspace(1) %out, align 4
432432
ret void
433433
}
@@ -656,7 +656,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) #1
656656
; GFX11-NEXT: buffer_gl1_inv
657657
; GFX11-NEXT: buffer_gl0_inv
658658
; GFX11-NEXT: s_endpgm
659-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
659+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
660660
ret void
661661
}
662662

@@ -723,7 +723,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %
723723
; GFX11-NEXT: buffer_gl0_inv
724724
; GFX11-NEXT: s_endpgm
725725
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
726-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
726+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
727727
ret void
728728
}
729729

@@ -962,7 +962,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace
962962
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
963963
%out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
964964
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
965-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
965+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
966966
store i32 %result, ptr addrspace(1) %out.gep, align 4
967967
ret void
968968
}
@@ -1040,7 +1040,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspa
10401040
%id = call i32 @llvm.amdgcn.workitem.id.x()
10411041
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
10421042
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
1043-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
1043+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
10441044
ret void
10451045
}
10461046

@@ -1119,7 +1119,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
11191119
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
11201120
; GFX11-NEXT: flat_store_b32 v[0:1], v2
11211121
; GFX11-NEXT: s_endpgm
1122-
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
1122+
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
11231123
store i32 %result, ptr %out, align 4
11241124
ret void
11251125
}
@@ -1206,7 +1206,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
12061206
; GFX11-NEXT: flat_store_b32 v[0:1], v2
12071207
; GFX11-NEXT: s_endpgm
12081208
%gep = getelementptr i32, ptr %ptr, i32 4
1209-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1209+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
12101210
store i32 %result, ptr %out, align 4
12111211
ret void
12121212
}
@@ -1442,7 +1442,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
14421442
; GFX11-NEXT: buffer_gl1_inv
14431443
; GFX11-NEXT: buffer_gl0_inv
14441444
; GFX11-NEXT: s_endpgm
1445-
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
1445+
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
14461446
ret void
14471447
}
14481448

@@ -1516,7 +1516,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
15161516
; GFX11-NEXT: buffer_gl0_inv
15171517
; GFX11-NEXT: s_endpgm
15181518
%gep = getelementptr i32, ptr %ptr, i32 4
1519-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1519+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
15201520
ret void
15211521
}
15221522

@@ -1780,7 +1780,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
17801780
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
17811781
%out.gep = getelementptr i32, ptr %out, i32 %id
17821782
%gep = getelementptr i32, ptr %gep.tid, i32 5
1783-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1783+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
17841784
store i32 %result, ptr %out.gep, align 4
17851785
ret void
17861786
}
@@ -1875,7 +1875,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1
18751875
%id = call i32 @llvm.amdgcn.workitem.id.x()
18761876
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
18771877
%gep = getelementptr i32, ptr %gep.tid, i32 5
1878-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1878+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
18791879
ret void
18801880
}
18811881

@@ -1969,7 +1969,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #1 {
19691969
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
19701970
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
19711971
; GFX11-NEXT: s_endpgm
1972-
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
1972+
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
19731973
store i64 %result, ptr %out, align 4
19741974
ret void
19751975
}
@@ -2071,7 +2071,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1
20712071
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
20722072
; GFX11-NEXT: s_endpgm
20732073
%gep = getelementptr i64, ptr %ptr, i32 4
2074-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2074+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
20752075
store i64 %result, ptr %out, align 4
20762076
ret void
20772077
}
@@ -2144,7 +2144,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) #1 {
21442144
; GFX11-NEXT: buffer_gl1_inv
21452145
; GFX11-NEXT: buffer_gl0_inv
21462146
; GFX11-NEXT: s_endpgm
2147-
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2147+
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
21482148
ret void
21492149
}
21502150

@@ -2223,7 +2223,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
22232223
; GFX11-NEXT: buffer_gl0_inv
22242224
; GFX11-NEXT: s_endpgm
22252225
%gep = getelementptr i64, ptr %ptr, i32 4
2226-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2226+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
22272227
ret void
22282228
}
22292229

@@ -2536,7 +2536,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
25362536
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
25372537
%out.gep = getelementptr i64, ptr %out, i32 %id
25382538
%gep = getelementptr i64, ptr %gep.tid, i32 5
2539-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2539+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
25402540
store i64 %result, ptr %out.gep, align 4
25412541
ret void
25422542
}
@@ -2635,7 +2635,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1
26352635
%id = call i32 @llvm.amdgcn.workitem.id.x()
26362636
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
26372637
%gep = getelementptr i64, ptr %gep.tid, i32 5
2638-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2638+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
26392639
ret void
26402640
}
26412641

@@ -2724,7 +2724,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
27242724
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
27252725
%idx.0 = add nsw i32 %tid.x, 2
27262726
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
2727-
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4
2727+
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
27282728
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
27292729
store i32 %result, ptr addrspace(1) %out, align 4
27302730
ret void
@@ -2807,7 +2807,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr add
28072807
; GFX11-NEXT: v_mov_b32_e32 v2, 0
28082808
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
28092809
; GFX11-NEXT: s_endpgm
2810-
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
2810+
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
28112811
store i64 %result, ptr addrspace(1) %out, align 4
28122812
ret void
28132813
}
@@ -2953,7 +2953,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) #1 {
29532953
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
29542954
; GFX11-NEXT: buffer_gl0_inv
29552955
; GFX11-NEXT: s_endpgm
2956-
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
2956+
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
29572957
ret void
29582958
}
29592959

@@ -3016,7 +3016,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr
30163016
; GFX11-NEXT: buffer_gl0_inv
30173017
; GFX11-NEXT: s_endpgm
30183018
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
3019-
%result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
3019+
%result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
30203020
ret void
30213021
}
30223022

@@ -3092,7 +3092,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr
30923092
; GFX11-NEXT: buffer_gl0_inv
30933093
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
30943094
; GFX11-NEXT: s_endpgm
3095-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
3095+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
30963096
store i64 %result, ptr addrspace(1) %out, align 4
30973097
ret void
30983098
}
@@ -3174,7 +3174,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %ou
31743174
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
31753175
; GFX11-NEXT: s_endpgm
31763176
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
3177-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3177+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
31783178
store i64 %result, ptr addrspace(1) %out, align 4
31793179
ret void
31803180
}
@@ -3440,7 +3440,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) #1
34403440
; GFX11-NEXT: buffer_gl1_inv
34413441
; GFX11-NEXT: buffer_gl0_inv
34423442
; GFX11-NEXT: s_endpgm
3443-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
3443+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
34443444
ret void
34453445
}
34463446

@@ -3512,7 +3512,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %
35123512
; GFX11-NEXT: buffer_gl0_inv
35133513
; GFX11-NEXT: s_endpgm
35143514
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
3515-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3515+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
35163516
ret void
35173517
}
35183518

@@ -3788,7 +3788,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace
37883788
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
37893789
%out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
37903790
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
3791-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3791+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
37923792
store i64 %result, ptr addrspace(1) %out.gep, align 4
37933793
ret void
37943794
}
@@ -3871,7 +3871,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspa
38713871
%id = call i32 @llvm.amdgcn.workitem.id.x()
38723872
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
38733873
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
3874-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3874+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
38753875
ret void
38763876
}
38773877

@@ -3966,7 +3966,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out,
39663966
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
39673967
%idx.0 = add nsw i32 %tid.x, 2
39683968
%arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
3969-
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
3969+
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
39703970
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
39713971
store i64 %result, ptr addrspace(1) %out, align 4
39723972
ret void
@@ -3977,6 +3977,7 @@ attributes #1 = { nounwind }
39773977
attributes #2 = { nounwind memory(none) }
39783978

39793979
!0 = !{i32 5, i32 6}
3980+
!1 = !{}
39803981

39813982
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
39823983
; GCN: {{.*}}

0 commit comments

Comments
 (0)