Skip to content

Commit c1ef658

Browse files
authored
[HIP][libclc] Fix race in cmpxchg and change xor to use CAS instead of atomic builtin (#11876)
1. Fix a race condition in cmpxchg 2. Change `atomic_xor` to use a CAS loop instead of atomic builtin. This is needed to merge this in UR oneapi-src/unified-runtime#936 so that perf regression can be fixed. The long term fix is to use a compiler flag to choose between builtin and safe CAS implementation, but talks upstream may take some time to figure out the details. See llvm/llvm-project#69229
1 parent 0b6b3b8 commit c1ef658

File tree

3 files changed

+41
-11
lines changed

3 files changed

+41
-11
lines changed

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,17 @@
2323
memory_order_success) \
2424
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \
2525
memory_order_failure) \
26-
TYPE original_val = *p; \
27-
bool success = __hip_atomic_compare_exchange_strong( \
28-
p, &expected, desired, memory_order_success, memory_order_failure, \
29-
atomic_scope); \
30-
\
31-
return success ? original_val : *p; \
26+
__hip_atomic_compare_exchange_strong(p, &expected, desired, \
27+
memory_order_success, \
28+
memory_order_failure, atomic_scope); \
29+
/* If cmpxchg \
30+
* succeeds: \
31+
- `expected` is unchanged, holding the old val that was at `p` \
32+
- `p` is changed to hold `desired` \
33+
* fails: \
34+
- `expected` is changed to hold the current val at `p` \
35+
- `p` is unchanged*/ \
36+
return expected; \
3237
}
3338

3439
#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \
@@ -37,7 +42,7 @@
3742
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4)
3843

3944
AMDGPU_ATOMIC_CMPXCHG(int, i)
40-
AMDGPU_ATOMIC_CMPXCHG(unsigned int, j)
45+
AMDGPU_ATOMIC_CMPXCHG(unsigned, j)
4146
AMDGPU_ATOMIC_CMPXCHG(long, l)
4247
AMDGPU_ATOMIC_CMPXCHG(unsigned long, m)
4348
AMDGPU_ATOMIC_CMPXCHG(float, f)

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,25 @@
7171
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \
7272
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \
7373
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN)
74+
75+
#define AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \
76+
SUB1, OP) \
77+
_CLC_DEF TYPE \
78+
FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
79+
volatile AS TYPE *p, enum Scope scope, \
80+
enum MemorySemanticsMask semantics, TYPE val) { \
81+
int atomic_scope = 0, memory_order = 0; \
82+
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
83+
TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \
84+
TYPE newval = 0; \
85+
do { \
86+
newval = oldval OP val; \
87+
} while (!__hip_atomic_compare_exchange_strong( \
88+
p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \
89+
return oldval; \
90+
}
91+
92+
#define AMDGPU_CAS_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, OP) \
93+
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, OP) \
94+
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, OP) \
95+
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, OP)

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010
#include <spirv/spirv.h>
1111
#include <spirv/spirv_types.h>
1212

13-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, int, i, __hip_atomic_fetch_xor)
14-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __hip_atomic_fetch_xor)
15-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, long, l, __hip_atomic_fetch_xor)
16-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __hip_atomic_fetch_xor)
13+
#define __CLC_XOR ^
1714

15+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, int, i, __CLC_XOR)
16+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __CLC_XOR)
17+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, long, l, __CLC_XOR)
18+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __CLC_XOR)
19+
20+
#undef __CLC_XOR
1821
#undef AMDGPU_ATOMIC
1922
#undef AMDGPU_ATOMIC_IMPL
2023
#undef AMDGPU_ARCH_GEQ

0 commit comments

Comments
 (0)