Skip to content

Commit c3c5e92

Browse files
[SYCL][HIP] Add basic HIP atomics (#8003)
Adding support for basic atomic operations for HIP AMD backend. --------- Co-authored-by: Steffen Larsen <[email protected]>
1 parent 24e36e8 commit c3c5e92

22 files changed

+568
-309
lines changed

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -588,17 +588,19 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
588588
if (LangOpts.HIP) {
589589
Builder.defineMacro("__HIP__");
590590
Builder.defineMacro("__HIPCC__");
591-
Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1");
592-
Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2");
593-
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
594-
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
595-
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
596591
if (LangOpts.CUDAIsDevice)
597592
Builder.defineMacro("__HIP_DEVICE_COMPILE__");
598593
if (LangOpts.GPUDefaultStream ==
599594
LangOptions::GPUDefaultStreamKind::PerThread)
600595
Builder.defineMacro("HIP_API_PER_THREAD_DEFAULT_STREAM");
601596
}
597+
if (LangOpts.HIP || (LangOpts.OpenCL && TI.getTriple().isAMDGPU())) {
598+
Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1");
599+
Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2");
600+
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
601+
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
602+
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
603+
}
602604
}
603605

604606
/// Initialize the predefined C++ language feature test macros defined in
@@ -1299,7 +1301,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
12991301

13001302
const llvm::Triple &DeviceTriple = TI.getTriple();
13011303
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
1302-
if (DeviceTriple.isNVPTX() ||
1304+
if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() ||
13031305
(DeviceTriple.isSPIR() &&
13041306
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
13051307
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// This test checks for the presence of target specific macros for openCL
2+
//
3+
// RUN: %clang_cc1 %s -E -dM -triple amdgcn-amdhsa-amdhsa \
4+
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s
5+
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_AGENT
6+
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SINGLETHREAD
7+
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SYSTEM
8+
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WAVEFRONT
9+
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WORKGROUP

clang/test/Preprocessor/sycl-macro-target-specific.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// This test checks for the presence of target specific macros for SYCL
2+
//
13
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
24
// RUN: | FileCheck --check-prefix=CHECK-NVPTX %s
35
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \
@@ -8,9 +10,26 @@
810
// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s
911
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \
1012
// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s
13+
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
14+
// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s
1115
// CHECK-NVPTX: #define __NVPTX__
1216
// CHECK-NVPTX-NEG-NOT: #define __NVPTX__
1317

18+
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
19+
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s
20+
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
21+
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
22+
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \
23+
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
24+
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \
25+
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
26+
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_x86_64-unknown-unknown -E -dM \
27+
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
28+
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \
29+
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
30+
// CHECK-AMDGPU: #define __AMDGPU__
31+
// CHECK-AMDGPU-NEG-NOT: #define __AMDGPU__
32+
1433
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \
1534
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
1635
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \
@@ -21,6 +40,8 @@
2140
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s
2241
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
2342
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
43+
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
44+
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
2445
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
2546
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS
2647

libclc/CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,11 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
336336
# Disables NVVM reflection to defer to after linking
337337
set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx72"
338338
"SHELL:-march=sm_86" "SHELL:-mllvm --nvvm-reflect-enable=false")
339-
else()
340-
set ( flags )
339+
elseif( ${ARCH} STREQUAL amdgcn )
340+
# AMDGCN needs generic address space for atomics
341+
set( flags "SHELL:-Xclang -cl-std=CL2.0")
342+
else()
343+
set ( flags )
341344
endif()
342345
set( arch_suffix "${t}" )
343346
else()

libclc/amdgcn-amdhsa/libspirv/SOURCES

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,17 @@ workitem/get_global_offset.ll
33
group/group_ballot.cl
44
group/collectives.cl
55
group/collectives_helpers.ll
6-
atomic/loadstore_helpers.ll
7-
cl_khr_int64_extended_atomics/minmax_helpers.ll
6+
atomic/atomic_and.cl
7+
atomic/atomic_add.cl
8+
atomic/atomic_cmpxchg.cl
9+
atomic/atomic_xchg.cl
10+
atomic/atomic_load.cl
11+
atomic/atomic_or.cl
12+
atomic/atomic_xor.cl
13+
atomic/atomic_min.cl
14+
atomic/atomic_max.cl
15+
atomic/atomic_sub.cl
16+
atomic/atomic_store.cl
817
synchronization/barrier.cl
918
math/acos.cl
1019
math/acosh.cl
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "atomic_helpers.h"
10+
#include <spirv/spirv.h>
11+
#include <spirv/spirv_types.h>
12+
13+
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add)
14+
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add)
15+
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add)
16+
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add)
17+
AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add)
18+
19+
#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2) \
20+
_CLC_DEF long \
21+
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \
22+
volatile AS long *, enum Scope, enum MemorySemanticsMask, \
23+
enum MemorySemanticsMask, long desired, long expected); \
24+
_CLC_DEF long \
25+
_Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
26+
const volatile AS long *, enum Scope, enum MemorySemanticsMask); \
27+
_CLC_DEF double \
28+
_Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \
29+
volatile AS double *p, enum Scope scope, \
30+
enum MemorySemanticsMask semantics, double val) { \
31+
int atomic_scope = 0, memory_order = 0; \
32+
volatile AS long *int_pointer = (volatile AS long *)p; \
33+
long old_int_val = 0, new_int_val = 0; \
34+
do { \
35+
old_int_val = \
36+
_Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
37+
int_pointer, scope, semantics); \
38+
double new_double_val = *(double *)&old_int_val + val; \
39+
new_int_val = *(long *)&new_double_val; \
40+
} while ( \
41+
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \
42+
int_pointer, scope, semantics, semantics, new_int_val, \
43+
old_int_val) != old_int_val); \
44+
\
45+
return *(double *)&old_int_val; \
46+
}
47+
48+
#ifdef cl_khr_int64_base_atomics
49+
AMDGPU_ATOMIC_FP64_ADD_IMPL(global, U3AS1, 1, 5)
50+
AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5)
51+
AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4)
52+
#endif
53+
54+
#undef AMDGPU_ATOMIC
55+
#undef AMDGPU_ATOMIC_IMPL
56+
#undef AMDGPU_ATOMIC_FP64_ADD_IMPL
57+
#undef GET_ATOMIC_SCOPE_AND_ORDER
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "atomic_helpers.h"
10+
#include <spirv/spirv.h>
11+
#include <spirv/spirv_types.h>
12+
13+
AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, int, i, __hip_atomic_fetch_and)
14+
AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned int, j, __hip_atomic_fetch_and)
15+
AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, long, l, __hip_atomic_fetch_and)
16+
AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned long, m, __hip_atomic_fetch_and)
17+
18+
#undef AMDGPU_ATOMIC
19+
#undef AMDGPU_ATOMIC_IMPL
20+
#undef GET_ATOMIC_SCOPE_AND_ORDER
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "atomic_helpers.h"
10+
#include <spirv/spirv.h>
11+
#include <spirv/spirv_types.h>
12+
13+
#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1, \
14+
SUB2) \
15+
_CLC_DEF TYPE \
16+
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_##TYPE_MANGLED##TYPE_MANGLED( \
17+
volatile AS TYPE *p, enum Scope scope, \
18+
enum MemorySemanticsMask success_semantics, \
19+
enum MemorySemanticsMask failure_semantics, TYPE desired, \
20+
TYPE expected) { \
21+
int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \
22+
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \
23+
memory_order_success) \
24+
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \
25+
memory_order_failure) \
26+
TYPE original_val = *p; \
27+
bool success = __hip_atomic_compare_exchange_strong( \
28+
p, &expected, desired, memory_order_success, memory_order_failure, \
29+
atomic_scope); \
30+
\
31+
return success ? original_val : *p; \
32+
}
33+
34+
#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \
35+
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, 5) \
36+
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, 5) \
37+
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4)
38+
39+
AMDGPU_ATOMIC_CMPXCHG(int, i)
40+
AMDGPU_ATOMIC_CMPXCHG(unsigned int, j)
41+
AMDGPU_ATOMIC_CMPXCHG(long, l)
42+
AMDGPU_ATOMIC_CMPXCHG(unsigned long, m)
43+
AMDGPU_ATOMIC_CMPXCHG(float, f)
44+
45+
// TODO implement for fp64
46+
47+
#undef AMDGPU_ATOMIC
48+
#undef AMDGPU_ATOMIC_IMPL
49+
#undef AMDGPU_ATOMIC_CPMXCHG
50+
#undef AMDGPU_ATOMIC_CPMXCHG_IMPL
51+
#undef GET_ATOMIC_SCOPE_AND_ORDER

0 commit comments

Comments
 (0)