Skip to content

[SYCL][HIP] Add basic HIP atomics #8003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,17 +588,19 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
if (LangOpts.HIP) {
Builder.defineMacro("__HIP__");
Builder.defineMacro("__HIPCC__");
Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1");
Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2");
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
if (LangOpts.CUDAIsDevice)
Builder.defineMacro("__HIP_DEVICE_COMPILE__");
if (LangOpts.GPUDefaultStream ==
LangOptions::GPUDefaultStreamKind::PerThread)
Builder.defineMacro("HIP_API_PER_THREAD_DEFAULT_STREAM");
}
if (LangOpts.HIP || (LangOpts.OpenCL && TI.getTriple().isAMDGPU())) {
Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1");
Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2");
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
}
}

/// Initialize the predefined C++ language feature test macros defined in
Expand Down Expand Up @@ -1299,7 +1301,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,

const llvm::Triple &DeviceTriple = TI.getTriple();
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
if (DeviceTriple.isNVPTX() ||
if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() ||
(DeviceTriple.isSPIR() &&
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
Expand Down
9 changes: 9 additions & 0 deletions clang/test/Preprocessor/opencl-macro-target-specific.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// This test checks for the presence of target specific macros for openCL
//
// RUN: %clang_cc1 %s -E -dM -triple amdgcn-amdhsa-amdhsa \
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_AGENT
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SINGLETHREAD
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_SYSTEM
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WAVEFRONT
// CHECK-AMDGPU: #define __HIP_MEMORY_SCOPE_WORKGROUP
21 changes: 21 additions & 0 deletions clang/test/Preprocessor/sycl-macro-target-specific.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// This test checks for the presence of target specific macros for SYCL
//
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-NVPTX %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \
Expand All @@ -8,9 +10,26 @@
// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-NVPTX-NEG %s
// CHECK-NVPTX: #define __NVPTX__
// CHECK-NVPTX-NEG-NOT: #define __NVPTX__

// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_x86_64-unknown-unknown -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-AMDGPU-NEG %s
// CHECK-AMDGPU: #define __AMDGPU__
// CHECK-AMDGPU-NEG-NOT: #define __AMDGPU__

// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown -E -dM \
Expand All @@ -21,6 +40,8 @@
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS

Expand Down
7 changes: 5 additions & 2 deletions libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,11 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Disables NVVM reflection to defer to after linking
set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx72"
"SHELL:-march=sm_86" "SHELL:-mllvm --nvvm-reflect-enable=false")
else()
set ( flags )
elseif( ${ARCH} STREQUAL amdgcn )
# AMDGCN needs generic address space for atomics
set( flags "SHELL:-Xclang -cl-std=CL2.0")
else()
set ( flags )
endif()
set( arch_suffix "${t}" )
else()
Expand Down
13 changes: 11 additions & 2 deletions libclc/amdgcn-amdhsa/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,17 @@ workitem/get_global_offset.ll
group/group_ballot.cl
group/collectives.cl
group/collectives_helpers.ll
atomic/loadstore_helpers.ll
cl_khr_int64_extended_atomics/minmax_helpers.ll
atomic/atomic_and.cl
atomic/atomic_add.cl
atomic/atomic_cmpxchg.cl
atomic/atomic_xchg.cl
atomic/atomic_load.cl
atomic/atomic_or.cl
atomic/atomic_xor.cl
atomic/atomic_min.cl
atomic/atomic_max.cl
atomic/atomic_sub.cl
atomic/atomic_store.cl
synchronization/barrier.cl
math/acos.cl
math/acosh.cl
Expand Down
57 changes: 57 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "atomic_helpers.h"
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add)

#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2) \
_CLC_DEF long \
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \
volatile AS long *, enum Scope, enum MemorySemanticsMask, \
enum MemorySemanticsMask, long desired, long expected); \
_CLC_DEF long \
_Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
const volatile AS long *, enum Scope, enum MemorySemanticsMask); \
_CLC_DEF double \
_Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \
volatile AS double *p, enum Scope scope, \
enum MemorySemanticsMask semantics, double val) { \
int atomic_scope = 0, memory_order = 0; \
volatile AS long *int_pointer = (volatile AS long *)p; \
long old_int_val = 0, new_int_val = 0; \
do { \
old_int_val = \
_Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
int_pointer, scope, semantics); \
double new_double_val = *(double *)&old_int_val + val; \
new_int_val = *(long *)&new_double_val; \
} while ( \
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \
int_pointer, scope, semantics, semantics, new_int_val, \
old_int_val) != old_int_val); \
\
return *(double *)&old_int_val; \
}

#ifdef cl_khr_int64_base_atomics
AMDGPU_ATOMIC_FP64_ADD_IMPL(global, U3AS1, 1, 5)
AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5)
AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4)
#endif

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ATOMIC_FP64_ADD_IMPL
#undef GET_ATOMIC_SCOPE_AND_ORDER
20 changes: 20 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "atomic_helpers.h"
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, int, i, __hip_atomic_fetch_and)
AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned int, j, __hip_atomic_fetch_and)
AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, long, l, __hip_atomic_fetch_and)
AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned long, m, __hip_atomic_fetch_and)

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef GET_ATOMIC_SCOPE_AND_ORDER
51 changes: 51 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "atomic_helpers.h"
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1, \
SUB2) \
_CLC_DEF TYPE \
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_##TYPE_MANGLED##TYPE_MANGLED( \
volatile AS TYPE *p, enum Scope scope, \
enum MemorySemanticsMask success_semantics, \
enum MemorySemanticsMask failure_semantics, TYPE desired, \
TYPE expected) { \
int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \
memory_order_success) \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \
memory_order_failure) \
TYPE original_val = *p; \
bool success = __hip_atomic_compare_exchange_strong( \
p, &expected, desired, memory_order_success, memory_order_failure, \
atomic_scope); \
\
return success ? original_val : *p; \
}

#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, 5) \
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, 5) \
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4)

AMDGPU_ATOMIC_CMPXCHG(int, i)
AMDGPU_ATOMIC_CMPXCHG(unsigned int, j)
AMDGPU_ATOMIC_CMPXCHG(long, l)
AMDGPU_ATOMIC_CMPXCHG(unsigned long, m)
AMDGPU_ATOMIC_CMPXCHG(float, f)

// TODO implement for fp64

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ATOMIC_CPMXCHG
#undef AMDGPU_ATOMIC_CPMXCHG_IMPL
#undef GET_ATOMIC_SCOPE_AND_ORDER
Loading