Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Speed up atomic_ref tests #879

Merged
merged 3 commits into from
Mar 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 1 addition & 144 deletions SYCL/AtomicRef/add.cpp
Original file line number Diff line number Diff line change
@@ -1,150 +1,7 @@
// Each combination of 64/32 bit atomic, relaxed/acquire/release/acq_rel
// semantic order and sub_group/work_group/device/system scope is tested
// separately. This is controlled by macros, defined by RUN commands. Defaults
// (no macro for a group) are: 32 bit, relaxed and device.

// See https://github.com/intel/llvm-test-suite/issues/867 for detailed status
// UNSUPPORTED: hip

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP -DATOMIC64
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM -DATOMIC64
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
Expand Down
65 changes: 20 additions & 45 deletions SYCL/AtomicRef/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,18 @@ void add_test(queue q, size_t N) {
(space == access::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
constexpr bool do_ext_tests = space != access::address_space::generic_space;
if constexpr (do_local_tests) {
#ifdef RUN_DEPRECATED
if constexpr (do_ext_tests) {
add_fetch_local_test<::sycl::ext::oneapi::atomic_ref, space, T,
Difference, order, scope>(q, N);
}
#else
add_fetch_local_test<::sycl::atomic_ref, space, T, Difference, order,
scope>(q, N);
#endif
}
if constexpr (do_global_tests) {
#ifdef RUN_DEPRECATED
if constexpr (do_ext_tests) {
add_fetch_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
order, scope>(q, N);
Expand All @@ -263,6 +267,7 @@ void add_test(queue q, size_t N) {
order, scope>(q, N);
}
}
#else
add_fetch_test<::sycl::atomic_ref, space, T, Difference, order, scope>(q,
N);
add_plus_equal_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
Expand All @@ -273,6 +278,7 @@ void add_test(queue q, size_t N) {
add_post_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
q, N);
}
#endif
}
}

Expand All @@ -281,74 +287,46 @@ template <access::address_space space, typename T, typename Difference = T,
void add_test_scopes(queue q, size_t N) {
std::vector<memory_scope> scopes =
q.get_device().get_info<info::device::atomic_memory_scope_capabilities>();
#if defined(SYSTEM)
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) !=
scopes.end()) {
std::cout << "Skipping test\n";
return;
add_test<space, T, Difference, order, memory_scope::system>(q, N);
}
add_test<space, T, Difference, order, memory_scope::system>(q, N);
#elif defined(WORK_GROUP)
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
if (std::find(scopes.begin(), scopes.end(), memory_scope::work_group) !=
scopes.end()) {
std::cout << "Skipping test\n";
return;
add_test<space, T, Difference, order, memory_scope::work_group>(q, N);
}
add_test<space, T, Difference, order, memory_scope::work_group>(q, N);
#elif defined(SUB_GROUP)
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
if (std::find(scopes.begin(), scopes.end(), memory_scope::sub_group) !=
scopes.end()) {
std::cout << "Skipping test\n";
return;
add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
}
add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
#else
add_test<space, T, Difference, order, memory_scope::device>(q, N);
#endif
}

template <access::address_space space, typename T, typename Difference = T>
void add_test_orders_scopes(queue q, size_t N) {
std::vector<memory_order> orders =
q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
#if defined(ACQ_REL)
if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) ==
if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) !=
orders.end()) {
std::cout << "Skipping test\n";
return;
add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
}
add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
#elif defined(ACQUIRE)
if (std::find(orders.begin(), orders.end(), memory_order::acquire) ==
if (std::find(orders.begin(), orders.end(), memory_order::acquire) !=
orders.end()) {
std::cout << "Skipping test\n";
return;
add_test_scopes<space, T, Difference, memory_order::acquire>(q, N);
}
add_test_scopes<space, T, Difference, memory_order::acquire>(q, N);
#elif defined(RELEASE)
if (std::find(orders.begin(), orders.end(), memory_order::release) ==
if (std::find(orders.begin(), orders.end(), memory_order::release) !=
orders.end()) {
std::cout << "Skipping test\n";
return;
add_test_scopes<space, T, Difference, memory_order::release>(q, N);
}
add_test_scopes<space, T, Difference, memory_order::release>(q, N);
#else
add_test_scopes<space, T, Difference, memory_order::relaxed>(q, N);
#endif
}

template <access::address_space space> void add_test_all() {
queue q;

constexpr int N = 32;
#ifdef ATOMIC64
if (!q.get_device().has(aspect::atomic64)) {
std::cout << "Skipping test\n";
return;
}

#ifdef FULL_ATOMIC64_COVERAGE
add_test_orders_scopes<space, double>(q, N);
#ifndef FP_TESTS_ONLY
if constexpr (sizeof(long) == 8) {
add_test_orders_scopes<space, long>(q, N);
add_test_orders_scopes<space, unsigned long>(q, N);
Expand All @@ -361,9 +339,8 @@ template <access::address_space space> void add_test_all() {
add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
}
#endif
#else
add_test_orders_scopes<space, float>(q, N);
#ifndef FP_TESTS_ONLY
#ifdef FULL_ATOMIC32_COVERAGE
add_test_orders_scopes<space, int>(q, N);
add_test_orders_scopes<space, unsigned int>(q, N);
if constexpr (sizeof(long) == 4) {
Expand All @@ -374,7 +351,5 @@ template <access::address_space space> void add_test_all() {
add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
}
#endif
#endif

std::cout << "Test passed." << std::endl;
}
Loading