Skip to content

Commit ab6c0f5

Browse files
authored
[E2E][CUDA][HIP][level_zero] Make P2P tests portable. (#14095)
This enables the p2p tests on hip and level_zero. Now that cuda also supports the multi-device context, P2P programming should be 100% portable across these backends. --------- Signed-off-by: JackAKirk <[email protected]>
1 parent 7928588 commit ab6c0f5

File tree

3 files changed

+18
-45
lines changed

3 files changed

+18
-45
lines changed

sycl/test-e2e/USM/P2P/p2p_access.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
// REQUIRES: cuda
2-
// RUN: %{build} -o %t.out
3-
// RUN: %if cuda %{ %{run} %t.out %}
1+
// REQUIRES: cuda || hip || level_zero
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
44

55
#include <cassert>
66
#include <sycl/detail/core.hpp>
@@ -10,17 +10,8 @@ using namespace sycl;
1010

1111
int main() {
1212

13-
// Note that this code will largely be removed: it is temporary due to the
14-
// temporary lack of multiple devices per sycl context in the Nvidia backend.
15-
// A portable implementation, using a single gpu platform, should be possible
16-
// once the Nvidia context issues are resolved.
17-
////////////////////////////////////////////////////////////////////////
18-
std::vector<sycl::device> Devs;
19-
for (const auto &plt : sycl::platform::get_platforms()) {
13+
auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);
2014

21-
if (plt.get_backend() == sycl::backend::ext_oneapi_cuda)
22-
Devs.push_back(plt.get_devices()[0]);
23-
}
2415
if (Devs.size() < 2) {
2516
std::cout << "Cannot test P2P capabilities, at least two devices are "
2617
"required, exiting."

sycl/test-e2e/USM/P2P/p2p_atomics.cpp

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
// REQUIRES: cuda
2-
// RUN: %if any-device-is-cuda %{ %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_61 -o %t.out %}
3-
// RUN: %if cuda %{ %{run} %t.out %}
1+
// REQUIRES: cuda || hip || level_zero
2+
// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_61 %} -o %t.out
3+
// RUN: %{run} %t.out
44

55
#include <cassert>
66
#include <numeric>
@@ -18,17 +18,8 @@ constexpr size_t N = 512;
1818

1919
int main() {
2020

21-
// Note that this code will largely be removed: it is temporary due to the
22-
// temporary lack of multiple devices per sycl context in the Nvidia backend.
23-
// A portable implementation, using a single gpu platform, should be possible
24-
// once the Nvidia context issues are resolved.
25-
////////////////////////////////////////////////////////////////////////
26-
std::vector<sycl::device> Devs;
27-
for (const auto &plt : sycl::platform::get_platforms()) {
21+
auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);
2822

29-
if (plt.get_backend() == sycl::backend::ext_oneapi_cuda)
30-
Devs.push_back(plt.get_devices()[0]);
31-
}
3223
if (Devs.size() < 2) {
3324
std::cout << "Cannot test P2P capabilities, at least two devices are "
3425
"required, exiting."
@@ -51,26 +42,26 @@ int main() {
5142
// Enables Devs[1] to access Devs[0] memory.
5243
Devs[1].ext_oneapi_enable_peer_access(Devs[0]);
5344

54-
std::vector<double> input(N);
45+
std::vector<int> input(N);
5546
std::iota(input.begin(), input.end(), 0);
5647

57-
double h_sum = 0.;
48+
int h_sum = 0.;
5849
for (const auto &value : input) {
5950
h_sum += value;
6051
}
6152

62-
double *d_sum = malloc_shared<double>(1, Queues[0]);
63-
double *d_in = malloc_device<double>(N, Queues[0]);
53+
int *d_sum = malloc_shared<int>(1, Queues[0]);
54+
int *d_in = malloc_device<int>(N, Queues[0]);
6455

65-
Queues[0].memcpy(d_in, &input[0], N * sizeof(double));
56+
Queues[0].memcpy(d_in, &input[0], N * sizeof(int));
6657
Queues[0].wait();
6758

6859
range global_range{N};
6960

7061
*d_sum = 0.;
7162
Queues[1].submit([&](handler &h) {
7263
h.parallel_for<class peer_atomic>(global_range, [=](id<1> i) {
73-
sycl::atomic_ref<double, sycl::memory_order::relaxed,
64+
sycl::atomic_ref<int, sycl::memory_order::relaxed,
7465
sycl::memory_scope::system,
7566
access::address_space::global_space>(*d_sum) += d_in[i];
7667
});

sycl/test-e2e/USM/P2P/p2p_copy.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
// REQUIRES: cuda
2-
// RUN: %{build} -o %t.out
3-
// RUN: %if cuda %{ %{run} %t.out %}
1+
// REQUIRES: cuda || hip || level_zero
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
44

55
#include <cassert>
66
#include <numeric>
@@ -15,17 +15,8 @@ constexpr int N = 100;
1515

1616
int main() {
1717

18-
// Note that this code will largely be removed: it is temporary due to the
19-
// temporary lack of multiple devices per sycl context in the Nvidia backend.
20-
// A portable implementation, using a single gpu platform, should be possible
21-
// once the Nvidia context issues are resolved.
22-
////////////////////////////////////////////////////////////////////////
23-
std::vector<sycl::device> Devs;
24-
for (const auto &plt : sycl::platform::get_platforms()) {
18+
auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);
2519

26-
if (plt.get_backend() == sycl::backend::ext_oneapi_cuda)
27-
Devs.push_back(plt.get_devices()[0]);
28-
}
2920
if (Devs.size() < 2) {
3021
std::cout << "Cannot test P2P capabilities, at least two devices are "
3122
"required, exiting."

0 commit comments

Comments
 (0)