Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCl][L0] Buffer for multi-device context is using device allocation now #976

Merged
merged 5 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions SYCL/Basic/buffer/buffer_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,17 @@ int main() {
buffer<::cl_int, 1> Buffer(Size);
Queue.submit([&](handler &cgh) {
accessor Accessor{Buffer, cgh, read_write};
if (NumOfDevices > 1)
// Currently the Level Zero plugin uses host allocations for multi-device
// contexts because such allocations are accessible by all devices.
std::cerr << "Multi GPU should use zeMemAllocHost\n";
else if (D.get_info<info::device::host_unified_memory>())
if (D.get_info<info::device::host_unified_memory>())
std::cerr << "Integrated GPU should use zeMemAllocHost\n";
else
std::cerr << "Discrete GPU should use zeMemAllocDevice\n";
cgh.parallel_for<class CreateBuffer>(range<1>(Size), [=](id<1> ID) {});
cgh.parallel_for<class CreateBuffer>(range<1>(Size),
[=](id<1> ID) { Accessor[ID] = 0; });
});
Queue.wait();

return 0;
}

// CHECK: {{Integrated|Multi|Discrete}} GPU should use [[API:zeMemAllocHost|zeMemAllocHost|zeMemAllocDevice]]
// CHECK: {{Integrated|Discrete}} GPU should use [[API:zeMemAllocHost|zeMemAllocDevice]]
// CHECK: ZE ---> [[API]](
47 changes: 47 additions & 0 deletions SYCL/Basic/buffer/buffer_migrate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// REQUIRES: gpu
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=2 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=3 %GPU_RUN_PLACEHOLDER %t.out
//
// Test for buffer use in a context with multiple devices (all found
// root-devices)
//

#include <CL/sycl.hpp>
using namespace cl::sycl;

int main() {

int Data = 0;
int Result = 0;
buffer<int, 1> Buffer(&Data, range<1>(1));

const auto &Devices =
platform(gpu_selector{}).get_devices(info::device_type::gpu);
std::cout << Devices.size() << " devices found" << std::endl;
context C(Devices);

int Index = 0;
for (auto D : Devices) {
std::cout << "Using on device " << Index << ": "
<< D.get_info<info::device::name>() << std::endl;
Result |= (1 << Index);

queue Q(C, D);
Q.submit([&](handler &cgh) {
accessor Accessor{Buffer, cgh, read_write};
cgh.parallel_for<class MigrateBuffer>(
range<1>(1), [=](id<1> ID) { Accessor[ID] |= (1 << Index); });
});
Q.wait();
++Index;
}

auto HostAcc = Buffer.get_host_access();
auto Passed = (HostAcc[0] == Result);
std::cout << "Checking result on host: " << (Passed ? "passed" : "FAILED")
<< std::endl;
std::cout << HostAcc[0] << " ?= " << Result << std::endl;
return !Passed;
}
3 changes: 3 additions & 0 deletions SYCL/Plugin/interop-level-zero-buffer-ownership.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
// 2. User-provided memory allocation is freed by DPCPP RT if
// "transfer" ownership is specified.

// NOTE: SYCL RT will see unbalanced count of alloc/free,
// so this test will fail with ZE_DEBUG=4.

// Keep ownership
// CHECK: zeMemFree

Expand Down
5 changes: 0 additions & 5 deletions SYCL/Plugin/interop-level-zero-buffer.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,6 @@ int main() {
Queue.wait();

{
char *Ptr = (char *)HostBuffer1;
for (int i = 0; i < 10; i++) {
assert(Ptr[i] == 'a');
}
Comment on lines -93 to -96
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@againull : from the spec you crafted:

A SYCL buffer that is constructed with this interop API uses the Level Zero memory allocation for its full lifetime, and the contents of the Level Zero memory allocation are unspecified for the lifetime of the SYCL buffer. If the application modifies the contents of that Level Zero memory allocation during the lifetime of the SYCL buffer, the behavior is undefined.


auto HostAcc1 = HostBufferInterop1.get_host_access();
for (int i = 0; i < 10; i++) {
assert(HostAcc1[i] == 'a');
Expand Down