Skip to content

[SYCL][HIP] Initial HIP mem_advise implementation. #10697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
aa4b752
[SYCL][HIP] Initial HIP mem_advise implementation.
GeorgeWeb Jul 20, 2023
c486984
Add back the HIP version guard for hipMemAdvise.
GeorgeWeb Aug 7, 2023
f714d74
Address review simplifying impl and add comments.
GeorgeWeb Aug 8, 2023
7408583
Add HIP device attribute check for managed memory support before call…
GeorgeWeb Aug 9, 2023
5699baf
Feedback - Add _pi_mem_advice aliases for HIP
GeorgeWeb Sep 4, 2023
4e3a44b
Feedback - Rename the memadvise test filename as it applies to more b…
GeorgeWeb Sep 5, 2023
dc5695e
Feedback - Update the memadvise flags test to include both the CUDA a…
GeorgeWeb Sep 5, 2023
499fc4d
Make mem_advise warn instead of throw for unsupported advice arguments.
GeorgeWeb Sep 18, 2023
f289357
Merge commit
GeorgeWeb Sep 18, 2023
b41ed80
Apply missed clang-format.
GeorgeWeb Sep 18, 2023
cec7b9d
Merge remote-tracking branch 'upstream/sycl' into georgi/hip_memadvis…
GeorgeWeb Sep 19, 2023
0d75163
Update the PI minor to 39 from 38
GeorgeWeb Sep 19, 2023
21bbdf1
Merge commit.
GeorgeWeb Sep 21, 2023
a651d66
Bump minor again after merge commit changes to PI header
GeorgeWeb Sep 21, 2023
cb6f7b2
Update test head-comment description.
GeorgeWeb Sep 21, 2023
d6e55c9
Merge remote-tracking branch 'upstream/sycl' into georgi/hip_memadvis…
GeorgeWeb Nov 1, 2023
fa7e8cf
Temporary update CMakeLists to test the UR-HIP adapter changes
GeorgeWeb Nov 1, 2023
9f08cec
Merge remote-tracking branch 'upstream/sycl' into georgi/hip_memadvis…
GeorgeWeb Nov 2, 2023
fda8f52
Merge remote-tracking branch 'upstream/sycl' into georgi/hip_memadvis…
GeorgeWeb Nov 2, 2023
5845503
Merge remote-tracking branch 'upstream/sycl' into georgi/hip_memadvis…
GeorgeWeb Nov 3, 2023
e89d822
Merge remote-tracking branch 'upstream/sycl' into georgi/hip_memadvis…
GeorgeWeb Dec 11, 2023
870a7af
Use updated version of the corresponding UR repo changes
GeorgeWeb Dec 12, 2023
87365cd
Merge remote-tracking branch 'upstream/sycl' into georgi/hip_memadvis…
GeorgeWeb Dec 13, 2023
b350724
Merge remote-tracking branch 'origin/sycl' into georgi/hip_memadvise_…
kbenzie Jan 9, 2024
575d583
[UR] Bump tag to 12a67f56
kbenzie Jan 9, 2024
7c37bf6
Merge remote-tracking branch 'origin/sycl' into georgi/hip_memadvise_…
kbenzie Jan 9, 2024
6d2027b
Fix hip memadvise discard events regression
GeorgeWeb Jan 9, 2024
56abbdb
Unset the UR tag override cmake variable
GeorgeWeb Jan 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,10 @@
// 14.37 Added piextUSMImportExternalPointer and piextUSMReleaseImportedPointer.
// 14.38 Change PI_MEM_ADVICE_* values to flags for use in bitwise operations.
// 14.39 Added PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT device info query.
// 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones.

#define _PI_H_VERSION_MAJOR 14
#define _PI_H_VERSION_MINOR 39
#define _PI_H_VERSION_MINOR 40

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -580,6 +581,29 @@ typedef enum {
PI_MEM_ADVICE_UNKNOWN = 0x7FFFFFFF,
} _pi_mem_advice;

// HIP _pi_mem_advice aliases
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_READ_MOSTLY =
PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_READ_MOSTLY =
PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION =
PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION =
PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_ACCESSED_BY =
PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY =
PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION_HOST =
PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST;
static constexpr _pi_mem_advice
PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST =
PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST =
PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST;
static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST =
PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST;

typedef enum {
PI_IMAGE_CHANNEL_ORDER_A = 0x10B1,
PI_IMAGE_CHANNEL_ORDER_R = 0x10B0,
Expand Down
12 changes: 6 additions & 6 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 2d42d77d5b931b4558842bad32cf9dad80e17b3c
# Merge: 9f14fedb 2a960baf
# commit 12a67f56c3c5d08cfac0852d552b4e5fe0452d04
# Merge: 2b7b827c c10968f5
# Author: Kenneth Benzie (Benie) <[email protected]>
# Date: Mon Jan 8 13:40:40 2024 +0000
# Merge pull request #1229 from oneapi-src/revert-1228-revert-984-ext_oneapi_queue_priority-hip
# Revert "Revert "[HIP] Implement ext_oneapi_queue_priority""
set(UNIFIED_RUNTIME_TAG 2d42d77d5b931b4558842bad32cf9dad80e17b3c)
# Date: Tue Jan 9 10:53:32 2024 +0000
# Merge pull request #1027 from GeorgeWeb/georgi/hip_memadvise
# [SYCL][HIP] Implement mem_advise for HIP
set(UNIFIED_RUNTIME_TAG 12a67f56c3c5d08cfac0852d552b4e5fe0452d04)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
12 changes: 12 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3358,6 +3358,18 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
if (Advice & PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION) {
UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION;
}
if (Advice & PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY) {
UrAdvice |= UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE;
}
if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY) {
UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE;
}
if (Advice & PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST) {
UrAdvice |= UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST;
}
if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) {
UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST;
}
if (Advice & PI_MEM_ADVICE_RESET) {
UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT;
}
Expand Down
9 changes: 7 additions & 2 deletions sycl/test-e2e/DiscardEvents/discard_events_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
// The test checks that the last parameter is `nullptr` for all PI calls that
// should discard events.
// {{0|0000000000000000}} is required for various output on Linux and Windows.
// NOTE: piextUSMEnqueuePrefetch and piextUSMEnqueueMemAdvise in the CUDA and
// HIP backends may return a warning result on Windows with error-code
// -996 (PI_ERROR_PLUGIN_SPECIFIC_ERROR) if USM managed memory is not
// supported or if unsupported advice flags are used for the latter API.
// Since it is a warning it is safe to ignore for this test.
//
// Everything that follows TestQueueOperations()
// CHECK: ---> piextUSMEnqueueMemset(
Expand All @@ -30,7 +35,7 @@
// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ]
//
// CHECK: ---> piextUSMEnqueueMemAdvise(
// CHECK: ) ---> pi_result : PI_SUCCESS
// CHECK: ) ---> pi_result : {{PI_SUCCESS|-996}}
// CHECK-NEXT: [out]pi_event * : {{0|0000000000000000}}[ nullptr ]
//
// CHECK: ---> piEnqueueKernelLaunch(
Expand Down Expand Up @@ -75,7 +80,7 @@
// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ]
//
// CHECK: ---> piextUSMEnqueueMemAdvise(
// CHECK: ) ---> pi_result : PI_SUCCESS
// CHECK: ) ---> pi_result : {{PI_SUCCESS|-996}}
// CHECK-NEXT: [out]pi_event * : {{0|0000000000000000}}[ nullptr ]
//
// CHECK: ---> piEnqueueKernelLaunch(
Expand Down
13 changes: 7 additions & 6 deletions sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
// The test checks that the last parameter is not `nullptr` for all PI calls
// that should discard events.
// {{0|0000000000000000}} is required for various output on Linux and Windows.
// NOTE: piextUSMEnqueuePrefetch in the CUDA backend may return a warning
// result on Windows with error-code -996
// (PI_ERROR_PLUGIN_SPECIFIC_ERROR). Since it is a warning it is safe to
// ignore for this test.
// NOTE: piextUSMEnqueuePrefetch and piextUSMEnqueueMemAdvise in the CUDA and
// HIP backends may return a warning result on Windows with error-code
// -996 (PI_ERROR_PLUGIN_SPECIFIC_ERROR) if USM managed memory is not
// supported or if unsupported advice flags are used for the latter API.
// Since it is a warning it is safe to ignore for this test.
//
// Everything that follows TestQueueOperations()
// CHECK: ---> piextUSMEnqueueMemset(
Expand Down Expand Up @@ -40,7 +41,7 @@
//
// CHECK: ---> piextUSMEnqueueMemAdvise(
// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ]
// CHECK: ---> pi_result : PI_SUCCESS
// CHECK: ---> pi_result : {{PI_SUCCESS|-996}}
//
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ]
Expand Down Expand Up @@ -97,7 +98,7 @@
//
// CHECK: ---> piextUSMEnqueueMemAdvise(
// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ]
// CHECK: ---> pi_result : PI_SUCCESS
// CHECK: ---> pi_result : {{PI_SUCCESS|-996}}
//
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ]
Expand Down
54 changes: 0 additions & 54 deletions sycl/test-e2e/USM/memadvise_cuda.cpp

This file was deleted.

74 changes: 74 additions & 0 deletions sycl/test-e2e/USM/memadvise_flags.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// RUN: %{build} -o %t1.out
// REQUIRES: cuda || hip_amd
// RUN: %{run} %t1.out

//==---------------- memadvise_flags.cpp -----------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <iostream>
#include <sycl/sycl.hpp>
#include <vector>

using namespace sycl;

int main() {
const size_t size = 100;
queue q;
auto dev = q.get_device();
auto ctx = q.get_context();
if (!dev.get_info<info::device::usm_shared_allocations>()) {
std::cout << "Shared USM is not supported. Skipping test." << std::endl;
return 0;
}

void *ptr = malloc_shared(size, dev, ctx);
if (ptr == nullptr) {
std::cout << "Allocation failed!" << std::endl;
return -1;
}

bool isCuda = dev.get_backend() == sycl::backend::ext_oneapi_cuda;
bool isHip = dev.get_backend() == sycl::backend::ext_oneapi_hip;

std::vector<int> valid_advices;
if (isCuda) {
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST);
valid_advices.emplace_back(
PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST);
} else if (isHip) {
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_READ_MOSTLY);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_READ_MOSTLY);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST);
} else {
// Skip
return 0;
}

for (int advice : valid_advices) {
q.mem_advise(ptr, size, advice);
}

q.wait_and_throw();
std::cout << "Test passed." << std::endl;
return 0;
}