Skip to content

Commit 8d1486a

Browse files
authored
Merge pull request #939 from steffenlarsen/steffen/virtual_mem_adapters
[UR][CUDA][L0][HIP] Add virtual memory adapter implementations
2 parents 67e4d1b + 1678894 commit 8d1486a

39 files changed

+859
-279
lines changed

source/adapters/cuda/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ add_ur_adapter(${TARGET_NAME}
2727
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
2828
${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp
2929
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
30+
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp
31+
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp
3032
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
3133
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
3234
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
@@ -38,6 +40,7 @@ add_ur_adapter(${TARGET_NAME}
3840
${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp
3941
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
4042
${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp
43+
${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp
4144
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
4245
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp
4346
)

source/adapters/cuda/device.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1017,6 +1017,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
10171017
// TODO: Investigate if this information is available on CUDA.
10181018
case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED:
10191019
return ReturnValue(false);
1020+
case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
1021+
return ReturnValue(true);
10201022
case UR_DEVICE_INFO_ESIMD_SUPPORT:
10211023
return ReturnValue(false);
10221024
case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS:
@@ -1026,7 +1028,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
10261028
case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
10271029
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
10281030
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
1029-
case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
10301031
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
10311032

10321033
default:

source/adapters/cuda/device.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
#include <ur/ur.hpp>
1313

14+
#include "common.hpp"
15+
1416
struct ur_device_handle_t_ {
1517
private:
1618
using native_type = CUdevice;

source/adapters/cuda/physical_mem.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//===--------- physical_mem.cpp - CUDA Adapter ----------------------------===//
2+
//
3+
// Copyright (C) 2023 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "physical_mem.hpp"
12+
#include "common.hpp"
13+
#include "context.hpp"
14+
#include "event.hpp"
15+
16+
#include <cassert>
17+
#include <cuda.h>
18+
19+
UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate(
20+
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
21+
[[maybe_unused]] const ur_physical_mem_properties_t *pProperties,
22+
ur_physical_mem_handle_t *phPhysicalMem) {
23+
CUmemAllocationProp AllocProps = {};
24+
AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
25+
AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED;
26+
UR_CHECK_ERROR(GetDeviceOrdinal(hDevice, AllocProps.location.id));
27+
28+
CUmemGenericAllocationHandle ResHandle;
29+
switch (auto Result = cuMemCreate(&ResHandle, size, &AllocProps, 0)) {
30+
case CUDA_ERROR_INVALID_VALUE:
31+
return UR_RESULT_ERROR_INVALID_SIZE;
32+
default:
33+
UR_CHECK_ERROR(Result);
34+
}
35+
*phPhysicalMem = new ur_physical_mem_handle_t_(ResHandle, hContext);
36+
37+
return UR_RESULT_SUCCESS;
38+
}
39+
40+
UR_APIEXPORT ur_result_t UR_APICALL
41+
urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) {
42+
hPhysicalMem->incrementReferenceCount();
43+
return UR_RESULT_SUCCESS;
44+
}
45+
46+
UR_APIEXPORT ur_result_t UR_APICALL
47+
urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
48+
if (hPhysicalMem->decrementReferenceCount() > 0)
49+
return UR_RESULT_SUCCESS;
50+
51+
try {
52+
std::unique_ptr<ur_physical_mem_handle_t_> PhysicalMemGuard(hPhysicalMem);
53+
54+
ScopedContext Active(hPhysicalMem->getContext());
55+
UR_CHECK_ERROR(cuMemRelease(hPhysicalMem->get()));
56+
return UR_RESULT_SUCCESS;
57+
} catch (ur_result_t err) {
58+
return err;
59+
} catch (...) {
60+
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
61+
}
62+
}

source/adapters/cuda/physical_mem.hpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//===---------- physical_mem.hpp - CUDA Adapter ---------------------------===//
2+
//
3+
// Copyright (C) 2023 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
#pragma once
11+
12+
#include <ur/ur.hpp>
13+
14+
#include <cuda.h>
15+
16+
#include "adapter.hpp"
17+
#include "device.hpp"
18+
#include "platform.hpp"
19+
20+
/// UR queue mapping on physical memory allocations used in virtual memory
21+
/// management.
22+
///
23+
struct ur_physical_mem_handle_t_ {
24+
using native_type = CUmemGenericAllocationHandle;
25+
26+
std::atomic_uint32_t RefCount;
27+
native_type PhysicalMem;
28+
ur_context_handle_t_ *Context;
29+
30+
ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx)
31+
: RefCount(1), PhysicalMem(PhysMem), Context(Ctx) {
32+
urContextRetain(Context);
33+
}
34+
35+
~ur_physical_mem_handle_t_() { urContextRelease(Context); }
36+
37+
native_type get() const noexcept { return PhysicalMem; }
38+
39+
ur_context_handle_t_ *getContext() const noexcept { return Context; }
40+
41+
uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
42+
43+
uint32_t decrementReferenceCount() noexcept { return --RefCount; }
44+
45+
uint32_t getReferenceCount() const noexcept { return RefCount; }
46+
};
47+
48+
// Find a device ordinal of a device.
49+
inline ur_result_t GetDeviceOrdinal(ur_device_handle_t Device, int &Ordinal) {
50+
ur_adapter_handle_t AdapterHandle = &adapter;
51+
// Get list of platforms
52+
uint32_t NumPlatforms;
53+
UR_CHECK_ERROR(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms));
54+
UR_ASSERT(NumPlatforms, UR_RESULT_ERROR_UNKNOWN);
55+
56+
std::vector<ur_platform_handle_t> Platforms{NumPlatforms};
57+
UR_CHECK_ERROR(urPlatformGet(&AdapterHandle, 1, NumPlatforms,
58+
Platforms.data(), nullptr));
59+
60+
// Ordinal corresponds to the platform ID as each device has its own platform.
61+
CUdevice NativeDevice = Device->get();
62+
for (Ordinal = 0; size_t(Ordinal) < Platforms.size(); ++Ordinal)
63+
if (Platforms[Ordinal]->Devices[0]->get() == NativeDevice)
64+
return UR_RESULT_SUCCESS;
65+
return UR_RESULT_ERROR_INVALID_DEVICE;
66+
}

source/adapters/cuda/ur_interface_loader.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -360,13 +360,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable(
360360
return retVal;
361361
}
362362

363-
pDdiTable->pfnFree = nullptr;
364-
pDdiTable->pfnGetInfo = nullptr;
365-
pDdiTable->pfnGranularityGetInfo = nullptr;
366-
pDdiTable->pfnMap = nullptr;
367-
pDdiTable->pfnReserve = nullptr;
368-
pDdiTable->pfnSetAccess = nullptr;
369-
pDdiTable->pfnUnmap = nullptr;
363+
pDdiTable->pfnFree = urVirtualMemFree;
364+
pDdiTable->pfnGetInfo = urVirtualMemGetInfo;
365+
pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo;
366+
pDdiTable->pfnMap = urVirtualMemMap;
367+
pDdiTable->pfnReserve = urVirtualMemReserve;
368+
pDdiTable->pfnSetAccess = urVirtualMemSetAccess;
369+
pDdiTable->pfnUnmap = urVirtualMemUnmap;
370370

371371
return retVal;
372372
}
@@ -381,9 +381,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable(
381381
return retVal;
382382
}
383383

384-
pDdiTable->pfnCreate = nullptr;
385-
pDdiTable->pfnRelease = nullptr;
386-
pDdiTable->pfnRetain = nullptr;
384+
pDdiTable->pfnCreate = urPhysicalMemCreate;
385+
pDdiTable->pfnRelease = urPhysicalMemRelease;
386+
pDdiTable->pfnRetain = urPhysicalMemRetain;
387387

388388
return retVal;
389389
}

source/adapters/cuda/virtual_mem.cpp

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
//===--------- virtual_mem.cpp - CUDA Adapter -----------------------------===//
2+
//
3+
// Copyright (C) 2023 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "common.hpp"
12+
#include "context.hpp"
13+
#include "event.hpp"
14+
#include "physical_mem.hpp"
15+
16+
#include <cassert>
17+
#include <cuda.h>
18+
19+
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
20+
ur_context_handle_t hContext, ur_device_handle_t hDevice,
21+
ur_virtual_mem_granularity_info_t propName, size_t propSize,
22+
void *pPropValue, size_t *pPropSizeRet) {
23+
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
24+
25+
ScopedContext Active(hContext);
26+
switch (propName) {
27+
case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM:
28+
case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: {
29+
CUmemAllocationGranularity_flags Flags =
30+
propName == UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM
31+
? CU_MEM_ALLOC_GRANULARITY_MINIMUM
32+
: CU_MEM_ALLOC_GRANULARITY_RECOMMENDED;
33+
CUmemAllocationProp AllocProps = {};
34+
AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
35+
AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED;
36+
UR_CHECK_ERROR(GetDeviceOrdinal(hDevice, AllocProps.location.id));
37+
38+
size_t Granularity;
39+
UR_CHECK_ERROR(
40+
cuMemGetAllocationGranularity(&Granularity, &AllocProps, Flags));
41+
return ReturnValue(Granularity);
42+
}
43+
default:
44+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
45+
}
46+
47+
return UR_RESULT_SUCCESS;
48+
}
49+
50+
UR_APIEXPORT ur_result_t UR_APICALL
51+
urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart,
52+
size_t size, void **ppStart) {
53+
ScopedContext Active(hContext);
54+
UR_CHECK_ERROR(cuMemAddressReserve((CUdeviceptr *)ppStart, size, 0,
55+
(CUdeviceptr)pStart, 0));
56+
return UR_RESULT_SUCCESS;
57+
}
58+
59+
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(
60+
ur_context_handle_t hContext, const void *pStart, size_t size) {
61+
ScopedContext Active(hContext);
62+
UR_CHECK_ERROR(cuMemAddressFree((CUdeviceptr)pStart, size));
63+
return UR_RESULT_SUCCESS;
64+
}
65+
66+
UR_APIEXPORT ur_result_t UR_APICALL
67+
urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart,
68+
size_t size, ur_virtual_mem_access_flags_t flags) {
69+
CUmemAccessDesc AccessDesc = {};
70+
if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE)
71+
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
72+
else if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY)
73+
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READ;
74+
else
75+
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_NONE;
76+
AccessDesc.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
77+
// TODO: When contexts support multiple devices, we should create a descriptor
78+
// for each. We may also introduce a variant of this function with a
79+
// specific device.
80+
UR_CHECK_ERROR(
81+
GetDeviceOrdinal(hContext->getDevice(), AccessDesc.location.id));
82+
83+
ScopedContext Active(hContext);
84+
UR_CHECK_ERROR(cuMemSetAccess((CUdeviceptr)pStart, size, &AccessDesc, 1));
85+
return UR_RESULT_SUCCESS;
86+
}
87+
88+
UR_APIEXPORT ur_result_t UR_APICALL
89+
urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size,
90+
ur_physical_mem_handle_t hPhysicalMem, size_t offset,
91+
ur_virtual_mem_access_flags_t flags) {
92+
ScopedContext Active(hContext);
93+
UR_CHECK_ERROR(
94+
cuMemMap((CUdeviceptr)pStart, size, offset, hPhysicalMem->get(), 0));
95+
if (flags)
96+
UR_CHECK_ERROR(urVirtualMemSetAccess(hContext, pStart, size, flags));
97+
return UR_RESULT_SUCCESS;
98+
}
99+
100+
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(
101+
ur_context_handle_t hContext, const void *pStart, size_t size) {
102+
ScopedContext Active(hContext);
103+
UR_CHECK_ERROR(cuMemUnmap((CUdeviceptr)pStart, size));
104+
return UR_RESULT_SUCCESS;
105+
}
106+
107+
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(
108+
ur_context_handle_t hContext, const void *pStart,
109+
[[maybe_unused]] size_t size, ur_virtual_mem_info_t propName,
110+
size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
111+
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
112+
113+
ScopedContext Active(hContext);
114+
switch (propName) {
115+
case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: {
116+
CUmemLocation MemLocation = {};
117+
MemLocation.type = CU_MEM_LOCATION_TYPE_DEVICE;
118+
UR_CHECK_ERROR(GetDeviceOrdinal(hContext->getDevice(), MemLocation.id));
119+
120+
unsigned long long CuAccessFlags;
121+
UR_CHECK_ERROR(
122+
cuMemGetAccess(&CuAccessFlags, &MemLocation, (CUdeviceptr)pStart));
123+
124+
ur_virtual_mem_access_flags_t UrAccessFlags = 0;
125+
if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READWRITE)
126+
UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE;
127+
else if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READ)
128+
UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY;
129+
return ReturnValue(UrAccessFlags);
130+
}
131+
default:
132+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
133+
}
134+
return UR_RESULT_SUCCESS;
135+
}

source/adapters/hip/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ add_ur_adapter(${TARGET_NAME}
6161
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
6262
${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp
6363
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
64+
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp
65+
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp
6466
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
6567
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
6668
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
@@ -71,6 +73,7 @@ add_ur_adapter(${TARGET_NAME}
7173
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
7274
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
7375
${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp
76+
${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp
7477
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
7578
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp
7679
)

source/adapters/hip/device.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
819819
}
820820
case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED:
821821
return ReturnValue(false);
822+
case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
823+
return ReturnValue(false);
822824
case UR_DEVICE_INFO_ESIMD_SUPPORT:
823825
return ReturnValue(false);
824826

@@ -833,7 +835,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
833835
case UR_DEVICE_INFO_BFLOAT16:
834836
case UR_DEVICE_INFO_IL_VERSION:
835837
case UR_DEVICE_INFO_ASYNC_BARRIER:
836-
case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
837838
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
838839

839840
default:

0 commit comments

Comments
 (0)