Skip to content

Commit 77b3554

Browse files
fabiomestrekbenzie
authored andcommitted
[CUDA] Add support for multiple active mappings
1 parent cda0cd9 commit 77b3554

File tree

4 files changed

+90
-81
lines changed

4 files changed

+90
-81
lines changed

source/adapters/cuda/enqueue.cpp

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,27 +1160,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
11601160
UR_RESULT_ERROR_INVALID_SIZE);
11611161

11621162
auto &BufferImpl = std::get<BufferMem>(hBuffer->Mem);
1163-
ur_result_t Result = UR_RESULT_ERROR_INVALID_MEM_OBJECT;
1164-
const bool IsPinned =
1165-
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;
1163+
auto MapPtr = BufferImpl.mapToPtr(size, offset, mapFlags);
11661164

1167-
// Currently no support for overlapping regions
1168-
if (BufferImpl.getMapPtr() != nullptr) {
1169-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1165+
if (!MapPtr) {
1166+
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
11701167
}
11711168

1172-
// Allocate a pointer in the host to store the mapped information
1173-
auto HostPtr = BufferImpl.mapToPtr(size, offset, mapFlags);
1174-
*ppRetMap = BufferImpl.getMapPtr();
1175-
if (HostPtr) {
1176-
Result = UR_RESULT_SUCCESS;
1177-
}
1169+
const bool IsPinned =
1170+
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;
11781171

1172+
ur_result_t Result = UR_RESULT_SUCCESS;
11791173
if (!IsPinned &&
11801174
((mapFlags & UR_MAP_FLAG_READ) || (mapFlags & UR_MAP_FLAG_WRITE))) {
11811175
// Pinned host memory is already on host so it doesn't need to be read.
11821176
Result = urEnqueueMemBufferRead(hQueue, hBuffer, blockingMap, offset, size,
1183-
HostPtr, numEventsInWaitList,
1177+
MapPtr, numEventsInWaitList,
11841178
phEventWaitList, phEvent);
11851179
} else {
11861180
ScopedContext Active(hQueue->getContext());
@@ -1201,6 +1195,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
12011195
}
12021196
}
12031197
}
1198+
*ppRetMap = MapPtr;
12041199

12051200
return Result;
12061201
}
@@ -1213,23 +1208,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
12131208
ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
12141209
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
12151210
ur_event_handle_t *phEvent) {
1216-
ur_result_t Result = UR_RESULT_SUCCESS;
12171211
UR_ASSERT(hMem->MemType == ur_mem_handle_t_::Type::Buffer,
12181212
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
1219-
UR_ASSERT(std::get<BufferMem>(hMem->Mem).getMapPtr() != nullptr,
1220-
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
1221-
UR_ASSERT(std::get<BufferMem>(hMem->Mem).getMapPtr() == pMappedPtr,
1222-
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
1213+
auto &BufferImpl = std::get<BufferMem>(hMem->Mem);
12231214

1224-
const bool IsPinned = std::get<BufferMem>(hMem->Mem).MemAllocMode ==
1225-
BufferMem::AllocMode::AllocHostPtr;
1215+
auto *Map = BufferImpl.getMapDetails(pMappedPtr);
1216+
UR_ASSERT(Map != nullptr, UR_RESULT_ERROR_INVALID_MEM_OBJECT);
12261217

1227-
if (!IsPinned &&
1228-
(std::get<BufferMem>(hMem->Mem).getMapFlags() & UR_MAP_FLAG_WRITE)) {
1218+
const bool IsPinned =
1219+
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;
1220+
1221+
ur_result_t Result = UR_RESULT_SUCCESS;
1222+
if (!IsPinned && (Map->getMapFlags() & UR_MAP_FLAG_WRITE)) {
12291223
// Pinned host memory is only on host so it doesn't need to be written to.
12301224
Result = urEnqueueMemBufferWrite(
1231-
hQueue, hMem, true, std::get<BufferMem>(hMem->Mem).getMapOffset(),
1232-
std::get<BufferMem>(hMem->Mem).getMapSize(), pMappedPtr,
1225+
hQueue, hMem, true, Map->getMapOffset(), Map->getMapSize(), pMappedPtr,
12331226
numEventsInWaitList, phEventWaitList, phEvent);
12341227
} else {
12351228
ScopedContext Active(hQueue->getContext());
@@ -1250,8 +1243,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
12501243
}
12511244
}
12521245
}
1246+
BufferImpl.unmap(pMappedPtr);
12531247

1254-
std::get<BufferMem>(hMem->Mem).unmap(pMappedPtr);
12551248
return Result;
12561249
}
12571250

source/adapters/cuda/memory.hpp

Lines changed: 70 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -18,89 +18,106 @@
1818

1919
// Handler for plain, pointer-based CUDA allocations
2020
struct BufferMem {
21-
using native_type = CUdeviceptr;
2221

23-
// If this allocation is a sub-buffer (i.e., a view on an existing
24-
// allocation), this is the pointer to the parent handler structure
25-
ur_mem_handle_t Parent;
26-
// CUDA handler for the pointer
27-
native_type Ptr;
22+
struct BufferMap {
23+
/// Size of the active mapped region.
24+
size_t MapSize;
25+
/// Offset of the active mapped region.
26+
size_t MapOffset;
27+
/// Original flags for the mapped region
28+
ur_map_flags_t MapFlags;
29+
/// Allocated host memory used exclusively for this map.
30+
std::unique_ptr<unsigned char[]> MapMem;
2831

29-
/// Pointer associated with this device on the host
30-
void *HostPtr;
31-
/// Size of the allocation in bytes
32-
size_t Size;
33-
/// Size of the active mapped region.
34-
size_t MapSize;
35-
/// Offset of the active mapped region.
36-
size_t MapOffset;
37-
/// Pointer to the active mapped region, if any
38-
void *MapPtr;
39-
/// Original flags for the mapped region
40-
ur_map_flags_t MapFlags;
32+
BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags)
33+
: MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags),
34+
MapMem(nullptr) {}
35+
36+
BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags,
37+
std::unique_ptr<unsigned char[]> &MapMem)
38+
: MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags),
39+
MapMem(std::move(MapMem)) {}
40+
41+
size_t getMapSize() const noexcept { return MapSize; }
42+
43+
size_t getMapOffset() const noexcept { return MapOffset; }
44+
45+
ur_map_flags_t getMapFlags() const noexcept { return MapFlags; }
46+
};
4147

4248
/** AllocMode
4349
* classic: Just a normal buffer allocated on the device via cuda malloc
4450
* use_host_ptr: Use an address on the host for the device
45-
* copy_in: The data for the device comes from the host but the host
46-
pointer is not available later for re-use
47-
* alloc_host_ptr: Uses pinned-memory allocation
48-
*/
51+
* copy_in: The data for the device comes from the host but the host pointer
52+
* is not available later for re-use alloc_host_ptr: Uses pinned-memory
53+
* allocation
54+
*/
4955
enum class AllocMode {
5056
Classic,
5157
UseHostPtr,
5258
CopyIn,
5359
AllocHostPtr,
54-
} MemAllocMode;
60+
};
61+
62+
using native_type = CUdeviceptr;
63+
64+
/// If this allocation is a sub-buffer (i.e., a view on an existing
65+
/// allocation), this is the pointer to the parent handler structure
66+
ur_mem_handle_t Parent;
67+
/// CUDA handler for the pointer
68+
native_type Ptr;
69+
/// Pointer associated with this device on the host
70+
void *HostPtr;
71+
/// Size of the allocation in bytes
72+
size_t Size;
73+
/// A map that contains all the active mappings for this buffer.
74+
std::unordered_map<void*, BufferMap> PtrToBufferMap;
75+
76+
AllocMode MemAllocMode;
5577

5678
BufferMem(ur_mem_handle_t Parent, BufferMem::AllocMode Mode, CUdeviceptr Ptr,
5779
void *HostPtr, size_t Size)
58-
: Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size}, MapSize{0},
59-
MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE},
60-
MemAllocMode{Mode} {};
80+
: Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size},
81+
PtrToBufferMap{}, MemAllocMode{Mode} {};
6182

6283
native_type get() const noexcept { return Ptr; }
6384

6485
size_t getSize() const noexcept { return Size; }
6586

66-
void *getMapPtr() const noexcept { return MapPtr; }
67-
68-
size_t getMapSize() const noexcept { return MapSize; }
69-
70-
size_t getMapOffset() const noexcept { return MapOffset; }
87+
BufferMap * getMapDetails(void* Map) {
88+
auto details = PtrToBufferMap.find(Map);
89+
if (details != PtrToBufferMap.end()) {
90+
return &details->second;
91+
}
92+
return nullptr;
93+
}
7194

7295
/// Returns a pointer to data visible on the host that contains
7396
/// the data on the device associated with this allocation.
7497
/// The offset is used to index into the CUDA allocation.
75-
void *mapToPtr(size_t Size, size_t Offset, ur_map_flags_t Flags) noexcept {
76-
assert(MapPtr == nullptr);
77-
MapSize = Size;
78-
MapOffset = Offset;
79-
MapFlags = Flags;
80-
if (HostPtr) {
81-
MapPtr = static_cast<char *>(HostPtr) + Offset;
98+
void *mapToPtr(size_t MapSize, size_t MapOffset,
99+
ur_map_flags_t MapFlags) noexcept {
100+
101+
void *MapPtr = nullptr;
102+
if (HostPtr == nullptr) {
103+
/// If HostPtr is invalid, we need to create a Mapping that owns its own
104+
/// memory on the host.
105+
auto MapMem = std::make_unique<unsigned char[]>(MapSize);
106+
MapPtr = MapMem.get();
107+
PtrToBufferMap.insert({MapPtr, BufferMap(MapSize, MapOffset, MapFlags, MapMem)});
82108
} else {
83-
// TODO: Allocate only what is needed based on the offset
84-
MapPtr = static_cast<void *>(malloc(this->getSize()));
109+
/// However, if HostPtr already has valid memory (e.g. pinned allocation),
110+
/// we can just use that memory for the mapping.
111+
MapPtr = static_cast<char *>(HostPtr) + MapOffset;
112+
PtrToBufferMap.insert({MapPtr, BufferMap(MapSize, MapOffset, MapFlags)});
85113
}
86114
return MapPtr;
87115
}
88116

89117
/// Detach the allocation from the host memory.
90-
void unmap(void *) noexcept {
91-
assert(MapPtr != nullptr);
92-
93-
if (MapPtr != HostPtr) {
94-
free(MapPtr);
95-
}
96-
MapPtr = nullptr;
97-
MapSize = 0;
98-
MapOffset = 0;
99-
}
100-
101-
ur_map_flags_t getMapFlags() const noexcept {
118+
void unmap(void * MapPtr) noexcept {
102119
assert(MapPtr != nullptr);
103-
return MapFlags;
120+
PtrToBufferMap.erase(MapPtr);
104121
}
105122
};
106123

test/conformance/enqueue/enqueue_adapter_cuda.match

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_
33
{{OPT}}urEnqueueMemBufferFillTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___size__256__patternSize__256
44
{{OPT}}urEnqueueMemBufferFillTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___size__1024__patternSize__256
5-
{{OPT}}urEnqueueMemBufferMapTest.SuccessMultiMaps/NVIDIA_CUDA_BACKEND___{{.*}}_
65
{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_
76
{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_
87
{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___1D

test/conformance/enqueue/urEnqueueMemBufferMap.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,8 @@ TEST_P(urEnqueueMemBufferMapTestWithParam, SuccessMultiMaps) {
194194
for (size_t i = 0; i < map_count; ++i) {
195195
map_a[i] = 42;
196196
}
197-
for (size_t i = map_count; i < count; ++i) {
198-
map_a[i] = 24;
197+
for (size_t i = 0; i < map_count; ++i) {
198+
map_b[i] = 24;
199199
}
200200
ASSERT_SUCCESS(
201201
urEnqueueMemUnmap(queue, buffer, map_a, 0, nullptr, nullptr));

0 commit comments

Comments
 (0)