|
18 | 18 |
|
19 | 19 | // Handler for plain, pointer-based CUDA allocations
|
20 | 20 | struct BufferMem {
|
21 |
| - using native_type = CUdeviceptr; |
22 | 21 |
|
23 |
| - // If this allocation is a sub-buffer (i.e., a view on an existing |
24 |
| - // allocation), this is the pointer to the parent handler structure |
25 |
| - ur_mem_handle_t Parent; |
26 |
| - // CUDA handler for the pointer |
27 |
| - native_type Ptr; |
| 22 | + struct BufferMap { |
| 23 | + /// Size of the active mapped region. |
| 24 | + size_t MapSize; |
| 25 | + /// Offset of the active mapped region. |
| 26 | + size_t MapOffset; |
| 27 | + /// Original flags for the mapped region |
| 28 | + ur_map_flags_t MapFlags; |
| 29 | + /// Allocated host memory used exclusively for this map. |
| 30 | + std::unique_ptr<unsigned char[]> MapMem; |
28 | 31 |
|
29 |
| - /// Pointer associated with this device on the host |
30 |
| - void *HostPtr; |
31 |
| - /// Size of the allocation in bytes |
32 |
| - size_t Size; |
33 |
| - /// Size of the active mapped region. |
34 |
| - size_t MapSize; |
35 |
| - /// Offset of the active mapped region. |
36 |
| - size_t MapOffset; |
37 |
| - /// Pointer to the active mapped region, if any |
38 |
| - void *MapPtr; |
39 |
| - /// Original flags for the mapped region |
40 |
| - ur_map_flags_t MapFlags; |
| 32 | + BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags) |
| 33 | + : MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags), |
| 34 | + MapMem(nullptr) {} |
| 35 | + |
| 36 | + BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags, |
| 37 | + std::unique_ptr<unsigned char[]> &MapMem) |
| 38 | + : MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags), |
| 39 | + MapMem(std::move(MapMem)) {} |
| 40 | + |
| 41 | + size_t getMapSize() const noexcept { return MapSize; } |
| 42 | + |
| 43 | + size_t getMapOffset() const noexcept { return MapOffset; } |
| 44 | + |
| 45 | + ur_map_flags_t getMapFlags() const noexcept { return MapFlags; } |
| 46 | + }; |
41 | 47 |
|
42 | 48 | /** AllocMode
|
43 | 49 | * classic: Just a normal buffer allocated on the device via cuda malloc
|
44 | 50 | * use_host_ptr: Use an address on the host for the device
|
45 |
| - * copy_in: The data for the device comes from the host but the host |
46 |
| - pointer is not available later for re-use |
47 |
| - * alloc_host_ptr: Uses pinned-memory allocation |
48 |
| - */ |
| 51 | + * copy_in: The data for the device comes from the host but the host pointer |
| 52 | + * is not available later for re-use alloc_host_ptr: Uses pinned-memory |
| 53 | + * allocation |
| 54 | + */ |
49 | 55 | enum class AllocMode {
|
50 | 56 | Classic,
|
51 | 57 | UseHostPtr,
|
52 | 58 | CopyIn,
|
53 | 59 | AllocHostPtr,
|
54 |
| - } MemAllocMode; |
| 60 | + }; |
| 61 | + |
| 62 | + using native_type = CUdeviceptr; |
| 63 | + |
| 64 | + /// If this allocation is a sub-buffer (i.e., a view on an existing |
| 65 | + /// allocation), this is the pointer to the parent handler structure |
| 66 | + ur_mem_handle_t Parent; |
| 67 | + /// CUDA handler for the pointer |
| 68 | + native_type Ptr; |
| 69 | + /// Pointer associated with this device on the host |
| 70 | + void *HostPtr; |
| 71 | + /// Size of the allocation in bytes |
| 72 | + size_t Size; |
| 73 | + /// A map that contains all the active mappings for this buffer. |
| 74 | + std::unordered_map<void *, BufferMap> PtrToBufferMap; |
| 75 | + |
| 76 | + AllocMode MemAllocMode; |
55 | 77 |
|
56 | 78 | BufferMem(ur_mem_handle_t Parent, BufferMem::AllocMode Mode, CUdeviceptr Ptr,
|
57 | 79 | void *HostPtr, size_t Size)
|
58 |
| - : Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size}, MapSize{0}, |
59 |
| - MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE}, |
60 |
| - MemAllocMode{Mode} {}; |
| 80 | + : Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size}, |
| 81 | + PtrToBufferMap{}, MemAllocMode{Mode} {}; |
61 | 82 |
|
62 | 83 | native_type get() const noexcept { return Ptr; }
|
63 | 84 |
|
64 | 85 | size_t getSize() const noexcept { return Size; }
|
65 | 86 |
|
66 |
| - void *getMapPtr() const noexcept { return MapPtr; } |
67 |
| - |
68 |
| - size_t getMapSize() const noexcept { return MapSize; } |
69 |
| - |
70 |
| - size_t getMapOffset() const noexcept { return MapOffset; } |
| 87 | + BufferMap *getMapDetails(void *Map) { |
| 88 | + auto details = PtrToBufferMap.find(Map); |
| 89 | + if (details != PtrToBufferMap.end()) { |
| 90 | + return &details->second; |
| 91 | + } |
| 92 | + return nullptr; |
| 93 | + } |
71 | 94 |
|
72 | 95 | /// Returns a pointer to data visible on the host that contains
|
73 | 96 | /// the data on the device associated with this allocation.
|
74 | 97 | /// The offset is used to index into the CUDA allocation.
|
75 |
| - void *mapToPtr(size_t Size, size_t Offset, ur_map_flags_t Flags) noexcept { |
76 |
| - assert(MapPtr == nullptr); |
77 |
| - MapSize = Size; |
78 |
| - MapOffset = Offset; |
79 |
| - MapFlags = Flags; |
80 |
| - if (HostPtr) { |
81 |
| - MapPtr = static_cast<char *>(HostPtr) + Offset; |
| 98 | + void *mapToPtr(size_t MapSize, size_t MapOffset, |
| 99 | + ur_map_flags_t MapFlags) noexcept { |
| 100 | + |
| 101 | + void *MapPtr = nullptr; |
| 102 | + if (HostPtr == nullptr) { |
| 103 | + /// If HostPtr is invalid, we need to create a Mapping that owns its own |
| 104 | + /// memory on the host. |
| 105 | + auto MapMem = std::make_unique<unsigned char[]>(MapSize); |
| 106 | + MapPtr = MapMem.get(); |
| 107 | + PtrToBufferMap.insert( |
| 108 | + {MapPtr, BufferMap(MapSize, MapOffset, MapFlags, MapMem)}); |
82 | 109 | } else {
|
83 |
| - // TODO: Allocate only what is needed based on the offset |
84 |
| - MapPtr = static_cast<void *>(malloc(this->getSize())); |
| 110 | + /// However, if HostPtr already has valid memory (e.g. pinned allocation), |
| 111 | + /// we can just use that memory for the mapping. |
| 112 | + MapPtr = static_cast<char *>(HostPtr) + MapOffset; |
| 113 | + PtrToBufferMap.insert({MapPtr, BufferMap(MapSize, MapOffset, MapFlags)}); |
85 | 114 | }
|
86 | 115 | return MapPtr;
|
87 | 116 | }
|
88 | 117 |
|
89 | 118 | /// Detach the allocation from the host memory.
|
90 |
| - void unmap(void *) noexcept { |
91 |
| - assert(MapPtr != nullptr); |
92 |
| - |
93 |
| - if (MapPtr != HostPtr) { |
94 |
| - free(MapPtr); |
95 |
| - } |
96 |
| - MapPtr = nullptr; |
97 |
| - MapSize = 0; |
98 |
| - MapOffset = 0; |
99 |
| - } |
100 |
| - |
101 |
| - ur_map_flags_t getMapFlags() const noexcept { |
| 119 | + void unmap(void *MapPtr) noexcept { |
102 | 120 | assert(MapPtr != nullptr);
|
103 |
| - return MapFlags; |
| 121 | + PtrToBufferMap.erase(MapPtr); |
104 | 122 | }
|
105 | 123 | };
|
106 | 124 |
|
|
0 commit comments