|
| 1 | +//===----------- MemoryManager.cpp - Target independent memory manager ----===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// Functionality for managing target memory. |
| 10 | +// It is very expensive to call alloc/free functions of target devices. The |
| 11 | +// MemoryManagerTy in this file is to reduce the number of invocations of those |
| 12 | +// functions by buffering allocated device memory. In this way, when a memory is |
| 13 | +// not used, it will not be freed on the device directly. The buffer is |
| 14 | +// organized in a number of buckets for efficient look up. A memory will go to |
| 15 | +// corresponding bucket based on its size. When a new memory request comes in, |
| 16 | +// it will first check whether there is free memory of same size. If yes, |
| 17 | +// returns it directly. Otherwise, allocate one on device. |
| 18 | +// |
| 19 | +// It also provides a way to opt out the memory manager. Memory |
| 20 | +// allocation/deallocation will only be managed if the requested size is less |
| 21 | +// than SizeThreshold, which can be configured via an environment variable |
| 22 | +// LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD. |
| 23 | +// |
| 24 | +//===----------------------------------------------------------------------===// |
| 25 | + |
| 26 | +#include "MemoryManager.h" |
| 27 | +#include "device.h" |
| 28 | +#include "private.h" |
| 29 | +#include "rtl.h" |
| 30 | + |
| 31 | +namespace { |
| 32 | +constexpr const size_t BucketSize[] = { |
| 33 | + 0, 1U << 2, 1U << 3, 1U << 4, 1U << 5, 1U << 6, 1U << 7, |
| 34 | + 1U << 8, 1U << 9, 1U << 10, 1U << 11, 1U << 12, 1U << 13}; |
| 35 | + |
| 36 | +constexpr const int NumBuckets = sizeof(BucketSize) / sizeof(BucketSize[0]); |
| 37 | + |
| 38 | +/// The threshold to manage memory using memory manager. If the request size is |
| 39 | +/// larger than \p SizeThreshold, the allocation will not be managed by the |
| 40 | +/// memory manager. This variable can be configured via an env \p |
| 41 | +/// LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD. By default, the value is 8KB. |
| 42 | +size_t SizeThreshold = 1U << 13; |
| 43 | + |
| 44 | +/// Find the previous number that is power of 2 given a number that is not power |
| 45 | +/// of 2. |
| 46 | +size_t floorToPowerOfTwo(size_t Num) { |
| 47 | + Num |= Num >> 1; |
| 48 | + Num |= Num >> 2; |
| 49 | + Num |= Num >> 4; |
| 50 | + Num |= Num >> 8; |
| 51 | + Num |= Num >> 16; |
| 52 | + Num |= Num >> 32; |
| 53 | + Num += 1; |
| 54 | + return Num >> 1; |
| 55 | +} |
| 56 | + |
| 57 | +/// Find a suitable bucket |
| 58 | +int findBucket(size_t Size) { |
| 59 | + const size_t F = floorToPowerOfTwo(Size); |
| 60 | + |
| 61 | + DP("findBucket: Size %zu is floored to %zu.\n", Size, F); |
| 62 | + |
| 63 | + int L = 0, H = NumBuckets - 1; |
| 64 | + while (H - L > 1) { |
| 65 | + int M = (L + H) >> 1; |
| 66 | + if (BucketSize[M] == F) |
| 67 | + return M; |
| 68 | + if (BucketSize[M] > F) |
| 69 | + H = M - 1; |
| 70 | + else |
| 71 | + L = M; |
| 72 | + } |
| 73 | + |
| 74 | + assert(L >= 0 && L < NumBuckets && "L is out of range"); |
| 75 | + |
| 76 | + DP("findBucket: Size %zu goes to bucket %d\n", Size, L); |
| 77 | + |
| 78 | + return L; |
| 79 | +} |
| 80 | +} // namespace |
| 81 | + |
| 82 | +MemoryManagerTy::MemoryManagerTy(DeviceTy &Dev, size_t Threshold) |
| 83 | + : FreeLists(NumBuckets), FreeListLocks(NumBuckets), Device(Dev) { |
| 84 | + if (Threshold) |
| 85 | + SizeThreshold = Threshold; |
| 86 | +} |
| 87 | + |
| 88 | +MemoryManagerTy::~MemoryManagerTy() { |
| 89 | + // TODO: There is a little issue that target plugin is destroyed before this |
| 90 | + // object, therefore the memory free will not succeed. |
| 91 | + // Deallocate all memory in map |
| 92 | + for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end(); ++Itr) { |
| 93 | + assert(Itr->second.Ptr && "nullptr in map table"); |
| 94 | + deleteOnDevice(Itr->second.Ptr); |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +void *MemoryManagerTy::allocateOnDevice(size_t Size, void *HstPtr) const { |
| 99 | + return Device.RTL->data_alloc(Device.RTLDeviceID, Size, HstPtr); |
| 100 | +} |
| 101 | + |
| 102 | +int MemoryManagerTy::deleteOnDevice(void *Ptr) const { |
| 103 | + return Device.RTL->data_delete(Device.RTLDeviceID, Ptr); |
| 104 | +} |
| 105 | + |
| 106 | +void *MemoryManagerTy::freeAndAllocate(size_t Size, void *HstPtr) { |
| 107 | + std::vector<void *> RemoveList; |
| 108 | + |
| 109 | + // Deallocate all memory in FreeList |
| 110 | + for (int I = 0; I < NumBuckets; ++I) { |
| 111 | + FreeListTy &List = FreeLists[I]; |
| 112 | + std::lock_guard<std::mutex> Lock(FreeListLocks[I]); |
| 113 | + if (List.empty()) |
| 114 | + continue; |
| 115 | + for (const NodeTy &N : List) { |
| 116 | + deleteOnDevice(N.Ptr); |
| 117 | + RemoveList.push_back(N.Ptr); |
| 118 | + } |
| 119 | + FreeLists[I].clear(); |
| 120 | + } |
| 121 | + |
| 122 | + // Remove all nodes in the map table which have been released |
| 123 | + if (!RemoveList.empty()) { |
| 124 | + std::lock_guard<std::mutex> LG(MapTableLock); |
| 125 | + for (void *P : RemoveList) |
| 126 | + PtrToNodeTable.erase(P); |
| 127 | + } |
| 128 | + |
| 129 | + // Try allocate memory again |
| 130 | + return allocateOnDevice(Size, HstPtr); |
| 131 | +} |
| 132 | + |
| 133 | +void *MemoryManagerTy::allocateOrFreeAndAllocateOnDevice(size_t Size, |
| 134 | + void *HstPtr) { |
| 135 | + void *TgtPtr = allocateOnDevice(Size, HstPtr); |
| 136 | + // We cannot get memory from the device. It might be due to OOM. Let's |
| 137 | + // free all memory in FreeLists and try again. |
| 138 | + if (TgtPtr == nullptr) { |
| 139 | + DP("Failed to get memory on device. Free all memory in FreeLists and " |
| 140 | + "try again.\n"); |
| 141 | + TgtPtr = freeAndAllocate(Size, HstPtr); |
| 142 | + } |
| 143 | + |
| 144 | +#ifdef OMPTARGET_DEBUG |
| 145 | + if (TgtPtr == nullptr) |
| 146 | + DP("Still cannot get memory on device probably because the device is " |
| 147 | + "OOM.\n"); |
| 148 | +#endif |
| 149 | + |
| 150 | + return TgtPtr; |
| 151 | +} |
| 152 | + |
| 153 | +void *MemoryManagerTy::allocate(size_t Size, void *HstPtr) { |
| 154 | + // If the size is zero, we will not bother the target device. Just return |
| 155 | + // nullptr directly. |
| 156 | + if (Size == 0) |
| 157 | + return nullptr; |
| 158 | + |
| 159 | + DP("MemoryManagerTy::allocate: size %zu with host pointer " DPxMOD ".\n", |
| 160 | + Size, DPxPTR(HstPtr)); |
| 161 | + |
| 162 | + // If the size is greater than the threshold, allocate it directly from |
| 163 | + // device. |
| 164 | + if (Size > SizeThreshold) { |
| 165 | + DP("%zu is greater than the threshold %zu. Allocate it directly from " |
| 166 | + "device\n", |
| 167 | + Size, SizeThreshold); |
| 168 | + void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr); |
| 169 | + |
| 170 | + DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr)); |
| 171 | + |
| 172 | + return TgtPtr; |
| 173 | + } |
| 174 | + |
| 175 | + NodeTy *NodePtr = nullptr; |
| 176 | + |
| 177 | + // Try to get a node from FreeList |
| 178 | + { |
| 179 | + const int B = findBucket(Size); |
| 180 | + FreeListTy &List = FreeLists[B]; |
| 181 | + |
| 182 | + NodeTy TempNode(Size, nullptr); |
| 183 | + std::lock_guard<std::mutex> LG(FreeListLocks[B]); |
| 184 | + FreeListTy::const_iterator Itr = List.find(TempNode); |
| 185 | + |
| 186 | + if (Itr != List.end()) { |
| 187 | + NodePtr = &Itr->get(); |
| 188 | + List.erase(Itr); |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | +#ifdef OMPTARGET_DEBUG |
| 193 | + if (NodePtr != nullptr) |
| 194 | + DP("Find one node " DPxMOD " in the bucket.\n", DPxPTR(NodePtr)); |
| 195 | +#endif |
| 196 | + |
| 197 | + // We cannot find a valid node in FreeLists. Let's allocate on device and |
| 198 | + // create a node for it. |
| 199 | + if (NodePtr == nullptr) { |
| 200 | + DP("Cannot find a node in the FreeLists. Allocate on device.\n"); |
| 201 | + // Allocate one on device |
| 202 | + void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr); |
| 203 | + |
| 204 | + if (TgtPtr == nullptr) |
| 205 | + return nullptr; |
| 206 | + |
| 207 | + // Create a new node and add it into the map table |
| 208 | + { |
| 209 | + std::lock_guard<std::mutex> Guard(MapTableLock); |
| 210 | + auto Itr = PtrToNodeTable.emplace(TgtPtr, NodeTy(Size, TgtPtr)); |
| 211 | + NodePtr = &Itr.first->second; |
| 212 | + } |
| 213 | + |
| 214 | + DP("Node address " DPxMOD ", target pointer " DPxMOD ", size %zu\n", |
| 215 | + DPxPTR(NodePtr), DPxPTR(TgtPtr), Size); |
| 216 | + } |
| 217 | + |
| 218 | + assert(NodePtr && "NodePtr should not be nullptr at this point"); |
| 219 | + |
| 220 | + return NodePtr->Ptr; |
| 221 | +} |
| 222 | + |
| 223 | +int MemoryManagerTy::free(void *TgtPtr) { |
| 224 | + DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr)); |
| 225 | + |
| 226 | + NodeTy *P = nullptr; |
| 227 | + |
| 228 | + // Look it up into the table |
| 229 | + { |
| 230 | + std::lock_guard<std::mutex> G(MapTableLock); |
| 231 | + auto Itr = PtrToNodeTable.find(TgtPtr); |
| 232 | + |
| 233 | + // We don't remove the node from the map table because the map does not |
| 234 | + // change. |
| 235 | + if (Itr != PtrToNodeTable.end()) |
| 236 | + P = &Itr->second; |
| 237 | + } |
| 238 | + |
| 239 | + // The memory is not managed by the manager |
| 240 | + if (P == nullptr) { |
| 241 | + DP("Cannot find its node. Delete it on device directly.\n"); |
| 242 | + return deleteOnDevice(TgtPtr); |
| 243 | + } |
| 244 | + |
| 245 | + // Insert the node to the free list |
| 246 | + const int B = findBucket(P->Size); |
| 247 | + |
| 248 | + DP("Found its node " DPxMOD ". Insert it to bucket %d.\n", DPxPTR(P), B); |
| 249 | + |
| 250 | + { |
| 251 | + std::lock_guard<std::mutex> G(FreeListLocks[B]); |
| 252 | + FreeLists[B].insert(*P); |
| 253 | + } |
| 254 | + |
| 255 | + return OFFLOAD_SUCCESS; |
| 256 | +} |
0 commit comments