@@ -111,7 +111,7 @@ class DeviceAllocatorT {
111
111
Header header, *h;
112
112
{
113
113
SpinMutexLock l (&mutex_);
114
- uptr idx, end ;
114
+ uptr idx;
115
115
uptr p_ = reinterpret_cast <uptr>(p);
116
116
EnsureSortedChunks (); // Avoid doing the sort while iterating.
117
117
for (idx = 0 ; idx < n_chunks_; idx++) {
@@ -121,7 +121,7 @@ class DeviceAllocatorT {
121
121
CHECK_EQ (chunks_[idx], p_);
122
122
CHECK_LT (idx, n_chunks_);
123
123
h = GetHeader (chunks_[idx], &header);
124
- CHECK_NE (h, nullptr );
124
+ CHECK (!dev_runtime_unloaded_ );
125
125
chunks_[idx] = chunks_[--n_chunks_];
126
126
chunks_sorted_ = false ;
127
127
stats.n_frees ++;
@@ -136,10 +136,10 @@ class DeviceAllocatorT {
136
136
uptr TotalMemoryUsed () {
137
137
Header header;
138
138
SpinMutexLock l (&mutex_);
139
- uptr res = 0 , beg, end ;
139
+ uptr res = 0 ;
140
140
for (uptr i = 0 ; i < n_chunks_; i++) {
141
141
Header *h = GetHeader (chunks_[i], &header);
142
- CHECK_NE (h, nullptr );
142
+ CHECK (!dev_runtime_unloaded_ );
143
143
res += RoundUpMapSize (h->map_size );
144
144
}
145
145
return res;
@@ -152,14 +152,14 @@ class DeviceAllocatorT {
152
152
uptr GetActuallyAllocatedSize (void *p) {
153
153
Header header;
154
154
uptr p_ = reinterpret_cast <uptr>(p);
155
- Header *h = GetHeader (p_, &header);
155
+ Header *h = GetHeaderAnyPointer (p_, &header);
156
156
return h ? h->map_size : 0 ;
157
157
}
158
158
159
159
void *GetMetaData (const void *p) {
160
160
Header header;
161
161
uptr p_ = reinterpret_cast <uptr>(p);
162
- Header *h = GetHeader (p_, &header);
162
+ Header *h = GetHeaderAnyPointer (p_, &header);
163
163
return h ? reinterpret_cast <void *>(h->map_beg + h->map_size -
164
164
kMetadataSize_ )
165
165
: nullptr ;
@@ -183,12 +183,13 @@ class DeviceAllocatorT {
183
183
return nullptr ;
184
184
if (p != nearest_chunk) {
185
185
Header *h = GetHeader (nearest_chunk, &header);
186
- CHECK_NE (h, nullptr );
187
186
CHECK_GE (nearest_chunk, h->map_beg );
188
187
CHECK_LT (nearest_chunk, h->map_beg + h->map_size );
189
188
CHECK_LE (nearest_chunk, p);
190
- if (h->map_beg + h->map_size <= p)
189
+ if (h->map_beg + h->map_size <= p) {
190
+ CHECK (!dev_runtime_unloaded_);
191
191
return nullptr ;
192
+ }
192
193
}
193
194
return GetUser (nearest_chunk);
194
195
}
@@ -211,11 +212,17 @@ class DeviceAllocatorT {
211
212
EnsureSortedChunks ();
212
213
Header header, *h;
213
214
h = GetHeader (chunks_[n - 1 ], &header);
214
- CHECK_NE (h, nullptr );
215
215
uptr min_mmap_ = chunks_[0 ];
216
216
uptr max_mmap_ = chunks_[n - 1 ] + h->map_size ;
217
- if (p < min_mmap_ || p >= max_mmap_)
217
+ if (p < min_mmap_)
218
+ return nullptr ;
219
+ if (p >= max_mmap_) {
220
+ // TODO (bingma): If dev_runtime_unloaded_ = true, map_size is limited
221
+ // to one page and we might miss a valid 'ptr'. If we hit cases where
222
+ // this kind of miss is unacceptable, we will need to implement a full
223
+ // solution with higher cost
218
224
return nullptr ;
225
+ }
219
226
uptr beg = 0 , end = n - 1 ;
220
227
// This loop is a log(n) lower_bound. It does not check for the exact match
221
228
// to avoid expensive cache-thrashing loads.
@@ -237,8 +244,12 @@ class DeviceAllocatorT {
237
244
if (p != chunks_[beg]) {
238
245
h = GetHeader (chunks_[beg], &header);
239
246
CHECK_NE (h, nullptr );
240
- if (h-> map_beg + h-> map_size <= p || p < h->map_beg )
247
+ if (p < h->map_beg )
241
248
return nullptr ;
249
+ if (h->map_beg + h->map_size <= p) {
250
+ // TODO (bingma): See above TODO in this function
251
+ return nullptr ;
252
+ }
242
253
}
243
254
return GetUser (chunks_[beg]);
244
255
}
@@ -288,11 +299,23 @@ class DeviceAllocatorT {
288
299
289
300
typedef DevivePointerInfo Header;
290
301
291
- Header *GetHeader (uptr p, Header* h) const {
302
+ Header *GetHeaderAnyPointer (uptr p, Header* h) const {
292
303
CHECK (IsAligned (p, page_size_));
293
304
return DeviceMemFuncs::GetPointerInfo (p, h) ? h : nullptr ;
294
305
}
295
306
307
+ Header* GetHeader (uptr chunk, Header* h) const {
308
+ if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo (chunk, h)) {
309
+ // Device allocator has dependency on device runtime. If device runtime
310
+ // is unloaded, GetPointerInfo() will fail. For such case, we can still
311
+ // return a valid value for map_beg, map_size will be limited to one page
312
+ h->map_beg = chunk;
313
+ h->map_size = page_size_;
314
+ dev_runtime_unloaded_ = true ;
315
+ }
316
+ return h;
317
+ }
318
+
296
319
void *GetUser (const uptr ptr) const {
297
320
return reinterpret_cast <void *>(ptr);
298
321
}
@@ -303,6 +326,7 @@ class DeviceAllocatorT {
303
326
304
327
bool enabled_;
305
328
bool mem_funcs_inited_;
329
+ mutable bool dev_runtime_unloaded_;
306
330
// Maximum of mem_funcs_init_count_ is 2:
307
331
// 1. The initial init called from Init(...), it could fail if
308
332
// libhsa-runtime64.so is dynamically loaded with dlopen()
0 commit comments