Skip to content

Commit 9292d91

Browse files
bing-maDavid Salinas
authored andcommitted
SWDEV-478362 - LeakSanitizer is not working properly in rocm-14468
DeviceAllocator has dependency on HSA runtime and in this test case DeviceAllocator is called by LSAN when HSA runtime has already been unloaded. Fortunately for this case, LSAN always calls DeviceAllocator with a ptr equaling to the beginning of one of the device memory chunks so DeviceAllocator can return the call without calling HSA runtime. This is also the fix. Change-Id: I14e6ddce1f047d4152fe0da26fbbfa754d0eb176
1 parent de7d401 commit 9292d91

File tree

1 file changed

+36
-12
lines changed

1 file changed

+36
-12
lines changed

compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class DeviceAllocatorT {
111111
Header header, *h;
112112
{
113113
SpinMutexLock l(&mutex_);
114-
uptr idx, end;
114+
uptr idx;
115115
uptr p_ = reinterpret_cast<uptr>(p);
116116
EnsureSortedChunks(); // Avoid doing the sort while iterating.
117117
for (idx = 0; idx < n_chunks_; idx++) {
@@ -121,7 +121,7 @@ class DeviceAllocatorT {
121121
CHECK_EQ(chunks_[idx], p_);
122122
CHECK_LT(idx, n_chunks_);
123123
h = GetHeader(chunks_[idx], &header);
124-
CHECK_NE(h, nullptr);
124+
CHECK(!dev_runtime_unloaded_);
125125
chunks_[idx] = chunks_[--n_chunks_];
126126
chunks_sorted_ = false;
127127
stats.n_frees++;
@@ -136,10 +136,10 @@ class DeviceAllocatorT {
136136
uptr TotalMemoryUsed() {
137137
Header header;
138138
SpinMutexLock l(&mutex_);
139-
uptr res = 0, beg, end;
139+
uptr res = 0;
140140
for (uptr i = 0; i < n_chunks_; i++) {
141141
Header *h = GetHeader(chunks_[i], &header);
142-
CHECK_NE(h, nullptr);
142+
CHECK(!dev_runtime_unloaded_);
143143
res += RoundUpMapSize(h->map_size);
144144
}
145145
return res;
@@ -152,14 +152,14 @@ class DeviceAllocatorT {
152152
uptr GetActuallyAllocatedSize(void *p) {
153153
Header header;
154154
uptr p_ = reinterpret_cast<uptr>(p);
155-
Header *h = GetHeader(p_, &header);
155+
Header *h = GetHeaderAnyPointer(p_, &header);
156156
return h ? h->map_size : 0;
157157
}
158158

159159
void *GetMetaData(const void *p) {
160160
Header header;
161161
uptr p_ = reinterpret_cast<uptr>(p);
162-
Header *h = GetHeader(p_, &header);
162+
Header *h = GetHeaderAnyPointer(p_, &header);
163163
return h ? reinterpret_cast<void *>(h->map_beg + h->map_size -
164164
kMetadataSize_)
165165
: nullptr;
@@ -183,12 +183,13 @@ class DeviceAllocatorT {
183183
return nullptr;
184184
if (p != nearest_chunk) {
185185
Header *h = GetHeader(nearest_chunk, &header);
186-
CHECK_NE(h, nullptr);
187186
CHECK_GE(nearest_chunk, h->map_beg);
188187
CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
189188
CHECK_LE(nearest_chunk, p);
190-
if (h->map_beg + h->map_size <= p)
189+
if (h->map_beg + h->map_size <= p) {
190+
CHECK(!dev_runtime_unloaded_);
191191
return nullptr;
192+
}
192193
}
193194
return GetUser(nearest_chunk);
194195
}
@@ -211,11 +212,17 @@ class DeviceAllocatorT {
211212
EnsureSortedChunks();
212213
Header header, *h;
213214
h = GetHeader(chunks_[n - 1], &header);
214-
CHECK_NE(h, nullptr);
215215
uptr min_mmap_ = chunks_[0];
216216
uptr max_mmap_ = chunks_[n - 1] + h->map_size;
217-
if (p < min_mmap_ || p >= max_mmap_)
217+
if (p < min_mmap_)
218+
return nullptr;
219+
if (p >= max_mmap_) {
220+
// TODO (bingma): If dev_runtime_unloaded_ = true, map_size is limited
221+
// to one page and we might miss a valid 'ptr'. If we hit cases where
222+
// this kind of miss is unacceptable, we will need to implement a full
223+
// solution with higher cost
218224
return nullptr;
225+
}
219226
uptr beg = 0, end = n - 1;
220227
// This loop is a log(n) lower_bound. It does not check for the exact match
221228
// to avoid expensive cache-thrashing loads.
@@ -237,8 +244,12 @@ class DeviceAllocatorT {
237244
if (p != chunks_[beg]) {
238245
h = GetHeader(chunks_[beg], &header);
239246
CHECK_NE(h, nullptr);
240-
if (h->map_beg + h->map_size <= p || p < h->map_beg)
247+
if (p < h->map_beg)
241248
return nullptr;
249+
if (h->map_beg + h->map_size <= p) {
250+
// TODO (bingma): See above TODO in this function
251+
return nullptr;
252+
}
242253
}
243254
return GetUser(chunks_[beg]);
244255
}
@@ -288,11 +299,23 @@ class DeviceAllocatorT {
288299

289300
typedef DevivePointerInfo Header;
290301

291-
Header *GetHeader(uptr p, Header* h) const {
302+
Header *GetHeaderAnyPointer(uptr p, Header* h) const {
292303
CHECK(IsAligned(p, page_size_));
293304
return DeviceMemFuncs::GetPointerInfo(p, h) ? h : nullptr;
294305
}
295306

307+
Header* GetHeader(uptr chunk, Header* h) const {
308+
if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo(chunk, h)) {
309+
// Device allocator has dependency on device runtime. If device runtime
310+
// is unloaded, GetPointerInfo() will fail. For such case, we can still
311+
// return a valid value for map_beg, map_size will be limited to one page
312+
h->map_beg = chunk;
313+
h->map_size = page_size_;
314+
dev_runtime_unloaded_ = true;
315+
}
316+
return h;
317+
}
318+
296319
void *GetUser(const uptr ptr) const {
297320
return reinterpret_cast<void *>(ptr);
298321
}
@@ -303,6 +326,7 @@ class DeviceAllocatorT {
303326

304327
bool enabled_;
305328
bool mem_funcs_inited_;
329+
mutable bool dev_runtime_unloaded_;
306330
// Maximum of mem_funcs_init_count_ is 2:
307331
// 1. The initial init called from Init(...), it could fail if
308332
// libhsa-runtime64.so is dynamically loaded with dlopen()

0 commit comments

Comments
 (0)