Skip to content

Commit 18ce3d3

Browse files
committed
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in [1] by introducing a per-entry mutex. Additional discussion can also be found in D104418, which will also be refined to fix another data race problem. Here is how it works. Like before, `DataMapMtx` is still being used for mapping table lookup and update. In any case, we will get a table entry. If we need to make a data transfer (update the data on the device), we need to lock the entry right before releasing `DataMapMtx`, and the issue of data transfer should be after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can guarantee that: 1) issue of data movement is not in critical region, which will not affect performance too much, and also will not affect other threads that don't touch the same entry; 2) if another thread accesses the same entry, the state of data movement is consistent (which requires that a thread must first get the update lock before getting data movement information). For a target that doesn't support async data transfer, issue of data movement is data transfer. This two-lock design can potentially improve concurrency compared with the design that guards data movement with `DataMapMtx` as well. For a target that supports async data movement, we could simply attach the event between the issue of data movement and unlock the entry. For a thread that wants to get the event, it must first get the lock. This can also get rid of the busy wait until the event pointer is valid. Reference: [1] https://bugs.llvm.org/show_bug.cgi?id=49940 Reviewed By: grokos Differential Revision: https://reviews.llvm.org/D104555
1 parent b22bf7e commit 18ce3d3

File tree

3 files changed

+95
-53
lines changed

3 files changed

+95
-53
lines changed

openmp/libomptarget/src/device.cpp

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -165,26 +165,18 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
165165
return lr;
166166
}
167167

168-
// Used by targetDataBegin
169-
// Return a struct containing target pointer begin (where the data will be
170-
// moved).
171-
// Allocate memory if this is the first occurrence of this mapping.
172-
// Increment the reference counter.
173-
// If the target pointer is NULL, then either data allocation failed or the user
174-
// tried to do an illegal mapping.
175-
// The returned struct also returns an iterator to the map table entry
176-
// corresponding to the host pointer (if exists), and two flags indicating
177-
// whether the entry is just created, and if the target pointer included is
178-
// actually a host pointer (when unified memory enabled).
179168
TargetPointerResultTy
180-
DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
181-
map_var_info_t HstPtrName, bool IsImplicit,
182-
bool UpdateRefCount, bool HasCloseModifier,
183-
bool HasPresentModifier) {
184-
void *TargetPointer = NULL;
185-
bool IsNew = false;
169+
DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
170+
map_var_info_t HstPtrName, MoveDataStateTy MoveData,
171+
bool IsImplicit, bool UpdateRefCount,
172+
bool HasCloseModifier, bool HasPresentModifier,
173+
AsyncInfoTy &AsyncInfo) {
174+
void *TargetPointer = nullptr;
186175
bool IsHostPtr = false;
176+
bool IsNew = false;
177+
187178
DataMapMtx.lock();
179+
188180
LookupResult LR = lookupMapping(HstPtrBegin, Size);
189181
auto Entry = LR.Entry;
190182

@@ -263,7 +255,38 @@ DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
263255
TargetPointer = (void *)Ptr;
264256
}
265257

266-
DataMapMtx.unlock();
258+
if (IsNew && MoveData == MoveDataStateTy::UNKNOWN)
259+
MoveData = MoveDataStateTy::REQUIRED;
260+
261+
// If the target pointer is valid, and we need to transfer data, issue the
262+
// data transfer.
263+
if (TargetPointer && (MoveData == MoveDataStateTy::REQUIRED)) {
264+
// Lock the entry before releasing the mapping table lock such that another
265+
// thread that could issue data movement will get the right result.
266+
Entry->lock();
267+
// Release the mapping table lock right after the entry is locked.
268+
DataMapMtx.unlock();
269+
270+
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size,
271+
DPxPTR(HstPtrBegin), DPxPTR(TargetPointer));
272+
273+
int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo);
274+
275+
// Unlock the entry immediately after the data movement is issued.
276+
Entry->unlock();
277+
278+
if (Ret != OFFLOAD_SUCCESS) {
279+
REPORT("Copying data to device failed.\n");
280+
// We will also return nullptr if the data movement fails because that
281+
// pointer points to a corrupted memory region so it doesn't make any
282+
// sense to continue to use it.
283+
TargetPointer = nullptr;
284+
}
285+
} else {
286+
// Release the mapping table lock directly.
287+
DataMapMtx.unlock();
288+
}
289+
267290
return {{IsNew, IsHostPtr}, Entry, TargetPointer};
268291
}
269292

openmp/libomptarget/src/device.h

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,20 @@ struct HostDataToTargetTy {
5353
/// use mutable to allow modification via std::set iterator which is const.
5454
mutable uint64_t RefCount;
5555
static const uint64_t INFRefCount = ~(uint64_t)0;
56+
/// This mutex will be locked when data movement is issued. For targets that
57+
/// doesn't support async data movement, this mutex can guarantee that after
58+
/// it is released, memory region on the target is update to date. For targets
59+
/// that support async data movement, this can guarantee that data movement
60+
/// has been issued. This mutex *must* be locked right before releasing the
61+
/// mapping table lock.
62+
std::shared_ptr<std::mutex> UpdateMtx;
5663

5764
public:
5865
HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB,
5966
map_var_info_t Name = nullptr, bool IsINF = false)
6067
: HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
61-
TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1) {}
68+
TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1),
69+
UpdateMtx(std::make_shared<std::mutex>()) {}
6270

6371
uint64_t getRefCount() const { return RefCount; }
6472

@@ -100,6 +108,10 @@ struct HostDataToTargetTy {
100108
return !isRefCountInf();
101109
return getRefCount() == 1;
102110
}
111+
112+
void lock() const { UpdateMtx->lock(); }
113+
114+
void unlock() const { UpdateMtx->unlock(); }
103115
};
104116

105117
typedef uintptr_t HstPtrBeginTy;
@@ -161,6 +173,8 @@ struct PendingCtorDtorListsTy {
161173
typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy>
162174
PendingCtorsDtorsPerLibrary;
163175

176+
enum class MoveDataStateTy : uint32_t { REQUIRED, NONE, UNKNOWN };
177+
164178
struct DeviceTy {
165179
int32_t DeviceID;
166180
RTLInfoTy *RTL;
@@ -195,12 +209,21 @@ struct DeviceTy {
195209
bool isDataExchangable(const DeviceTy &DstDevice);
196210

197211
LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
198-
TargetPointerResultTy getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
199-
int64_t Size,
200-
map_var_info_t HstPtrName,
201-
bool IsImplicit, bool UpdateRefCount,
202-
bool HasCloseModifier,
203-
bool HasPresentModifier);
212+
/// Get the target pointer based on host pointer begin and base. If the
213+
/// mapping already exists, the target pointer will be returned directly. In
214+
/// addition, if \p MoveData is true, the memory region pointed by \p
215+
/// HstPtrBegin of size \p Size will also be transferred to the device. If the
216+
/// mapping doesn't exist, and if unified memory is not enabled, a new mapping
217+
/// will be created and the data will also be transferred accordingly. nullptr
218+
/// will be returned because of any of following reasons:
219+
/// - Data allocation failed;
220+
/// - The user tried to do an illegal mapping;
221+
/// - Data transfer issue fails.
222+
TargetPointerResultTy
223+
getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
224+
map_var_info_t HstPtrName, MoveDataStateTy MoveData,
225+
bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
226+
bool HasPresentModifier, AsyncInfoTy &AsyncInfo);
204227
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
205228
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
206229
bool UpdateRefCount, bool &IsHostPtr,

openmp/libomptarget/src/omptarget.cpp

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -487,9 +487,10 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
487487
// entry for a global that might not already be allocated by the time the
488488
// PTR_AND_OBJ entry is handled below, and so the allocation might fail
489489
// when HasPresentModifier.
490-
Pointer_TPR = Device.getOrAllocTgtPtr(
491-
HstPtrBase, HstPtrBase, sizeof(void *), nullptr, IsImplicit,
492-
UpdateRef, HasCloseModifier, HasPresentModifier);
490+
Pointer_TPR = Device.getTargetPointer(
491+
HstPtrBase, HstPtrBase, sizeof(void *), nullptr,
492+
MoveDataStateTy::NONE, IsImplicit, UpdateRef, HasCloseModifier,
493+
HasPresentModifier, AsyncInfo);
493494
PointerTgtPtrBegin = Pointer_TPR.TargetPointer;
494495
IsHostPtr = Pointer_TPR.Flags.IsHostPointer;
495496
if (!PointerTgtPtrBegin) {
@@ -511,9 +512,17 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
511512
(!FromMapper || i != 0); // subsequently update ref count of pointee
512513
}
513514

514-
auto TPR = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, data_size,
515-
HstPtrName, IsImplicit, UpdateRef,
516-
HasCloseModifier, HasPresentModifier);
515+
MoveDataStateTy MoveData = MoveDataStateTy::NONE;
516+
const bool UseUSM = PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
517+
const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO;
518+
const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
519+
if (HasFlagTo && (!UseUSM || HasCloseModifier))
520+
MoveData = HasFlagAlways ? MoveDataStateTy::REQUIRED
521+
: MoveData = MoveDataStateTy::UNKNOWN;
522+
523+
auto TPR = Device.getTargetPointer(
524+
HstPtrBegin, HstPtrBase, data_size, HstPtrName, MoveData, IsImplicit,
525+
UpdateRef, HasCloseModifier, HasPresentModifier, AsyncInfo);
517526
void *TgtPtrBegin = TPR.TargetPointer;
518527
IsHostPtr = TPR.Flags.IsHostPointer;
519528
// If data_size==0, then the argument could be a zero-length pointer to
@@ -535,26 +544,6 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
535544
args_base[i] = TgtPtrBase;
536545
}
537546

538-
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
539-
bool copy = false;
540-
if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
541-
HasCloseModifier) {
542-
if (TPR.Flags.IsNewEntry || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS))
543-
copy = true;
544-
}
545-
546-
if (copy && !IsHostPtr) {
547-
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
548-
data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
549-
int rt =
550-
Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo);
551-
if (rt != OFFLOAD_SUCCESS) {
552-
REPORT("Copying data to device failed.\n");
553-
return OFFLOAD_FAIL;
554-
}
555-
}
556-
}
557-
558547
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
559548
// Check whether we need to update the pointer on the device
560549
bool UpdateDevPtr = false;
@@ -582,20 +571,27 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
582571
HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
583572
UpdateDevPtr = true;
584573
}
585-
Device.ShadowMtx.unlock();
586574

587575
if (UpdateDevPtr) {
576+
Pointer_TPR.MapTableEntry->lock();
577+
Device.ShadowMtx.unlock();
578+
588579
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
589580
DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
581+
590582
void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation();
591583
TgtPtrBase = ExpectedTgtPtrBase;
584+
592585
int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
593586
sizeof(void *), AsyncInfo);
587+
Pointer_TPR.MapTableEntry->unlock();
588+
594589
if (rt != OFFLOAD_SUCCESS) {
595590
REPORT("Copying data to device failed.\n");
596591
return OFFLOAD_FAIL;
597592
}
598-
}
593+
} else
594+
Device.ShadowMtx.unlock();
599595
}
600596
}
601597

0 commit comments

Comments
 (0)