Skip to content

Commit 6483c48

Browse files
carlobertollironlieb
authored andcommitted
On APUs, XNACK-Enabled, prefault D2H host memory.
Based on the same results as: http://gerrit-git.amd.com/c/lightning/ec/llvm-project/+/1028589 Change-Id: I825915dd5435892ab21827bcb1ce029e6f104b23
1 parent 4f61320 commit 6483c48

File tree

1 file changed

+9
-0
lines changed
  • openmp/libomptarget/plugins-nextgen/amdgpu/src

1 file changed

+9
-0
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3119,6 +3119,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
31193119
AMDGPUStreamTy *Stream = nullptr;
31203120
void *PinnedPtr = nullptr;
31213121

3122+
// Prefault GPU page table in XNACK-Enabled case, on APUs,
3123+
// under the assumption that explicitly allocated memory
3124+
// will be fully accessed and that on-the-fly individual page faults
3125+
// perform worse than whole memory faulting.
3126+
if (OMPX_APUPrefaultMemcopy && Size >= OMPX_APUPrefaultMemcopySize &&
3127+
IsAPU && IsXnackEnabled)
3128+
if (auto Err = prepopulatePageTableImpl(const_cast<void *>(HstPtr), Size))
3129+
return Err;
3130+
31223131
// Use one-step asynchronous operation when host memory is already pinned.
31233132
if (void *PinnedPtr =
31243133
PinnedAllocs.getDeviceAccessiblePtrFromPinnedBuffer(HstPtr)) {

0 commit comments

Comments
 (0)