Skip to content

Commit f7218e1

Browse files
[amdgpu] Implement D2D memcpy as HSA call
1 parent 2973feb commit f7218e1

File tree

1 file changed

+32
-7
lines changed
  • openmp/libomptarget/plugins-nextgen/amdgpu/src

1 file changed

+32
-7
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2250,14 +2250,37 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
22502250
PinnedMemoryManager);
22512251
}
22522252

2253-
/// Exchange data between two devices within the plugin. This function is not
2254-
/// supported in this plugin.
2253+
/// Exchange data between two devices within the plugin.
22552254
Error dataExchangeImpl(const void *SrcPtr, GenericDeviceTy &DstGenericDevice,
22562255
void *DstPtr, int64_t Size,
22572256
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
2258-
// This function should never be called because the function
2259-
// AMDGPUPluginTy::isDataExchangable() returns false.
2260-
return Plugin::error("dataExchangeImpl not supported");
2257+
AMDGPUDeviceTy &DstDevice = static_cast<AMDGPUDeviceTy &>(DstGenericDevice);
2258+
2259+
hsa_agent_t SrcAgent = getAgent();
2260+
hsa_agent_t DstAgent = DstDevice.getAgent();
2261+
2262+
AMDGPUSignalTy Signal;
2263+
if (auto Err = Signal.init())
2264+
return Err;
2265+
2266+
// The agents need to have access to the corresponding memory
2267+
// This is presently only true if the pointers were originally
2268+
// allocated by this runtime or the caller made the appropriate
2269+
// access calls.
2270+
hsa_status_t Status = hsa_amd_memory_async_copy(
2271+
DstPtr, DstAgent, SrcPtr, SrcAgent, (Size > 0) ? (size_t)Size : 0, 0,
2272+
nullptr, Signal.get());
2273+
if (auto Err =
2274+
Plugin::check(Status, "Error in D2D hsa_amd_memory_async_copy: %s"))
2275+
return Err;
2276+
2277+
if (auto Err = Signal.wait(getStreamBusyWaitMicroseconds()))
2278+
return Err;
2279+
2280+
if (auto Err = Signal.deinit())
2281+
return Err;
2282+
2283+
return Plugin::success();
22612284
}
22622285

22632286
/// Initialize the async info for interoperability purposes.
@@ -2899,7 +2922,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
28992922

29002923
/// This plugin does not support exchanging data between two devices.
29012924
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
2902-
return false;
2925+
return true;
29032926
}
29042927

29052928
/// Get the host device instance.
@@ -3174,9 +3197,11 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
31743197
return nullptr;
31753198
}
31763199

3177-
if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED)) {
3200+
if (Alloc) {
31783201
auto &KernelAgents = Plugin::get<AMDGPUPluginTy>().getKernelAgents();
31793202

3203+
// Inherently necessary for host or shared allocations
3204+
// Also enabled for device memory to allow device to device memcpy
31803205
// Enable all kernel agents to access the buffer.
31813206
if (auto Err = MemoryPool->enableAccess(Alloc, Size, KernelAgents)) {
31823207
REPORT("%s\n", toString(std::move(Err)).data());

0 commit comments

Comments
 (0)