@@ -2250,14 +2250,37 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2250
2250
PinnedMemoryManager);
2251
2251
}
2252
2252
2253
- // / Exchange data between two devices within the plugin. This function is not
2254
- // / supported in this plugin.
2253
+ // / Exchange data between two devices within the plugin.
2255
2254
Error dataExchangeImpl (const void *SrcPtr, GenericDeviceTy &DstGenericDevice,
2256
2255
void *DstPtr, int64_t Size,
2257
2256
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
2258
- // This function should never be called because the function
2259
- // AMDGPUPluginTy::isDataExchangable() returns false.
2260
- return Plugin::error (" dataExchangeImpl not supported" );
2257
+ AMDGPUDeviceTy &DstDevice = static_cast <AMDGPUDeviceTy &>(DstGenericDevice);
2258
+
2259
+ hsa_agent_t SrcAgent = getAgent ();
2260
+ hsa_agent_t DstAgent = DstDevice.getAgent ();
2261
+
2262
+ AMDGPUSignalTy Signal;
2263
+ if (auto Err = Signal.init ())
2264
+ return Err;
2265
+
2266
+ // The agents need to have access to the corresponding memory
2267
+ // This is presently only true if the pointers were originally
2268
+ // allocated by this runtime or the caller made the appropriate
2269
+ // access calls.
2270
+ hsa_status_t Status = hsa_amd_memory_async_copy (
2271
+ DstPtr, DstAgent, SrcPtr, SrcAgent, (Size > 0 ) ? (size_t )Size : 0 , 0 ,
2272
+ nullptr , Signal.get ());
2273
+ if (auto Err =
2274
+ Plugin::check (Status, " Error in D2D hsa_amd_memory_async_copy: %s" ))
2275
+ return Err;
2276
+
2277
+ if (auto Err = Signal.wait (getStreamBusyWaitMicroseconds ()))
2278
+ return Err;
2279
+
2280
+ if (auto Err = Signal.deinit ())
2281
+ return Err;
2282
+
2283
+ return Plugin::success ();
2261
2284
}
2262
2285
2263
2286
// / Initialize the async info for interoperability purposes.
@@ -2899,7 +2922,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
2899
2922
2900
2923
// / This plugin does not support exchanging data between two devices.
2901
2924
bool isDataExchangable (int32_t SrcDeviceId, int32_t DstDeviceId) override {
2902
- return false ;
2925
+ return true ;
2903
2926
}
2904
2927
2905
2928
// / Get the host device instance.
@@ -3174,9 +3197,11 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
3174
3197
return nullptr ;
3175
3198
}
3176
3199
3177
- if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED) ) {
3200
+ if (Alloc) {
3178
3201
auto &KernelAgents = Plugin::get<AMDGPUPluginTy>().getKernelAgents ();
3179
3202
3203
+ // Inherently necessary for host or shared allocations
3204
+ // Also enabled for device memory to allow device to device memcpy
3180
3205
// Enable all kernel agents to access the buffer.
3181
3206
if (auto Err = MemoryPool->enableAccess (Alloc, Size, KernelAgents)) {
3182
3207
REPORT (" %s\n " , toString (std::move (Err)).data ());
0 commit comments