Skip to content

Commit 9c1d62c

Browse files
mhalkronlieb
authored andcommitted
[OpenMP][OMPT] Add OMPT callback for device data exchange 'Device-to-Device' (llvm#81991)
Since there's no `ompt_target_data_transfer_tofrom_device` (within ompt_target_data_op_t enum) or something other that conveys the meaning of inter-device data exchange we decided to indicate a Device-to-Device transfer by using: optype == ompt_target_data_transfer_from_device (=3) Hence, a device transfer may be identified e.g. by checking for: (optype == 3) && (src_device_num < omp_get_num_devices()) && (dest_device_num < omp_get_num_devices()) Fixes: llvm#66478 Change-Id: I4c382ee61a05102c7ffc6de9b765e072f6386f11
1 parent a1a2fa5 commit 9c1d62c

File tree

5 files changed

+137
-39
lines changed

5 files changed

+137
-39
lines changed

openmp/libomptarget/include/OpenMP/OMPT/Interface.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,14 @@ class Interface {
6363
void **TgtPtrBegin, size_t Size, void *Code);
6464

6565
/// Top-level function for invoking callback before data submit
66-
void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
67-
void *TgtPtrBegin, size_t Size, void *Code);
66+
void beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
67+
int64_t DstDeviceId, void *DstPtrBegin,
68+
size_t Size, void *Code);
6869

6970
/// Top-level function for invoking callback after data submit
70-
void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
71-
void *TgtPtrBegin, size_t Size, void *Code);
71+
void endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
72+
int64_t DstDeviceId, void *DstPtrBegin, size_t Size,
73+
void *Code);
7274

7375
/// Top-level function for invoking callback before device data deallocation
7476
void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
@@ -77,12 +79,14 @@ class Interface {
7779
void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
7880

7981
/// Top-level function for invoking callback before data retrieve
80-
void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
81-
void *TgtPtrBegin, size_t Size, void *Code);
82+
void beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
83+
int64_t DstDeviceId, void *DstPtrBegin,
84+
size_t Size, void *Code);
8285

8386
/// Top-level function for invoking callback after data retrieve
84-
void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
85-
void *TgtPtrBegin, size_t Size, void *Code);
87+
void endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
88+
int64_t DstDeviceId, void *DstPtrBegin,
89+
size_t Size, void *Code);
8690

8791
/// Top-level function for invoking callback before kernel dispatch
8892
void beginTargetSubmit(unsigned int NumTeams = 1);

openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -132,41 +132,38 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
132132
endTargetDataOperation();
133133
}
134134

135-
void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
136-
void *HstPtrBegin, size_t Size,
137-
void *Code) {
135+
void Interface::beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
136+
int64_t DstDeviceId, void *DstPtrBegin,
137+
size_t Size, void *Code) {
138138
beginTargetDataOperation();
139139
if (ompt_callback_target_data_op_emi_fn) {
140140
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
141141
// callback
142142
ompt_callback_target_data_op_emi_fn(
143143
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
144-
ompt_target_data_transfer_to_device, HstPtrBegin,
145-
/*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
146-
Code);
144+
ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
145+
DstPtrBegin, DstDeviceId, Size, Code);
147146
} else if (ompt_callback_target_data_op_fn) {
148147
// HostOpId is set by the runtime
149148
HostOpId = createOpId();
150149
// Invoke the tool supplied data op callback
151150
ompt_callback_target_data_op_fn(
152151
TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
153-
HstPtrBegin, /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin,
154-
DeviceId, Size, Code);
152+
SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
155153
}
156154
}
157155

158-
void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
159-
void *HstPtrBegin, size_t Size,
160-
void *Code) {
156+
void Interface::endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
157+
int64_t DstDeviceId, void *DstPtrBegin,
158+
size_t Size, void *Code) {
161159
// Only EMI callback handles end scope
162160
if (ompt_callback_target_data_op_emi_fn) {
163161
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
164162
// callback
165163
ompt_callback_target_data_op_emi_fn(
166164
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
167-
ompt_target_data_transfer_to_device, HstPtrBegin,
168-
/*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
169-
Code);
165+
ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
166+
DstPtrBegin, DstDeviceId, Size, Code);
170167
}
171168
endTargetDataOperation();
172169
}
@@ -206,41 +203,38 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
206203
endTargetDataOperation();
207204
}
208205

209-
void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
210-
void *TgtPtrBegin, size_t Size,
211-
void *Code) {
206+
void Interface::beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
207+
int64_t DstDeviceId, void *DstPtrBegin,
208+
size_t Size, void *Code) {
212209
beginTargetDataOperation();
213210
if (ompt_callback_target_data_op_emi_fn) {
214211
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
215212
// callback
216213
ompt_callback_target_data_op_emi_fn(
217214
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
218-
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
219-
HstPtrBegin,
220-
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
215+
ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
216+
DstPtrBegin, DstDeviceId, Size, Code);
221217
} else if (ompt_callback_target_data_op_fn) {
222218
// HostOpId is set by the runtime
223219
HostOpId = createOpId();
224220
// Invoke the tool supplied data op callback
225221
ompt_callback_target_data_op_fn(
226222
TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
227-
TgtPtrBegin, DeviceId, HstPtrBegin,
228-
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
223+
SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
229224
}
230225
}
231226

232-
void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
233-
void *TgtPtrBegin, size_t Size,
234-
void *Code) {
227+
void Interface::endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
228+
int64_t DstDeviceId, void *DstPtrBegin,
229+
size_t Size, void *Code) {
235230
// Only EMI callback handles end scope
236231
if (ompt_callback_target_data_op_emi_fn) {
237232
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
238233
// callback
239234
ompt_callback_target_data_op_emi_fn(
240235
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
241-
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
242-
HstPtrBegin,
243-
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
236+
ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
237+
DstPtrBegin, DstDeviceId, Size, Code);
244238
}
245239
endTargetDataOperation();
246240
}

openmp/libomptarget/src/device.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
165165
OMPT_IF_BUILT(
166166
InterfaceRAII TargetDataSubmitRAII(
167167
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
168-
DeviceID, TgtPtrBegin, HstPtrBegin, Size,
168+
omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
169169
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);
170170
// ToDo: mhalk Do we need a check for TracingActive here?
171171
InterfaceRAII TargetDataSubmitTraceRAII(
@@ -197,7 +197,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
197197
OMPT_IF_BUILT(
198198
InterfaceRAII TargetDataRetrieveRAII(
199199
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
200-
DeviceID, HstPtrBegin, TgtPtrBegin, Size,
200+
DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
201201
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);
202202
// ToDo: mhalk Do we need a check for TracingActive here?
203203
InterfaceRAII TargetDataSubmitTraceRAII(
@@ -219,6 +219,18 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
219219
// Copy data from current device to destination device directly
220220
int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
221221
int64_t Size, AsyncInfoTy &AsyncInfo) {
222+
/// RAII to establish tool anchors before and after data exchange
223+
/// Note: Despite the fact that this is a data exchange, we use 'from_device'
224+
/// operation enum (w.r.t. ompt_target_data_op_t) as there is currently
225+
/// no better alternative. It is still possible to distinguish this
226+
/// scenario from a real data retrieve by checking if both involved
227+
/// device numbers are less than omp_get_num_devices().
228+
OMPT_IF_BUILT(
229+
InterfaceRAII TargetDataExchangeRAII(
230+
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
231+
RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
232+
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
233+
222234
if (ForceSynchronousTargetRegions || !AsyncInfo ||
223235
#ifdef OMPT_SUPPORT
224236
ompt::CallbacksInitialized ||

openmp/libomptarget/test/ompt/callbacks.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,14 @@ static void on_ompt_callback_target_data_op_emi(
8181
assert(codeptr_ra != 0 && "Unexpected null codeptr");
8282
if (endpoint == ompt_scope_begin)
8383
*host_op_id = next_op_id++;
84+
// target_task_data may be null, avoid dereferencing it
85+
uint64_t target_task_data_value =
86+
(target_task_data) ? target_task_data->value : 0;
8487
printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p "
8588
"(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p "
8689
"src_device_num=%d "
8790
"dest=%p dest_device_num=%d bytes=%lu code=%p\n",
88-
endpoint, optype, target_task_data, target_task_data->value,
91+
endpoint, optype, target_task_data, target_task_data_value,
8992
target_data, target_data->value, host_op_id, *host_op_id, src_addr,
9093
src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra);
9194
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: %libomptarget-compile-run-and-check-generic
2+
// REQUIRES: ompt
3+
// UNSUPPORTED: aarch64-unknown-linux-gnu
4+
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
5+
// UNSUPPORTED: x86_64-pc-linux-gnu
6+
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
7+
8+
/*
9+
* Verify all three data transfer directions: H2D, D2D and D2H
10+
*/
11+
12+
#include <omp.h>
13+
#include <stdio.h>
14+
#include <stdlib.h>
15+
16+
#include "callbacks.h"
17+
#include "register_emi.h"
18+
19+
int main(void) {
20+
int NumDevices = omp_get_num_devices();
21+
assert(NumDevices > 0 && "No device(s) present.");
22+
int Device = omp_get_default_device();
23+
int Host = omp_get_initial_device();
24+
// Note: Zero value depicts an OFFLOAD_SUCCESS
25+
int Status;
26+
27+
printf("Allocating Memory on Device\n");
28+
int *DevPtr = (int *)omp_target_alloc(sizeof(int), Device);
29+
assert(DevPtr && "Could not allocate memory on device.");
30+
int *HstPtr = (int *)malloc(sizeof(int));
31+
*HstPtr = 42;
32+
33+
printf("Testing: Host to Device\n");
34+
Status = omp_target_memcpy(DevPtr, HstPtr, sizeof(int), 0, 0, Device, Host);
35+
assert(Status == 0 && "H2D memory copy operation failed.\n");
36+
37+
printf("Testing: Device to Device\n");
38+
Status = omp_target_memcpy(DevPtr, DevPtr, sizeof(int), 0, 0, Device, Device);
39+
assert(Status == 0 && "D2D memory copy operation failed.\n");
40+
41+
printf("Testing: Device to Host\n");
42+
Status = omp_target_memcpy(HstPtr, DevPtr, sizeof(int), 0, 0, Host, Device);
43+
assert(Status == 0 && "D2H memory copy operation failed.\n");
44+
45+
printf("Checking Correctness\n");
46+
assert(*HstPtr == 42);
47+
48+
printf("Freeing Memory on Device\n");
49+
free(HstPtr);
50+
omp_target_free(DevPtr, Device);
51+
52+
return 0;
53+
}
54+
55+
// clang-format off
56+
57+
/// CHECK: Callback Init:
58+
59+
/// CHECK: Allocating Memory on Device
60+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
61+
/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
62+
/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
63+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
64+
65+
/// CHECK: Testing: Host to Device
66+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
67+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
68+
69+
/// CHECK: Testing: Device to Device
70+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
71+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
72+
73+
/// CHECK: Testing: Device to Host
74+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
75+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
76+
77+
/// CHECK: Checking Correctness
78+
79+
/// CHECK: Freeing Memory on Device
80+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 {{.+}} src_device_num=[[DEVICE]]
81+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 {{.+}} src_device_num=[[DEVICE]]
82+
83+
/// CHECK: Callback Fini:
84+
85+
// clang-format on

0 commit comments

Comments
 (0)