Skip to content

Commit e521752

Browse files
authored
[OpenMP][OMPT] Add OMPT callback for device data exchange 'Device-to-Device' (llvm#81991)
Since there's no `ompt_target_data_transfer_tofrom_device` (within ompt_target_data_op_t enum) or something other that conveys the meaning of inter-device data exchange we decided to indicate a Device-to-Device transfer by using: optype == ompt_target_data_transfer_from_device (=3) Hence, a device transfer may be identified e.g. by checking for: (optype == 3) && (src_device_num < omp_get_num_devices()) && (dest_device_num < omp_get_num_devices()) Fixes: llvm#66478
1 parent 8cfb716 commit e521752

File tree

6 files changed

+153
-43
lines changed

6 files changed

+153
-43
lines changed

openmp/libomptarget/include/OpenMP/OMPT/Interface.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,14 @@ class Interface {
5454
void **TgtPtrBegin, size_t Size, void *Code);
5555

5656
/// Top-level function for invoking callback before data submit
57-
void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
58-
void *TgtPtrBegin, size_t Size, void *Code);
57+
void beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
58+
int64_t DstDeviceId, void *DstPtrBegin,
59+
size_t Size, void *Code);
5960

6061
/// Top-level function for invoking callback after data submit
61-
void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
62-
void *TgtPtrBegin, size_t Size, void *Code);
62+
void endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
63+
int64_t DstDeviceId, void *DstPtrBegin, size_t Size,
64+
void *Code);
6365

6466
/// Top-level function for invoking callback before device data deallocation
6567
void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
@@ -68,12 +70,14 @@ class Interface {
6870
void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
6971

7072
/// Top-level function for invoking callback before data retrieve
71-
void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
72-
void *TgtPtrBegin, size_t Size, void *Code);
73+
void beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
74+
int64_t DstDeviceId, void *DstPtrBegin,
75+
size_t Size, void *Code);
7376

7477
/// Top-level function for invoking callback after data retrieve
75-
void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
76-
void *TgtPtrBegin, size_t Size, void *Code);
78+
void endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
79+
int64_t DstDeviceId, void *DstPtrBegin,
80+
size_t Size, void *Code);
7781

7882
/// Top-level function for invoking callback before kernel dispatch
7983
void beginTargetSubmit(unsigned int NumTeams = 1);

openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -119,41 +119,38 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
119119
endTargetDataOperation();
120120
}
121121

122-
void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
123-
void *HstPtrBegin, size_t Size,
124-
void *Code) {
122+
void Interface::beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
123+
int64_t DstDeviceId, void *DstPtrBegin,
124+
size_t Size, void *Code) {
125125
beginTargetDataOperation();
126126
if (ompt_callback_target_data_op_emi_fn) {
127127
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
128128
// callback
129129
ompt_callback_target_data_op_emi_fn(
130130
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
131-
ompt_target_data_transfer_to_device, HstPtrBegin,
132-
/*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
133-
Code);
131+
ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
132+
DstPtrBegin, DstDeviceId, Size, Code);
134133
} else if (ompt_callback_target_data_op_fn) {
135134
// HostOpId is set by the runtime
136135
HostOpId = createOpId();
137136
// Invoke the tool supplied data op callback
138137
ompt_callback_target_data_op_fn(
139138
TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
140-
HstPtrBegin, /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin,
141-
DeviceId, Size, Code);
139+
SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
142140
}
143141
}
144142

145-
void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
146-
void *HstPtrBegin, size_t Size,
147-
void *Code) {
143+
void Interface::endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
144+
int64_t DstDeviceId, void *DstPtrBegin,
145+
size_t Size, void *Code) {
148146
// Only EMI callback handles end scope
149147
if (ompt_callback_target_data_op_emi_fn) {
150148
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
151149
// callback
152150
ompt_callback_target_data_op_emi_fn(
153151
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
154-
ompt_target_data_transfer_to_device, HstPtrBegin,
155-
/*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
156-
Code);
152+
ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
153+
DstPtrBegin, DstDeviceId, Size, Code);
157154
}
158155
endTargetDataOperation();
159156
}
@@ -193,41 +190,38 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
193190
endTargetDataOperation();
194191
}
195192

196-
void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
197-
void *TgtPtrBegin, size_t Size,
198-
void *Code) {
193+
void Interface::beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
194+
int64_t DstDeviceId, void *DstPtrBegin,
195+
size_t Size, void *Code) {
199196
beginTargetDataOperation();
200197
if (ompt_callback_target_data_op_emi_fn) {
201198
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
202199
// callback
203200
ompt_callback_target_data_op_emi_fn(
204201
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
205-
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
206-
HstPtrBegin,
207-
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
202+
ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
203+
DstPtrBegin, DstDeviceId, Size, Code);
208204
} else if (ompt_callback_target_data_op_fn) {
209205
// HostOpId is set by the runtime
210206
HostOpId = createOpId();
211207
// Invoke the tool supplied data op callback
212208
ompt_callback_target_data_op_fn(
213209
TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
214-
TgtPtrBegin, DeviceId, HstPtrBegin,
215-
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
210+
SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
216211
}
217212
}
218213

219-
void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
220-
void *TgtPtrBegin, size_t Size,
221-
void *Code) {
214+
void Interface::endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
215+
int64_t DstDeviceId, void *DstPtrBegin,
216+
size_t Size, void *Code) {
222217
// Only EMI callback handles end scope
223218
if (ompt_callback_target_data_op_emi_fn) {
224219
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
225220
// callback
226221
ompt_callback_target_data_op_emi_fn(
227222
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
228-
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
229-
HstPtrBegin,
230-
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
223+
ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
224+
DstPtrBegin, DstDeviceId, Size, Code);
231225
}
232226
endTargetDataOperation();
233227
}

openmp/libomptarget/src/device.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
151151
OMPT_IF_BUILT(
152152
InterfaceRAII TargetDataSubmitRAII(
153153
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
154-
DeviceID, TgtPtrBegin, HstPtrBegin, Size,
154+
omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
155155
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
156156

157157
if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
@@ -173,7 +173,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
173173
OMPT_IF_BUILT(
174174
InterfaceRAII TargetDataRetrieveRAII(
175175
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
176-
DeviceID, HstPtrBegin, TgtPtrBegin, Size,
176+
DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
177177
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
178178

179179
if (!RTL->data_retrieve_async || !RTL->synchronize)
@@ -185,6 +185,17 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
185185
// Copy data from current device to destination device directly
186186
int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
187187
int64_t Size, AsyncInfoTy &AsyncInfo) {
188+
/// RAII to establish tool anchors before and after data exchange
189+
/// Note: Despite the fact that this is a data exchange, we use 'from_device'
190+
/// operation enum (w.r.t. ompt_target_data_op_t) as there is currently
191+
/// no better alternative. It is still possible to distinguish this
192+
/// scenario from a real data retrieve by checking if both involved
193+
/// device numbers are less than omp_get_num_devices().
194+
OMPT_IF_BUILT(
195+
InterfaceRAII TargetDataExchangeRAII(
196+
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
197+
RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
198+
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
188199
if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
189200
assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
190201
return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,

openmp/libomptarget/test/ompt/callbacks.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,14 @@ static void on_ompt_callback_target_data_op_emi(
8181
assert(codeptr_ra != 0 && "Unexpected null codeptr");
8282
if (endpoint == ompt_scope_begin)
8383
*host_op_id = next_op_id++;
84+
// target_task_data may be null, avoid dereferencing it
85+
uint64_t target_task_data_value =
86+
(target_task_data) ? target_task_data->value : 0;
8487
printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p "
8588
"(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p "
8689
"src_device_num=%d "
8790
"dest=%p dest_device_num=%d bytes=%lu code=%p\n",
88-
endpoint, optype, target_task_data, target_task_data->value,
91+
endpoint, optype, target_task_data, target_task_data_value,
8992
target_data, target_data->value, host_op_id, *host_op_id, src_addr,
9093
src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra);
9194
}

openmp/libomptarget/test/ompt/target_memcpy.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ int main() {
3333
if (omp_target_memcpy(dev_ptr, &host_var1, sizeof(int), 0, 0, dev, host))
3434
abort();
3535

36+
// D2D transfer
37+
if (omp_target_memcpy(dev_ptr, dev_ptr, sizeof(int), 0, 0, dev, dev))
38+
abort();
39+
3640
// D2H transfer
3741
if (omp_target_memcpy(&host_var2, dev_ptr, sizeof(int), 0, 0, host, dev))
3842
abort();
@@ -46,16 +50,25 @@ int main() {
4650

4751
// clang-format off
4852
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
53+
/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
54+
/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
4955
/// CHECK-NOT: code=(nil)
50-
/// CHECK: code=[[CODE1:.*]]
56+
/// CHECK: code=[[CODE1:0x[0-f]+]]
5157
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
58+
/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
5259
/// CHECK-NOT: code=(nil)
5360
/// CHECK-NOT: code=[[CODE1]]
54-
/// CHECK: code=[[CODE2:.*]]
61+
/// CHECK: code=[[CODE2:0x[0-f]+]]
5562
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
63+
/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
5664
/// CHECK-NOT: code=(nil)
5765
/// CHECK-NOT: code=[[CODE2]]
58-
/// CHECK: code=[[CODE3:.*]]
59-
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
66+
/// CHECK: code=[[CODE3:0x[0-f]+]]
67+
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
68+
/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
6069
/// CHECK-NOT: code=(nil)
6170
/// CHECK-NOT: code=[[CODE3]]
71+
/// CHECK: code=[[CODE4:0x[0-f]+]]
72+
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
73+
/// CHECK-NOT: code=(nil)
74+
/// CHECK-NOT: code=[[CODE4]]
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: %libomptarget-compile-run-and-check-generic
2+
// REQUIRES: ompt
3+
// UNSUPPORTED: aarch64-unknown-linux-gnu
4+
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
5+
// UNSUPPORTED: x86_64-pc-linux-gnu
6+
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
7+
8+
/*
9+
* Verify all three data transfer directions: H2D, D2D and D2H
10+
*/
11+
12+
#include <omp.h>
13+
#include <stdio.h>
14+
#include <stdlib.h>
15+
16+
#include "callbacks.h"
17+
#include "register_emi.h"
18+
19+
int main(void) {
20+
int NumDevices = omp_get_num_devices();
21+
assert(NumDevices > 0 && "No device(s) present.");
22+
int Device = omp_get_default_device();
23+
int Host = omp_get_initial_device();
24+
// Note: Zero value depicts an OFFLOAD_SUCCESS
25+
int Status;
26+
27+
printf("Allocating Memory on Device\n");
28+
int *DevPtr = (int *)omp_target_alloc(sizeof(int), Device);
29+
assert(DevPtr && "Could not allocate memory on device.");
30+
int *HstPtr = (int *)malloc(sizeof(int));
31+
*HstPtr = 42;
32+
33+
printf("Testing: Host to Device\n");
34+
Status = omp_target_memcpy(DevPtr, HstPtr, sizeof(int), 0, 0, Device, Host);
35+
assert(Status == 0 && "H2D memory copy operation failed.\n");
36+
37+
printf("Testing: Device to Device\n");
38+
Status = omp_target_memcpy(DevPtr, DevPtr, sizeof(int), 0, 0, Device, Device);
39+
assert(Status == 0 && "D2D memory copy operation failed.\n");
40+
41+
printf("Testing: Device to Host\n");
42+
Status = omp_target_memcpy(HstPtr, DevPtr, sizeof(int), 0, 0, Host, Device);
43+
assert(Status == 0 && "D2H memory copy operation failed.\n");
44+
45+
printf("Checking Correctness\n");
46+
assert(*HstPtr == 42);
47+
48+
printf("Freeing Memory on Device\n");
49+
free(HstPtr);
50+
omp_target_free(DevPtr, Device);
51+
52+
return 0;
53+
}
54+
55+
// clang-format off
56+
57+
/// CHECK: Callback Init:
58+
59+
/// CHECK: Allocating Memory on Device
60+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
61+
/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
62+
/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
63+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
64+
65+
/// CHECK: Testing: Host to Device
66+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
67+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
68+
69+
/// CHECK: Testing: Device to Device
70+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
71+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
72+
73+
/// CHECK: Testing: Device to Host
74+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
75+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
76+
77+
/// CHECK: Checking Correctness
78+
79+
/// CHECK: Freeing Memory on Device
80+
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 {{.+}} src_device_num=[[DEVICE]]
81+
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 {{.+}} src_device_num=[[DEVICE]]
82+
83+
/// CHECK: Callback Fini:
84+
85+
// clang-format on

0 commit comments

Comments
 (0)