Skip to content

[OpenMP][OMPT] Add OMPT callback for device data exchange 'Device-to-Device' #81991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions openmp/libomptarget/include/OpenMP/OMPT/Interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ class Interface {
void **TgtPtrBegin, size_t Size, void *Code);

/// Top-level function for invoking callback before data submit
void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size, void *Code);
void beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin,
size_t Size, void *Code);

/// Top-level function for invoking callback after data submit
void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size, void *Code);
void endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin, size_t Size,
void *Code);

/// Top-level function for invoking callback before device data deallocation
void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
Expand All @@ -68,12 +70,14 @@ class Interface {
void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);

/// Top-level function for invoking callback before data retrieve
void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size, void *Code);
void beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin,
size_t Size, void *Code);

/// Top-level function for invoking callback after data retrieve
void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size, void *Code);
void endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin,
size_t Size, void *Code);

/// Top-level function for invoking callback before kernel dispatch
void beginTargetSubmit(unsigned int NumTeams = 1);
Expand Down
50 changes: 22 additions & 28 deletions openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,41 +119,38 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
endTargetDataOperation();
}

void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
void *HstPtrBegin, size_t Size,
void *Code) {
void Interface::beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin,
size_t Size, void *Code) {
beginTargetDataOperation();
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
ompt_target_data_transfer_to_device, HstPtrBegin,
/*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
Code);
ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
DstPtrBegin, DstDeviceId, Size, Code);
} else if (ompt_callback_target_data_op_fn) {
// HostOpId is set by the runtime
HostOpId = createOpId();
// Invoke the tool supplied data op callback
ompt_callback_target_data_op_fn(
TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
HstPtrBegin, /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin,
DeviceId, Size, Code);
SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
}
}

void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
void *HstPtrBegin, size_t Size,
void *Code) {
void Interface::endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin,
size_t Size, void *Code) {
// Only EMI callback handles end scope
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
ompt_target_data_transfer_to_device, HstPtrBegin,
/*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
Code);
ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
DstPtrBegin, DstDeviceId, Size, Code);
}
endTargetDataOperation();
}
Expand Down Expand Up @@ -193,41 +190,38 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
endTargetDataOperation();
}

void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size,
void *Code) {
void Interface::beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin,
size_t Size, void *Code) {
beginTargetDataOperation();
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
HstPtrBegin,
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
DstPtrBegin, DstDeviceId, Size, Code);
} else if (ompt_callback_target_data_op_fn) {
// HostOpId is set by the runtime
HostOpId = createOpId();
// Invoke the tool supplied data op callback
ompt_callback_target_data_op_fn(
TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
TgtPtrBegin, DeviceId, HstPtrBegin,
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
}
}

void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size,
void *Code) {
void Interface::endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
int64_t DstDeviceId, void *DstPtrBegin,
size_t Size, void *Code) {
// Only EMI callback handles end scope
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
HstPtrBegin,
/*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
DstPtrBegin, DstDeviceId, Size, Code);
}
endTargetDataOperation();
}
Expand Down
15 changes: 13 additions & 2 deletions openmp/libomptarget/src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
OMPT_IF_BUILT(
InterfaceRAII TargetDataSubmitRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
DeviceID, TgtPtrBegin, HstPtrBegin, Size,
omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)

if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
Expand All @@ -173,7 +173,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
OMPT_IF_BUILT(
InterfaceRAII TargetDataRetrieveRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
DeviceID, HstPtrBegin, TgtPtrBegin, Size,
DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)

if (!RTL->data_retrieve_async || !RTL->synchronize)
Expand All @@ -185,6 +185,17 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
// Copy data from current device to destination device directly
int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
int64_t Size, AsyncInfoTy &AsyncInfo) {
/// RAII to establish tool anchors before and after data exchange
/// Note: Despite the fact that this is a data exchange, we use 'from_device'
/// operation enum (w.r.t. ompt_target_data_op_t) as there is currently
/// no better alternative. It is still possible to distinguish this
/// scenario from a real data retrieve by checking if both involved
/// device numbers are less than omp_get_num_devices().
OMPT_IF_BUILT(
InterfaceRAII TargetDataExchangeRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
Expand Down
5 changes: 4 additions & 1 deletion openmp/libomptarget/test/ompt/callbacks.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,14 @@ static void on_ompt_callback_target_data_op_emi(
assert(codeptr_ra != 0 && "Unexpected null codeptr");
if (endpoint == ompt_scope_begin)
*host_op_id = next_op_id++;
// target_task_data may be null, avoid dereferencing it
uint64_t target_task_data_value =
(target_task_data) ? target_task_data->value : 0;
printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p "
"(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p "
"src_device_num=%d "
"dest=%p dest_device_num=%d bytes=%lu code=%p\n",
endpoint, optype, target_task_data, target_task_data->value,
endpoint, optype, target_task_data, target_task_data_value,
target_data, target_data->value, host_op_id, *host_op_id, src_addr,
src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra);
}
Expand Down
21 changes: 17 additions & 4 deletions openmp/libomptarget/test/ompt/target_memcpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ int main() {
if (omp_target_memcpy(dev_ptr, &host_var1, sizeof(int), 0, 0, dev, host))
abort();

// D2D transfer
if (omp_target_memcpy(dev_ptr, dev_ptr, sizeof(int), 0, 0, dev, dev))
abort();

// D2H transfer
if (omp_target_memcpy(&host_var2, dev_ptr, sizeof(int), 0, 0, host, dev))
abort();
Expand All @@ -46,16 +50,25 @@ int main() {

// clang-format off
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
/// CHECK-NOT: code=(nil)
/// CHECK: code=[[CODE1:.*]]
/// CHECK: code=[[CODE1:0x[0-f]+]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
/// CHECK-NOT: code=(nil)
/// CHECK-NOT: code=[[CODE1]]
/// CHECK: code=[[CODE2:.*]]
/// CHECK: code=[[CODE2:0x[0-f]+]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
/// CHECK-NOT: code=(nil)
/// CHECK-NOT: code=[[CODE2]]
/// CHECK: code=[[CODE3:.*]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
/// CHECK: code=[[CODE3:0x[0-f]+]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
/// CHECK-NOT: code=(nil)
/// CHECK-NOT: code=[[CODE3]]
/// CHECK: code=[[CODE4:0x[0-f]+]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
/// CHECK-NOT: code=(nil)
/// CHECK-NOT: code=[[CODE4]]
85 changes: 85 additions & 0 deletions openmp/libomptarget/test/ompt/target_memcpy_emi.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: ompt
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO

/*
* Verify all three data transfer directions: H2D, D2D and D2H
*/

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

#include "callbacks.h"
#include "register_emi.h"

int main(void) {
int NumDevices = omp_get_num_devices();
assert(NumDevices > 0 && "No device(s) present.");
int Device = omp_get_default_device();
int Host = omp_get_initial_device();
// Note: Zero value depicts an OFFLOAD_SUCCESS
int Status;

printf("Allocating Memory on Device\n");
int *DevPtr = (int *)omp_target_alloc(sizeof(int), Device);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check that DevPtr is not null.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Will add an assert().

assert(DevPtr && "Could not allocate memory on device.");
int *HstPtr = (int *)malloc(sizeof(int));
*HstPtr = 42;

printf("Testing: Host to Device\n");
Status = omp_target_memcpy(DevPtr, HstPtr, sizeof(int), 0, 0, Device, Host);
assert(Status == 0 && "H2D memory copy operation failed.\n");

printf("Testing: Device to Device\n");
Status = omp_target_memcpy(DevPtr, DevPtr, sizeof(int), 0, 0, Device, Device);
assert(Status == 0 && "D2D memory copy operation failed.\n");

printf("Testing: Device to Host\n");
Status = omp_target_memcpy(HstPtr, DevPtr, sizeof(int), 0, 0, Host, Device);
assert(Status == 0 && "D2H memory copy operation failed.\n");

printf("Checking Correctness\n");
assert(*HstPtr == 42);

printf("Freeing Memory on Device\n");
free(HstPtr);
omp_target_free(DevPtr, Device);

return 0;
}

// clang-format off

/// CHECK: Callback Init:

/// CHECK: Allocating Memory on Device
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]

/// CHECK: Testing: Host to Device
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]

/// CHECK: Testing: Device to Device
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]

/// CHECK: Testing: Device to Host
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]

/// CHECK: Checking Correctness

/// CHECK: Freeing Memory on Device
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 {{.+}} src_device_num=[[DEVICE]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 {{.+}} src_device_num=[[DEVICE]]

/// CHECK: Callback Fini:

// clang-format on