Skip to content

[OpenMP] Replace copy and paste code with instantiation #73991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 7 additions & 80 deletions openmp/libomptarget/include/PluginManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define OMPTARGET_PLUGIN_MANAGER_H

#include "Shared/APITypes.h"
#include "Shared/PluginAPI.h"

#include "device.h"

Expand All @@ -25,49 +26,6 @@
#include <mutex>

struct PluginAdaptorTy {
typedef int32_t(init_plugin_ty)();
typedef int32_t(is_valid_binary_ty)(void *);
typedef int32_t(is_valid_binary_info_ty)(void *, void *);
typedef int32_t(is_data_exchangable_ty)(int32_t, int32_t);
typedef int32_t(number_of_devices_ty)();
typedef int32_t(init_device_ty)(int32_t);
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
typedef void *(data_alloc_ty)(int32_t, int64_t, void *, int32_t);
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t,
__tgt_async_info *);
typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t);
typedef int32_t(data_retrieve_async_ty)(int32_t, void *, void *, int64_t,
__tgt_async_info *);
typedef int32_t(data_exchange_ty)(int32_t, void *, int32_t, void *, int64_t);
typedef int32_t(data_exchange_async_ty)(int32_t, void *, int32_t, void *,
int64_t, __tgt_async_info *);
typedef int32_t(data_delete_ty)(int32_t, void *, int32_t);
typedef int32_t(launch_kernel_ty)(int32_t, void *, void **, ptrdiff_t *,
const KernelArgsTy *, __tgt_async_info *);
typedef int64_t(init_requires_ty)(int64_t);
typedef int32_t(synchronize_ty)(int32_t, __tgt_async_info *);
typedef int32_t(query_async_ty)(int32_t, __tgt_async_info *);
typedef int32_t(supports_empty_images_ty)();
typedef void(print_device_info_ty)(int32_t);
typedef void(set_info_flag_ty)(uint32_t);
typedef int32_t(create_event_ty)(int32_t, void **);
typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *);
typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *);
typedef int32_t(sync_event_ty)(int32_t, void *);
typedef int32_t(destroy_event_ty)(int32_t, void *);
typedef int32_t(release_async_info_ty)(int32_t, __tgt_async_info *);
typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
const char **);
typedef int32_t(data_lock_ty)(int32_t, void *, int64_t, void **);
typedef int32_t(data_unlock_ty)(int32_t, void *);
typedef int32_t(data_notify_mapped_ty)(int32_t, void *, int64_t);
typedef int32_t(data_notify_unmapped_ty)(int32_t, void *);
typedef int32_t(set_device_offset_ty)(int32_t);
typedef int32_t(activate_record_replay_ty)(int32_t, uint64_t, void *, bool,
bool, uint64_t &);

int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
// i.e. the OpenMP index of the first device
Expand All @@ -80,43 +38,12 @@ struct PluginAdaptorTy {
std::string RTLName;
#endif

// Functions implemented in the RTL.
init_plugin_ty *init_plugin = nullptr;
is_valid_binary_ty *is_valid_binary = nullptr;
is_valid_binary_info_ty *is_valid_binary_info = nullptr;
is_data_exchangable_ty *is_data_exchangable = nullptr;
number_of_devices_ty *number_of_devices = nullptr;
init_device_ty *init_device = nullptr;
load_binary_ty *load_binary = nullptr;
data_alloc_ty *data_alloc = nullptr;
data_submit_ty *data_submit = nullptr;
data_submit_async_ty *data_submit_async = nullptr;
data_retrieve_ty *data_retrieve = nullptr;
data_retrieve_async_ty *data_retrieve_async = nullptr;
data_exchange_ty *data_exchange = nullptr;
data_exchange_async_ty *data_exchange_async = nullptr;
data_delete_ty *data_delete = nullptr;
launch_kernel_ty *launch_kernel = nullptr;
init_requires_ty *init_requires = nullptr;
synchronize_ty *synchronize = nullptr;
query_async_ty *query_async = nullptr;
supports_empty_images_ty *supports_empty_images = nullptr;
set_info_flag_ty *set_info_flag = nullptr;
print_device_info_ty *print_device_info = nullptr;
create_event_ty *create_event = nullptr;
record_event_ty *record_event = nullptr;
wait_event_ty *wait_event = nullptr;
sync_event_ty *sync_event = nullptr;
destroy_event_ty *destroy_event = nullptr;
init_async_info_ty *init_async_info = nullptr;
init_device_into_ty *init_device_info = nullptr;
release_async_info_ty *release_async_info = nullptr;
data_lock_ty *data_lock = nullptr;
data_unlock_ty *data_unlock = nullptr;
data_notify_mapped_ty *data_notify_mapped = nullptr;
data_notify_unmapped_ty *data_notify_unmapped = nullptr;
set_device_offset_ty *set_device_offset = nullptr;
activate_record_replay_ty *activate_record_replay = nullptr;
#define PLUGIN_API_HANDLE(NAME, MANDATORY) \
using NAME##_ty = decltype(__tgt_rtl_##NAME); \
NAME##_ty *NAME = nullptr;

#include "Shared/PluginAPI.inc"
#undef PLUGIN_API_HANDLE

// Are there images associated with this RTL.
bool IsUsed = false;
Expand Down
26 changes: 26 additions & 0 deletions openmp/libomptarget/include/Shared/APITypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,32 @@ struct __tgt_async_info {
/// happening.
KernelLaunchEnvironmentTy KernelLaunchEnvironment;
};

/// This struct contains all of the arguments to a target kernel region launch.
struct KernelArgsTy {
uint32_t Version; // Version of this struct for ABI compatibility.
uint32_t NumArgs; // Number of arguments in each input pointer.
void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
void **ArgPtrs; // Pointer to the argument data.
int64_t *ArgSizes; // Size of the argument data in bytes.
int64_t *ArgTypes; // Type of the data (e.g. to / from).
void **ArgNames; // Name of the data for debugging, possibly null.
void **ArgMappers; // User-defined mappers, possibly null.
uint64_t Tripcount; // Tripcount for the teams / distribute loop, 0 otherwise.
struct {
uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
uint64_t Unused : 63;
} Flags;
uint32_t NumTeams[3]; // The number of teams (for x,y,z dimension).
uint32_t ThreadLimit[3]; // The number of threads (for x,y,z dimension).
uint32_t DynCGroupMem; // Amount of dynamic cgroup memory requested.
};
static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
"Invalid struct size");
static_assert(sizeof(KernelArgsTy) ==
(8 * sizeof(int32_t) + 3 * sizeof(int64_t) +
4 * sizeof(void **) + 2 * sizeof(int64_t *)),
"Invalid struct size");
}

#endif // OMPTARGET_SHARED_API_TYPES_H
10 changes: 10 additions & 0 deletions openmp/libomptarget/include/Shared/PluginAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,16 @@ int32_t __tgt_rtl_data_notify_unmapped(int32_t ID, void *HstPtr);
// Set the global device identifier offset, such that the plugin may determine a
// unique device number.
int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset);

int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
KernelArgsTy *KernelArgs,
__tgt_async_info *AsyncInfoPtr);

int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
void *VAddr, bool isRecord,
bool SaveOutput,
uint64_t &ReqPtrArgOffset);
}

#endif // OMPTARGET_SHARED_PLUGIN_API_H
50 changes: 50 additions & 0 deletions openmp/libomptarget/include/Shared/PluginAPI.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===-- Shared/PluginAPI.inc - Target independent plugin API ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the names of the interface functions between target
// independent offload runtime library and target dependent plugins.
//
//===----------------------------------------------------------------------===//

// No include guards!

PLUGIN_API_HANDLE(init_plugin, true);
PLUGIN_API_HANDLE(is_valid_binary, true);
PLUGIN_API_HANDLE(is_valid_binary_info, false);
PLUGIN_API_HANDLE(is_data_exchangable, false);
PLUGIN_API_HANDLE(number_of_devices, true);
PLUGIN_API_HANDLE(init_device, true);
PLUGIN_API_HANDLE(load_binary, true);
PLUGIN_API_HANDLE(data_alloc, true);
PLUGIN_API_HANDLE(data_submit, true);
PLUGIN_API_HANDLE(data_submit_async, false);
PLUGIN_API_HANDLE(data_retrieve, true);
PLUGIN_API_HANDLE(data_retrieve_async, false);
PLUGIN_API_HANDLE(data_exchange, false);
PLUGIN_API_HANDLE(data_exchange_async, false);
PLUGIN_API_HANDLE(data_delete, true);
PLUGIN_API_HANDLE(launch_kernel, true);
PLUGIN_API_HANDLE(init_requires, false);
PLUGIN_API_HANDLE(synchronize, false);
PLUGIN_API_HANDLE(query_async, false);
PLUGIN_API_HANDLE(supports_empty_images, false);
PLUGIN_API_HANDLE(set_info_flag, false);
PLUGIN_API_HANDLE(print_device_info, false);
PLUGIN_API_HANDLE(create_event, false);
PLUGIN_API_HANDLE(record_event, false);
PLUGIN_API_HANDLE(wait_event, false);
PLUGIN_API_HANDLE(sync_event, false);
PLUGIN_API_HANDLE(destroy_event, false);
PLUGIN_API_HANDLE(init_async_info, false);
PLUGIN_API_HANDLE(init_device_info, false);
PLUGIN_API_HANDLE(data_lock, false);
PLUGIN_API_HANDLE(data_unlock, false);
PLUGIN_API_HANDLE(data_notify_mapped, false);
PLUGIN_API_HANDLE(data_notify_unmapped, false);
PLUGIN_API_HANDLE(set_device_offset, false);
PLUGIN_API_HANDLE(initialize_record_replay, false);
4 changes: 2 additions & 2 deletions openmp/libomptarget/include/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ struct DeviceTy {

// calls to RTL
int32_t initOnce();
__tgt_target_table *loadBinary(void *Img);
__tgt_target_table *loadBinary(__tgt_device_image *Img);

// device memory allocation/deallocation routines
/// Allocates \p Size bytes on the device, host or shared memory space
Expand Down Expand Up @@ -192,7 +192,7 @@ struct DeviceTy {

// Launch the kernel identified by \p TgtEntryPtr with the given arguments.
int32_t launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
ptrdiff_t *TgtOffsets, const KernelArgsTy &KernelArgs,
ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs,
AsyncInfoTy &AsyncInfo);

/// Synchronize device/queue/event based on \p AsyncInfo and return
Expand Down
23 changes: 0 additions & 23 deletions openmp/libomptarget/include/omptarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,29 +121,6 @@ enum TargetAllocTy : int32_t {
TARGET_ALLOC_DEFAULT
};

/// This struct contains all of the arguments to a target kernel region launch.
struct KernelArgsTy {
uint32_t Version; // Version of this struct for ABI compatibility.
uint32_t NumArgs; // Number of arguments in each input pointer.
void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
void **ArgPtrs; // Pointer to the argument data.
int64_t *ArgSizes; // Size of the argument data in bytes.
int64_t *ArgTypes; // Type of the data (e.g. to / from).
void **ArgNames; // Name of the data for debugging, possibly null.
void **ArgMappers; // User-defined mappers, possibly null.
uint64_t Tripcount; // Tripcount for the teams / distribute loop, 0 otherwise.
struct {
uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
uint64_t Unused : 63;
} Flags;
uint32_t NumTeams[3]; // The number of teams (for x,y,z dimension).
uint32_t ThreadLimit[3]; // The number of threads (for x,y,z dimension).
uint32_t DynCGroupMem; // Amount of dynamic cgroup memory requested.
};
static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
"Invalid struct size");
static_assert(sizeof(KernelArgsTy) == (8 * sizeof(int32_t) + 3 * sizeof(int64_t) + 4 * sizeof(void**) + 2 * sizeof(int64_t*)),
"Invalid struct size");
inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
0, {0,0}, {1, 0, 0}, {1, 0, 0}, 0};
Expand Down
9 changes: 4 additions & 5 deletions openmp/libomptarget/src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,8 @@ void DeviceTy::init() {
"LIBOMPTARGET_RR_SAVE_OUTPUT", false);

uint64_t ReqPtrArgOffset;
RTL->activate_record_replay(RTLDeviceID, 0, nullptr, true,
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true,
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
}

IsInit = true;
Expand All @@ -565,7 +565,7 @@ int32_t DeviceTy::initOnce() {
}

// Load binary to device.
__tgt_target_table *DeviceTy::loadBinary(void *Img) {
__tgt_target_table *DeviceTy::loadBinary(__tgt_device_image *Img) {
std::lock_guard<decltype(RTL->Mtx)> LG(RTL->Mtx);
return RTL->load_binary(RTLDeviceID, Img);
}
Expand Down Expand Up @@ -702,8 +702,7 @@ int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {

// Run region on device
int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
ptrdiff_t *TgtOffsets,
const KernelArgsTy &KernelArgs,
ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs,
AsyncInfoTy &AsyncInfo) {
return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
&KernelArgs, AsyncInfo);
Expand Down
6 changes: 3 additions & 3 deletions openmp/libomptarget/src/omptarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1733,9 +1733,9 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
int target_activate_rr(DeviceTy &Device, uint64_t MemorySize, void *VAddr,
bool IsRecord, bool SaveOutput,
uint64_t &ReqPtrArgOffset) {
return Device.RTL->activate_record_replay(Device.DeviceID, MemorySize, VAddr,
IsRecord, SaveOutput,
ReqPtrArgOffset);
return Device.RTL->initialize_record_replay(Device.DeviceID, MemorySize,
VAddr, IsRecord, SaveOutput,
ReqPtrArgOffset);
}

/// Executes a kernel using pre-recorded information for loading to
Expand Down
Loading