Skip to content

[Libomptarget] Move target table handling out of the plugins #77150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions openmp/libomptarget/include/Shared/APITypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ struct __tgt_target_table {
*EntriesEnd; // End of the table with all the entries (non inclusive)
};

/// This struct contains a handle to a loaded binary in the plugin device.
struct __tgt_device_binary {
uintptr_t handle;
};

// clang-format on

/// This struct contains information exchanged between different asynchronous
Expand Down
14 changes: 12 additions & 2 deletions openmp/libomptarget/include/Shared/PluginAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,18 @@ int32_t __tgt_rtl_init_device(int32_t ID);
// return NULL. Otherwise, return a pointer to the built address table.
// Individual entries in the table may also be NULL, when the corresponding
// offload region is not supported on the target device.
__tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
__tgt_device_image *Image);
int32_t __tgt_rtl_load_binary(int32_t ID, __tgt_device_image *Image,
__tgt_device_binary *Binary);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is ABI breaking.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we determined at some point that the plugin ABI was free to be broken because it's not exported anywhere, and we should only ever load the corresponding plugin from the associated install.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right.


// Look up the device address of the named symbol in the given binary. Returns
// non-zero on failure.
int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size,
const char *Name, void **DevicePtr);

// Look up the device address of the named kernel in the given binary. Returns
// non-zero on failure.
int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name,
void **DevicePtr);

// Allocate data on the particular target device, of the specified size.
// HostPtr is a address of the host data the allocated target data
Expand Down
2 changes: 2 additions & 0 deletions openmp/libomptarget/include/Shared/PluginAPI.inc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ PLUGIN_API_HANDLE(is_data_exchangable, false);
PLUGIN_API_HANDLE(number_of_devices, true);
PLUGIN_API_HANDLE(init_device, true);
PLUGIN_API_HANDLE(load_binary, true);
PLUGIN_API_HANDLE(get_global, true);
PLUGIN_API_HANDLE(get_function, true);
PLUGIN_API_HANDLE(data_alloc, true);
PLUGIN_API_HANDLE(data_submit, true);
PLUGIN_API_HANDLE(data_submit_async, false);
Expand Down
2 changes: 1 addition & 1 deletion openmp/libomptarget/include/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct DeviceTy {
/// Provide access to the mapping handler.
MappingInfoTy &getMappingInfo() { return MappingInfo; }

__tgt_target_table *loadBinary(__tgt_device_image *Img);
llvm::Expected<__tgt_device_binary> loadBinary(__tgt_device_image *Img);

// device memory allocation/deallocation routines
/// Allocates \p Size bytes on the device, host or shared memory space
Expand Down
5 changes: 5 additions & 0 deletions openmp/libomptarget/include/rtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@
/// are trying to (re)register an existing lib or really have a new one.
struct TranslationTable {
__tgt_target_table HostTable;
llvm::SmallVector<__tgt_target_table> DeviceTables;

// Image assigned to a given device.
llvm::SmallVector<__tgt_device_image *>
TargetsImages; // One image per device ID.

// Arrays of entries active on the device.
llvm::SmallVector<llvm::SmallVector<__tgt_offload_entry>>
TargetsEntries; // One table per device ID.

// Table of entry points or NULL if it was not already computed.
llvm::SmallVector<__tgt_target_table *>
TargetsTable; // One table per device ID.
Expand Down
12 changes: 6 additions & 6 deletions openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,9 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
/// Class implementing the AMDGPU device images' properties.
struct AMDGPUDeviceImageTy : public DeviceImageTy {
/// Create the AMDGPU image with the id and the target image pointer.
AMDGPUDeviceImageTy(int32_t ImageId, const __tgt_device_image *TgtImage)
: DeviceImageTy(ImageId, TgtImage) {}
AMDGPUDeviceImageTy(int32_t ImageId, GenericDeviceTy &Device,
const __tgt_device_image *TgtImage)
: DeviceImageTy(ImageId, Device, TgtImage) {}

/// Prepare and load the executable corresponding to the image.
Error loadExecutable(const AMDGPUDeviceTy &Device);
Expand Down Expand Up @@ -2105,14 +2106,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
uint64_t getClockFrequency() const override { return ClockFrequency; }

/// Allocate and construct an AMDGPU kernel.
Expected<GenericKernelTy &>
constructKernel(const __tgt_offload_entry &KernelEntry) override {
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the AMDGPU kernel.
AMDGPUKernelTy *AMDGPUKernel = Plugin::get().allocate<AMDGPUKernelTy>();
if (!AMDGPUKernel)
return Plugin::error("Failed to allocate memory for AMDGPU kernel");

new (AMDGPUKernel) AMDGPUKernelTy(KernelEntry.name);
new (AMDGPUKernel) AMDGPUKernelTy(Name);

return *AMDGPUKernel;
}
Expand Down Expand Up @@ -2160,7 +2160,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// Allocate and initialize the image object.
AMDGPUDeviceImageTy *AMDImage =
Plugin::get().allocate<AMDGPUDeviceImageTy>();
new (AMDImage) AMDGPUDeviceImageTy(ImageId, TgtImage);
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage);

// Load the HSA executable.
if (Error Err = AMDImage->loadExecutable(*this))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ class GlobalTy {
GlobalTy(const std::string &Name, uint32_t Size, void *Ptr = nullptr)
: Name(Name), Size(Size), Ptr(Ptr) {}

GlobalTy(const __tgt_offload_entry &Entry)
: Name(Entry.name), Size(Entry.size), Ptr(Entry.addr) {}

const std::string &getName() const { return Name; }
uint32_t getSize() const { return Size; }
void *getPtr() const { return Ptr; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,34 +182,6 @@ class InfoQueueTy {
/// specific device. This class is responsible for storing and managing
/// the offload entries for an image on a device.
class DeviceImageTy {

/// Class representing the offload entry table. The class stores the
/// __tgt_target_table and a map to search in the table faster.
struct OffloadEntryTableTy {
/// Add new entry to the table.
void addEntry(const __tgt_offload_entry &Entry) {
Entries.push_back(Entry);
TTTablePtr.EntriesBegin = &Entries[0];
TTTablePtr.EntriesEnd = TTTablePtr.EntriesBegin + Entries.size();
}

/// Get the raw pointer to the __tgt_target_table.
operator __tgt_target_table *() {
if (Entries.empty())
return nullptr;
return &TTTablePtr;
}

private:
__tgt_target_table TTTablePtr;
llvm::SmallVector<__tgt_offload_entry> Entries;

public:
using const_iterator = decltype(Entries)::const_iterator;
const_iterator begin() const { return Entries.begin(); }
const_iterator end() const { return Entries.end(); }
};

/// Image identifier within the corresponding device. Notice that this id is
/// not unique between different device; they may overlap.
int32_t ImageId;
Expand All @@ -218,25 +190,29 @@ class DeviceImageTy {
const __tgt_device_image *TgtImage;
const __tgt_device_image *TgtImageBitcode;

/// Reference to the device this image is loaded on.
GenericDeviceTy &Device;

/// If this image has any global destructors that much be called.
/// FIXME: This is only required because we currently have no invariants
/// towards the lifetime of the underlying image. We should either copy
/// the image into memory locally or erase the pointers after init.
bool PendingGlobalDtors;

/// Table of offload entries.
OffloadEntryTableTy OffloadEntryTable;

public:
DeviceImageTy(int32_t Id, const __tgt_device_image *Image)
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr),
DeviceImageTy(int32_t Id, GenericDeviceTy &Device,
const __tgt_device_image *Image)
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr), Device(Device),
PendingGlobalDtors(false) {
assert(TgtImage && "Invalid target image");
}

/// Get the image identifier within the device.
int32_t getId() const { return ImageId; }

/// Get the device that this image is loaded onto.
GenericDeviceTy &getDevice() const { return Device; }

/// Get the pointer to the raw __tgt_device_image.
const __tgt_device_image *getTgtImage() const { return TgtImage; }

Expand All @@ -261,13 +237,9 @@ class DeviceImageTy {
return MemoryBufferRef(StringRef((const char *)getStart(), getSize()),
"Image");
}

/// Accessors to the boolean value
bool setPendingGlobalDtors() { return PendingGlobalDtors = true; }
bool hasPendingGlobalDtors() const { return PendingGlobalDtors; }

/// Get a reference to the offload entry table for the image.
OffloadEntryTableTy &getOffloadEntryTable() { return OffloadEntryTable; }
};

/// Class implementing common functionalities of offload kernels. Each plugin
Expand Down Expand Up @@ -661,8 +633,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
virtual Error deinitImpl() = 0;

/// Load the binary image into the device and return the target table.
Expected<__tgt_target_table *> loadBinary(GenericPluginTy &Plugin,
const __tgt_device_image *TgtImage);
Expected<DeviceImageTy *> loadBinary(GenericPluginTy &Plugin,
const __tgt_device_image *TgtImage);
virtual Expected<DeviceImageTy *>
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;

Expand All @@ -680,9 +652,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
// up to the target to override this using the shouldSetupRPCServer function.
Error setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image);

/// Register the offload entries for a specific image on the device.
Error registerOffloadEntries(DeviceImageTy &Image);

/// Synchronize the current thread with the pending operations on the
/// __tgt_async_info structure.
Error synchronize(__tgt_async_info *AsyncInfo);
Expand Down Expand Up @@ -888,21 +857,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
bool useAutoZeroCopy();
virtual bool useAutoZeroCopyImpl() { return false; }

private:
/// Register offload entry for global variable.
Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
const __tgt_offload_entry &GlobalEntry,
__tgt_offload_entry &DeviceEntry);

/// Register offload entry for kernel function.
Error registerKernelOffloadEntry(DeviceImageTy &DeviceImage,
const __tgt_offload_entry &KernelEntry,
__tgt_offload_entry &DeviceEntry);

/// Allocate and construct a kernel object.
virtual Expected<GenericKernelTy &>
constructKernel(const __tgt_offload_entry &KernelEntry) = 0;
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;

private:
/// Get and set the stack size and heap size for the device. If not used, the
/// plugin can implement the setters as no-op and setting the output
/// value to zero for the getters.
Expand Down
Loading