Skip to content

Commit 621bafd

Browse files
authored
[Libomptarget] Move target table handling out of the plugins (#77150)
Summary: This patch removes the bulk of the handling of the `__tgt_offload_entries` out of the plugins itself. The reason for this is because the plugins themselves should not be handling this implementation detail of the OpenMP runtime. Instead, we expose two new plugin API functions to get the points to a device pointer for a global as well as a kernel type. This required introducing a new type to represent a binary image that has been loaded on a device. We can then use this to load the addresses as needed. The creation of the mapping table is then handled just in `libomptarget` where we simply look up each address individually. This should allow us to expose these operations more generically when we provide a separate API.
1 parent ebd4dc4 commit 621bafd

File tree

14 files changed

+201
-210
lines changed

14 files changed

+201
-210
lines changed

openmp/libomptarget/include/Shared/APITypes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ struct __tgt_target_table {
6262
*EntriesEnd; // End of the table with all the entries (non inclusive)
6363
};
6464

65+
/// This struct contains a handle to a loaded binary in the plugin device.
66+
struct __tgt_device_binary {
67+
uintptr_t handle;
68+
};
69+
6570
// clang-format on
6671

6772
/// This struct contains information exchanged between different asynchronous

openmp/libomptarget/include/Shared/PluginAPI.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,18 @@ int32_t __tgt_rtl_init_device(int32_t ID);
5757
// return NULL. Otherwise, return a pointer to the built address table.
5858
// Individual entries in the table may also be NULL, when the corresponding
5959
// offload region is not supported on the target device.
60-
__tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
61-
__tgt_device_image *Image);
60+
int32_t __tgt_rtl_load_binary(int32_t ID, __tgt_device_image *Image,
61+
__tgt_device_binary *Binary);
62+
63+
// Look up the device address of the named symbol in the given binary. Returns
64+
// non-zero on failure.
65+
int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size,
66+
const char *Name, void **DevicePtr);
67+
68+
// Look up the device address of the named kernel in the given binary. Returns
69+
// non-zero on failure.
70+
int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name,
71+
void **DevicePtr);
6272

6373
// Allocate data on the particular target device, of the specified size.
6474
// HostPtr is a address of the host data the allocated target data

openmp/libomptarget/include/Shared/PluginAPI.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ PLUGIN_API_HANDLE(is_data_exchangable, false);
1919
PLUGIN_API_HANDLE(number_of_devices, true);
2020
PLUGIN_API_HANDLE(init_device, true);
2121
PLUGIN_API_HANDLE(load_binary, true);
22+
PLUGIN_API_HANDLE(get_global, true);
23+
PLUGIN_API_HANDLE(get_function, true);
2224
PLUGIN_API_HANDLE(data_alloc, true);
2325
PLUGIN_API_HANDLE(data_submit, true);
2426
PLUGIN_API_HANDLE(data_submit_async, false);

openmp/libomptarget/include/device.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ struct DeviceTy {
7070
/// Provide access to the mapping handler.
7171
MappingInfoTy &getMappingInfo() { return MappingInfo; }
7272

73-
__tgt_target_table *loadBinary(__tgt_device_image *Img);
73+
llvm::Expected<__tgt_device_binary> loadBinary(__tgt_device_image *Img);
7474

7575
// device memory allocation/deallocation routines
7676
/// Allocates \p Size bytes on the device, host or shared memory space

openmp/libomptarget/include/rtl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,16 @@
2626
/// are trying to (re)register an existing lib or really have a new one.
2727
struct TranslationTable {
2828
__tgt_target_table HostTable;
29+
llvm::SmallVector<__tgt_target_table> DeviceTables;
2930

3031
// Image assigned to a given device.
3132
llvm::SmallVector<__tgt_device_image *>
3233
TargetsImages; // One image per device ID.
3334

35+
// Arrays of entries active on the device.
36+
llvm::SmallVector<llvm::SmallVector<__tgt_offload_entry>>
37+
TargetsEntries; // One table per device ID.
38+
3439
// Table of entry points or NULL if it was not already computed.
3540
llvm::SmallVector<__tgt_target_table *>
3641
TargetsTable; // One table per device ID.

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -439,8 +439,9 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
439439
/// Class implementing the AMDGPU device images' properties.
440440
struct AMDGPUDeviceImageTy : public DeviceImageTy {
441441
/// Create the AMDGPU image with the id and the target image pointer.
442-
AMDGPUDeviceImageTy(int32_t ImageId, const __tgt_device_image *TgtImage)
443-
: DeviceImageTy(ImageId, TgtImage) {}
442+
AMDGPUDeviceImageTy(int32_t ImageId, GenericDeviceTy &Device,
443+
const __tgt_device_image *TgtImage)
444+
: DeviceImageTy(ImageId, Device, TgtImage) {}
444445

445446
/// Prepare and load the executable corresponding to the image.
446447
Error loadExecutable(const AMDGPUDeviceTy &Device);
@@ -2105,14 +2106,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
21052106
uint64_t getClockFrequency() const override { return ClockFrequency; }
21062107

21072108
/// Allocate and construct an AMDGPU kernel.
2108-
Expected<GenericKernelTy &>
2109-
constructKernel(const __tgt_offload_entry &KernelEntry) override {
2109+
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
21102110
// Allocate and construct the AMDGPU kernel.
21112111
AMDGPUKernelTy *AMDGPUKernel = Plugin::get().allocate<AMDGPUKernelTy>();
21122112
if (!AMDGPUKernel)
21132113
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
21142114

2115-
new (AMDGPUKernel) AMDGPUKernelTy(KernelEntry.name);
2115+
new (AMDGPUKernel) AMDGPUKernelTy(Name);
21162116

21172117
return *AMDGPUKernel;
21182118
}
@@ -2160,7 +2160,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
21602160
// Allocate and initialize the image object.
21612161
AMDGPUDeviceImageTy *AMDImage =
21622162
Plugin::get().allocate<AMDGPUDeviceImageTy>();
2163-
new (AMDImage) AMDGPUDeviceImageTy(ImageId, TgtImage);
2163+
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage);
21642164

21652165
// Load the HSA executable.
21662166
if (Error Err = AMDImage->loadExecutable(*this))

openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@ class GlobalTy {
4747
GlobalTy(const std::string &Name, uint32_t Size, void *Ptr = nullptr)
4848
: Name(Name), Size(Size), Ptr(Ptr) {}
4949

50-
GlobalTy(const __tgt_offload_entry &Entry)
51-
: Name(Entry.name), Size(Entry.size), Ptr(Entry.addr) {}
52-
5350
const std::string &getName() const { return Name; }
5451
uint32_t getSize() const { return Size; }
5552
void *getPtr() const { return Ptr; }

openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 13 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -182,34 +182,6 @@ class InfoQueueTy {
182182
/// specific device. This class is responsible for storing and managing
183183
/// the offload entries for an image on a device.
184184
class DeviceImageTy {
185-
186-
/// Class representing the offload entry table. The class stores the
187-
/// __tgt_target_table and a map to search in the table faster.
188-
struct OffloadEntryTableTy {
189-
/// Add new entry to the table.
190-
void addEntry(const __tgt_offload_entry &Entry) {
191-
Entries.push_back(Entry);
192-
TTTablePtr.EntriesBegin = &Entries[0];
193-
TTTablePtr.EntriesEnd = TTTablePtr.EntriesBegin + Entries.size();
194-
}
195-
196-
/// Get the raw pointer to the __tgt_target_table.
197-
operator __tgt_target_table *() {
198-
if (Entries.empty())
199-
return nullptr;
200-
return &TTTablePtr;
201-
}
202-
203-
private:
204-
__tgt_target_table TTTablePtr;
205-
llvm::SmallVector<__tgt_offload_entry> Entries;
206-
207-
public:
208-
using const_iterator = decltype(Entries)::const_iterator;
209-
const_iterator begin() const { return Entries.begin(); }
210-
const_iterator end() const { return Entries.end(); }
211-
};
212-
213185
/// Image identifier within the corresponding device. Notice that this id is
214186
/// not unique between different device; they may overlap.
215187
int32_t ImageId;
@@ -218,25 +190,29 @@ class DeviceImageTy {
218190
const __tgt_device_image *TgtImage;
219191
const __tgt_device_image *TgtImageBitcode;
220192

193+
/// Reference to the device this image is loaded on.
194+
GenericDeviceTy &Device;
195+
221196
/// If this image has any global destructors that much be called.
222197
/// FIXME: This is only required because we currently have no invariants
223198
/// towards the lifetime of the underlying image. We should either copy
224199
/// the image into memory locally or erase the pointers after init.
225200
bool PendingGlobalDtors;
226201

227-
/// Table of offload entries.
228-
OffloadEntryTableTy OffloadEntryTable;
229-
230202
public:
231-
DeviceImageTy(int32_t Id, const __tgt_device_image *Image)
232-
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr),
203+
DeviceImageTy(int32_t Id, GenericDeviceTy &Device,
204+
const __tgt_device_image *Image)
205+
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr), Device(Device),
233206
PendingGlobalDtors(false) {
234207
assert(TgtImage && "Invalid target image");
235208
}
236209

237210
/// Get the image identifier within the device.
238211
int32_t getId() const { return ImageId; }
239212

213+
/// Get the device that this image is loaded onto.
214+
GenericDeviceTy &getDevice() const { return Device; }
215+
240216
/// Get the pointer to the raw __tgt_device_image.
241217
const __tgt_device_image *getTgtImage() const { return TgtImage; }
242218

@@ -261,13 +237,9 @@ class DeviceImageTy {
261237
return MemoryBufferRef(StringRef((const char *)getStart(), getSize()),
262238
"Image");
263239
}
264-
265240
/// Accessors to the boolean value
266241
bool setPendingGlobalDtors() { return PendingGlobalDtors = true; }
267242
bool hasPendingGlobalDtors() const { return PendingGlobalDtors; }
268-
269-
/// Get a reference to the offload entry table for the image.
270-
OffloadEntryTableTy &getOffloadEntryTable() { return OffloadEntryTable; }
271243
};
272244

273245
/// Class implementing common functionalities of offload kernels. Each plugin
@@ -661,8 +633,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
661633
virtual Error deinitImpl() = 0;
662634

663635
/// Load the binary image into the device and return the target table.
664-
Expected<__tgt_target_table *> loadBinary(GenericPluginTy &Plugin,
665-
const __tgt_device_image *TgtImage);
636+
Expected<DeviceImageTy *> loadBinary(GenericPluginTy &Plugin,
637+
const __tgt_device_image *TgtImage);
666638
virtual Expected<DeviceImageTy *>
667639
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
668640

@@ -680,9 +652,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
680652
// up to the target to override this using the shouldSetupRPCServer function.
681653
Error setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image);
682654

683-
/// Register the offload entries for a specific image on the device.
684-
Error registerOffloadEntries(DeviceImageTy &Image);
685-
686655
/// Synchronize the current thread with the pending operations on the
687656
/// __tgt_async_info structure.
688657
Error synchronize(__tgt_async_info *AsyncInfo);
@@ -888,21 +857,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
888857
bool useAutoZeroCopy();
889858
virtual bool useAutoZeroCopyImpl() { return false; }
890859

891-
private:
892-
/// Register offload entry for global variable.
893-
Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
894-
const __tgt_offload_entry &GlobalEntry,
895-
__tgt_offload_entry &DeviceEntry);
896-
897-
/// Register offload entry for kernel function.
898-
Error registerKernelOffloadEntry(DeviceImageTy &DeviceImage,
899-
const __tgt_offload_entry &KernelEntry,
900-
__tgt_offload_entry &DeviceEntry);
901-
902860
/// Allocate and construct a kernel object.
903-
virtual Expected<GenericKernelTy &>
904-
constructKernel(const __tgt_offload_entry &KernelEntry) = 0;
861+
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
905862

863+
private:
906864
/// Get and set the stack size and heap size for the device. If not used, the
907865
/// plugin can implement the setters as no-op and setting the output
908866
/// value to zero for the getters.

0 commit comments

Comments
 (0)