Skip to content

Commit 027d1b1

Browse files
committed
[Libomptarget] Remove __tgt_image_info and use the ELF directly (llvm#75720)
Summary: This patch reorganizes a lot of the code used to check for compatibility with the current environment. The main bulk of this patch involves moving from using a separate `__tgt_image_info` struct (which just contains a string for the architecture) to instead simply checking this information from the ELF directly. Checking information in the ELF is very inexpensive as creating an ELF file is simply writing a base pointer. The main desire to do this was to reorganize everything into the ELF image. We can then do the majority of these checks without first initializing the plugin. A future patch will move the first ELF checks to happen without initializing the plugin so we no longer need to initialize and plugins that don't have needed images. This patch also adds a lot more sanity checks for whether or not the ELF is actually compatible. Such as if the images have a valid ABI, 64-bit width, executable, etc. Reverts following patches for reimplementation: 53859b6 b7e137c Change-Id: I37fc701d76d99464d7bd877168cbf912f622b634
1 parent f298a8c commit 027d1b1

File tree

20 files changed

+469
-348
lines changed

20 files changed

+469
-348
lines changed

openmp/libomptarget/include/DeviceImage.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,13 @@ class DeviceImageTy {
3030

3131
__tgt_bin_desc *BinaryDesc;
3232
__tgt_device_image Image;
33-
__tgt_image_info ImageInfo;
3433

3534
public:
3635
DeviceImageTy(__tgt_bin_desc &BinaryDesc, __tgt_device_image &Image);
3736

3837
__tgt_device_image &getExecutableImage() { return Image; }
39-
__tgt_image_info &getImageInfo() { return ImageInfo; }
4038
__tgt_bin_desc &getBinaryDesc() { return *BinaryDesc; }
4139

42-
llvm::StringRef
43-
getArch(llvm::StringRef DefaultArch = llvm::StringRef()) const {
44-
return ImageInfo.Arch ? ImageInfo.Arch : DefaultArch;
45-
}
46-
4740
auto entries() { return llvm::make_pointee_range(OffloadEntries); }
4841
};
4942

openmp/libomptarget/include/Shared/APITypes.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ struct __tgt_device_info {
4646
void *Device = nullptr;
4747
};
4848

49-
/// This struct contains information about a given image.
50-
struct __tgt_image_info {
51-
const char *Arch;
52-
};
53-
5449
/// This struct is a record of all the host code that may be offloaded to a
5550
/// target.
5651
struct __tgt_bin_desc {

openmp/libomptarget/include/Shared/PluginAPI.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,6 @@ void __tgt_rtl_set_up_env(void);
5656
// having to load the library, which can be expensive.
5757
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
5858

59-
// This provides the same functionality as __tgt_rtl_is_valid_binary except we
60-
// also use additional information to determine if the image is valid. This
61-
// allows us to determine if an image has a compatible architecture.
62-
int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *Image,
63-
__tgt_image_info *Info);
64-
6559
// Return an integer other than zero if the data can be exchaned from SrcDevId
6660
// to DstDevId. If it is data exchangable, the device plugin should provide
6761
// function to move data from source device to destination device directly.
@@ -249,8 +243,6 @@ int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
249243

250244
bool __tgt_rtl_requested_prepopulate_gpu_page_table();
251245

252-
bool __tgt_rtl_exists_valid_binary_for_RTL(void *, void *);
253-
254246
bool __tgt_rtl_is_system_supporting_managed_memory();
255247

256248
int32_t __tgt_rtl_launch_kernel_sync(int32_t, void *, void **, ptrdiff_t *,

openmp/libomptarget/include/Shared/PluginAPI.inc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
PLUGIN_API_HANDLE(init_plugin, true);
1717
PLUGIN_API_HANDLE(is_valid_binary, true);
18-
PLUGIN_API_HANDLE(is_valid_binary_info, false);
1918
PLUGIN_API_HANDLE(is_data_exchangable, false);
2019
PLUGIN_API_HANDLE(number_of_devices, true);
2120
PLUGIN_API_HANDLE(init_device, true);
@@ -48,7 +47,6 @@ PLUGIN_API_HANDLE(data_notify_mapped, false);
4847
PLUGIN_API_HANDLE(data_notify_unmapped, false);
4948
PLUGIN_API_HANDLE(set_device_offset, false);
5049
PLUGIN_API_HANDLE(initialize_record_replay, false);
51-
PLUGIN_API_HANDLE(exists_valid_binary_for_RTL, true);
5250
PLUGIN_API_HANDLE(has_apu_device, true);
5351
PLUGIN_API_HANDLE(has_USM_capable_dGPU, true);
5452
PLUGIN_API_HANDLE(are_allocations_for_maps_on_apus_disabled, true);

openmp/libomptarget/include/omptarget.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -322,12 +322,6 @@ void __tgt_register_lib(__tgt_bin_desc *Desc);
322322

323323
/// Initialize all RTLs at once
324324
void __tgt_init_all_rtls();
325-
/// adds an image information struct, called for each image
326-
void __tgt_register_image_info(__tgt_image_info *imageInfo);
327-
328-
/// gets pointer to image information for specified image number
329-
/// Returns nullptr for apps built with old version of compiler
330-
__tgt_image_info *__tgt_get_image_info(uint32_t image_num);
331325

332326
/// removes a target shared library from the target execution image
333327
void __tgt_unregister_lib(__tgt_bin_desc *Desc);

openmp/libomptarget/include/rtl.h

Lines changed: 187 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,198 @@
1313
#ifndef _OMPTARGET_RTL_H
1414
#define _OMPTARGET_RTL_H
1515

16-
#include "llvm/ADT/SmallVector.h"
17-
1816
#include "omptarget.h"
17+
#include <llvm/ADT/DenseSet.h>
18+
#include "llvm/ADT/SmallVector.h"
19+
#include "llvm/Support/DynamicLibrary.h"
1920

2021
#include <cstdint>
22+
#include <list>
23+
#include <map>
24+
#include <mutex>
25+
#include <string>
26+
#include <vector>
2127
#include <map>
2228

29+
// Forward declarations.
30+
struct DeviceTy;
31+
struct __tgt_bin_desc;
32+
33+
struct RTLInfoTy {
34+
typedef int32_t(init_plugin_ty)();
35+
typedef int32_t(deinit_plugin_ty)();
36+
typedef int32_t(is_valid_binary_ty)(void *);
37+
typedef int32_t(is_valid_binary_info_ty)(void *, void *);
38+
typedef int32_t(is_data_exchangable_ty)(int32_t, int32_t);
39+
typedef int32_t(number_of_devices_ty)();
40+
typedef bool(has_apu_device_ty)();
41+
typedef bool(has_USM_capable_dGPU_ty)();
42+
typedef bool(are_allocations_for_maps_on_apus_disabled_ty)();
43+
typedef bool(requested_prepopulate_gpu_page_table_ty)();
44+
typedef bool(is_no_maps_check_ty)();
45+
typedef bool(is_fine_grained_memory_enabled_ty)();
46+
typedef bool(is_system_supporting_managed_memory_ty)();
47+
typedef int32_t(init_device_ty)(int32_t);
48+
typedef int32_t(deinit_device_ty)(int32_t);
49+
typedef int32_t(number_of_team_procs_ty)(int32_t);
50+
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
51+
typedef void *(data_alloc_ty)(int32_t, int64_t, void *, int32_t);
52+
53+
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
54+
typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t,
55+
__tgt_async_info *);
56+
typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t);
57+
typedef int32_t(data_retrieve_async_ty)(int32_t, void *, void *, int64_t,
58+
__tgt_async_info *);
59+
typedef int32_t(data_exchange_ty)(int32_t, void *, int32_t, void *, int64_t);
60+
typedef int32_t(data_exchange_async_ty)(int32_t, void *, int32_t, void *,
61+
int64_t, __tgt_async_info *);
62+
typedef int32_t(data_delete_ty)(int32_t, void *, int32_t);
63+
typedef int32_t(launch_kernel_sync_ty)(int32_t, void *, void **, ptrdiff_t *,
64+
const KernelArgsTy *);
65+
typedef int32_t(launch_kernel_ty)(int32_t, void *, void **, ptrdiff_t *,
66+
const KernelArgsTy *, __tgt_async_info *);
67+
typedef int64_t(init_requires_ty)(int64_t);
68+
typedef int32_t(synchronize_ty)(int32_t, __tgt_async_info *);
69+
typedef int32_t(query_async_ty)(int32_t, __tgt_async_info *);
70+
typedef int32_t (*register_lib_ty)(__tgt_bin_desc *);
71+
typedef int32_t(supports_empty_images_ty)();
72+
typedef void(print_device_info_ty)(int32_t);
73+
typedef void(set_info_flag_ty)(uint32_t);
74+
typedef int32_t(create_event_ty)(int32_t, void **);
75+
typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *);
76+
typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *);
77+
typedef int32_t(sync_event_ty)(int32_t, void *);
78+
typedef int32_t(destroy_event_ty)(int32_t, void *);
79+
typedef int(set_coarse_grain_mem_region_ty)(int32_t, void *, int64_t);
80+
typedef int(prepopulate_page_table_ty)(int32_t, void *, int64_t);
81+
typedef int32_t(query_coarse_grain_mem_region_ty)(int32_t, void *, int64_t);
82+
typedef int32_t(enable_access_to_all_agents_ty)(void *, int32_t);
83+
typedef int32_t(release_async_info_ty)(int32_t, __tgt_async_info *);
84+
typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
85+
typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
86+
const char **);
87+
typedef int32_t(data_lock_ty)(int32_t, void *, int64_t, void **);
88+
typedef int32_t(data_unlock_ty)(int32_t, void *);
89+
typedef int32_t(data_notify_mapped_ty)(int32_t, void *, int64_t);
90+
typedef int32_t(data_notify_unmapped_ty)(int32_t, void *);
91+
typedef int32_t(set_device_offset_ty)(int32_t);
92+
typedef int32_t(activate_record_replay_ty)(int32_t, uint64_t, void *, bool,
93+
bool);
94+
typedef void(set_up_env_ty)(void);
95+
96+
int32_t Idx = -1; // RTL index, index is the number of devices
97+
// of other RTLs that were registered before,
98+
// i.e. the OpenMP index of the first device
99+
// to be registered with this RTL.
100+
int32_t NumberOfDevices = -1; // Number of devices this RTL deals with.
101+
102+
std::unique_ptr<llvm::sys::DynamicLibrary> LibraryHandler;
103+
104+
#ifdef OMPTARGET_DEBUG
105+
std::string RTLName;
106+
#endif
107+
108+
// Functions implemented in the RTL.
109+
init_plugin_ty *init_plugin = nullptr;
110+
deinit_plugin_ty *deinit_plugin = nullptr;
111+
is_valid_binary_ty *is_valid_binary = nullptr;
112+
is_valid_binary_info_ty *is_valid_binary_info = nullptr;
113+
is_data_exchangable_ty *is_data_exchangable = nullptr;
114+
number_of_devices_ty *number_of_devices = nullptr;
115+
has_apu_device_ty *has_apu_device = nullptr;
116+
has_USM_capable_dGPU_ty *has_USM_capable_dGPU = nullptr;
117+
are_allocations_for_maps_on_apus_disabled_ty
118+
*are_allocations_for_maps_on_apus_disabled = nullptr;
119+
requested_prepopulate_gpu_page_table_ty
120+
*requested_prepopulate_gpu_page_table = nullptr;
121+
is_no_maps_check_ty *is_no_maps_check = nullptr;
122+
is_fine_grained_memory_enabled_ty *is_fine_grained_memory_enabled = nullptr;
123+
is_system_supporting_managed_memory_ty *is_system_supporting_managed_memory =
124+
nullptr;
125+
init_device_ty *init_device = nullptr;
126+
deinit_device_ty *deinit_device = nullptr;
127+
number_of_team_procs_ty *number_of_team_procs = nullptr;
128+
load_binary_ty *load_binary = nullptr;
129+
data_alloc_ty *data_alloc = nullptr;
130+
data_submit_ty *data_submit = nullptr;
131+
data_submit_async_ty *data_submit_async = nullptr;
132+
data_retrieve_ty *data_retrieve = nullptr;
133+
data_retrieve_async_ty *data_retrieve_async = nullptr;
134+
data_exchange_ty *data_exchange = nullptr;
135+
data_exchange_async_ty *data_exchange_async = nullptr;
136+
data_delete_ty *data_delete = nullptr;
137+
launch_kernel_sync_ty *launch_kernel_sync = nullptr;
138+
launch_kernel_ty *launch_kernel = nullptr;
139+
init_requires_ty *init_requires = nullptr;
140+
synchronize_ty *synchronize = nullptr;
141+
query_async_ty *query_async = nullptr;
142+
register_lib_ty register_lib = nullptr;
143+
register_lib_ty unregister_lib = nullptr;
144+
supports_empty_images_ty *supports_empty_images = nullptr;
145+
set_info_flag_ty *set_info_flag = nullptr;
146+
print_device_info_ty *print_device_info = nullptr;
147+
create_event_ty *create_event = nullptr;
148+
record_event_ty *record_event = nullptr;
149+
wait_event_ty *wait_event = nullptr;
150+
sync_event_ty *sync_event = nullptr;
151+
destroy_event_ty *destroy_event = nullptr;
152+
init_async_info_ty *init_async_info = nullptr;
153+
init_device_into_ty *init_device_info = nullptr;
154+
release_async_info_ty *release_async_info = nullptr;
155+
data_lock_ty *data_lock = nullptr;
156+
data_unlock_ty *data_unlock = nullptr;
157+
set_coarse_grain_mem_region_ty *set_coarse_grain_mem_region = nullptr;
158+
prepopulate_page_table_ty *prepopulate_page_table = nullptr;
159+
query_coarse_grain_mem_region_ty *query_coarse_grain_mem_region = nullptr;
160+
enable_access_to_all_agents_ty *enable_access_to_all_agents = nullptr;
161+
data_notify_mapped_ty *data_notify_mapped = nullptr;
162+
data_notify_unmapped_ty *data_notify_unmapped = nullptr;
163+
set_device_offset_ty *set_device_offset = nullptr;
164+
activate_record_replay_ty *activate_record_replay = nullptr;
165+
set_up_env_ty *set_up_env = nullptr;
166+
167+
// Are there images associated with this RTL.
168+
bool IsUsed = false;
169+
170+
llvm::DenseSet<const __tgt_device_image *> UsedImages;
171+
172+
// Mutex for thread-safety when calling RTL interface functions.
173+
// It is easier to enforce thread-safety at the libomptarget level,
174+
// so that developers of new RTLs do not have to worry about it.
175+
std::mutex Mtx;
176+
};
177+
178+
/// RTLs identified in the system.
179+
struct RTLsTy {
180+
// List of the detected runtime libraries.
181+
std::list<RTLInfoTy> AllRTLs;
182+
183+
// Array of pointers to the detected runtime libraries that have compatible
184+
// binaries.
185+
llvm::SmallVector<RTLInfoTy *> UsedRTLs;
186+
187+
explicit RTLsTy() = default;
188+
189+
// Register the clauses of the requires directive.
190+
void registerRequires(int64_t Flags);
191+
192+
// Initialize RTL if it has not been initialized
193+
void initRTLonce(RTLInfoTy &RTL);
194+
195+
// Initialize all RTLs
196+
void initAllRTLs();
197+
198+
// Register a shared library with all (compatible) RTLs.
199+
void registerLib(__tgt_bin_desc *Desc);
200+
201+
// Unregister a shared library from all RTLs.
202+
void unregisterLib(__tgt_bin_desc *Desc);
203+
204+
// not thread-safe, called from global constructor (i.e. once)
205+
void loadRTLs();
206+
};
207+
23208
/// Map between the host entry begin and the translation table. Each
24209
/// registered library gets one TranslationTable. Use the map from
25210
/// __tgt_offload_entry so that we may quickly determine whether we

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "Shared/Debug.h"
2525
#include "Shared/Environment.h"
2626
#include "Shared/Utils.h"
27+
#include "Utils/ELF.h"
2728

2829
#include "GlobalHandler.h"
2930
#include "OpenMP/OMPT/Callback.h"
@@ -4027,17 +4028,15 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
40274028
}
40284029
}
40294030

4030-
void checkInvalidImage(__tgt_image_info *Info,
4031-
__tgt_device_image *TgtImage) override final {
4032-
4033-
utils::checkImageCompatibilityWithSystemXnackMode(TgtImage,
4034-
IsXnackEnabled());
4035-
}
4036-
40374031
/// Check whether the image is compatible with an AMDGPU device.
4038-
Expected<bool>
4039-
isImageCompatible(__tgt_image_info *Info,
4040-
__tgt_device_image *TgtImage) const override {
4032+
Expected<bool> isELFCompatible(StringRef Image) const override {
4033+
// Get the associated architecture and flags from the ELF.
4034+
auto ElfOrErr =
4035+
ELF64LEObjectFile::create(MemoryBufferRef(Image, /*Identifier=*/""),
4036+
/*InitContent=*/false);
4037+
if (!ElfOrErr)
4038+
return ElfOrErr.takeError();
4039+
std::optional<StringRef> Processor = ElfOrErr->tryGetCPUName();
40414040

40424041
for (hsa_agent_t Agent : KernelAgents) {
40434042
std::string Target;
@@ -4061,10 +4060,17 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
40614060
if (Err)
40624061
return std::move(Err);
40634062

4064-
if (utils::isImageCompatibleWithEnv(Info, Target))
4063+
if (utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
4064+
ElfOrErr->getPlatformFlags(),
4065+
Target))
40654066
return true;
40664067
}
40674068

4069+
// Check if the system's XNACK mode matches the one required by the
4070+
// image. Print a warning if not.
4071+
// utils::checkImageCompatibilityWithSystemXnackMode(TgtImage,
4072+
// IsXnackEnabled());
4073+
40684074
return false;
40694075
}
40704076

0 commit comments

Comments
 (0)