Skip to content

Commit d683a85

Browse files
ThorBlronlieb
authored andcommitted
This patch implements a runtime check whether the system's USM setting
matches the one required by the target image. If necessary, the zero-copy mode will be enabled. Change-Id: Ic9950e15d0ee4664910c1a575e192634f3227898
1 parent 584aa52 commit d683a85

File tree

8 files changed

+123
-69
lines changed

8 files changed

+123
-69
lines changed

openmp/libomptarget/include/rtl.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
#include "llvm/ADT/SmallVector.h"
2020
#include "llvm/Support/DynamicLibrary.h"
2121

22-
#include "omptarget.h"
23-
2422
#include <list>
2523
#include <map>
2624
#include <mutex>

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 62 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "UtilitiesRTL.h"
3131
#include "omptarget.h"
3232

33+
#include "hsakmt/hsakmt.h"
34+
3335
#include "print_tracing.h"
3436

3537
#include "memtype.h"
@@ -46,8 +48,6 @@
4648
#include "llvm/Support/Program.h"
4749
#include "llvm/Support/raw_ostream.h"
4850

49-
#include "hsakmt/hsakmt.h"
50-
5151
#if defined(__has_include)
5252
#if __has_include("hsa/hsa.h")
5353
#include "hsa/hsa.h"
@@ -80,12 +80,12 @@
8080
#define OMPT_IF_TRACING_ENABLED(stmts)
8181
#endif
8282

83-
#define CHECK_KMT_ERROR(val) kmtCheck((val), #val, __FILE__, __LINE__)
83+
#define KMT_EXPECT_SUCCESS(val) kmtExpectSucc((val), #val, __FILE__, __LINE__)
8484
template <typename T>
85-
int kmtCheck(T err, const char *const func, const char *const file,
86-
const int line) {
85+
int kmtExpectSucc(T err, const char *const func, const char *const file,
86+
const int line) {
8787
if (err != HSAKMT_STATUS_SUCCESS) {
88-
DP("HsaKmt Error at: %s : %u \n", file, line);
88+
FAILURE_MESSAGE("HsaKmt Error at: %s : %u \n", file, line);
8989
return -1;
9090
}
9191
return 0;
@@ -95,8 +95,6 @@ int kmtCheck(T err, const char *const func, const char *const file,
9595
extern void setOmptTimestamp(uint64_t Start, uint64_t End);
9696
extern void setOmptHostToDeviceRate(double Slope, double Offset);
9797

98-
99-
10098
/// HSA system clock frequency
10199
double TicksToTime = 1.0;
102100

@@ -2504,7 +2502,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
25042502

25052503
/// AMDGPU returns the product of the number of compute units and the waves
25062504
/// per compute unit.
2507-
uint64_t requestedRPCPortCount() const override {
2505+
uint64_t requestedRPCPortCount() const override {
25082506
return HardwareParallelism;
25092507
}
25102508

@@ -3581,8 +3579,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
35813579

35823580
NoMapChecks = BoolEnvar("OMPX_DISABLE_MAPS", true);
35833581
DisableUsmMaps = BoolEnvar("OMPX_DISABLE_USM_MAPS", false);
3584-
APUMaps = BoolEnvar("OMPX_APU_MAPS", false);
3585-
HSAXnack = BoolEnvar("HSA_XNACK", false);
3582+
HsaXnack = BoolEnvar("HSA_XNACK", false);
3583+
IsHsaXnackDefined = HsaXnack.isPresent();
35863584
}
35873585

35883586
void setUpEnv() override final {
@@ -3594,36 +3592,13 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
35943592
if (DisableUsmMaps.get() == true) {
35953593
EnableFineGrainedMemory = true;
35963594
}
3597-
3598-
if (hasAPUDevice() || hasGfx90aDevice()) {
3599-
// OMPX_APU_MAPS is a temporary env variable
3600-
// that should always be used with HSA_XNACK=1:
3601-
// error if it is not. Once this is made default behavior
3602-
// for USM=OFF, HSA_XNACK=1, then we can remove the error
3603-
// as the behavior is only triggered by HSA_XNACK value
3604-
if ((APUMaps.get() == true) && (HSAXnack.get() == false)) {
3605-
FATAL_MESSAGE0(1, "OMPX_APU_MAPS behavior requires HSA_XNACK=1");
3606-
}
3607-
3608-
assert(!(Plugin::get().getRequiresFlags() & OMP_REQ_UNDEFINED));
3609-
// Last condition of the following if statement is a workaround due to
3610-
// the presence of OMPX_APU_MAPS to enable unified shared memory mode
3611-
// for default programs run with HSA_XNACK=1. Remove once the
3612-
// OMPX_APU_MAPS mode is made default. Formerly implemented in
3613-
// RTLsTy::disableAPUMapsForUSM
3614-
3615-
if ((APUMaps.get() == true) && (HSAXnack.get() == true) &&
3616-
!(Plugin::get().getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
3617-
DisableAllocationsForMapsOnApus = true;
3618-
}
3619-
} else if (APUMaps.get() == true && HSAXnack == true) {
3620-
FATAL_MESSAGE0(1,
3621-
"OMPX_APU_MAPS and HSA_XNACK enabled on non-APU system");
3622-
}
36233595
}
36243596

36253597
/// Check whether the image is compatible with an AMDGPU device.
3626-
Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
3598+
Expected<bool>
3599+
isImageCompatible(__tgt_image_info *Info,
3600+
__tgt_device_image *TgtImage) const override {
3601+
36273602
for (hsa_agent_t Agent : KernelAgents) {
36283603
std::string Target;
36293604
auto Err = utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
@@ -3650,14 +3625,47 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
36503625
if (utils::isImageCompatibleWithEnv(Info, Target))
36513626
return true;
36523627
}
3628+
3629+
// Check if the system's XNACK mode matches the one required by the
3630+
// image. Print a warning if not.
3631+
utils::checkImageCompatibilityWithSystemXnackMode(TgtImage,
3632+
IsXnackEnabled());
3633+
36533634
return false;
36543635
}
36553636

3656-
void
3657-
checkAndAdjustXnackStatus(__tgt_device_image *TgtImage) const override final {
3658-
if (utils::wasBinaryBuiltWithXnackEnabled(TgtImage)) {
3659-
// TODO: Implement me. This is part of another ticket. It requries methods
3660-
// to manipulate the XNACK status.
3637+
void checkAndAdjustUsmModeForTargetImage(
3638+
__tgt_device_image *TgtImage) override final {
3639+
assert((TgtImage != nullptr) && "TgtImage is nullptr");
3640+
assert(!(Plugin::get().getRequiresFlags() & OMP_REQ_UNDEFINED) &&
3641+
"Requires flags are not set.");
3642+
3643+
if (!(hasAPUDevice() || hasGfx90aDevice()))
3644+
return;
3645+
3646+
bool IsXnackRequired =
3647+
Plugin::get().getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY;
3648+
3649+
utils::XnackBuildMode BinaryXnackMode =
3650+
utils::extractXnackModeFromBinary(TgtImage);
3651+
3652+
DisableAllocationsForMapsOnApus = false;
3653+
3654+
if (IsXnackEnabled()) {
3655+
if (!IsXnackRequired) {
3656+
switch (BinaryXnackMode) {
3657+
case utils::XnackBuildMode::XNACK_PLUS:
3658+
case utils::XnackBuildMode::XNACK_ANY:
3659+
DisableAllocationsForMapsOnApus = true; // Zero-copy
3660+
}
3661+
return;
3662+
}
3663+
} else {
3664+
if (IsXnackRequired) {
3665+
FAILURE_MESSAGE(
3666+
"XNACK is disabled. However, the program requires XNACK "
3667+
"support. Enable XNACK and re-run the program.\n");
3668+
}
36613669
}
36623670
}
36633671

@@ -3727,6 +3735,15 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
37273735
return HSA_STATUS_ERROR;
37283736
}
37293737

3738+
bool IsXnackEnabled() const {
3739+
// -1 instructs the runtime to query the XNACK status without modifying it.
3740+
int32_t enable = -1;
3741+
KMT_EXPECT_SUCCESS(hsaKmtOpenKFD());
3742+
KMT_EXPECT_SUCCESS(hsaKmtGetXNACKMode(&enable));
3743+
KMT_EXPECT_SUCCESS(hsaKmtCloseKFD());
3744+
return (enable > 0);
3745+
}
3746+
37303747
bool checkForDeviceByGFXName(const llvm::StringRef GfxLookUpName) {
37313748
bool CheckForMI300A =
37323749
(GfxLookUpName.find_insensitive("gfx940") != llvm::StringRef::npos);
@@ -3776,8 +3793,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
37763793

37773794
BoolEnvar NoMapChecks;
37783795
BoolEnvar DisableUsmMaps;
3779-
BoolEnvar APUMaps;
3780-
BoolEnvar HSAXnack;
3796+
BoolEnvar HsaXnack;
3797+
bool IsHsaXnackDefined{false};
37813798

37823799
// Set by OMPX_APU_MAPS environment variable.
37833800
// If set, maps cause no copy operations. USM is used instead. Allocated

openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ enum COV_OFFSETS : uint32_t {
8181
PER_DEVICE_PREALLOC_SIZE = 131072
8282
};
8383

84+
enum XnackBuildMode : short {
85+
XNACK_UNSUPPORTED = -1,
86+
XNACK_MINUS = 0,
87+
XNACK_PLUS = 1,
88+
XNACK_ANY = 2
89+
};
90+
8491
/// Parse a TargetID to get processor arch and feature map.
8592
/// Returns processor subarch.
8693
/// Returns TargetID features in \p FeatureMap argument.
@@ -169,25 +176,41 @@ bool isImageCompatibleWithEnv(const __tgt_image_info *Info,
169176
return true;
170177
}
171178

172-
// Check target image for XNACK option (XNACK+, XNACK-ANY, XNACK-)
173-
[[nodiscard]] bool
174-
wasBinaryBuiltWithXnackEnabled(__tgt_device_image *TgtImage) {
179+
// Check target image for XNACK mode (XNACK+, XNACK-ANY, XNACK-)
180+
[[nodiscard]] XnackBuildMode
181+
extractXnackModeFromBinary(__tgt_device_image *TgtImage) {
175182
assert((TgtImage != nullptr) && "TgtImage is nullptr.");
176183
u_int16_t EFlags = elf_get_eflags(TgtImage);
177184

178185
unsigned XnackFlags = EFlags & ELF::EF_AMDGPU_FEATURE_XNACK_V4;
179186

180187
switch (XnackFlags) {
181-
case ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4:
182188
case ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4:
183-
return true;
189+
return XnackBuildMode::XNACK_PLUS;
190+
case ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4:
191+
return XnackBuildMode::XNACK_ANY;
184192
case ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4:
193+
return XnackBuildMode::XNACK_MINUS;
185194
case ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
186-
return false;
195+
return XnackBuildMode::XNACK_UNSUPPORTED;
187196
default:
188197
FAILURE_MESSAGE("Unknown XNACK flag!\n");
189198
}
190-
return false;
199+
return XNACK_MINUS;
200+
}
201+
202+
void checkImageCompatibilityWithSystemXnackMode(__tgt_device_image *TgtImage,
203+
bool IsXnackEnabled) {
204+
XnackBuildMode ImageXnackMode = utils::extractXnackModeFromBinary(TgtImage);
205+
if ((IsXnackEnabled && !ImageXnackMode)) {
206+
FAILURE_MESSAGE(
207+
"Image is not compatible with current XNACK mode! XNACK is enabled "
208+
"on the system but image was compiled with xnack-.\n");
209+
} else if (!IsXnackEnabled && (ImageXnackMode == 1)) {
210+
FAILURE_MESSAGE("Image is not compatible with current XNACK mode! "
211+
"XNACK is disabled on the system. However, the image "
212+
"requires xnack+.\n");
213+
}
191214
}
192215

193216
struct KernelMetaDataTy {

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,7 @@ uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
476476
uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
477477
uint32_t NumTeamsClause[3],
478478
uint64_t LoopTripCount,
479-
uint32_t &NumThreads
480-
) const {
479+
uint32_t &NumThreads) const {
481480
assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
482481
"Multi dimensional launch not supported yet.");
483482

@@ -1501,21 +1500,15 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage,
15011500
if (!Plugin::isActive())
15021501
return false;
15031502

1504-
15051503
if (!__tgt_rtl_is_valid_binary(TgtImage))
15061504
return false;
1507-
1508-
// Need to call this method before 'isImageCompatibleCheck' in order to adjust
1509-
// settings.
1510-
Plugin::get().checkAndAdjustXnackStatus(TgtImage);
1511-
15121505
// A subarchitecture was not specified. Assume it is compatible.
15131506
if (!Info->Arch)
15141507
return true;
15151508

15161509
// Check the compatibility with all the available devices. Notice the
15171510
// devices may not be initialized yet.
1518-
auto CompatibleOrErr = Plugin::get().isImageCompatible(Info);
1511+
auto CompatibleOrErr = Plugin::get().isImageCompatible(Info, TgtImage);
15191512
if (!CompatibleOrErr) {
15201513
// This error should not abort the execution, so we just inform the user
15211514
// through the debug system.
@@ -1625,6 +1618,8 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
16251618
return nullptr;
16261619
}
16271620

1621+
Plugin::get().checkAndAdjustUsmModeForTargetImage(TgtImage);
1622+
16281623
__tgt_target_table *Table = *TableOrErr;
16291624
assert(Table != nullptr && "Invalid table");
16301625

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,7 +1008,7 @@ struct GenericPluginTy {
10081008

10091009
virtual void setUpEnv() {}
10101010
virtual void
1011-
checkAndAdjustXnackStatus(__tgt_device_image *TgtImage) const {}
1011+
checkAndAdjustUsmModeForTargetImage(__tgt_device_image *TgtImage) {}
10121012

10131013
/// Get the ELF code to recognize the binary image of this plugin.
10141014
virtual uint16_t getMagicElfBits() const = 0;
@@ -1059,7 +1059,9 @@ struct GenericPluginTy {
10591059
/// Indicate if an image is compatible with the plugin devices. Notice that
10601060
/// this function may be called before actually initializing the devices. So
10611061
/// we could not move this function into GenericDeviceTy.
1062-
virtual Expected<bool> isImageCompatible(__tgt_image_info *Info) const = 0;
1062+
virtual Expected<bool>
1063+
isImageCompatible(__tgt_image_info *Info,
1064+
__tgt_device_image *TgtImage) const = 0;
10631065

10641066
/// Indicate whether the plugin supports empty images.
10651067
virtual bool supportsEmptyImages() const { return false; }

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1024,7 +1024,9 @@ struct CUDAPluginTy final : public GenericPluginTy {
10241024
}
10251025

10261026
/// Check whether the image is compatible with the available CUDA devices.
1027-
Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
1027+
Expected<bool>
1028+
isImageCompatible(__tgt_image_info *Info,
1029+
__tgt_device_image *TgtImage) const override {
10281030
for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
10291031
CUdevice Device;
10301032
CUresult Res = cuDeviceGet(&Device, DevId);

openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
414414
}
415415

416416
/// All images (ELF-compatible) should be compatible with this plugin.
417-
Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
417+
Expected<bool>
418+
isImageCompatible(__tgt_image_info *Info,
419+
__tgt_device_image *TgtImage) const override {
418420
return true;
419421
}
420422

openmp/libomptarget/src/CMakeLists.txt

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,16 @@
99
# Build offloading library libomptarget.so.
1010
#
1111
##===----------------------------------------------------------------------===##
12+
# libhsakmt.a
13+
find_library ( HSAKMT_LIB libhsakmt.a REQURIED HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
14+
get_filename_component ( HSAKMT_LIB_PATH ${HSAKMT_LIB} DIRECTORY )
15+
link_directories (${HSAKMT_LIB_PATH})
1216

13-
libomptarget_say("Building offloading runtime library libomptarget.")
17+
# lib_amdgpu
18+
pkg_check_modules(drm_amdgpu REQUIRED IMPORTED_TARGET libdrm_amdgpu)
19+
20+
# libnuma
21+
find_library(numa libnuma.so REQUIRED HINTS /usr/lib/x86_64-linux-gnu/)
1422

1523
add_llvm_library(omptarget
1624
SHARED
@@ -61,7 +69,14 @@ endif()
6169

6270
find_library(LLVM_OFFLOAD_ARCH LLVMOffloadArch HINTS ${LLVM_LIBRARY_DIR} ${LLVM_BINARY_DIR} PATH_SUFFIXES lib REQUIRED)
6371

64-
target_link_libraries(omptarget PRIVATE
72+
target_include_directories(omptarget PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
73+
74+
target_link_libraries(
75+
omptarget
76+
PRIVATE hsakmt
77+
PRIVATE drm_amdgpu
78+
PRIVATE numa
79+
PRIVATE
6580
${CMAKE_DL_LIBS}
6681
${LLVM_OFFLOAD_ARCH}
6782
"-Wl,--no-allow-shlib-undefined")

0 commit comments

Comments
 (0)