-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR] Add a MLIR_NVVM_EMBED_LIBDEVICE CMake option that embeds libdevice in the binary #120238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…vice in the binary This removes a runtime dependency on the CUDA Toolkit path, instead of looking up the filesystem we use a version of libdevice embedded in the binary at build time.
@llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-mlir Author: Mehdi Amini (joker-eph) ChangesThis removes a runtime dependency on the CUDA Toolkit path, instead of looking up the filesystem we use a version of libdevice embedded in the binary at build time. Full diff: https://github.com/llvm/llvm-project/pull/120238.diff 2 Files Affected:
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 543e917b528b3e..4be147d02d579a 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -119,6 +119,45 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
)
endif()
+
+function(embed_binary_to_src file output_file symbol)
+ file(READ ${file} filedata HEX)
+ # Convert hex data for C compatibility
+ string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+ # Write data to output file
+ file(WRITE ${output_file} "const char ${symbol}[] = {${filedata}};\nconst int ${symbol}_size = sizeof(${symbol});\n")
+endfunction()
+
+set(MLIR_NVVM_EMBED_LIBDEVICE 0 CACHE BOOL "Embed CUDA libdevice.bc in the binary at build time instead of looking it up at runtime")
+if (MLIR_NVVM_EMBED_LIBDEVICE)
+ if (NOT MLIR_NVVM_LIBDEVICE_PATH)
+ if(CUDAToolkit_FOUND)
+ find_file(MLIR_NVVM_LIBDEVICE_PATH libdevice.10.bc
+ PATHS ${CUDAToolkit_LIBRARY_ROOT}
+ PATH_SUFFIXES "nvvm/libdevice" NO_DEFAULT_PATH REQUIRED)
+ else()
+ message(FATAL_ERROR
+ "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
+ endif()
+ endif()
+
+ embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
+ add_mlir_library(MLIRNVVMLibdevice
+ ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
+ )
+ target_link_libraries(MLIRNVVMTarget PRIVATE MLIRNVVMLibdevice)
+ target_compile_definitions(obj.MLIRNVVMTarget
+ PRIVATE
+ MLIR_NVVM_EMBED_LIBDEVICE=1
+ )
+else()
+ target_compile_definitions(obj.MLIRNVVMTarget
+ PRIVATE
+ MLIR_NVVM_EMBED_LIBDEVICE=0
+ )
+endif()
+
+
if (MLIR_ENABLE_ROCM_CONVERSIONS)
set(AMDGPU_LIBS
AMDGPUAsmParser
@@ -169,3 +208,4 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS)
__DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}"
)
endif()
+
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index a9f7806b10f404..86ff848d6c6c2d 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -16,6 +16,10 @@
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/DialectResourceBlobManager.h"
#include "mlir/Target/LLVM/NVVM/Utils.h"
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
@@ -33,6 +37,7 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
#include <cstdlib>
using namespace mlir;
@@ -42,6 +47,9 @@ using namespace mlir::NVVM;
#define __DEFAULT_CUDATOOLKIT_PATH__ ""
#endif
+extern "C" const char _mlir_embedded_libdevice[];
+extern "C" const unsigned _mlir_embedded_libdevice_size;
+
namespace {
// Implementation of the `TargetAttrInterface` model.
class NVVMTargetAttrImpl
@@ -130,6 +138,33 @@ ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {
// Try to append `libdevice` from a CUDA toolkit installation.
LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
+#if MLIR_NVVM_EMBED_LIBDEVICE
+ // If libdevice is embedded in the binary, we don't look it up on the
+ // filesystem.
+ MLIRContext *ctx = target.getContext();
+ auto type =
+ RankedTensorType::get(ArrayRef<int64_t>{_mlir_embedded_libdevice_size},
+ IntegerType::get(ctx, 8));
+ auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
+
+ // Lookup if we already loaded the resource, otherwise create it.
+ DialectResourceBlobManager::BlobEntry *blob =
+ resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
+ if (blob) {
+ librariesToLink.push_back(DenseResourceElementsAttr::get(
+ type, DenseResourceElementsHandle(
+ blob, ctx->getLoadedDialect<BuiltinDialect>())));
+ return success();
+ }
+
+ // Allocate a resource using one of the UnManagedResourceBlob method to wrap
+ // the embedded data.
+ auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign(
+ ArrayRef<char>{_mlir_embedded_libdevice, _mlir_embedded_libdevice_size});
+ librariesToLink.push_back(DenseResourceElementsAttr::get(
+ type, resourceManager.insert("_mlir_embedded_libdevice",
+ std::move(unmanagedBlob))));
+#else
StringRef pathRef = getToolkitPath();
if (!pathRef.empty()) {
SmallVector<char, 256> path;
@@ -149,6 +184,7 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
}
librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
}
+#endif
return success();
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice change, is there a way we can test with a stub?
It's difficult because it is a build option, the ifdef isn't enabled by default. I plan to change one of the bot to use this option after I land this. |
This removes a runtime dependency on the CUDA Toolkit path, instead of looking up the filesystem we use a version of libdevice embedded in the binary at build time.