Skip to content

[MLIR] Add a MLIR_NVVM_EMBED_LIBDEVICE CMake option that embeds libdevice in the binary #120238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions mlir/lib/Target/LLVM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,45 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
)
endif()


function(embed_binary_to_src file output_file symbol)
file(READ ${file} filedata HEX)
# Convert hex data for C compatibility
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
# Write data to output file
file(WRITE ${output_file} "const char ${symbol}[] = {${filedata}};\nconst int ${symbol}_size = sizeof(${symbol});\n")
endfunction()

set(MLIR_NVVM_EMBED_LIBDEVICE 0 CACHE BOOL "Embed CUDA libdevice.bc in the binary at build time instead of looking it up at runtime")
if (MLIR_NVVM_EMBED_LIBDEVICE)
if (NOT MLIR_NVVM_LIBDEVICE_PATH)
if(CUDAToolkit_FOUND)
find_file(MLIR_NVVM_LIBDEVICE_PATH libdevice.10.bc
PATHS ${CUDAToolkit_LIBRARY_ROOT}
PATH_SUFFIXES "nvvm/libdevice" NO_DEFAULT_PATH REQUIRED)
else()
message(FATAL_ERROR
"Requested using the `nvptxcompiler` library backend but it couldn't be found.")
endif()
endif()

embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
add_mlir_library(MLIRNVVMLibdevice
${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
)
target_link_libraries(MLIRNVVMTarget PRIVATE MLIRNVVMLibdevice)
target_compile_definitions(obj.MLIRNVVMTarget
PRIVATE
MLIR_NVVM_EMBED_LIBDEVICE=1
)
else()
target_compile_definitions(obj.MLIRNVVMTarget
PRIVATE
MLIR_NVVM_EMBED_LIBDEVICE=0
)
endif()


if (MLIR_ENABLE_ROCM_CONVERSIONS)
set(AMDGPU_LIBS
AMDGPUAsmParser
Expand Down Expand Up @@ -169,3 +208,4 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS)
__DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}"
)
endif()

36 changes: 36 additions & 0 deletions mlir/lib/Target/LLVM/NVVM/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/IR/BuiltinAttributeInterfaces.h"
#include "mlir/IR/BuiltinDialect.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/DialectResourceBlobManager.h"
#include "mlir/Target/LLVM/NVVM/Utils.h"
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
Expand All @@ -33,6 +37,7 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"

#include <cstdint>
#include <cstdlib>

using namespace mlir;
Expand All @@ -42,6 +47,9 @@ using namespace mlir::NVVM;
#define __DEFAULT_CUDATOOLKIT_PATH__ ""
#endif

extern "C" const char _mlir_embedded_libdevice[];
extern "C" const unsigned _mlir_embedded_libdevice_size;

namespace {
// Implementation of the `TargetAttrInterface` model.
class NVVMTargetAttrImpl
Expand Down Expand Up @@ -130,6 +138,33 @@ ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {

// Try to append `libdevice` from a CUDA toolkit installation.
LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
#if MLIR_NVVM_EMBED_LIBDEVICE
// If libdevice is embedded in the binary, we don't look it up on the
// filesystem.
MLIRContext *ctx = target.getContext();
auto type =
RankedTensorType::get(ArrayRef<int64_t>{_mlir_embedded_libdevice_size},
IntegerType::get(ctx, 8));
auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);

// Lookup if we already loaded the resource, otherwise create it.
DialectResourceBlobManager::BlobEntry *blob =
resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
if (blob) {
librariesToLink.push_back(DenseResourceElementsAttr::get(
type, DenseResourceElementsHandle(
blob, ctx->getLoadedDialect<BuiltinDialect>())));
return success();
}

// Allocate a resource using one of the UnManagedResourceBlob method to wrap
// the embedded data.
auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign(
ArrayRef<char>{_mlir_embedded_libdevice, _mlir_embedded_libdevice_size});
librariesToLink.push_back(DenseResourceElementsAttr::get(
type, resourceManager.insert("_mlir_embedded_libdevice",
std::move(unmanagedBlob))));
#else
StringRef pathRef = getToolkitPath();
if (!pathRef.empty()) {
SmallVector<char, 256> path;
Expand All @@ -149,6 +184,7 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
}
librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
}
#endif
return success();
}

Expand Down
Loading