Skip to content

[MLIR] Add a MLIR_NVVM_EMBED_LIBDEVICE CMake option that embeds libdevice in the binary #120238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 17, 2024

Conversation

joker-eph
Copy link
Collaborator

This removes a runtime dependency on the CUDA Toolkit path, instead of looking up the filesystem we use a version of libdevice embedded in the binary at build time.

…vice in the binary

This removes a runtime dependency on the CUDA Toolkit path, instead of
looking up the filesystem we use a version of libdevice embedded in the
binary at build time.
@llvmbot
Copy link
Member

llvmbot commented Dec 17, 2024

@llvm/pr-subscribers-mlir-llvm

@llvm/pr-subscribers-mlir

Author: Mehdi Amini (joker-eph)

Changes

This removes a runtime dependency on the CUDA Toolkit path, instead of looking up the filesystem we use a version of libdevice embedded in the binary at build time.


Full diff: https://github.com/llvm/llvm-project/pull/120238.diff

2 Files Affected:

  • (modified) mlir/lib/Target/LLVM/CMakeLists.txt (+40)
  • (modified) mlir/lib/Target/LLVM/NVVM/Target.cpp (+36)
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 543e917b528b3e..4be147d02d579a 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -119,6 +119,45 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   )
 endif()
 
+
+function(embed_binary_to_src file output_file symbol)
+    file(READ ${file} filedata HEX)
+    # Convert hex data for C compatibility
+    string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+    # Write data to output file
+    file(WRITE ${output_file} "const char ${symbol}[] = {${filedata}};\nconst int ${symbol}_size = sizeof(${symbol});\n")
+endfunction()
+
+set(MLIR_NVVM_EMBED_LIBDEVICE 0 CACHE BOOL "Embed CUDA libdevice.bc in the binary at build time instead of looking it up at runtime")
+if (MLIR_NVVM_EMBED_LIBDEVICE)
+  if (NOT MLIR_NVVM_LIBDEVICE_PATH)
+    if(CUDAToolkit_FOUND)
+      find_file(MLIR_NVVM_LIBDEVICE_PATH libdevice.10.bc
+                PATHS ${CUDAToolkit_LIBRARY_ROOT}
+                PATH_SUFFIXES "nvvm/libdevice" NO_DEFAULT_PATH REQUIRED)
+    else()
+      message(FATAL_ERROR
+              "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
+    endif()
+  endif()
+  
+  embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
+  add_mlir_library(MLIRNVVMLibdevice
+    ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
+  )
+  target_link_libraries(MLIRNVVMTarget PRIVATE MLIRNVVMLibdevice)
+  target_compile_definitions(obj.MLIRNVVMTarget
+    PRIVATE
+    MLIR_NVVM_EMBED_LIBDEVICE=1
+  )
+else()
+  target_compile_definitions(obj.MLIRNVVMTarget
+    PRIVATE
+    MLIR_NVVM_EMBED_LIBDEVICE=0
+  )
+endif()
+
+
 if (MLIR_ENABLE_ROCM_CONVERSIONS)
   set(AMDGPU_LIBS
     AMDGPUAsmParser
@@ -169,3 +208,4 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS)
     __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}"
   )
 endif()
+
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index a9f7806b10f404..86ff848d6c6c2d 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -16,6 +16,10 @@
 #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/DialectResourceBlobManager.h"
 #include "mlir/Target/LLVM/NVVM/Utils.h"
 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
@@ -33,6 +37,7 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <cstdint>
 #include <cstdlib>
 
 using namespace mlir;
@@ -42,6 +47,9 @@ using namespace mlir::NVVM;
 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
 #endif
 
+extern "C" const char _mlir_embedded_libdevice[];
+extern "C" const unsigned _mlir_embedded_libdevice_size;
+
 namespace {
 // Implementation of the `TargetAttrInterface` model.
 class NVVMTargetAttrImpl
@@ -130,6 +138,33 @@ ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {
 
 // Try to append `libdevice` from a CUDA toolkit installation.
 LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
+#if MLIR_NVVM_EMBED_LIBDEVICE
+  // If libdevice is embedded in the binary, we don't look it up on the
+  // filesystem.
+  MLIRContext *ctx = target.getContext();
+  auto type =
+      RankedTensorType::get(ArrayRef<int64_t>{_mlir_embedded_libdevice_size},
+                            IntegerType::get(ctx, 8));
+  auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
+
+  // Lookup if we already loaded the resource, otherwise create it.
+  DialectResourceBlobManager::BlobEntry *blob =
+      resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
+  if (blob) {
+    librariesToLink.push_back(DenseResourceElementsAttr::get(
+        type, DenseResourceElementsHandle(
+                  blob, ctx->getLoadedDialect<BuiltinDialect>())));
+    return success();
+  }
+
+  // Allocate a resource using one of the UnManagedResourceBlob method to wrap
+  // the embedded data.
+  auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign(
+      ArrayRef<char>{_mlir_embedded_libdevice, _mlir_embedded_libdevice_size});
+  librariesToLink.push_back(DenseResourceElementsAttr::get(
+      type, resourceManager.insert("_mlir_embedded_libdevice",
+                                   std::move(unmanagedBlob))));
+#else
   StringRef pathRef = getToolkitPath();
   if (!pathRef.empty()) {
     SmallVector<char, 256> path;
@@ -149,6 +184,7 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
     }
     librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
   }
+#endif
   return success();
 }
 

Copy link
Contributor

@fabianmcg fabianmcg left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice change, is there a way we can test with a stub?

@joker-eph
Copy link
Collaborator Author

It's difficult because it is a build option, the ifdef isn't enabled by default. I plan to change one of the bot to use this option after I land this.

@joker-eph joker-eph merged commit 6a7d6c5 into llvm:main Dec 17, 2024
11 checks passed
@joker-eph joker-eph deleted the libdevice_embedded branch December 17, 2024 15:53
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants