[mlir][GPU] Remove the SerializeToCubin pass #82486

fabianmcg · 2024-02-21T12:20:41Z

The SerializeToCubin pass was deprecated in September 2023 in favor of GPU compilation attributes; see the GPU compilation section in the gpu MLIR docs.
This patch removes SerializeToCubin from the repo.

The `SerializeToCubin` pass was deprecated in September 2023 in favor of GPU compilation attributes. This patch removes `SerializeToCubin` from the repo.

llvmbot · 2024-02-21T12:37:24Z

@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Fabian Mora (fabianmcg)

Changes

The SerializeToCubin pass was deprecated in September 2023 in favor of GPU compilation attributes.
This patch removes SerializeToCubin from the repo.

Full diff: https://github.com/llvm/llvm-project/pull/82486.diff

4 Files Affected:

(modified) mlir/CMakeLists.txt (-1)
(modified) mlir/include/mlir/Dialect/GPU/Transforms/Passes.h (-14)
(modified) mlir/lib/Dialect/GPU/CMakeLists.txt (-52)
(removed) mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp (-180)

diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 2d9f78e03ba76b..16c898bdeb6e00 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -123,7 +123,6 @@ else()
 endif()
 add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ENABLE_ROCM_CONVERSIONS})
 
-set(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION 0 CACHE BOOL "Enable deprecated GPU serialization passes")
 set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner")
 set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner")
 set(MLIR_ENABLE_SYCL_RUNNER 0 CACHE BOOL "Enable building the mlir Sycl runner")
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 5885facd07541e..8f7466a697d854 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -147,25 +147,11 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
 // Registration
 //===----------------------------------------------------------------------===//
 
-/// Register pass to serialize GPU kernel functions to a CUBIN binary
-/// annotation.
-LLVM_DEPRECATED("use Target attributes instead", "")
-void registerGpuSerializeToCubinPass();
-
 /// Register pass to serialize GPU kernel functions to a HSAco binary
 /// annotation.
 LLVM_DEPRECATED("use Target attributes instead", "")
 void registerGpuSerializeToHsacoPass();
 
-/// Create an instance of the GPU kernel function to CUBIN binary serialization
-/// pass with optLevel (default level 2).
-LLVM_DEPRECATED("use Target attributes instead", "")
-std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
-                                                    StringRef chip,
-                                                    StringRef features,
-                                                    int optLevel = 2,
-                                                    bool dumpPtx = false);
-
 /// Create an instance of the GPU kernel function to HSAco binary serialization
 /// pass.
 LLVM_DEPRECATED("use Target attributes instead", "")
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index e5776e157b612c..51cfa2216e0c1f 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -1,11 +1,3 @@
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
-  set(NVPTX_LIBS
-    NVPTXCodeGen
-    NVPTXDesc
-    NVPTXInfo
-  )
-endif()
-
 if (MLIR_ENABLE_ROCM_CONVERSIONS)
   set(AMDGPU_LIBS
     IRReader
@@ -60,7 +52,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
   Transforms/ParallelLoopMapper.cpp
   Transforms/ROCDLAttachTarget.cpp
   Transforms/SerializeToBlob.cpp
-  Transforms/SerializeToCubin.cpp
   Transforms/SerializeToHsaco.cpp
   Transforms/ShuffleRewriter.cpp
   Transforms/SPIRVAttachTarget.cpp
@@ -74,7 +65,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
   Core
   MC
   Target
-  ${NVPTX_LIBS}
   ${AMDGPU_LIBS}
 
   DEPENDS
@@ -110,48 +100,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
 add_subdirectory(TransformOps)
 add_subdirectory(Pipelines)
 
-if(MLIR_ENABLE_CUDA_RUNNER)
-  if(NOT MLIR_ENABLE_CUDA_CONVERSIONS)
-    message(SEND_ERROR
-      "Building mlir with cuda support requires the NVPTX backend")
-  endif()
-
-  # Configure CUDA language support. Using check_language first allows us to
-  # give a custom error message.
-  include(CheckLanguage)
-  check_language(CUDA)
-  if (CMAKE_CUDA_COMPILER)
-    enable_language(CUDA)
-  else()
-    message(SEND_ERROR
-      "Building mlir with cuda support requires a working CUDA install")
-  endif()
-
-  # Enable gpu-to-cubin pass.
-  target_compile_definitions(obj.MLIRGPUTransforms
-    PRIVATE
-    MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
-  )
-
-  # Add CUDA headers includes and the libcuda.so library.
-  target_include_directories(obj.MLIRGPUTransforms
-    PRIVATE
-    ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
-  )
-
-  # Add link path for the cuda driver library.
-  find_library(CUDA_DRIVER_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
-  get_filename_component(CUDA_DRIVER_LIBRARY_PATH "${CUDA_DRIVER_LIBRARY}" DIRECTORY)
-  target_link_directories(MLIRGPUTransforms PRIVATE ${CUDA_DRIVER_LIBRARY_PATH})
-
-  target_link_libraries(MLIRGPUTransforms
-    PRIVATE
-    MLIRNVVMToLLVMIRTranslation
-    cuda
-  )
-
-endif()
-
 if(MLIR_ENABLE_ROCM_CONVERSIONS)
   if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
     message(SEND_ERROR
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
deleted file mode 100644
index 34ad4e6868e157..00000000000000
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that serializes a gpu module into CUBIN blob and
-// adds that blob as a string attribute of the module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "llvm/Support/Debug.h"
-
-#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Threading.h"
-
-#include <cuda.h>
-
-using namespace mlir;
-
-static void emitCudaError(const llvm::Twine &expr, const char *buffer,
-                          CUresult result, Location loc) {
-  const char *error = nullptr;
-  cuGetErrorString(result, &error);
-  emitError(loc,
-            expr.concat(error ? " failed with error code " + llvm::Twine{error}
-                              : llvm::Twine(" failed with unknown error "))
-                .concat("[")
-                .concat(buffer)
-                .concat("]"));
-}
-
-#define RETURN_ON_CUDA_ERROR(expr)                                             \
-  do {                                                                         \
-    if (auto status = (expr)) {                                                \
-      emitCudaError(#expr, jitErrorBuffer, status, loc);                       \
-      return {};                                                               \
-    }                                                                          \
-  } while (false)
-
-namespace {
-class SerializeToCubinPass
-    : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
-  static llvm::once_flag initializeBackendOnce;
-
-public:
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)
-
-  SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
-                       StringRef chip = "sm_35", StringRef features = "+ptx60",
-                       int optLevel = 2, bool dumpPtx = false);
-
-  StringRef getArgument() const override { return "gpu-to-cubin"; }
-  StringRef getDescription() const override {
-    return "Lower GPU kernel function to CUBIN binary annotations";
-  }
-
-private:
-  // Serializes PTX to CUBIN.
-  std::unique_ptr<std::vector<char>>
-  serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-// Sets the 'option' to 'value' unless it already has a value.
-static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
-  if (!option.hasValue())
-    option = value.str();
-}
-
-llvm::once_flag SerializeToCubinPass::initializeBackendOnce;
-
-SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
-                                           StringRef features, int optLevel,
-                                           bool dumpPtx) {
-  // No matter how this pass is constructed, ensure that the NVPTX backend
-  // is initialized exactly once.
-  llvm::call_once(initializeBackendOnce, []() {
-    // Initialize LLVM NVPTX backend.
-#if LLVM_HAS_NVPTX_TARGET
-    LLVMInitializeNVPTXTarget();
-    LLVMInitializeNVPTXTargetInfo();
-    LLVMInitializeNVPTXTargetMC();
-    LLVMInitializeNVPTXAsmPrinter();
-#endif
-  });
-
-  maybeSetOption(this->triple, triple);
-  maybeSetOption(this->chip, chip);
-  maybeSetOption(this->features, features);
-  this->dumpPtx = dumpPtx;
-  if (this->optLevel.getNumOccurrences() == 0)
-    this->optLevel.setValue(optLevel);
-}
-
-std::unique_ptr<std::vector<char>>
-SerializeToCubinPass::serializeISA(const std::string &isa) {
-  Location loc = getOperation().getLoc();
-  char jitErrorBuffer[4096] = {0};
-
-  RETURN_ON_CUDA_ERROR(cuInit(0));
-
-  // Linking requires a device context.
-  CUdevice device;
-  RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
-  CUcontext context;
-  // Use the primary context.
-  RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRetain(&context, device));
-  // Push the primary context so that the next CUDA operations
-  // actually use it.
-  RETURN_ON_CUDA_ERROR(cuCtxPushCurrent(context));
-  CUlinkState linkState;
-
-  CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
-                               CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
-  void *jitOptionsVals[] = {jitErrorBuffer,
-                            reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
-
-  RETURN_ON_CUDA_ERROR(cuLinkCreate(2,              /* number of jit options */
-                                    jitOptions,     /* jit options */
-                                    jitOptionsVals, /* jit option values */
-                                    &linkState));
-
-  auto kernelName = getOperation().getName().str();
-  if (dumpPtx) {
-    llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
-    llvm::dbgs() << isa << "\n";
-  }
-  RETURN_ON_CUDA_ERROR(cuLinkAddData(
-      linkState, CUjitInputType::CU_JIT_INPUT_PTX,
-      const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
-      kernelName.c_str(), 0, /* number of jit options */
-      nullptr,               /* jit options */
-      nullptr                /* jit option values */
-      ));
-
-  void *cubinData;
-  size_t cubinSize;
-  RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
-
-  char *cubinAsChar = static_cast<char *>(cubinData);
-  auto result =
-      std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
-
-  // This will also destroy the cubin data.
-  RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
-  // Pop and release the primary context.
-  CUcontext poppedContext;
-  RETURN_ON_CUDA_ERROR(cuCtxPopCurrent(&poppedContext));
-  RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRelease(device));
-
-  return result;
-}
-
-// Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
-void mlir::registerGpuSerializeToCubinPass() {
-  PassRegistration<SerializeToCubinPass> registerSerializeToCubin(
-      [] { return std::make_unique<SerializeToCubinPass>(); });
-}
-
-std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
-                                                          StringRef arch,
-                                                          StringRef features,
-                                                          int optLevel,
-                                                          bool dumpPtx) {
-  return std::make_unique<SerializeToCubinPass>(triple, arch, features,
-                                                optLevel, dumpPtx);
-}
-
-#else  // MLIR_GPU_TO_CUBIN_PASS_ENABLE
-void mlir::registerGpuSerializeToCubinPass() {}
-#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE

joker-eph

Thanks!

joker-eph · 2024-02-22T01:24:56Z

Can you add to the description links to the documentation for the new mechanism?

fabianmcg · 2024-02-22T01:40:46Z

Can you add to the description links to the documentation for the new mechanism?

Done.

This macro is obsolete since the landing of llvm#82486 but was forgotten to be removed from the BUILD files.

…E. (#83006) This macro is obsolete since the landing of #82486 but was forgotten to be removed from the BUILD files.

[mlir][GPU] Remove the SerializeToCubin pass

1c345bf

The `SerializeToCubin` pass was deprecated in September 2023 in favor of GPU compilation attributes. This patch removes `SerializeToCubin` from the repo.

fabianmcg requested a review from joker-eph February 21, 2024 12:36

fabianmcg marked this pull request as ready for review February 21, 2024 12:36

llvmbot added mlir:gpu mlir labels Feb 21, 2024

joker-eph approved these changes Feb 22, 2024

View reviewed changes

fabianmcg merged commit f204aee into llvm:main Feb 22, 2024

ingomueller-net mentioned this pull request Feb 26, 2024

[mlir][bazel] Remove defines of obsolete MLIR_GPU_TO_CUBIN_PASS_ENABLE. #83006

Merged

ingomueller-net added a commit that referenced this pull request Mar 4, 2024

[mlir][bazel] Remove defines of obsolete MLIR_GPU_TO_CUBIN_PASS_ENABL…

c6565f2

…E. (#83006) This macro is obsolete since the landing of #82486 but was forgotten to be removed from the BUILD files.

fabianmcg deleted the gpu_rm branch June 12, 2024 18:16

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[mlir][GPU] Remove the SerializeToCubin pass #82486

[mlir][GPU] Remove the SerializeToCubin pass #82486

Uh oh!

fabianmcg commented Feb 21, 2024 •

edited

Loading

Uh oh!

llvmbot commented Feb 21, 2024 •

edited

Loading

Uh oh!

joker-eph left a comment

Uh oh!

joker-eph commented Feb 22, 2024

Uh oh!

fabianmcg commented Feb 22, 2024

Uh oh!

Uh oh!

[mlir][GPU] Remove the SerializeToCubin pass #82486

[mlir][GPU] Remove the SerializeToCubin pass #82486

Uh oh!

Conversation

fabianmcg commented Feb 21, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 21, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

joker-eph left a comment

Choose a reason for hiding this comment

Uh oh!

joker-eph commented Feb 22, 2024

Uh oh!

fabianmcg commented Feb 22, 2024

Uh oh!

Uh oh!

fabianmcg commented Feb 21, 2024 •

edited

Loading

llvmbot commented Feb 21, 2024 •

edited

Loading