-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][GPU] Remove the SerializeToCubin pass #82486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The `SerializeToCubin` pass was deprecated in September 2023 in favor of GPU compilation attributes. This patch removes `SerializeToCubin` from the repo.
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir Author: Fabian Mora (fabianmcg) ChangesThe Full diff: https://github.com/llvm/llvm-project/pull/82486.diff 4 Files Affected:
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 2d9f78e03ba76b..16c898bdeb6e00 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -123,7 +123,6 @@ else()
endif()
add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ENABLE_ROCM_CONVERSIONS})
-set(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION 0 CACHE BOOL "Enable deprecated GPU serialization passes")
set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner")
set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner")
set(MLIR_ENABLE_SYCL_RUNNER 0 CACHE BOOL "Enable building the mlir Sycl runner")
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 5885facd07541e..8f7466a697d854 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -147,25 +147,11 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
// Registration
//===----------------------------------------------------------------------===//
-/// Register pass to serialize GPU kernel functions to a CUBIN binary
-/// annotation.
-LLVM_DEPRECATED("use Target attributes instead", "")
-void registerGpuSerializeToCubinPass();
-
/// Register pass to serialize GPU kernel functions to a HSAco binary
/// annotation.
LLVM_DEPRECATED("use Target attributes instead", "")
void registerGpuSerializeToHsacoPass();
-/// Create an instance of the GPU kernel function to CUBIN binary serialization
-/// pass with optLevel (default level 2).
-LLVM_DEPRECATED("use Target attributes instead", "")
-std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
- StringRef chip,
- StringRef features,
- int optLevel = 2,
- bool dumpPtx = false);
-
/// Create an instance of the GPU kernel function to HSAco binary serialization
/// pass.
LLVM_DEPRECATED("use Target attributes instead", "")
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index e5776e157b612c..51cfa2216e0c1f 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -1,11 +1,3 @@
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
- set(NVPTX_LIBS
- NVPTXCodeGen
- NVPTXDesc
- NVPTXInfo
- )
-endif()
-
if (MLIR_ENABLE_ROCM_CONVERSIONS)
set(AMDGPU_LIBS
IRReader
@@ -60,7 +52,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
Transforms/ParallelLoopMapper.cpp
Transforms/ROCDLAttachTarget.cpp
Transforms/SerializeToBlob.cpp
- Transforms/SerializeToCubin.cpp
Transforms/SerializeToHsaco.cpp
Transforms/ShuffleRewriter.cpp
Transforms/SPIRVAttachTarget.cpp
@@ -74,7 +65,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
Core
MC
Target
- ${NVPTX_LIBS}
${AMDGPU_LIBS}
DEPENDS
@@ -110,48 +100,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
add_subdirectory(TransformOps)
add_subdirectory(Pipelines)
-if(MLIR_ENABLE_CUDA_RUNNER)
- if(NOT MLIR_ENABLE_CUDA_CONVERSIONS)
- message(SEND_ERROR
- "Building mlir with cuda support requires the NVPTX backend")
- endif()
-
- # Configure CUDA language support. Using check_language first allows us to
- # give a custom error message.
- include(CheckLanguage)
- check_language(CUDA)
- if (CMAKE_CUDA_COMPILER)
- enable_language(CUDA)
- else()
- message(SEND_ERROR
- "Building mlir with cuda support requires a working CUDA install")
- endif()
-
- # Enable gpu-to-cubin pass.
- target_compile_definitions(obj.MLIRGPUTransforms
- PRIVATE
- MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
- )
-
- # Add CUDA headers includes and the libcuda.so library.
- target_include_directories(obj.MLIRGPUTransforms
- PRIVATE
- ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
- )
-
- # Add link path for the cuda driver library.
- find_library(CUDA_DRIVER_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
- get_filename_component(CUDA_DRIVER_LIBRARY_PATH "${CUDA_DRIVER_LIBRARY}" DIRECTORY)
- target_link_directories(MLIRGPUTransforms PRIVATE ${CUDA_DRIVER_LIBRARY_PATH})
-
- target_link_libraries(MLIRGPUTransforms
- PRIVATE
- MLIRNVVMToLLVMIRTranslation
- cuda
- )
-
-endif()
-
if(MLIR_ENABLE_ROCM_CONVERSIONS)
if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
message(SEND_ERROR
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
deleted file mode 100644
index 34ad4e6868e157..00000000000000
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that serializes a gpu module into CUBIN blob and
-// adds that blob as a string attribute of the module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "llvm/Support/Debug.h"
-
-#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Threading.h"
-
-#include <cuda.h>
-
-using namespace mlir;
-
-static void emitCudaError(const llvm::Twine &expr, const char *buffer,
- CUresult result, Location loc) {
- const char *error = nullptr;
- cuGetErrorString(result, &error);
- emitError(loc,
- expr.concat(error ? " failed with error code " + llvm::Twine{error}
- : llvm::Twine(" failed with unknown error "))
- .concat("[")
- .concat(buffer)
- .concat("]"));
-}
-
-#define RETURN_ON_CUDA_ERROR(expr) \
- do { \
- if (auto status = (expr)) { \
- emitCudaError(#expr, jitErrorBuffer, status, loc); \
- return {}; \
- } \
- } while (false)
-
-namespace {
-class SerializeToCubinPass
- : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
- static llvm::once_flag initializeBackendOnce;
-
-public:
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)
-
- SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
- StringRef chip = "sm_35", StringRef features = "+ptx60",
- int optLevel = 2, bool dumpPtx = false);
-
- StringRef getArgument() const override { return "gpu-to-cubin"; }
- StringRef getDescription() const override {
- return "Lower GPU kernel function to CUBIN binary annotations";
- }
-
-private:
- // Serializes PTX to CUBIN.
- std::unique_ptr<std::vector<char>>
- serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-// Sets the 'option' to 'value' unless it already has a value.
-static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
- if (!option.hasValue())
- option = value.str();
-}
-
-llvm::once_flag SerializeToCubinPass::initializeBackendOnce;
-
-SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
- StringRef features, int optLevel,
- bool dumpPtx) {
- // No matter how this pass is constructed, ensure that the NVPTX backend
- // is initialized exactly once.
- llvm::call_once(initializeBackendOnce, []() {
- // Initialize LLVM NVPTX backend.
-#if LLVM_HAS_NVPTX_TARGET
- LLVMInitializeNVPTXTarget();
- LLVMInitializeNVPTXTargetInfo();
- LLVMInitializeNVPTXTargetMC();
- LLVMInitializeNVPTXAsmPrinter();
-#endif
- });
-
- maybeSetOption(this->triple, triple);
- maybeSetOption(this->chip, chip);
- maybeSetOption(this->features, features);
- this->dumpPtx = dumpPtx;
- if (this->optLevel.getNumOccurrences() == 0)
- this->optLevel.setValue(optLevel);
-}
-
-std::unique_ptr<std::vector<char>>
-SerializeToCubinPass::serializeISA(const std::string &isa) {
- Location loc = getOperation().getLoc();
- char jitErrorBuffer[4096] = {0};
-
- RETURN_ON_CUDA_ERROR(cuInit(0));
-
- // Linking requires a device context.
- CUdevice device;
- RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
- CUcontext context;
- // Use the primary context.
- RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRetain(&context, device));
- // Push the primary context so that the next CUDA operations
- // actually use it.
- RETURN_ON_CUDA_ERROR(cuCtxPushCurrent(context));
- CUlinkState linkState;
-
- CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
- void *jitOptionsVals[] = {jitErrorBuffer,
- reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
-
- RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
- jitOptions, /* jit options */
- jitOptionsVals, /* jit option values */
- &linkState));
-
- auto kernelName = getOperation().getName().str();
- if (dumpPtx) {
- llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
- llvm::dbgs() << isa << "\n";
- }
- RETURN_ON_CUDA_ERROR(cuLinkAddData(
- linkState, CUjitInputType::CU_JIT_INPUT_PTX,
- const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
- kernelName.c_str(), 0, /* number of jit options */
- nullptr, /* jit options */
- nullptr /* jit option values */
- ));
-
- void *cubinData;
- size_t cubinSize;
- RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
-
- char *cubinAsChar = static_cast<char *>(cubinData);
- auto result =
- std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
-
- // This will also destroy the cubin data.
- RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
- // Pop and release the primary context.
- CUcontext poppedContext;
- RETURN_ON_CUDA_ERROR(cuCtxPopCurrent(&poppedContext));
- RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRelease(device));
-
- return result;
-}
-
-// Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
-void mlir::registerGpuSerializeToCubinPass() {
- PassRegistration<SerializeToCubinPass> registerSerializeToCubin(
- [] { return std::make_unique<SerializeToCubinPass>(); });
-}
-
-std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
- StringRef arch,
- StringRef features,
- int optLevel,
- bool dumpPtx) {
- return std::make_unique<SerializeToCubinPass>(triple, arch, features,
- optLevel, dumpPtx);
-}
-
-#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
-void mlir::registerGpuSerializeToCubinPass() {}
-#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
Can you add to the description links to the documentation for the new mechanism? |
Done. |
This macro is obsolete since the landing of llvm#82486 but was forgotten to be removed from the BUILD files.
This macro is obsolete since the landing of llvm#82486 but was forgotten to be removed from the BUILD files.
The
SerializeToCubin
pass was deprecated in September 2023 in favor of GPU compilation attributes; see the GPU compilation section in thegpu
MLIR docs.This patch removes
SerializeToCubin
from the repo.