llvm
diff --git a/‎mlir/CMakeLists.txt
Lines changed: 4 additions & 1 deletion b/‎mlir/CMakeLists.txt
Lines changed: 4 additions & 1 deletion
diff --git a/‎mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
Lines changed: 1 addition & 0 deletions b/‎mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
Lines changed: 69 additions & 0 deletions b/‎mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
Lines changed: 69 additions & 0 deletions
diff --git a/‎mlir/include/mlir/InitAllExtensions.h
Lines changed: 2 additions & 0 deletions b/‎mlir/include/mlir/InitAllExtensions.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Target/LLVM/NVVM/Target.h
Lines changed: 28 additions & 0 deletions b/‎mlir/include/mlir/Target/LLVM/NVVM/Target.h
Lines changed: 28 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Target/LLVM/NVVM/Utils.h
Lines changed: 74 additions & 0 deletions b/‎mlir/include/mlir/Target/LLVM/NVVM/Utils.h
Lines changed: 74 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
Lines changed: 31 additions & 0 deletions b/‎mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
Lines changed: 31 additions & 0 deletions
diff --git a/‎mlir/lib/Target/LLVM/CMakeLists.txt
Lines changed: 77 additions & 0 deletions b/‎mlir/lib/Target/LLVM/CMakeLists.txt
Lines changed: 77 additions & 0 deletions
@@ -97,7 +97,7 @@ endif()
 
 # Build the CUDA conversions and run according tests if the NVPTX backend
 # is available
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD AND MLIR_ENABLE_EXECUTION_ENGINE)
+if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   set(MLIR_ENABLE_CUDA_CONVERSIONS 1)
 else()
   set(MLIR_ENABLE_CUDA_CONVERSIONS 0)
@@ -118,6 +118,9 @@ set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner")
 set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner")
 set(MLIR_ENABLE_SPIRV_CPU_RUNNER 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner")
 set(MLIR_ENABLE_VULKAN_RUNNER 0 CACHE BOOL "Enable building the mlir Vulkan runner")
+set(MLIR_ENABLE_NVPTXCOMPILER 0 CACHE BOOL
+    "Statically link the nvptxlibrary instead of calling ptxas as a subprocess \
+    for compiling PTX to cubin")
 
 option(MLIR_INCLUDE_TESTS
        "Generate build targets for the MLIR unit tests."
 
@@ -15,6 +15,7 @@
 #define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_
 
 #include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
 
@@ -14,6 +14,7 @@
 #define NVVMIR_OPS
 
 include "mlir/IR/EnumAttr.td"
+include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
 include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
@@ -1472,4 +1473,72 @@ def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned",
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// NVVM target attribute.
+//===----------------------------------------------------------------------===//
+
+def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
+  let description = [{
+    GPU target attribute for controlling compilation of NVIDIA targets. All
+    parameters decay into default values if not present.
+
+    Examples:
+
+    1. Target with default values.
+    ```
+      gpu.module @mymodule [#nvvm.target] attributes {...} {
+        ...
+      }
+    ```
+
+    2. Target with `sm_90` chip and fast math.
+    ```
+      gpu.module @mymodule [#nvvm.target<chip = "sm_90", flags = {fast}>] {
+        ...
+      }
+    ```
+  }];
+  let parameters = (ins
+    DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
+    StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple,
+    StringRefParameter<"Target chip.", "\"sm_50\"">:$chip,
+    StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features,
+    OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
+    OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link
+  );
+  let assemblyFormat = [{
+    (`<` struct($O, $triple, $chip, $features, $flags, $link)^ `>`)?
+  }];
+  let builders = [
+    AttrBuilder<(ins CArg<"int", "2">:$optLevel,
+                     CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple,
+                     CArg<"StringRef", "\"sm_50\"">:$chip,
+                     CArg<"StringRef", "\"+ptx60\"">:$features,
+                     CArg<"DictionaryAttr", "nullptr">:$targetFlags,
+                     CArg<"ArrayAttr", "nullptr">:$linkFiles), [{
+      return Base::get($_ctxt, optLevel, triple, chip, features, targetFlags, linkFiles);
+    }]>
+  ];
+  let skipDefaultBuilders = 1;
+  let genVerifyDecl = 1;
+  let extraClassDeclaration = [{
+    bool hasFlag(StringRef flag) const;
+    bool hasFastMath() const;
+    bool hasFtz() const;
+  }];
+  let extraClassDefinition = [{
+    bool $cppClass::hasFlag(StringRef flag) const {
+      if (DictionaryAttr flags = getFlags())
+        return flags.get(flag) != nullptr;
+      return false;
+    }
+    bool $cppClass::hasFastMath() const {
+      return hasFlag("fast");
+    }
+    bool $cppClass::hasFtz() const {
+      return hasFlag("ftz");
+    }
+  }];
+}
+
 #endif // NVVMIR_OPS
@@ -16,6 +16,7 @@
 
 #include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h"
 #include "mlir/Dialect/Func/Extensions/AllExtensions.h"
+#include "mlir/Target/LLVM/NVVM/Target.h"
 
 #include <cstdlib>
 
@@ -29,6 +30,7 @@ namespace mlir {
 inline void registerAllExtensions(DialectRegistry &registry) {
   func::registerAllExtensions(registry);
   registerConvertNVVMToLLVMInterface(registry);
+  registerNVVMTarget(registry);
 }
 
 } // namespace mlir
 
@@ -0,0 +1,28 @@
+//===- Target.h - MLIR NVVM target registration -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides registration calls for attaching the NVVM target interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVM_NVVM_TARGET_H
+#define MLIR_TARGET_LLVM_NVVM_TARGET_H
+
+namespace mlir {
+class DialectRegistry;
+class MLIRContext;
+/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
+/// given registry.
+void registerNVVMTarget(DialectRegistry &registry);
+
+/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
+/// registry associated with the given context.
+void registerNVVMTarget(MLIRContext &context);
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVM_NVVM_TARGET_H
@@ -0,0 +1,74 @@
+//===- Utils.h - MLIR NVVM target utils -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This files declares NVVM target related utility classes and functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVM_NVVM_UTILS_H
+#define MLIR_TARGET_LLVM_NVVM_UTILS_H
+
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/Target/LLVM/ModuleToObject.h"
+
+namespace mlir {
+namespace NVVM {
+/// Searches & returns the path CUDA toolkit path, the search order is:
+/// 1. The `CUDA_ROOT` environment variable.
+/// 2. The `CUDA_HOME` environment variable.
+/// 3. The `CUDA_PATH` environment variable.
+/// 4. The CUDA toolkit path detected by CMake.
+/// 5. Returns an empty string.
+StringRef getCUDAToolkitPath();
+
+/// Base class for all NVVM serializations from GPU modules into binary strings.
+/// By default this class serializes into LLVM bitcode.
+class SerializeGPUModuleBase : public LLVM::ModuleToObject {
+public:
+  /// Initializes the `toolkitPath` with the path in `targetOptions` or if empty
+  /// with the path in `getCUDAToolkitPath`.
+  SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target,
+                         const gpu::TargetOptions &targetOptions = {});
+
+  /// Initializes the LLVM NVPTX target by safely calling `LLVMInitializeNVPTX*`
+  /// methods if available.
+  static void init();
+
+  /// Returns the target attribute.
+  NVVMTargetAttr getTarget() const;
+
+  /// Returns the CUDA toolkit path.
+  StringRef getToolkitPath() const;
+
+  /// Returns the bitcode files to be loaded.
+  ArrayRef<std::string> getFileList() const;
+
+  /// Appends `nvvm/libdevice.bc` into `fileList`. Returns failure if the
+  /// library couldn't be found.
+  LogicalResult appendStandardLibs();
+
+  /// Loads the bitcode files in `fileList`.
+  virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
+  loadBitcodeFiles(llvm::Module &module,
+                   llvm::TargetMachine &targetMachine) override;
+
+protected:
+  /// NVVM target attribute.
+  NVVMTargetAttr target;
+
+  /// CUDA toolkit path.
+  std::string toolkitPath;
+
+  /// List of LLVM bitcode files to link to.
+  SmallVector<std::string> fileList;
+};
+} // namespace NVVM
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVM_NVVM_UTILS_H
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 
 #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -723,6 +724,7 @@ void NVVMDialect::initialize() {
   // registered.
   allowUnknownOperations();
   declarePromisedInterface<ConvertToLLVMPatternInterface>();
+  declarePromisedInterface<gpu::TargetAttrInterface>();
 }
 
 LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
@@ -761,6 +763,35 @@ LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// NVVM target attribute.
+//===----------------------------------------------------------------------===//
+LogicalResult
+NVVMTargetAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                       int optLevel, StringRef triple, StringRef chip,
+                       StringRef features, DictionaryAttr flags,
+                       ArrayAttr files) {
+  if (optLevel < 0 || optLevel > 3) {
+    emitError() << "The optimization level must be a number between 0 and 3.";
+    return failure();
+  }
+  if (triple.empty()) {
+    emitError() << "The target triple cannot be empty.";
+    return failure();
+  }
+  if (chip.empty()) {
+    emitError() << "The target chip cannot be empty.";
+    return failure();
+  }
+  if (files && !llvm::all_of(files, [](::mlir::Attribute attr) {
+        return attr && mlir::isa<StringAttr>(attr);
+      })) {
+    emitError() << "All the elements in the `link` array must be strings.";
+    return failure();
+  }
+  return success();
+}
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
 
 
@@ -20,3 +20,80 @@ add_mlir_library(MLIRTargetLLVM
   MLIRExecutionEngineUtils
   MLIRTargetLLVMIRExport
 )
+
+if (MLIR_ENABLE_CUDA_CONVERSIONS)
+  set(NVPTX_LIBS
+    NVPTXCodeGen
+    NVPTXDesc
+    NVPTXInfo
+  )
+endif()
+
+add_mlir_dialect_library(MLIRNVVMTarget
+  NVVM/Target.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
+
+  LINK_COMPONENTS
+  ${NVPTX_LIBS}
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRExecutionEngineUtils
+  MLIRSupport
+  MLIRGPUDialect
+  MLIRTargetLLVM
+  MLIRNVVMToLLVMIRTranslation
+  )
+
+if(MLIR_ENABLE_CUDA_CONVERSIONS)
+  # Find the CUDA toolkit.
+  find_package(CUDAToolkit)
+
+  if(CUDAToolkit_FOUND)
+    # Get the CUDA toolkit path. The path is needed for detecting `libdevice.bc`.
+    # These extra steps are needed because of a bug on CMake.
+    # See: https://gitlab.kitware.com/cmake/cmake/-/issues/24858
+    # TODO: Bump the MLIR CMake version to 3.26.4 and switch to
+    # ${CUDAToolkit_LIBRARY_ROOT}
+    if(NOT DEFINED ${CUDAToolkit_LIBRARY_ROOT})
+      get_filename_component(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_BIN_DIR}
+                             DIRECTORY ABSOLUTE)
+    else()
+      set(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_ROOT})
+    endif()
+
+    # Add the `nvptxcompiler` library.
+    if(MLIR_ENABLE_NVPTXCOMPILER)
+      # Find the `nvptxcompiler` library.
+      # TODO: Bump the MLIR CMake version to 3.25 and use `CUDA::nvptxcompiler_static`.
+      find_library(MLIR_NVPTXCOMPILER_LIB nvptxcompiler_static
+                  PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
+
+      # Fail if `nvptxcompiler_static` couldn't be found.
+      if(MLIR_NVPTXCOMPILER_LIB STREQUAL "MLIR_NVPTXCOMPILER_LIB-NOTFOUND")
+        message(FATAL_ERROR
+                "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
+      endif()
+
+      # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
+      target_link_libraries(MLIRNVVMTarget PRIVATE ${MLIR_NVPTXCOMPILER_LIB})
+      target_include_directories(obj.MLIRNVVMTarget PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
+    endif()
+  else()
+    # Fail if `MLIR_ENABLE_NVPTXCOMPILER` is enabled and the toolkit couldn't be found.
+    if(MLIR_ENABLE_NVPTXCOMPILER)
+      message(FATAL_ERROR
+              "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
+    endif()
+  endif()
+  message(VERBOSE "MLIR default CUDA toolkit path: ${MLIR_CUDAToolkit_ROOT}")
+
+  # Define the `CUDAToolkit` path.
+  target_compile_definitions(obj.MLIRNVVMTarget
+    PRIVATE
+    MLIR_NVPTXCOMPILER_ENABLED=${MLIR_ENABLE_NVPTXCOMPILER}
+    __DEFAULT_CUDATOOLKIT_PATH__="${MLIR_CUDAToolkit_ROOT}"
+  )
+endif()