-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[flang][cuda] Add global constructor for allocators registration #109854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesThis pass crates the constructor function to call the allocator registration and adds it to the global_ctors. Full diff: https://github.com/llvm/llvm-project/pull/109854.diff 5 Files Affected:
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index fcfb8677951a2d..3b2af3a3398108 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -39,6 +39,7 @@ namespace fir {
#define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
#define GEN_PASS_DECL_CHARACTERCONVERSION
#define GEN_PASS_DECL_CFGCONVERSION
+#define GEN_PASS_DECL_CUFADDCONSTRUCTOR
#define GEN_PASS_DECL_CUFIMPLICITDEVICEGLOBAL
#define GEN_PASS_DECL_CUFOPCONVERSION
#define GEN_PASS_DECL_EXTERNALNAMECONVERSION
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index ab98591c911cdf..bf75123e853779 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -436,4 +436,11 @@ def CufImplicitDeviceGlobal :
];
}
+def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> {
+ let summary = "Add constructor to register CUDA Fortran allocators";
+ let dependentDialects = [
+ "mlir::func::FuncDialect"
+ ];
+}
+
#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index b68e3d68b9b83e..5e1a0293e63c97 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
CompilerGeneratedNames.cpp
ConstantArgumentGlobalisation.cpp
ControlFlowConverter.cpp
+ CUFAddConstructor.cpp
CufImplicitDeviceGlobal.cpp
CufOpConversion.cpp
ArrayValueCopy.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
new file mode 100644
index 00000000000000..7ab06845ddec2f
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -0,0 +1,89 @@
+//===- CUFAddConstructor.cpp ------------------------------------*- C++ -*-===//
+//
+// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+// property and proprietary rights in and to this material, related
+// documentation and any modifications thereto. Any use, reproduction,
+// disclosure or distribution of this material and related documentation
+// without an express license agreement from NVIDIA CORPORATION or
+// its affiliates is strictly prohibited.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/FIRAttr.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Runtime/entry-names.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace fir {
+#define GEN_PASS_DEF_CUFADDCONSTRUCTOR
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+namespace {
+
+static constexpr llvm::StringRef cudaFortranCtorName{
+ "__cudaFortranConstructor"};
+
+struct CUFAddConstructor
+ : public fir::impl::CUFAddConstructorBase<CUFAddConstructor> {
+
+ void runOnOperation() override {
+ mlir::ModuleOp mod = getOperation();
+ mlir::OpBuilder builder{mod.getBodyRegion()};
+ builder.setInsertionPointToEnd(mod.getBody());
+ mlir::Location loc = mod.getLoc();
+ auto *ctx = mod.getContext();
+ auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
+ auto funcTy =
+ mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
+
+ llvm::errs() << "DBG1\n";
+ // Symbol reference to CUFRegisterAllocator.
+ builder.setInsertionPointToEnd(mod.getBody());
+ llvm::errs() << "DBG1.1\n";
+ auto registerFuncOp = builder.create<mlir::LLVM::LLVMFuncOp>(
+ loc, RTNAME_STRING(CUFRegisterAllocator), funcTy);
+ llvm::errs() << "DBG1.2\n";
+ registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
+ auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get(
+ mod.getContext(), RTNAME_STRING(CUFRegisterAllocator));
+ llvm::errs() << "DBG1.3\n";
+ builder.setInsertionPointToEnd(mod.getBody());
+ // mod.push_back(registerFuncOp);
+ llvm::errs() << "DBG1.4\n";
+
+ llvm::errs() << "DBG2\n";
+ // Create the constructor function that cal CUFRegisterAllocator.
+ builder.setInsertionPointToEnd(mod.getBody());
+ auto func = builder.create<mlir::LLVM::LLVMFuncOp>(loc, cudaFortranCtorName,
+ funcTy);
+ func.setLinkage(mlir::LLVM::Linkage::Internal);
+ builder.setInsertionPointToStart(func.addEntryBlock(builder));
+ builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
+ builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
+ // mod.push_back(func);
+
+ llvm::errs() << "DBG3\n";
+ // Create the llvm.global_ctor with the function.
+ // TODO: We might want to have a utility that retrieve it if already created
+ // and adds new functions.
+ builder.setInsertionPointToEnd(mod.getBody());
+ llvm::SmallVector<mlir::Attribute> funcs;
+ funcs.push_back(
+ mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName()));
+ llvm::SmallVector<int> priorities;
+ priorities.push_back(0);
+ builder.create<mlir::LLVM::GlobalCtorsOp>(
+ mod.getLoc(), builder.getArrayAttr(funcs),
+ builder.getI32ArrayAttr(priorities));
+ }
+};
+
+} // end anonymous namespace
\ No newline at end of file
diff --git a/flang/test/Fir/CUDA/cuda-constructor.f90 b/flang/test/Fir/CUDA/cuda-constructor.f90
new file mode 100644
index 00000000000000..3a27b9d54252ca
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-constructor.f90
@@ -0,0 +1,12 @@
+! RUN: bbc -fcuda -emit-hlfir %s -o - | fir-opt --cuf-add-constructor | FileCheck %s
+
+program main
+ real, device :: ahost(10)
+end
+
+! CHECK: llvm.func @_FortranACUFRegisterAllocator() attributes {sym_visibility = "private"}
+! CHECK-LABEL: llvm.func internal @__cudaFortranConstructor() {
+! CHECK: llvm.call @_FortranACUFRegisterAllocator() : () -> ()
+! CHECK: llvm.return
+! CHECK: }
+! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]}
\ No newline at end of file
|
tschuett
reviewed
Sep 24, 2024
wangzpgi
approved these changes
Sep 25, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This pass creates the constructor function to call the allocator registration and adds it to the global_ctors.