-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Adding support for registration of boxes #114323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Renaud Kauffmann (Renaud-K) ChangesFull diff: https://github.com/llvm/llvm-project/pull/114323.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index 7cdb2f7ffe27d9..dd204126be5dbc 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -11,11 +11,13 @@
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/CodeGen/Target.h"
+#include "flang/Optimizer/CodeGen/TypeConverter.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Support/DataLayout.h"
#include "flang/Optimizer/Transforms/CUFCommon.h"
#include "flang/Runtime/CUDA/registration.h"
@@ -84,6 +86,8 @@ struct CUFAddConstructor
auto registeredMod = builder.create<cuf::RegisterModuleOp>(
loc, llvmPtrTy, mlir::SymbolRefAttr::get(ctx, gpuMod.getName()));
+ fir::LLVMTypeConverter typeConverter(mod, /*applyTBAA=*/false,
+ /*forceUnifiedTBAATree=*/false, *dl);
// Register kernels
for (auto func : gpuMod.getOps<mlir::gpu::GPUFuncOp>()) {
if (func.isKernel()) {
@@ -115,17 +119,25 @@ struct CUFAddConstructor
fir::factory::createStringLiteral(builder, loc, gblNameStr));
// Global variable size
- auto sizeAndAlign = fir::getTypeSizeAndAlignmentOrCrash(
- loc, globalOp.getType(), *dl, kindMap);
- auto size =
- builder.createIntegerConstant(loc, idxTy, sizeAndAlign.first);
+ std::optional<uint64_t> size;
+ if (auto boxTy =
+ mlir::dyn_cast<fir::BaseBoxType>(globalOp.getType())) {
+ mlir::Type structTy = typeConverter.convertBoxTypeAsStruct(boxTy);
+ size = dl->getTypeSizeInBits(structTy) / 8;
+ }
+ if (!size) {
+ size = fir::getTypeSizeAndAlignmentOrCrash(loc, globalOp.getType(),
+ *dl, kindMap)
+ .first;
+ }
+ auto sizeVal = builder.createIntegerConstant(loc, idxTy, *size);
// Global variable address
mlir::Value addr = builder.create<fir::AddrOfOp>(
loc, globalOp.resultType(), globalOp.getSymbol());
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
- builder, loc, fTy, registeredMod, addr, gblName, size)};
+ builder, loc, fTy, registeredMod, addr, gblName, sizeVal)};
builder.create<fir::CallOp>(loc, func, args);
} break;
case cuf::DataAttribute::Managed:
diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90
index 378dabbb7c7e7d..e52b1a7fdf49f0 100644
--- a/flang/test/Fir/CUDA/cuda-constructor-2.f90
+++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90
@@ -3,6 +3,12 @@
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
+ fir.global @_QMmtestsEndev {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xi32>>> {
+ %c0 = arith.constant 0 : index
+ %0 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+ %1 = fircg.ext_embox %0(%c0) {allocator_idx = 2 : i32} : (!fir.heap<!fir.array<?xi32>>, index) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+ fir.has_value %1 : !fir.box<!fir.heap<!fir.array<?xi32>>>
+ }
gpu.module @cuda_device_mod [#nvvm.target] {
}
@@ -18,5 +24,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
// CHECK-DAG: %[[VAR_ADDR2:.*]] = fir.convert %[[VAR_ADDR]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.ref<i8>
// CHECK-DAG: %[[VAR_NAME2:.*]] = fir.convert %[[VAR_NAME]] : (!fir.ref<!fir.char<1,12>>) -> !fir.ref<i8>
// CHECK-DAG: %[[CST:.*]] = arith.constant 20 : index
-// CHECK-DAG %[[CST2:.*]] = fir.convert %[[CST]] : (index) -> i64
-// CHECK fir.call @_FortranACUFRegisterVariable(%[[MODULE2]], %[[VAR_ADDR2]], %[[VAR_NAME2]], %[[CST2]]) : (!fir.ref<!fir.llvm_ptr<i8>>, !fir.ref<i8>, !fir.ref<i8>, i64) -> none
+// CHECK-DAG: %[[CST2:.*]] = fir.convert %[[CST]] : (index) -> i64
+// CHECK-DAG: fir.call @_FortranACUFRegisterVariable(%[[MODULE2]], %[[VAR_ADDR2]], %[[VAR_NAME2]], %[[CST2]]) : (!fir.ref<!fir.llvm_ptr<i8>>, !fir.ref<i8>, !fir.ref<i8>, i64) -> none
+// CHECK-DAG: %[[BOX:.*]] = fir.address_of(@_QMmtestsEndev) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK-DAG: %[[BOXREF:.*]] = fir.convert %[[BOX]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<i8>
+// CHECK-DAG: fir.call @_FortranACUFRegisterVariable(%[[MODULE:.*]], %[[BOXREF]], %{{.*}}, %{{.*}})
+//
\ No newline at end of file
diff --git a/flang/test/Fir/CUDA/cuda-register-func.fir b/flang/test/Fir/CUDA/cuda-register-func.fir
index 6b0cbfd3aca63d..25ab8dd786a4e0 100644
--- a/flang/test/Fir/CUDA/cuda-register-func.fir
+++ b/flang/test/Fir/CUDA/cuda-register-func.fir
@@ -1,6 +1,6 @@
// RUN: fir-opt --cuf-add-constructor %s | FileCheck %s
-module attributes {gpu.container_module} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
gpu.module @cuda_device_mod {
gpu.func @_QPsub_device1() kernel {
gpu.return
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Just a small nit
// CHECK-DAG: %[[BOX:.*]] = fir.address_of(@_QMmtestsEndev) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> | ||
// CHECK-DAG: %[[BOXREF:.*]] = fir.convert %[[BOX]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<i8> | ||
// CHECK-DAG: fir.call @_FortranACUFRegisterVariable(%[[MODULE:.*]], %[[BOXREF]], %{{.*}}, %{{.*}}) | ||
// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing new line
Can you add |
Needed to take into account that `fir::getTypeSizeAndAlignmentOrCrash` does not work with box types but requires the `fir::LLVMTypeConverter`
Needed to take into account that `fir::getTypeSizeAndAlignmentOrCrash` does not work with box types but requires the `fir::LLVMTypeConverter`
Needed to take into account that
fir::getTypeSizeAndAlignmentOrCrash
does not work with box types but requires thefir::LLVMTypeConverter