Skip to content

[flang][cuda] Handle simple device pointer allocation #123996

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 22, 2025

Conversation

clementval
Copy link
Contributor

@clementval clementval commented Jan 22, 2025

Allocation of fortran pointer must use the flang pointer entry points and not the one for allocatable. The runtime makes checks that will fail.

Source and double descriptors allocation will follow.

@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category flang:fir-hlfir labels Jan 22, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 22, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Allocation of fortran pointer must use the flang pointer entry points and not the one for allocatable. The runtime makes checks that will fail.

Source and double descriptors allocation will follow.


Full diff: https://github.com/llvm/llvm-project/pull/123996.diff

5 Files Affected:

  • (added) flang/include/flang/Runtime/CUDA/pointer.h (+27)
  • (modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+23-4)
  • (modified) flang/runtime/CUDA/CMakeLists.txt (+1)
  • (added) flang/runtime/CUDA/pointer.cpp (+40)
  • (modified) flang/test/Fir/CUDA/cuda-allocate.fir (+11)
diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
new file mode 100644
index 00000000000000..db5242696303f5
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -0,0 +1,27 @@
+//===-- include/flang/Runtime/CUDA/pointer.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_POINTER_H_
+#define FORTRAN_RUNTIME_CUDA_POINTER_H_
+
+#include "flang/Runtime/descriptor-consts.h"
+#include "flang/Runtime/entry-names.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+
+/// Perform allocation of the descriptor.
+int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
+    bool hasStat = false, const Descriptor *errMsg = nullptr,
+    const char *sourceFile = nullptr, int sourceLine = 0);
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
+#endif // FORTRAN_RUNTIME_CUDA_POINTER_H_
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 8b8c00fa7ecfcb..23248f6d12622a 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -20,6 +20,7 @@
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
+#include "flang/Runtime/CUDA/pointer.h"
 #include "flang/Runtime/allocatable.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -161,7 +162,18 @@ struct CUFAllocateOpConversion
     fir::FirOpBuilder builder(rewriter, mod);
     mlir::Location loc = op.getLoc();
 
+    bool isPointer = false;
+
+    if (auto declareOp =
+            mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
+      if (declareOp.getFortranAttrs() &&
+          bitEnumContainsAny(*declareOp.getFortranAttrs(),
+                             fir::FortranVariableFlagsEnum::pointer))
+        isPointer = true;
+
     if (hasDoubleDescriptors(op)) {
+      if (isPointer)
+        TODO(loc, "pointer allocation with double descriptors");
       // Allocation for module variable are done with custom runtime entry point
       // so the descriptors can be synchronized.
       mlir::func::FuncOp func;
@@ -176,13 +188,20 @@ struct CUFAllocateOpConversion
     }
 
     mlir::func::FuncOp func;
-    if (op.getSource())
+    if (op.getSource()) {
+      if (isPointer)
+        TODO(loc, "pointer allocation with source");
       func =
           fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocateSource)>(
               loc, builder);
-    else
-      func = fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
-          loc, builder);
+    } else {
+      func =
+          isPointer
+              ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>(
+                    loc, builder)
+              : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
+                    loc, builder);
+    }
 
     return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
   }
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 3a88824826de31..23e01da72eded1 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -20,6 +20,7 @@ add_flang_library(${CUFRT_LIBNAME}
   kernel.cpp
   memmove-function.cpp
   memory.cpp
+  pointer.cpp
   registration.cpp
 )
 
diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp
new file mode 100644
index 00000000000000..0c5d3a5a6297d8
--- /dev/null
+++ b/flang/runtime/CUDA/pointer.cpp
@@ -0,0 +1,40 @@
+//===-- runtime/CUDA/pointer.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/pointer.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "flang/Runtime/pointer.h"
+
+#include "cuda_runtime.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
+int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+  if (desc.HasAddendum()) {
+    Terminator terminator{sourceFile, sourceLine};
+    // TODO: This require a bit more work to set the correct type descriptor
+    // address
+    terminator.Crash(
+        "not yet implemented: CUDA descriptor allocation with addendum");
+  }
+  // Perform the standard allocation.
+  int stat{
+      RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
+  return stat;
+}
+
+RT_EXT_API_GROUP_END
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 35c6e2a77a697d..2ac9498d355414 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -181,4 +181,15 @@ func.func @_QQallocate_stream() {
 // CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
 // CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
+
+func.func @_QPp_alloc() {
+  %0 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>> {bindc_name = "complex_array", data_attr = #cuf.cuda<device>, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+  %4 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+  %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>} -> i32
+  return
+}
+
+// CHECK-LABEL: func.func @_QPp_alloc()
+// CHECK: fir.call @_FortranACUFPointerAllocate
+
 } // end of module

@llvmbot
Copy link
Member

llvmbot commented Jan 22, 2025

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Allocation of fortran pointer must use the flang pointer entry points and not the one for allocatable. The runtime makes checks that will fail.

Source and double descriptors allocation will follow.


Full diff: https://github.com/llvm/llvm-project/pull/123996.diff

5 Files Affected:

  • (added) flang/include/flang/Runtime/CUDA/pointer.h (+27)
  • (modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+23-4)
  • (modified) flang/runtime/CUDA/CMakeLists.txt (+1)
  • (added) flang/runtime/CUDA/pointer.cpp (+40)
  • (modified) flang/test/Fir/CUDA/cuda-allocate.fir (+11)
diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
new file mode 100644
index 00000000000000..db5242696303f5
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -0,0 +1,27 @@
+//===-- include/flang/Runtime/CUDA/pointer.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_POINTER_H_
+#define FORTRAN_RUNTIME_CUDA_POINTER_H_
+
+#include "flang/Runtime/descriptor-consts.h"
+#include "flang/Runtime/entry-names.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+
+/// Perform allocation of the descriptor.
+int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
+    bool hasStat = false, const Descriptor *errMsg = nullptr,
+    const char *sourceFile = nullptr, int sourceLine = 0);
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
+#endif // FORTRAN_RUNTIME_CUDA_POINTER_H_
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 8b8c00fa7ecfcb..23248f6d12622a 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -20,6 +20,7 @@
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
+#include "flang/Runtime/CUDA/pointer.h"
 #include "flang/Runtime/allocatable.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -161,7 +162,18 @@ struct CUFAllocateOpConversion
     fir::FirOpBuilder builder(rewriter, mod);
     mlir::Location loc = op.getLoc();
 
+    bool isPointer = false;
+
+    if (auto declareOp =
+            mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
+      if (declareOp.getFortranAttrs() &&
+          bitEnumContainsAny(*declareOp.getFortranAttrs(),
+                             fir::FortranVariableFlagsEnum::pointer))
+        isPointer = true;
+
     if (hasDoubleDescriptors(op)) {
+      if (isPointer)
+        TODO(loc, "pointer allocation with double descriptors");
       // Allocation for module variable are done with custom runtime entry point
       // so the descriptors can be synchronized.
       mlir::func::FuncOp func;
@@ -176,13 +188,20 @@ struct CUFAllocateOpConversion
     }
 
     mlir::func::FuncOp func;
-    if (op.getSource())
+    if (op.getSource()) {
+      if (isPointer)
+        TODO(loc, "pointer allocation with source");
       func =
           fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocateSource)>(
               loc, builder);
-    else
-      func = fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
-          loc, builder);
+    } else {
+      func =
+          isPointer
+              ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>(
+                    loc, builder)
+              : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
+                    loc, builder);
+    }
 
     return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
   }
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 3a88824826de31..23e01da72eded1 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -20,6 +20,7 @@ add_flang_library(${CUFRT_LIBNAME}
   kernel.cpp
   memmove-function.cpp
   memory.cpp
+  pointer.cpp
   registration.cpp
 )
 
diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp
new file mode 100644
index 00000000000000..0c5d3a5a6297d8
--- /dev/null
+++ b/flang/runtime/CUDA/pointer.cpp
@@ -0,0 +1,40 @@
+//===-- runtime/CUDA/pointer.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/pointer.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "flang/Runtime/pointer.h"
+
+#include "cuda_runtime.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
+int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+  if (desc.HasAddendum()) {
+    Terminator terminator{sourceFile, sourceLine};
+    // TODO: This require a bit more work to set the correct type descriptor
+    // address
+    terminator.Crash(
+        "not yet implemented: CUDA descriptor allocation with addendum");
+  }
+  // Perform the standard allocation.
+  int stat{
+      RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
+  return stat;
+}
+
+RT_EXT_API_GROUP_END
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 35c6e2a77a697d..2ac9498d355414 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -181,4 +181,15 @@ func.func @_QQallocate_stream() {
 // CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
 // CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
+
+func.func @_QPp_alloc() {
+  %0 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>> {bindc_name = "complex_array", data_attr = #cuf.cuda<device>, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+  %4 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+  %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>} -> i32
+  return
+}
+
+// CHECK-LABEL: func.func @_QPp_alloc()
+// CHECK: fir.call @_FortranACUFPointerAllocate
+
 } // end of module

@clementval clementval merged commit 6e498bc into llvm:main Jan 22, 2025
12 checks passed
@clementval clementval deleted the cuf_pointer_allocate branch January 22, 2025 23:59
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:fir-hlfir flang:runtime flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants