Revert "Reland '[flang] Allow to pass an async id to allocate the descriptor (#118713)' and #118733" #121029

clementval · 2024-12-24T05:26:45Z

This still cause issue for device runtime build.

…criptor …" This reverts commit 5b74fb7.

llvmbot · 2024-12-24T05:27:18Z

@llvm/pr-subscribers-flang-runtime

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

This still cause issue for device runtime build.

Patch is 39.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121029.diff

22 Files Affected:

(modified) flang/include/flang/Runtime/CUDA/allocator.h (+4-4)
(modified) flang/include/flang/Runtime/CUDA/common.h (-3)
(modified) flang/include/flang/Runtime/allocatable.h (+3-3)
(modified) flang/include/flang/Runtime/allocator-registry.h (+4-6)
(modified) flang/include/flang/Runtime/descriptor.h (+1-1)
(modified) flang/lib/Lower/Allocatable.cpp (+3-8)
(modified) flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp (+3-6)
(modified) flang/runtime/CUDA/allocatable.cpp (+1-1)
(modified) flang/runtime/CUDA/allocator.cpp (+5-10)
(modified) flang/runtime/CUDA/descriptor.cpp (+1-2)
(modified) flang/runtime/allocatable.cpp (+4-6)
(modified) flang/runtime/array-constructor.cpp (+4-4)
(modified) flang/runtime/descriptor.cpp (+2-2)
(modified) flang/test/HLFIR/elemental-codegen.fir (+3-3)
(modified) flang/test/Lower/OpenACC/acc-declare.f90 (+2-2)
(modified) flang/test/Lower/allocatable-polymorphic.f90 (+13-13)
(modified) flang/test/Lower/allocatable-runtime.f90 (+2-2)
(modified) flang/test/Lower/allocate-mold.f90 (+2-2)
(modified) flang/test/Lower/polymorphic.f90 (+3-3)
(modified) flang/unittests/Runtime/CUDA/Allocatable.cpp (+1-2)
(modified) flang/unittests/Runtime/CUDA/AllocatorCUF.cpp (+1-20)
(modified) flang/unittests/Runtime/CUDA/Memory.cpp (+1-2)

diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index b6f0e7f303176c..4fb4c94c5e9b0a 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -20,16 +20,16 @@ extern "C" {
 void RTDECL(CUFRegisterAllocator)();
 }
 
-void *CUFAllocPinned(std::size_t, std::int64_t = kCudaNoStream);
+void *CUFAllocPinned(std::size_t);
 void CUFFreePinned(void *);
 
-void *CUFAllocDevice(std::size_t, std::int64_t);
+void *CUFAllocDevice(std::size_t);
 void CUFFreeDevice(void *);
 
-void *CUFAllocManaged(std::size_t, std::int64_t = kCudaNoStream);
+void *CUFAllocManaged(std::size_t);
 void CUFFreeManaged(void *);
 
-void *CUFAllocUnified(std::size_t, std::int64_t = kCudaNoStream);
+void *CUFAllocUnified(std::size_t);
 void CUFFreeUnified(void *);
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h
index 9c95f727ee6734..474f8e6578b891 100644
--- a/flang/include/flang/Runtime/CUDA/common.h
+++ b/flang/include/flang/Runtime/CUDA/common.h
@@ -23,9 +23,6 @@ static constexpr unsigned kHostToDevice = 0;
 static constexpr unsigned kDeviceToHost = 1;
 static constexpr unsigned kDeviceToDevice = 2;
 
-/// Value used for asyncId when no specific stream is specified.
-static constexpr std::int64_t kCudaNoStream = -1;
-
 #define CUDA_REPORT_IF_ERROR(expr) \
   [](cudaError_t err) { \
     if (err == cudaSuccess) \
diff --git a/flang/include/flang/Runtime/allocatable.h b/flang/include/flang/Runtime/allocatable.h
index 121c31af963aa0..58061d9862095e 100644
--- a/flang/include/flang/Runtime/allocatable.h
+++ b/flang/include/flang/Runtime/allocatable.h
@@ -94,9 +94,9 @@ int RTDECL(AllocatableCheckLengthParameter)(Descriptor &,
 // Successfully allocated memory is initialized if the allocatable has a
 // derived type, and is always initialized by AllocatableAllocateSource().
 // Performs all necessary coarray synchronization and validation actions.
-int RTDECL(AllocatableAllocate)(Descriptor &, std::int64_t asyncId = -1,
-    bool hasStat = false, const Descriptor *errMsg = nullptr,
-    const char *sourceFile = nullptr, int sourceLine = 0);
+int RTDECL(AllocatableAllocate)(Descriptor &, bool hasStat = false,
+    const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+    int sourceLine = 0);
 int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
     bool hasStat = false, const Descriptor *errMsg = nullptr,
     const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h
index 4c3295edf13d9a..29302c5d825bc9 100644
--- a/flang/include/flang/Runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -11,7 +11,6 @@
 
 #include "flang/Common/api-attrs.h"
 #include "flang/Runtime/allocator-registry-consts.h"
-#include <cstdint>
 #include <cstdlib>
 #include <vector>
 
@@ -19,7 +18,7 @@
 
 namespace Fortran::runtime {
 
-using AllocFct = void *(*)(std::size_t, std::int64_t);
+using AllocFct = void *(*)(std::size_t);
 using FreeFct = void (*)(void *);
 
 typedef struct Allocator_t {
@@ -27,11 +26,10 @@ typedef struct Allocator_t {
   FreeFct free{nullptr};
 } Allocator_t;
 
-static RT_API_ATTRS void *MallocWrapper(
-    std::size_t size, [[maybe_unused]] std::int64_t) {
+#ifdef RT_DEVICE_COMPILATION
+static RT_API_ATTRS void *MallocWrapper(std::size_t size) {
   return std::malloc(size);
 }
-#ifdef RT_DEVICE_COMPILATION
 static RT_API_ATTRS void FreeWrapper(void *p) { return std::free(p); }
 #endif
 
@@ -41,7 +39,7 @@ struct AllocatorRegistry {
       : allocators{{&MallocWrapper, &FreeWrapper}} {}
 #else
   constexpr AllocatorRegistry() {
-    allocators[kDefaultAllocator] = {&MallocWrapper, &std::free};
+    allocators[kDefaultAllocator] = {&std::malloc, &std::free};
   };
 #endif
   RT_API_ATTRS void Register(int, Allocator_t);
diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h
index 44e82c6a256873..dd36fba157ca92 100644
--- a/flang/include/flang/Runtime/descriptor.h
+++ b/flang/include/flang/Runtime/descriptor.h
@@ -369,7 +369,7 @@ class Descriptor {
   // before calling.  It (re)computes the byte strides after
   // allocation.  Does not allocate automatic components or
   // perform default component initialization.
-  RT_API_ATTRS int Allocate(std::int64_t asyncId = -1);
+  RT_API_ATTRS int Allocate();
   RT_API_ATTRS void SetByteStrides();
 
   // Deallocates storage; does not call FINAL subroutines or
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index f1436564aabaa2..fb8380ac7e8c51 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -184,14 +184,9 @@ static mlir::Value genRuntimeAllocate(fir::FirOpBuilder &builder,
           ? fir::runtime::getRuntimeFunc<mkRTKey(PointerAllocate)>(loc, builder)
           : fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc,
                                                                        builder);
-  llvm::SmallVector<mlir::Value> args{box.getAddr()};
-  if (!box.isPointer())
-    args.push_back(
-        builder.createIntegerConstant(loc, builder.getI64Type(), -1));
-  args.push_back(errorManager.hasStat);
-  args.push_back(errorManager.errMsgAddr);
-  args.push_back(errorManager.sourceFile);
-  args.push_back(errorManager.sourceLine);
+  llvm::SmallVector<mlir::Value> args{
+      box.getAddr(), errorManager.hasStat, errorManager.errMsgAddr,
+      errorManager.sourceFile, errorManager.sourceLine};
   llvm::SmallVector<mlir::Value> operands;
   for (auto [fst, snd] : llvm::zip(args, callee.getFunctionType().getInputs()))
     operands.emplace_back(builder.createConvert(loc, snd, fst));
diff --git a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
index 28452d3b486da3..70a88ff18cb1da 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
@@ -76,19 +76,16 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder,
   mlir::func::FuncOp func{
       fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc, builder)};
   mlir::FunctionType fTy{func.getFunctionType()};
-  mlir::Value asyncId =
-      builder.createIntegerConstant(loc, builder.getI64Type(), -1);
   mlir::Value sourceFile{fir::factory::locationToFilename(builder, loc)};
   mlir::Value sourceLine{
-      fir::factory::locationToLineNo(builder, loc, fTy.getInput(5))};
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(4))};
   if (!hasStat)
     hasStat = builder.createBool(loc, false);
   if (!errMsg) {
     mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
     errMsg = builder.create<fir::AbsentOp>(loc, boxNoneTy).getResult();
   }
-  llvm::SmallVector<mlir::Value> args{
-      fir::runtime::createArguments(builder, loc, fTy, desc, asyncId, hasStat,
-                                    errMsg, sourceFile, sourceLine)};
+  llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+      builder, loc, fTy, desc, hasStat, errMsg, sourceFile, sourceLine)};
   builder.create<fir::CallOp>(loc, func, args);
 }
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
index 3f6f8f3d6d5de0..9be54e8906903d 100644
--- a/flang/runtime/CUDA/allocatable.cpp
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -52,7 +52,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
   }
   // Perform the standard allocation.
   int stat{RTNAME(AllocatableAllocate)(
-      desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
+      desc, hasStat, errMsg, sourceFile, sourceLine)};
   return stat;
 }
 
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index d848f1811dcf3f..85b3daf65a8ba4 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -33,7 +33,7 @@ void RTDEF(CUFRegisterAllocator)() {
 }
 }
 
-void *CUFAllocPinned(std::size_t sizeInBytes, std::int64_t) {
+void *CUFAllocPinned(std::size_t sizeInBytes) {
   void *p;
   CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
   return p;
@@ -41,20 +41,15 @@ void *CUFAllocPinned(std::size_t sizeInBytes, std::int64_t) {
 
 void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
 
-void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t stream) {
+void *CUFAllocDevice(std::size_t sizeInBytes) {
   void *p;
-  if (stream >= 0) {
-    CUDA_REPORT_IF_ERROR(
-        cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)stream));
-  } else {
-    CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
-  }
+  CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
   return p;
 }
 
 void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
-void *CUFAllocManaged(std::size_t sizeInBytes, std::int64_t) {
+void *CUFAllocManaged(std::size_t sizeInBytes) {
   void *p;
   CUDA_REPORT_IF_ERROR(
       cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -63,7 +58,7 @@ void *CUFAllocManaged(std::size_t sizeInBytes, std::int64_t) {
 
 void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
-void *CUFAllocUnified(std::size_t sizeInBytes, std::int64_t) {
+void *CUFAllocUnified(std::size_t sizeInBytes) {
   // Call alloc managed for the time being.
   return CUFAllocManaged(sizeInBytes);
 }
diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp
index 816b1458ee52cb..391c47e84241d4 100644
--- a/flang/runtime/CUDA/descriptor.cpp
+++ b/flang/runtime/CUDA/descriptor.cpp
@@ -20,8 +20,7 @@ RT_EXT_API_GROUP_BEGIN
 
 Descriptor *RTDEF(CUFAllocDescriptor)(
     std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
-  return reinterpret_cast<Descriptor *>(
-      CUFAllocManaged(sizeInBytes, kCudaNoStream));
+  return reinterpret_cast<Descriptor *>(CUFAllocManaged(sizeInBytes));
 }
 
 void RTDEF(CUFFreeDescriptor)(
diff --git a/flang/runtime/allocatable.cpp b/flang/runtime/allocatable.cpp
index b65cec8d51cf86..5e065f47636a89 100644
--- a/flang/runtime/allocatable.cpp
+++ b/flang/runtime/allocatable.cpp
@@ -133,17 +133,15 @@ void RTDEF(AllocatableApplyMold)(
   }
 }
 
-int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
-    bool hasStat, const Descriptor *errMsg, const char *sourceFile,
-    int sourceLine) {
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   if (!descriptor.IsAllocatable()) {
     return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
   } else if (descriptor.IsAllocated()) {
     return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
   } else {
-    int stat{
-        ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
+    int stat{ReturnError(terminator, descriptor.Allocate(), errMsg, hasStat)};
     if (stat == StatOk) {
       if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
         if (const auto *derived{addendum->derivedType()}) {
@@ -162,7 +160,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
     const Descriptor &source, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   int stat{RTNAME(AllocatableAllocate)(
-      alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
+      alloc, hasStat, errMsg, sourceFile, sourceLine)};
   if (stat == StatOk) {
     Terminator terminator{sourceFile, sourceLine};
     DoFromSourceAssign(alloc, source, terminator);
diff --git a/flang/runtime/array-constructor.cpp b/flang/runtime/array-constructor.cpp
index 0d677d7cc63aa9..c6953167f5fb2e 100644
--- a/flang/runtime/array-constructor.cpp
+++ b/flang/runtime/array-constructor.cpp
@@ -50,8 +50,8 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
           initialAllocationSize(fromElements, to.ElementBytes())};
       to.GetDimension(0).SetBounds(1, allocationSize);
       RTNAME(AllocatableAllocate)
-      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
-          vector.sourceFile, vector.sourceLine);
+      (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
+          vector.sourceLine);
       to.GetDimension(0).SetBounds(1, fromElements);
       vector.actualAllocationSize = allocationSize;
     } else {
@@ -59,8 +59,8 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
       // first value: there should be no reallocation.
       RUNTIME_CHECK(terminator, previousToElements >= fromElements);
       RTNAME(AllocatableAllocate)
-      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
-          vector.sourceFile, vector.sourceLine);
+      (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
+          vector.sourceLine);
       vector.actualAllocationSize = previousToElements;
     }
   } else {
diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp
index f43c96bed7d00d..32f43e89dc7a36 100644
--- a/flang/runtime/descriptor.cpp
+++ b/flang/runtime/descriptor.cpp
@@ -163,7 +163,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
 #endif
 }
 
-RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
+RT_API_ATTRS int Descriptor::Allocate() {
   std::size_t elementBytes{ElementBytes()};
   if (static_cast<std::int64_t>(elementBytes) < 0) {
     // F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -175,7 +175,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
   // Zero size allocation is possible in Fortran and the resulting
   // descriptor must be allocated/associated. Since std::malloc(0)
   // result is implementation defined, always allocate at least one byte.
-  void *p{alloc(byteSize ? byteSize : 1, asyncId)};
+  void *p{alloc(byteSize ? byteSize : 1)};
   if (!p) {
     return CFI_ERROR_MEM_ALLOCATION;
   }
diff --git a/flang/test/HLFIR/elemental-codegen.fir b/flang/test/HLFIR/elemental-codegen.fir
index 3c33bf8fca2d14..0d5f343cb17711 100644
--- a/flang/test/HLFIR/elemental-codegen.fir
+++ b/flang/test/HLFIR/elemental-codegen.fir
@@ -192,7 +192,7 @@ func.func @test_polymorphic(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.bindc_
 // CHECK:           %[[VAL_35:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_40:.*]] = arith.constant 1 : index
 // CHECK:           fir.do_loop %[[VAL_41:.*]] = %[[VAL_40]] to %[[EX1]] step %[[VAL_40]] unordered {
@@ -276,7 +276,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
 // CHECK:           %[[VAL_36:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_41:.*]] = arith.constant 1 : index
 // CHECK:           fir.do_loop %[[VAL_42:.*]] = %[[VAL_41]] to %[[VAL_3]] step %[[VAL_41]] unordered {
@@ -329,7 +329,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
 // CHECK:           %[[VAL_85:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_90:.*]] = arith.constant 1 : index
 // CHECK:           fir.do_loop %[[VAL_91:.*]] = %[[VAL_90]] to %[[VAL_3]] step %[[VAL_90]] unordered {
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 9fe51a8db55e3b..0066e712fbdcce 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -469,6 +469,6 @@ subroutine init()
 end module
 
 ! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 ! CHECK: fir.if
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90
index fba0c12fb889c3..bbc54754ca1ab1 100644
--- a/flang/test/Lower/allocatable-polymorphic.f90
+++ b/flang/test/Lower/allocatable-polymorphic.f90
@@ -267,7 +267,7 @@ subroutine test_allocatable()
 ! CHECK: %[[C0:.*]] = arith.constant 0 : i32
 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[P_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> none
 ! CHECK: %[[P_CAST:.*]] = fir.convert %[[P_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.re...
[truncated]

llvmbot · 2024-12-24T05:27:19Z

@llvm/pr-subscribers-openacc

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

This still cause issue for device runtime build.

Patch is 39.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121029.diff

22 Files Affected:

(modified) flang/include/flang/Runtime/CUDA/allocator.h (+4-4)
(modified) flang/include/flang/Runtime/CUDA/common.h (-3)
(modified) flang/include/flang/Runtime/allocatable.h (+3-3)
(modified) flang/include/flang/Runtime/allocator-registry.h (+4-6)
(modified) flang/include/flang/Runtime/descriptor.h (+1-1)
(modified) flang/lib/Lower/Allocatable.cpp (+3-8)
(modified) flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp (+3-6)
(modified) flang/runtime/CUDA/allocatable.cpp (+1-1)
(modified) flang/runtime/CUDA/allocator.cpp (+5-10)
(modified) flang/runtime/CUDA/descriptor.cpp (+1-2)
(modified) flang/runtime/allocatable.cpp (+4-6)
(modified) flang/runtime/array-constructor.cpp (+4-4)
(modified) flang/runtime/descriptor.cpp (+2-2)
(modified) flang/test/HLFIR/elemental-codegen.fir (+3-3)
(modified) flang/test/Lower/OpenACC/acc-declare.f90 (+2-2)
(modified) flang/test/Lower/allocatable-polymorphic.f90 (+13-13)
(modified) flang/test/Lower/allocatable-runtime.f90 (+2-2)
(modified) flang/test/Lower/allocate-mold.f90 (+2-2)
(modified) flang/test/Lower/polymorphic.f90 (+3-3)
(modified) flang/unittests/Runtime/CUDA/Allocatable.cpp (+1-2)
(modified) flang/unittests/Runtime/CUDA/AllocatorCUF.cpp (+1-20)
(modified) flang/unittests/Runtime/CUDA/Memory.cpp (+1-2)

diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index b6f0e7f303176c..4fb4c94c5e9b0a 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -20,16 +20,16 @@ extern "C" {
 void RTDECL(CUFRegisterAllocator)();
 }
 
-void *CUFAllocPinned(std::size_t, std::int64_t = kCudaNoStream);
+void *CUFAllocPinned(std::size_t);
 void CUFFreePinned(void *);
 
-void *CUFAllocDevice(std::size_t, std::int64_t);
+void *CUFAllocDevice(std::size_t);
 void CUFFreeDevice(void *);
 
-void *CUFAllocManaged(std::size_t, std::int64_t = kCudaNoStream);
+void *CUFAllocManaged(std::size_t);
 void CUFFreeManaged(void *);
 
-void *CUFAllocUnified(std::size_t, std::int64_t = kCudaNoStream);
+void *CUFAllocUnified(std::size_t);
 void CUFFreeUnified(void *);
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h
index 9c95f727ee6734..474f8e6578b891 100644
--- a/flang/include/flang/Runtime/CUDA/common.h
+++ b/flang/include/flang/Runtime/CUDA/common.h
@@ -23,9 +23,6 @@ static constexpr unsigned kHostToDevice = 0;
 static constexpr unsigned kDeviceToHost = 1;
 static constexpr unsigned kDeviceToDevice = 2;
 
-/// Value used for asyncId when no specific stream is specified.
-static constexpr std::int64_t kCudaNoStream = -1;
-
 #define CUDA_REPORT_IF_ERROR(expr) \
   [](cudaError_t err) { \
     if (err == cudaSuccess) \
diff --git a/flang/include/flang/Runtime/allocatable.h b/flang/include/flang/Runtime/allocatable.h
index 121c31af963aa0..58061d9862095e 100644
--- a/flang/include/flang/Runtime/allocatable.h
+++ b/flang/include/flang/Runtime/allocatable.h
@@ -94,9 +94,9 @@ int RTDECL(AllocatableCheckLengthParameter)(Descriptor &,
 // Successfully allocated memory is initialized if the allocatable has a
 // derived type, and is always initialized by AllocatableAllocateSource().
 // Performs all necessary coarray synchronization and validation actions.
-int RTDECL(AllocatableAllocate)(Descriptor &, std::int64_t asyncId = -1,
-    bool hasStat = false, const Descriptor *errMsg = nullptr,
-    const char *sourceFile = nullptr, int sourceLine = 0);
+int RTDECL(AllocatableAllocate)(Descriptor &, bool hasStat = false,
+    const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+    int sourceLine = 0);
 int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
     bool hasStat = false, const Descriptor *errMsg = nullptr,
     const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h
index 4c3295edf13d9a..29302c5d825bc9 100644
--- a/flang/include/flang/Runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -11,7 +11,6 @@
 
 #include "flang/Common/api-attrs.h"
 #include "flang/Runtime/allocator-registry-consts.h"
-#include <cstdint>
 #include <cstdlib>
 #include <vector>
 
@@ -19,7 +18,7 @@
 
 namespace Fortran::runtime {
 
-using AllocFct = void *(*)(std::size_t, std::int64_t);
+using AllocFct = void *(*)(std::size_t);
 using FreeFct = void (*)(void *);
 
 typedef struct Allocator_t {
@@ -27,11 +26,10 @@ typedef struct Allocator_t {
   FreeFct free{nullptr};
 } Allocator_t;
 
-static RT_API_ATTRS void *MallocWrapper(
-    std::size_t size, [[maybe_unused]] std::int64_t) {
+#ifdef RT_DEVICE_COMPILATION
+static RT_API_ATTRS void *MallocWrapper(std::size_t size) {
   return std::malloc(size);
 }
-#ifdef RT_DEVICE_COMPILATION
 static RT_API_ATTRS void FreeWrapper(void *p) { return std::free(p); }
 #endif
 
@@ -41,7 +39,7 @@ struct AllocatorRegistry {
       : allocators{{&MallocWrapper, &FreeWrapper}} {}
 #else
   constexpr AllocatorRegistry() {
-    allocators[kDefaultAllocator] = {&MallocWrapper, &std::free};
+    allocators[kDefaultAllocator] = {&std::malloc, &std::free};
   };
 #endif
   RT_API_ATTRS void Register(int, Allocator_t);
diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h
index 44e82c6a256873..dd36fba157ca92 100644
--- a/flang/include/flang/Runtime/descriptor.h
+++ b/flang/include/flang/Runtime/descriptor.h
@@ -369,7 +369,7 @@ class Descriptor {
   // before calling.  It (re)computes the byte strides after
   // allocation.  Does not allocate automatic components or
   // perform default component initialization.
-  RT_API_ATTRS int Allocate(std::int64_t asyncId = -1);
+  RT_API_ATTRS int Allocate();
   RT_API_ATTRS void SetByteStrides();
 
   // Deallocates storage; does not call FINAL subroutines or
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index f1436564aabaa2..fb8380ac7e8c51 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -184,14 +184,9 @@ static mlir::Value genRuntimeAllocate(fir::FirOpBuilder &builder,
           ? fir::runtime::getRuntimeFunc<mkRTKey(PointerAllocate)>(loc, builder)
           : fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc,
                                                                        builder);
-  llvm::SmallVector<mlir::Value> args{box.getAddr()};
-  if (!box.isPointer())
-    args.push_back(
-        builder.createIntegerConstant(loc, builder.getI64Type(), -1));
-  args.push_back(errorManager.hasStat);
-  args.push_back(errorManager.errMsgAddr);
-  args.push_back(errorManager.sourceFile);
-  args.push_back(errorManager.sourceLine);
+  llvm::SmallVector<mlir::Value> args{
+      box.getAddr(), errorManager.hasStat, errorManager.errMsgAddr,
+      errorManager.sourceFile, errorManager.sourceLine};
   llvm::SmallVector<mlir::Value> operands;
   for (auto [fst, snd] : llvm::zip(args, callee.getFunctionType().getInputs()))
     operands.emplace_back(builder.createConvert(loc, snd, fst));
diff --git a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
index 28452d3b486da3..70a88ff18cb1da 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
@@ -76,19 +76,16 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder,
   mlir::func::FuncOp func{
       fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc, builder)};
   mlir::FunctionType fTy{func.getFunctionType()};
-  mlir::Value asyncId =
-      builder.createIntegerConstant(loc, builder.getI64Type(), -1);
   mlir::Value sourceFile{fir::factory::locationToFilename(builder, loc)};
   mlir::Value sourceLine{
-      fir::factory::locationToLineNo(builder, loc, fTy.getInput(5))};
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(4))};
   if (!hasStat)
     hasStat = builder.createBool(loc, false);
   if (!errMsg) {
     mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
     errMsg = builder.create<fir::AbsentOp>(loc, boxNoneTy).getResult();
   }
-  llvm::SmallVector<mlir::Value> args{
-      fir::runtime::createArguments(builder, loc, fTy, desc, asyncId, hasStat,
-                                    errMsg, sourceFile, sourceLine)};
+  llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+      builder, loc, fTy, desc, hasStat, errMsg, sourceFile, sourceLine)};
   builder.create<fir::CallOp>(loc, func, args);
 }
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
index 3f6f8f3d6d5de0..9be54e8906903d 100644
--- a/flang/runtime/CUDA/allocatable.cpp
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -52,7 +52,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
   }
   // Perform the standard allocation.
   int stat{RTNAME(AllocatableAllocate)(
-      desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
+      desc, hasStat, errMsg, sourceFile, sourceLine)};
   return stat;
 }
 
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index d848f1811dcf3f..85b3daf65a8ba4 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -33,7 +33,7 @@ void RTDEF(CUFRegisterAllocator)() {
 }
 }
 
-void *CUFAllocPinned(std::size_t sizeInBytes, std::int64_t) {
+void *CUFAllocPinned(std::size_t sizeInBytes) {
   void *p;
   CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
   return p;
@@ -41,20 +41,15 @@ void *CUFAllocPinned(std::size_t sizeInBytes, std::int64_t) {
 
 void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
 
-void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t stream) {
+void *CUFAllocDevice(std::size_t sizeInBytes) {
   void *p;
-  if (stream >= 0) {
-    CUDA_REPORT_IF_ERROR(
-        cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)stream));
-  } else {
-    CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
-  }
+  CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
   return p;
 }
 
 void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
-void *CUFAllocManaged(std::size_t sizeInBytes, std::int64_t) {
+void *CUFAllocManaged(std::size_t sizeInBytes) {
   void *p;
   CUDA_REPORT_IF_ERROR(
       cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -63,7 +58,7 @@ void *CUFAllocManaged(std::size_t sizeInBytes, std::int64_t) {
 
 void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
-void *CUFAllocUnified(std::size_t sizeInBytes, std::int64_t) {
+void *CUFAllocUnified(std::size_t sizeInBytes) {
   // Call alloc managed for the time being.
   return CUFAllocManaged(sizeInBytes);
 }
diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp
index 816b1458ee52cb..391c47e84241d4 100644
--- a/flang/runtime/CUDA/descriptor.cpp
+++ b/flang/runtime/CUDA/descriptor.cpp
@@ -20,8 +20,7 @@ RT_EXT_API_GROUP_BEGIN
 
 Descriptor *RTDEF(CUFAllocDescriptor)(
     std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
-  return reinterpret_cast<Descriptor *>(
-      CUFAllocManaged(sizeInBytes, kCudaNoStream));
+  return reinterpret_cast<Descriptor *>(CUFAllocManaged(sizeInBytes));
 }
 
 void RTDEF(CUFFreeDescriptor)(
diff --git a/flang/runtime/allocatable.cpp b/flang/runtime/allocatable.cpp
index b65cec8d51cf86..5e065f47636a89 100644
--- a/flang/runtime/allocatable.cpp
+++ b/flang/runtime/allocatable.cpp
@@ -133,17 +133,15 @@ void RTDEF(AllocatableApplyMold)(
   }
 }
 
-int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
-    bool hasStat, const Descriptor *errMsg, const char *sourceFile,
-    int sourceLine) {
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   if (!descriptor.IsAllocatable()) {
     return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
   } else if (descriptor.IsAllocated()) {
     return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
   } else {
-    int stat{
-        ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
+    int stat{ReturnError(terminator, descriptor.Allocate(), errMsg, hasStat)};
     if (stat == StatOk) {
       if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
         if (const auto *derived{addendum->derivedType()}) {
@@ -162,7 +160,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
     const Descriptor &source, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   int stat{RTNAME(AllocatableAllocate)(
-      alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
+      alloc, hasStat, errMsg, sourceFile, sourceLine)};
   if (stat == StatOk) {
     Terminator terminator{sourceFile, sourceLine};
     DoFromSourceAssign(alloc, source, terminator);
diff --git a/flang/runtime/array-constructor.cpp b/flang/runtime/array-constructor.cpp
index 0d677d7cc63aa9..c6953167f5fb2e 100644
--- a/flang/runtime/array-constructor.cpp
+++ b/flang/runtime/array-constructor.cpp
@@ -50,8 +50,8 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
           initialAllocationSize(fromElements, to.ElementBytes())};
       to.GetDimension(0).SetBounds(1, allocationSize);
       RTNAME(AllocatableAllocate)
-      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
-          vector.sourceFile, vector.sourceLine);
+      (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
+          vector.sourceLine);
       to.GetDimension(0).SetBounds(1, fromElements);
       vector.actualAllocationSize = allocationSize;
     } else {
@@ -59,8 +59,8 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
       // first value: there should be no reallocation.
       RUNTIME_CHECK(terminator, previousToElements >= fromElements);
       RTNAME(AllocatableAllocate)
-      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
-          vector.sourceFile, vector.sourceLine);
+      (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
+          vector.sourceLine);
       vector.actualAllocationSize = previousToElements;
     }
   } else {
diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp
index f43c96bed7d00d..32f43e89dc7a36 100644
--- a/flang/runtime/descriptor.cpp
+++ b/flang/runtime/descriptor.cpp
@@ -163,7 +163,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
 #endif
 }
 
-RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
+RT_API_ATTRS int Descriptor::Allocate() {
   std::size_t elementBytes{ElementBytes()};
   if (static_cast<std::int64_t>(elementBytes) < 0) {
     // F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -175,7 +175,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
   // Zero size allocation is possible in Fortran and the resulting
   // descriptor must be allocated/associated. Since std::malloc(0)
   // result is implementation defined, always allocate at least one byte.
-  void *p{alloc(byteSize ? byteSize : 1, asyncId)};
+  void *p{alloc(byteSize ? byteSize : 1)};
   if (!p) {
     return CFI_ERROR_MEM_ALLOCATION;
   }
diff --git a/flang/test/HLFIR/elemental-codegen.fir b/flang/test/HLFIR/elemental-codegen.fir
index 3c33bf8fca2d14..0d5f343cb17711 100644
--- a/flang/test/HLFIR/elemental-codegen.fir
+++ b/flang/test/HLFIR/elemental-codegen.fir
@@ -192,7 +192,7 @@ func.func @test_polymorphic(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.bindc_
 // CHECK:           %[[VAL_35:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_40:.*]] = arith.constant 1 : index
 // CHECK:           fir.do_loop %[[VAL_41:.*]] = %[[VAL_40]] to %[[EX1]] step %[[VAL_40]] unordered {
@@ -276,7 +276,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
 // CHECK:           %[[VAL_36:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_41:.*]] = arith.constant 1 : index
 // CHECK:           fir.do_loop %[[VAL_42:.*]] = %[[VAL_41]] to %[[VAL_3]] step %[[VAL_41]] unordered {
@@ -329,7 +329,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
 // CHECK:           %[[VAL_85:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_90:.*]] = arith.constant 1 : index
 // CHECK:           fir.do_loop %[[VAL_91:.*]] = %[[VAL_90]] to %[[VAL_3]] step %[[VAL_90]] unordered {
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 9fe51a8db55e3b..0066e712fbdcce 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -469,6 +469,6 @@ subroutine init()
 end module
 
 ! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 ! CHECK: fir.if
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90
index fba0c12fb889c3..bbc54754ca1ab1 100644
--- a/flang/test/Lower/allocatable-polymorphic.f90
+++ b/flang/test/Lower/allocatable-polymorphic.f90
@@ -267,7 +267,7 @@ subroutine test_allocatable()
 ! CHECK: %[[C0:.*]] = arith.constant 0 : i32
 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[P_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> none
 ! CHECK: %[[P_CAST:.*]] = fir.convert %[[P_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.re...
[truncated]

Revert "Reland '[flang] Allow to pass an async id to allocate the des…

32dc6f5

…criptor …" This reverts commit 5b74fb7.

llvmbot added flang:runtime flang Flang issues not falling into any other category flang:fir-hlfir openacc labels Dec 24, 2024

clementval merged commit 4cb2a51 into main Dec 24, 2024
10 of 13 checks passed

clementval deleted the revert-120997-cuf_rt_testing branch December 24, 2024 05:27

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Revert "Reland '[flang] Allow to pass an async id to allocate the descriptor (#118713)' and #118733" #121029

Revert "Reland '[flang] Allow to pass an async id to allocate the descriptor (#118713)' and #118733" #121029

Uh oh!

clementval commented Dec 24, 2024

Uh oh!

llvmbot commented Dec 24, 2024 •

edited

Loading

Uh oh!

llvmbot commented Dec 24, 2024

Uh oh!

Uh oh!

Uh oh!

Revert "Reland '[flang] Allow to pass an async id to allocate the descriptor (#118713)' and #118733" #121029

Revert "Reland '[flang] Allow to pass an async id to allocate the descriptor (#118713)' and #118733" #121029

Uh oh!

Conversation

clementval commented Dec 24, 2024

Uh oh!

llvmbot commented Dec 24, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 24, 2024

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Dec 24, 2024 •

edited

Loading