-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Use a reference for asyncObject #138186
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation.
@llvm/pr-subscribers-openacc Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesSwitch from New tentative with some fix. The previous was reverted yesterday. Patch is 88.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138186.diff 52 Files Affected:
diff --git a/flang-rt/include/flang-rt/runtime/allocator-registry.h b/flang-rt/include/flang-rt/runtime/allocator-registry.h
index 33e8e2c7d7850..f0ba77a360736 100644
--- a/flang-rt/include/flang-rt/runtime/allocator-registry.h
+++ b/flang-rt/include/flang-rt/runtime/allocator-registry.h
@@ -19,7 +19,7 @@
namespace Fortran::runtime {
-using AllocFct = void *(*)(std::size_t, std::int64_t);
+using AllocFct = void *(*)(std::size_t, std::int64_t *);
using FreeFct = void (*)(void *);
typedef struct Allocator_t {
@@ -28,7 +28,7 @@ typedef struct Allocator_t {
} Allocator_t;
static RT_API_ATTRS void *MallocWrapper(
- std::size_t size, [[maybe_unused]] std::int64_t) {
+ std::size_t size, [[maybe_unused]] std::int64_t *) {
return std::malloc(size);
}
#ifdef RT_DEVICE_COMPILATION
diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index 9907e7866e7bf..c98e6b14850cb 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -29,8 +29,8 @@
#include <cstdio>
#include <cstring>
-/// Value used for asyncId when no specific stream is specified.
-static constexpr std::int64_t kNoAsyncId = -1;
+/// Value used for asyncObject when no specific stream is specified.
+static constexpr std::int64_t *kNoAsyncObject = nullptr;
namespace Fortran::runtime {
@@ -372,7 +372,7 @@ class Descriptor {
// before calling. It (re)computes the byte strides after
// allocation. Does not allocate automatic components or
// perform default component initialization.
- RT_API_ATTRS int Allocate(std::int64_t asyncId);
+ RT_API_ATTRS int Allocate(std::int64_t *asyncObject);
RT_API_ATTRS void SetByteStrides();
// Deallocates storage; does not call FINAL subroutines or
diff --git a/flang-rt/include/flang-rt/runtime/reduction-templates.h b/flang-rt/include/flang-rt/runtime/reduction-templates.h
index 77f77a592a476..18412708b02c5 100644
--- a/flang-rt/include/flang-rt/runtime/reduction-templates.h
+++ b/flang-rt/include/flang-rt/runtime/reduction-templates.h
@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
// as the element size of the source.
result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
CFI_attribute_allocatable);
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
diff --git a/flang-rt/lib/cuda/allocatable.cpp b/flang-rt/lib/cuda/allocatable.cpp
index 432974d18a3e3..c77819e9440d7 100644
--- a/flang-rt/lib/cuda/allocatable.cpp
+++ b/flang-rt/lib/cuda/allocatable.cpp
@@ -23,7 +23,7 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
-int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
@@ -41,7 +41,7 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
return stat;
}
-int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
if (desc.HasAddendum()) {
@@ -63,7 +63,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
}
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
@@ -76,7 +76,7 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
}
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocateSync)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index 51119ab251168..f4289c55bd8de 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -98,7 +98,7 @@ static unsigned findAllocation(void *ptr) {
return allocNotFound;
}
-static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
+static void insertAllocation(void *ptr, std::size_t size, cudaStream_t stream) {
CriticalSection critical{lock};
initAllocations();
if (numDeviceAllocations >= maxDeviceAllocations) {
@@ -106,7 +106,7 @@ static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
}
deviceAllocations[numDeviceAllocations].ptr = ptr;
deviceAllocations[numDeviceAllocations].size = size;
- deviceAllocations[numDeviceAllocations].stream = (cudaStream_t)stream;
+ deviceAllocations[numDeviceAllocations].stream = stream;
++numDeviceAllocations;
qsort(deviceAllocations, numDeviceAllocations, sizeof(DeviceAllocation),
compareDeviceAlloc);
@@ -136,7 +136,7 @@ void RTDEF(CUFRegisterAllocator)() {
}
void *CUFAllocPinned(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
return p;
@@ -144,18 +144,18 @@ void *CUFAllocPinned(
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
-void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
+void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t *asyncObject) {
void *p;
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
} else {
- if (asyncId == kNoAsyncId) {
+ if (asyncObject == kNoAsyncObject) {
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
} else {
CUDA_REPORT_IF_ERROR(
- cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
- insertAllocation(p, sizeInBytes, asyncId);
+ cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)*asyncObject));
+ insertAllocation(p, sizeInBytes, (cudaStream_t)*asyncObject);
}
}
return p;
@@ -174,7 +174,7 @@ void CUFFreeDevice(void *p) {
}
void *CUFAllocManaged(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -184,9 +184,9 @@ void *CUFAllocManaged(
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
void *CUFAllocUnified(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
// Call alloc managed for the time being.
- return CUFAllocManaged(sizeInBytes, asyncId);
+ return CUFAllocManaged(sizeInBytes, asyncObject);
}
void CUFFreeUnified(void *p) {
diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp
index 175e8c0ef8438..7b768f91af29d 100644
--- a/flang-rt/lib/cuda/descriptor.cpp
+++ b/flang-rt/lib/cuda/descriptor.cpp
@@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN
Descriptor *RTDEF(CUFAllocDescriptor)(
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
return reinterpret_cast<Descriptor *>(
- CUFAllocManaged(sizeInBytes, /*asyncId*/ -1));
+ CUFAllocManaged(sizeInBytes, /*asyncObject=*/nullptr));
}
void RTDEF(CUFFreeDescriptor)(
diff --git a/flang-rt/lib/runtime/allocatable.cpp b/flang-rt/lib/runtime/allocatable.cpp
index 6acce34eb9a9e..ef18da6ea0786 100644
--- a/flang-rt/lib/runtime/allocatable.cpp
+++ b/flang-rt/lib/runtime/allocatable.cpp
@@ -133,17 +133,17 @@ void RTDEF(AllocatableApplyMold)(
}
}
-int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
- bool hasStat, const Descriptor *errMsg, const char *sourceFile,
- int sourceLine) {
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor,
+ std::int64_t *asyncObject, bool hasStat, const Descriptor *errMsg,
+ const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
if (!descriptor.IsAllocatable()) {
return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
} else if (descriptor.IsAllocated()) {
return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
} else {
- int stat{
- ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
+ int stat{ReturnError(
+ terminator, descriptor.Allocate(asyncObject), errMsg, hasStat)};
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -162,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(AllocatableAllocate)(
- alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
+ alloc, /*asyncObject=*/nullptr, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
DoFromSourceAssign(alloc, source, terminator);
diff --git a/flang-rt/lib/runtime/array-constructor.cpp b/flang-rt/lib/runtime/array-constructor.cpp
index 67b3b5e1e0f50..858fac7bf2b39 100644
--- a/flang-rt/lib/runtime/array-constructor.cpp
+++ b/flang-rt/lib/runtime/array-constructor.cpp
@@ -50,7 +50,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
initialAllocationSize(fromElements, to.ElementBytes())};
to.GetDimension(0).SetBounds(1, allocationSize);
RTNAME(AllocatableAllocate)
- (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+ (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
to.GetDimension(0).SetBounds(1, fromElements);
vector.actualAllocationSize = allocationSize;
@@ -59,7 +59,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
// first value: there should be no reallocation.
RUNTIME_CHECK(terminator, previousToElements >= fromElements);
RTNAME(AllocatableAllocate)
- (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+ (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
vector.actualAllocationSize = previousToElements;
}
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index 4a813cd489022..8a4fa36c91479 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -99,7 +99,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS(
toDim.SetByteStride(stride);
stride *= toDim.Extent();
}
- int result{ReturnError(terminator, to.Allocate(kNoAsyncId))};
+ int result{ReturnError(terminator, to.Allocate(kNoAsyncObject))};
if (result == StatOk && derived && !derived->noInitializationNeeded()) {
result = ReturnError(terminator, Initialize(to, *derived, terminator));
}
@@ -277,7 +277,7 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
// entity, otherwise, the Deallocate() below will not
// free the descriptor memory.
newFrom.raw().attribute = CFI_attribute_allocatable;
- auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))};
+ auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncObject))};
if (stat == StatOk) {
if (HasDynamicComponent(from)) {
// If 'from' has allocatable/automatic component, we cannot
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index d1152ee1caefb..f140d202e118e 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -118,7 +118,7 @@ static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("Compare: could not allocate storage for result");
}
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
@@ -173,7 +173,7 @@ static RT_API_ATTRS void AdjustLRHelper(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
}
for (SubscriptValue resultAt{0}; elements-- > 0;
@@ -227,7 +227,7 @@ static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("LEN_TRIM: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -427,7 +427,7 @@ static RT_API_ATTRS void GeneralCharFunc(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -530,7 +530,8 @@ static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
- RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(
+ terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
}
for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
@@ -606,7 +607,7 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
- if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (accumulator.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash(
"CharacterConcatenate: could not allocate storage for result");
}
@@ -629,7 +630,8 @@ void RTDEF(CharacterConcatenateScalar1)(
accumulator.set_base_addr(nullptr);
std::size_t oldLen{accumulator.ElementBytes()};
accumulator.raw().elem_len += chars;
- RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(
+ terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
FreeMemory(old);
}
@@ -831,7 +833,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
std::size_t origBytes{string.ElementBytes()};
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
CFI_attribute_allocatable);
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("REPEAT could not allocate storage for result");
}
const char *from{string.OffsetElement()};
@@ -865,7 +867,7 @@ void RTDEF(Trim)(Descriptor &result, const Descriptor &string,
}
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
CFI_attribute_allocatable);
- RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
}
diff --git a/flang-rt/lib/runtime/copy.cpp b/flang-rt/lib/runtime/copy.cpp
index 3a0f98cf8d376..f990f46e0be66 100644
--- a/flang-rt/lib/runtime/copy.cpp
+++ b/flang-rt/lib/runtime/copy.cpp
@@ -171,8 +171,8 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
*reinterpret_cast<Descriptor *>(toPtr + component->offset())};
if (toDesc.raw().base_addr != nullptr) {
toDesc.set_base_addr(nullptr);
- RUNTIME_CHECK(
- terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS);
+ RUNTIME_CHECK(terminator,
+ toDesc.Allocate(/*asyncObject=*/nullptr) == CFI_SUCCESS);
const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
fromPtr + component->offset())};
copyStack.emplace(toDesc, fromDesc);
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index c46ea806a430a..35037036f63e7 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -52,7 +52,7 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
allocDesc.raw().attribute = CFI_attribute_allocatable;
if (comp.genre() == typeInfo::Component::Genre::Automatic) {
stat = ReturnError(
- terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat);
+ terminator, allocDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -153,7 +153,7 @@ RT_API_ATTRS int InitializeClone(const Descriptor &clone,
if (origDesc.IsAllocated()) {
cloneDesc.ApplyMold(origDesc, origDesc.rank());
stat = ReturnError(
- terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat);
+ terminator, cloneDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
if (const typeInfo::DerivedType *
@@ -260,7 +260,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
copy.raw().attribute = CFI_attribute_allocatable;
Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
- copy.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ copy.Allocate(kNoAsyncObject) == CFI_SUCCESS);
ShallowCopyDiscontiguousToContiguous(copy, descriptor);
argDescriptor = ©
}
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 3debf53bb5290..67336d01380e0 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -158,7 +158,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
#endif
}
-RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
+RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
std::size_t elementBytes{ElementBytes()};
if (static_cast<std::int64_t>(elementBytes) < 0) {
// F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
// Zero size allocation is possible in Fortran and the resulting
// descriptor must be allocated/associated. Since std::malloc(0)
// result is implementation defined, always allocate at least one byte.
- void *p{alloc(byteSize ? byteSize : 1, asyncId)};
+ void *p{alloc(byteSize ? byteSize : 1, asyncObject)};
if (!p) {
return CFI_ERROR_MEM_ALLOCATION;
}
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/r...
[truncated]
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesSwitch from New tentative with some fix. The previous was reverted yesterday. Patch is 88.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138186.diff 52 Files Affected:
diff --git a/flang-rt/include/flang-rt/runtime/allocator-registry.h b/flang-rt/include/flang-rt/runtime/allocator-registry.h
index 33e8e2c7d7850..f0ba77a360736 100644
--- a/flang-rt/include/flang-rt/runtime/allocator-registry.h
+++ b/flang-rt/include/flang-rt/runtime/allocator-registry.h
@@ -19,7 +19,7 @@
namespace Fortran::runtime {
-using AllocFct = void *(*)(std::size_t, std::int64_t);
+using AllocFct = void *(*)(std::size_t, std::int64_t *);
using FreeFct = void (*)(void *);
typedef struct Allocator_t {
@@ -28,7 +28,7 @@ typedef struct Allocator_t {
} Allocator_t;
static RT_API_ATTRS void *MallocWrapper(
- std::size_t size, [[maybe_unused]] std::int64_t) {
+ std::size_t size, [[maybe_unused]] std::int64_t *) {
return std::malloc(size);
}
#ifdef RT_DEVICE_COMPILATION
diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index 9907e7866e7bf..c98e6b14850cb 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -29,8 +29,8 @@
#include <cstdio>
#include <cstring>
-/// Value used for asyncId when no specific stream is specified.
-static constexpr std::int64_t kNoAsyncId = -1;
+/// Value used for asyncObject when no specific stream is specified.
+static constexpr std::int64_t *kNoAsyncObject = nullptr;
namespace Fortran::runtime {
@@ -372,7 +372,7 @@ class Descriptor {
// before calling. It (re)computes the byte strides after
// allocation. Does not allocate automatic components or
// perform default component initialization.
- RT_API_ATTRS int Allocate(std::int64_t asyncId);
+ RT_API_ATTRS int Allocate(std::int64_t *asyncObject);
RT_API_ATTRS void SetByteStrides();
// Deallocates storage; does not call FINAL subroutines or
diff --git a/flang-rt/include/flang-rt/runtime/reduction-templates.h b/flang-rt/include/flang-rt/runtime/reduction-templates.h
index 77f77a592a476..18412708b02c5 100644
--- a/flang-rt/include/flang-rt/runtime/reduction-templates.h
+++ b/flang-rt/include/flang-rt/runtime/reduction-templates.h
@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
// as the element size of the source.
result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
CFI_attribute_allocatable);
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
diff --git a/flang-rt/lib/cuda/allocatable.cpp b/flang-rt/lib/cuda/allocatable.cpp
index 432974d18a3e3..c77819e9440d7 100644
--- a/flang-rt/lib/cuda/allocatable.cpp
+++ b/flang-rt/lib/cuda/allocatable.cpp
@@ -23,7 +23,7 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
-int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
@@ -41,7 +41,7 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
return stat;
}
-int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
if (desc.HasAddendum()) {
@@ -63,7 +63,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
}
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
@@ -76,7 +76,7 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
}
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocateSync)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index 51119ab251168..f4289c55bd8de 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -98,7 +98,7 @@ static unsigned findAllocation(void *ptr) {
return allocNotFound;
}
-static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
+static void insertAllocation(void *ptr, std::size_t size, cudaStream_t stream) {
CriticalSection critical{lock};
initAllocations();
if (numDeviceAllocations >= maxDeviceAllocations) {
@@ -106,7 +106,7 @@ static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
}
deviceAllocations[numDeviceAllocations].ptr = ptr;
deviceAllocations[numDeviceAllocations].size = size;
- deviceAllocations[numDeviceAllocations].stream = (cudaStream_t)stream;
+ deviceAllocations[numDeviceAllocations].stream = stream;
++numDeviceAllocations;
qsort(deviceAllocations, numDeviceAllocations, sizeof(DeviceAllocation),
compareDeviceAlloc);
@@ -136,7 +136,7 @@ void RTDEF(CUFRegisterAllocator)() {
}
void *CUFAllocPinned(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
return p;
@@ -144,18 +144,18 @@ void *CUFAllocPinned(
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
-void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
+void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t *asyncObject) {
void *p;
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
} else {
- if (asyncId == kNoAsyncId) {
+ if (asyncObject == kNoAsyncObject) {
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
} else {
CUDA_REPORT_IF_ERROR(
- cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
- insertAllocation(p, sizeInBytes, asyncId);
+ cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)*asyncObject));
+ insertAllocation(p, sizeInBytes, (cudaStream_t)*asyncObject);
}
}
return p;
@@ -174,7 +174,7 @@ void CUFFreeDevice(void *p) {
}
void *CUFAllocManaged(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -184,9 +184,9 @@ void *CUFAllocManaged(
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
void *CUFAllocUnified(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
// Call alloc managed for the time being.
- return CUFAllocManaged(sizeInBytes, asyncId);
+ return CUFAllocManaged(sizeInBytes, asyncObject);
}
void CUFFreeUnified(void *p) {
diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp
index 175e8c0ef8438..7b768f91af29d 100644
--- a/flang-rt/lib/cuda/descriptor.cpp
+++ b/flang-rt/lib/cuda/descriptor.cpp
@@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN
Descriptor *RTDEF(CUFAllocDescriptor)(
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
return reinterpret_cast<Descriptor *>(
- CUFAllocManaged(sizeInBytes, /*asyncId*/ -1));
+ CUFAllocManaged(sizeInBytes, /*asyncObject=*/nullptr));
}
void RTDEF(CUFFreeDescriptor)(
diff --git a/flang-rt/lib/runtime/allocatable.cpp b/flang-rt/lib/runtime/allocatable.cpp
index 6acce34eb9a9e..ef18da6ea0786 100644
--- a/flang-rt/lib/runtime/allocatable.cpp
+++ b/flang-rt/lib/runtime/allocatable.cpp
@@ -133,17 +133,17 @@ void RTDEF(AllocatableApplyMold)(
}
}
-int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
- bool hasStat, const Descriptor *errMsg, const char *sourceFile,
- int sourceLine) {
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor,
+ std::int64_t *asyncObject, bool hasStat, const Descriptor *errMsg,
+ const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
if (!descriptor.IsAllocatable()) {
return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
} else if (descriptor.IsAllocated()) {
return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
} else {
- int stat{
- ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
+ int stat{ReturnError(
+ terminator, descriptor.Allocate(asyncObject), errMsg, hasStat)};
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -162,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(AllocatableAllocate)(
- alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
+ alloc, /*asyncObject=*/nullptr, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
DoFromSourceAssign(alloc, source, terminator);
diff --git a/flang-rt/lib/runtime/array-constructor.cpp b/flang-rt/lib/runtime/array-constructor.cpp
index 67b3b5e1e0f50..858fac7bf2b39 100644
--- a/flang-rt/lib/runtime/array-constructor.cpp
+++ b/flang-rt/lib/runtime/array-constructor.cpp
@@ -50,7 +50,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
initialAllocationSize(fromElements, to.ElementBytes())};
to.GetDimension(0).SetBounds(1, allocationSize);
RTNAME(AllocatableAllocate)
- (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+ (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
to.GetDimension(0).SetBounds(1, fromElements);
vector.actualAllocationSize = allocationSize;
@@ -59,7 +59,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
// first value: there should be no reallocation.
RUNTIME_CHECK(terminator, previousToElements >= fromElements);
RTNAME(AllocatableAllocate)
- (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+ (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
vector.actualAllocationSize = previousToElements;
}
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index 4a813cd489022..8a4fa36c91479 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -99,7 +99,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS(
toDim.SetByteStride(stride);
stride *= toDim.Extent();
}
- int result{ReturnError(terminator, to.Allocate(kNoAsyncId))};
+ int result{ReturnError(terminator, to.Allocate(kNoAsyncObject))};
if (result == StatOk && derived && !derived->noInitializationNeeded()) {
result = ReturnError(terminator, Initialize(to, *derived, terminator));
}
@@ -277,7 +277,7 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
// entity, otherwise, the Deallocate() below will not
// free the descriptor memory.
newFrom.raw().attribute = CFI_attribute_allocatable;
- auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))};
+ auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncObject))};
if (stat == StatOk) {
if (HasDynamicComponent(from)) {
// If 'from' has allocatable/automatic component, we cannot
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index d1152ee1caefb..f140d202e118e 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -118,7 +118,7 @@ static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("Compare: could not allocate storage for result");
}
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
@@ -173,7 +173,7 @@ static RT_API_ATTRS void AdjustLRHelper(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
}
for (SubscriptValue resultAt{0}; elements-- > 0;
@@ -227,7 +227,7 @@ static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("LEN_TRIM: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -427,7 +427,7 @@ static RT_API_ATTRS void GeneralCharFunc(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -530,7 +530,8 @@ static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
- RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(
+ terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
}
for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
@@ -606,7 +607,7 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
- if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (accumulator.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash(
"CharacterConcatenate: could not allocate storage for result");
}
@@ -629,7 +630,8 @@ void RTDEF(CharacterConcatenateScalar1)(
accumulator.set_base_addr(nullptr);
std::size_t oldLen{accumulator.ElementBytes()};
accumulator.raw().elem_len += chars;
- RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(
+ terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
FreeMemory(old);
}
@@ -831,7 +833,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
std::size_t origBytes{string.ElementBytes()};
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
CFI_attribute_allocatable);
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("REPEAT could not allocate storage for result");
}
const char *from{string.OffsetElement()};
@@ -865,7 +867,7 @@ void RTDEF(Trim)(Descriptor &result, const Descriptor &string,
}
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
CFI_attribute_allocatable);
- RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
}
diff --git a/flang-rt/lib/runtime/copy.cpp b/flang-rt/lib/runtime/copy.cpp
index 3a0f98cf8d376..f990f46e0be66 100644
--- a/flang-rt/lib/runtime/copy.cpp
+++ b/flang-rt/lib/runtime/copy.cpp
@@ -171,8 +171,8 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
*reinterpret_cast<Descriptor *>(toPtr + component->offset())};
if (toDesc.raw().base_addr != nullptr) {
toDesc.set_base_addr(nullptr);
- RUNTIME_CHECK(
- terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS);
+ RUNTIME_CHECK(terminator,
+ toDesc.Allocate(/*asyncObject=*/nullptr) == CFI_SUCCESS);
const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
fromPtr + component->offset())};
copyStack.emplace(toDesc, fromDesc);
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index c46ea806a430a..35037036f63e7 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -52,7 +52,7 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
allocDesc.raw().attribute = CFI_attribute_allocatable;
if (comp.genre() == typeInfo::Component::Genre::Automatic) {
stat = ReturnError(
- terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat);
+ terminator, allocDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -153,7 +153,7 @@ RT_API_ATTRS int InitializeClone(const Descriptor &clone,
if (origDesc.IsAllocated()) {
cloneDesc.ApplyMold(origDesc, origDesc.rank());
stat = ReturnError(
- terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat);
+ terminator, cloneDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
if (const typeInfo::DerivedType *
@@ -260,7 +260,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
copy.raw().attribute = CFI_attribute_allocatable;
Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
- copy.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ copy.Allocate(kNoAsyncObject) == CFI_SUCCESS);
ShallowCopyDiscontiguousToContiguous(copy, descriptor);
argDescriptor = ©
}
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 3debf53bb5290..67336d01380e0 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -158,7 +158,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
#endif
}
-RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
+RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
std::size_t elementBytes{ElementBytes()};
if (static_cast<std::int64_t>(elementBytes) < 0) {
// F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
// Zero size allocation is possible in Fortran and the resulting
// descriptor must be allocated/associated. Since std::malloc(0)
// result is implementation defined, always allocate at least one byte.
- void *p{alloc(byteSize ? byteSize : 1, asyncId)};
+ void *p{alloc(byteSize ? byteSize : 1, asyncObject)};
if (!p) {
return CFI_ERROR_MEM_ALLOCATION;
}
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/r...
[truncated]
|
EXPECT_TRUE(b->IsAllocated()); | ||
cudaDeviceSynchronize(); | ||
EXPECT_EQ(cudaSuccess, cudaGetLastError()); | ||
|
||
RTNAME(AllocatableAllocate) | ||
(*c, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); | ||
(*c, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Question: asyncId=-1
translates to asyncObject=nullptr
. Here asyncId=1
also translates to asyncObject=nullptr
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes since it's a pointer now. This is a unittest so we don't really care which stream is used.
This reverts commit 7f922f1.
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted yesterday.
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted yesterday.
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted yesterday.
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted yesterday.
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted yesterday.
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted yesterday.
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted yesterday.
Switch from
int64_t
toint64_t*
to fit with the rest of the implementation.New tentative with some fix. The previous was reverted yesterday.