Skip to content

[flang][OpenMP] Initialize allocatable members of derived types #120295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions flang/include/flang/Lower/AbstractConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ class AbstractConverter {
/// Get the mlir instance of a symbol.
virtual mlir::Value getSymbolAddress(SymbolRef sym) = 0;

virtual fir::ExtendedValue
symBoxToExtendedValue(const Fortran::lower::SymbolBox &symBox) = 0;

virtual fir::ExtendedValue
getSymbolExtendedValue(const Fortran::semantics::Symbol &sym,
Fortran::lower::SymMap *symMap = nullptr) = 0;
Expand Down
5 changes: 5 additions & 0 deletions flang/include/flang/Lower/ConvertVariable.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym,
Fortran::lower::SymMap &symMap);

/// Call clone initialization runtime routine to initialize \p sym's value.
void initializeCloneAtRuntime(Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym,
Fortran::lower::SymMap &symMap);

/// Create a fir::GlobalOp given a module variable definition. This is intended
/// to be used when lowering a module definition, not when lowering variables
/// used from a module. For used variables instantiateVariable must directly be
Expand Down
6 changes: 6 additions & 0 deletions flang/include/flang/Optimizer/Builder/Runtime/Derived.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ namespace fir::runtime {
void genDerivedTypeInitialize(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value box);

/// Generate call to derived type clone initialization runtime routine to
/// initialize \p newBox from \p box.
void genDerivedTypeInitializeClone(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value newBox,
mlir::Value box);

/// Generate call to derived type destruction runtime routine to
/// destroy \p box.
void genDerivedTypeDestroy(fir::FirOpBuilder &builder, mlir::Location loc,
Expand Down
7 changes: 7 additions & 0 deletions flang/include/flang/Runtime/derived-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ extern "C" {
void RTDECL(Initialize)(
const Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);

// Initializes an object clone from the original object.
// Each allocatable member of the clone is allocated with the same bounds as
// in the original object, if it is also allocated in it.
// The descriptor must be initialized and non-null.
void RTDECL(InitializeClone)(const Descriptor &, const Descriptor &,
const char *sourceFile = nullptr, int sourceLine = 0);

// Finalizes an object and its components. Deallocates any
// allocatable/automatic components. Does not deallocate the descriptor's
// storage.
Expand Down
4 changes: 2 additions & 2 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,8 +556,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
return lookupSymbol(sym).getAddr();
}

fir::ExtendedValue
symBoxToExtendedValue(const Fortran::lower::SymbolBox &symBox) {
fir::ExtendedValue symBoxToExtendedValue(
const Fortran::lower::SymbolBox &symBox) override final {
return symBox.match(
[](const Fortran::lower::SymbolBox::Intrinsic &box)
-> fir::ExtendedValue { return box.getAddr(); },
Expand Down
14 changes: 14 additions & 0 deletions flang/lib/Lower/ConvertVariable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,20 @@ void Fortran::lower::defaultInitializeAtRuntime(
}
}

/// Call clone initialization runtime routine to initialize \p sym's value.
void Fortran::lower::initializeCloneAtRuntime(
Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
mlir::Value newBox = builder.createBox(loc, exv);
lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(sym);
fir::ExtendedValue hexv = converter.symBoxToExtendedValue(hsb);
mlir::Value box = builder.createBox(loc, hexv);
fir::runtime::genDerivedTypeInitializeClone(builder, loc, newBox, box);
}

enum class VariableCleanUp { Finalize, Deallocate };
/// Check whether a local variable needs to be finalized according to clause
/// 7.5.6.3 point 3 or if it is an allocatable that must be deallocated. Note
Expand Down
21 changes: 19 additions & 2 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,23 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
*sym, /*skipDefaultInit=*/isFirstPrivate);
(void)success;
assert(success && "Privatization failed due to existing binding");

// Initialize clone from original object if it has any allocatable member.
auto needInitClone = [&] {
if (isFirstPrivate)
return false;

SymbolBox sb = symTable.lookupSymbol(sym);
assert(sb);
mlir::Value addr = sb.getAddr();
assert(addr);
return hlfir::mayHaveAllocatableComponent(addr.getType());
};

if (needInitClone()) {
Fortran::lower::initializeCloneAtRuntime(converter, *sym, symTable);
callsInitClone = true;
}
}

void DataSharingProcessor::copyFirstPrivateSymbol(
Expand Down Expand Up @@ -165,8 +182,8 @@ bool DataSharingProcessor::needBarrier() {
// variables.
// Emit implicit barrier for linear clause. Maybe on somewhere else.
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) &&
sym->test(semantics::Symbol::Flag::OmpLastPrivate))
if (sym->test(semantics::Symbol::Flag::OmpLastPrivate) &&
(sym->test(semantics::Symbol::Flag::OmpFirstPrivate) || callsInitClone))
Copy link
Contributor

@tblah tblah Jan 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@luporl Why don't we need this barrier in other cases, e.g. when FortranAInitialize (not the clone variant) is used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because FortranAInitializeClone receives an extra argument, that is the original variable.
When lastprivate is used, the original variable is modified by one of the running threads. Without the barrier, another thread may still be using it to initialize its copy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original variable is also used for other similar cases e.g. assumed shape arrays. Maybe for now we should always use the barrier?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will likely affect performance, but it is certainly better than having hard-to-debug race conditions.
I'm OK with it as a temporary fix.

Later, we could rename callsInitClone to needsBarrier and set it whenever a given operation may modify a variable that is also read by other threads.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I understand correctly that the barrier here isn't strictly necessary because v and v2 are different symbols?
https://github.com/llvm/llvm-project/pull/120295/files#diff-7f918db826ee2c02ce8eb509f6d8367ad741d2b1537396959d510b5fe6a29a37R52

I'm trying to add the extra barriers in my re-implementation of this patch in my changes for https://discourse.llvm.org/t/rfc-openmp-supporting-delayed-task-execution-with-firstprivate-variables/83084/7

Thanks for the help!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The barrier there is necessary. It prevents the original v variable from being updated by lastprivate before every thread has finished initializing its private copy of v (using the original variable v).

As v2 doesn't have an allocatable member, InitializeClone doesn't need to be called for it.
If it was the only symbol, as it is just written by lastprivate but not read by other threads, no barrier would be needed.
And if both v and v2 had allocatable members, a single barrier after the initialization of both would be enough, as their original variables are not read by any threads after that, but only written by lastprivate later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

return true;
}
return false;
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class DataSharingProcessor {
lower::pft::Evaluation &eval;
bool shouldCollectPreDeterminedSymbols;
bool useDelayedPrivatization;
bool callsInitClone = false;
lower::SymMap &symTable;
OMPConstructSymbolVisitor visitor;

Expand Down
15 changes: 15 additions & 0 deletions flang/lib/Optimizer/Builder/Runtime/Derived.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,21 @@ void fir::runtime::genDerivedTypeInitialize(fir::FirOpBuilder &builder,
builder.create<fir::CallOp>(loc, func, args);
}

void fir::runtime::genDerivedTypeInitializeClone(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value newBox,
mlir::Value box) {
auto func =
fir::runtime::getRuntimeFunc<mkRTKey(InitializeClone)>(loc, builder);
auto fTy = func.getFunctionType();
auto sourceFile = fir::factory::locationToFilename(builder, loc);
auto sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
auto args = fir::runtime::createArguments(builder, loc, fTy, newBox, box,
sourceFile, sourceLine);
builder.create<fir::CallOp>(loc, func, args);
}

void fir::runtime::genDerivedTypeDestroy(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value box) {
auto func = fir::runtime::getRuntimeFunc<mkRTKey(Destroy)>(loc, builder);
Expand Down
10 changes: 10 additions & 0 deletions flang/runtime/derived-api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ void RTDEF(Initialize)(
}
}

void RTDEF(InitializeClone)(const Descriptor &clone, const Descriptor &orig,
const char *sourceFile, int sourceLine) {
if (const DescriptorAddendum * addendum{clone.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
Terminator terminator{sourceFile, sourceLine};
InitializeClone(clone, orig, *derived, terminator);
}
}
}

void RTDEF(Destroy)(const Descriptor &descriptor) {
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
Expand Down
78 changes: 78 additions & 0 deletions flang/runtime/derived.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,84 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
return stat;
}

RT_API_ATTRS int InitializeClone(const Descriptor &clone,
const Descriptor &orig, const typeInfo::DerivedType &derived,
Terminator &terminator, bool hasStat, const Descriptor *errMsg) {
const Descriptor &componentDesc{derived.component()};
std::size_t elements{orig.Elements()};
int stat{StatOk};

// Initialize each data component.
std::size_t components{componentDesc.Elements()};
for (std::size_t i{0}; i < components; ++i) {
const typeInfo::Component &comp{
*componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(i)};
SubscriptValue at[maxRank];
orig.GetLowerBounds(at);
// Allocate allocatable components that are also allocated in the original
// object.
if (comp.genre() == typeInfo::Component::Genre::Allocatable) {
// Initialize each element.
for (std::size_t j{0}; j < elements; ++j, orig.IncrementSubscripts(at)) {
Descriptor &origDesc{
*orig.ElementComponent<Descriptor>(at, comp.offset())};
Descriptor &cloneDesc{
*clone.ElementComponent<Descriptor>(at, comp.offset())};
if (origDesc.IsAllocated()) {
cloneDesc.ApplyMold(origDesc, origDesc.rank());
stat = ReturnError(terminator, cloneDesc.Allocate(), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
if (const typeInfo::DerivedType *
derived{addendum->derivedType()}) {
if (!derived->noInitializationNeeded()) {
// Perform default initialization for the allocated element.
stat = Initialize(
cloneDesc, *derived, terminator, hasStat, errMsg);
}
// Initialize derived type's allocatables.
if (stat == StatOk) {
stat = InitializeClone(cloneDesc, origDesc, *derived,
terminator, hasStat, errMsg);
}
}
}
}
}
if (stat != StatOk) {
break;
}
}
} else if (comp.genre() == typeInfo::Component::Genre::Data &&
comp.derivedType()) {
// Handle nested derived types.
const typeInfo::DerivedType &compType{*comp.derivedType()};
SubscriptValue extents[maxRank];
GetComponentExtents(extents, comp, orig);
// Data components don't have descriptors, allocate them.
StaticDescriptor<maxRank, true, 0> origStaticDesc;
StaticDescriptor<maxRank, true, 0> cloneStaticDesc;
Descriptor &origDesc{origStaticDesc.descriptor()};
Descriptor &cloneDesc{cloneStaticDesc.descriptor()};
// Initialize each element.
for (std::size_t j{0}; j < elements; ++j, orig.IncrementSubscripts(at)) {
origDesc.Establish(compType,
orig.ElementComponent<char>(at, comp.offset()), comp.rank(),
extents);
cloneDesc.Establish(compType,
clone.ElementComponent<char>(at, comp.offset()), comp.rank(),
extents);
stat = InitializeClone(
cloneDesc, origDesc, compType, terminator, hasStat, errMsg);
if (stat != StatOk) {
break;
}
}
}
}
return stat;
}

static RT_API_ATTRS const typeInfo::SpecialBinding *FindFinal(
const typeInfo::DerivedType &derived, int rank) {
if (const auto *ranked{derived.FindSpecialBinding(
Expand Down
8 changes: 8 additions & 0 deletions flang/runtime/derived.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ class Terminator;
RT_API_ATTRS int Initialize(const Descriptor &, const typeInfo::DerivedType &,
Terminator &, bool hasStat = false, const Descriptor *errMsg = nullptr);

// Initializes an object clone from the original object.
// Each allocatable member of the clone is allocated with the same bounds as
// in the original object, if it is also allocated in it.
// Returns a STAT= code (0 when all's well).
RT_API_ATTRS int InitializeClone(const Descriptor &, const Descriptor &,
const typeInfo::DerivedType &, Terminator &, bool hasStat = false,
const Descriptor *errMsg = nullptr);

// Call FINAL subroutines, if any
RT_API_ATTRS void Finalize(
const Descriptor &, const typeInfo::DerivedType &derived, Terminator *);
Expand Down
94 changes: 94 additions & 0 deletions flang/test/Lower/OpenMP/derived-type-allocatable.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
! Test that derived type allocatable members of private copies are properly
! initialized.
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s

module m1
type x
integer, allocatable :: x1(:)
end type

type y
integer :: y1(10)
end type

contains

!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_nested
!CHECK: fir.call @_FortranAInitializeClone
!CHECK-NEXT: omp.yield

!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_array_of_allocs
!CHECK: fir.call @_FortranAInitializeClone
!CHECK-NEXT: omp.yield

!CHECK-LABEL: omp.private {type = firstprivate} @_QMm1Ftest_array
!CHECK-NOT: fir.call @_FortranAInitializeClone
!CHECK: omp.yield

!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_array
!CHECK: fir.call @_FortranAInitializeClone
!CHECK-NEXT: omp.yield

!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_scalar
!CHECK: fir.call @_FortranAInitializeClone
!CHECK-NEXT: omp.yield

subroutine test_scalar()
type(x) :: v
allocate(v%x1(5))

!$omp parallel private(v)
!$omp end parallel
end subroutine

! Test omp sections lastprivate(v, v2)
! - InitializeClone must not be called for v2, that doesn't have an
! allocatable member.
! - InitializeClone must be called for v, that has an allocatable member.
! - To avoid race conditions between InitializeClone and lastprivate, a
! barrier must be present after the initializations.
!CHECK-LABEL: func @_QMm1Ptest_array
!CHECK: fir.call @_FortranAInitializeClone
!CHECK-NEXT: omp.barrier
subroutine test_array()
type(x) :: v(10)
type(y) :: v2(10)
allocate(v(1)%x1(5))

!$omp parallel private(v)
!$omp end parallel

!$omp parallel
!$omp sections lastprivate(v2, v)
!$omp end sections
!$omp end parallel

!$omp parallel firstprivate(v)
!$omp end parallel
end subroutine

subroutine test_array_of_allocs()
type(x), allocatable :: v(:)
allocate(v(10))
allocate(v(1)%x1(5))

!$omp parallel private(v)
!$omp end parallel
end subroutine

subroutine test_nested()
type dt1
integer, allocatable :: a(:)
end type

type dt2
type(dt1) :: d1
end type

type(dt2) :: d2
allocate(d2%d1%a(10))

!$omp parallel private(d2)
!$omp end parallel
end subroutine
end module
Loading