-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][OpenMP] Implement copyin for pointers and allocatables. #107425
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-flang-openmp Author: David Truby (DavidTruby) ChangesThe copyin clause currently forbids pointer and allocatable variables, For pointer variables it is sufficient to remove the check to get For allocatables we need to ensure that the master thread allocatable is Full diff: https://github.com/llvm/llvm-project/pull/107425.diff 4 Files Affected:
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index daded9091780e2..98eae365eb1170 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -326,6 +326,9 @@ class AbstractConverter {
virtual Fortran::lower::SymbolBox
lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+ virtual Fortran::lower::SymbolBox
+ shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
/// Return the mlir::SymbolTable associated to the ModuleOp.
/// Look-ups are faster using it than using module.lookup<>,
/// but the module op should be queried in case of failure
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 1f2724290b8852..125f91386ac9fd 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1175,7 +1175,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/// Find the symbol in the inner-most level of the local map or return null.
Fortran::lower::SymbolBox
- shallowLookupSymbol(const Fortran::semantics::Symbol &sym) {
+ shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override {
if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
return v;
return {};
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 6dee31ddb6963d..ca77edb0d600b0 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -14,6 +14,7 @@
#include "Clauses.h"
#include "flang/Lower/PFTBuilder.h"
+#include "flang/Lower/SymbolMap.h"
#include "flang/Parser/tools.h"
#include "flang/Semantics/tools.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
@@ -619,9 +620,28 @@ bool ClauseProcessor::processCopyin() const {
checkAndCopyHostAssociateVar(&*mem, &insPt);
break;
}
- if (semantics::IsAllocatableOrObjectPointer(&sym->GetUltimate()))
- TODO(converter.getCurrentLocation(),
- "pointer or allocatable variables in Copyin clause");
+ if (semantics::IsAllocatable(sym->GetUltimate())) {
+ // copyin should copy the association of the allocatable
+
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ const Fortran::semantics::Symbol &hsym = sym->GetUltimate();
+
+ Fortran::lower::SymbolBox hsb =
+ converter.lookupOneLevelUpSymbol(hsym);
+ assert(hsb && "Host symbol box not found");
+
+ Fortran::lower::SymbolBox sb = converter.shallowLookupSymbol(*sym);
+ assert(sb && "Host-associated symbol box not found");
+ assert(hsb.getAddr() != sb.getAddr() &&
+ "Host and associated symbol boxes are the same");
+
+ mlir::Location loc = converter.genLocation(sym->name());
+ mlir::OpBuilder::InsertionGuard ipGuard{builder};
+ builder.setInsertionPointAfter(sb.getAddr().getDefiningOp());
+ builder.create<hlfir::AssignOp>(loc, hsb.getAddr(), sb.getAddr(),
+ true, false, false);
+ }
+
assert(sym->has<semantics::HostAssocDetails>() &&
"No host-association found");
checkAndCopyHostAssociateVar(sym);
diff --git a/flang/test/Lower/OpenMP/copyin.f90 b/flang/test/Lower/OpenMP/copyin.f90
index 34c83fca464173..0ef52f2b824996 100644
--- a/flang/test/Lower/OpenMP/copyin.f90
+++ b/flang/test/Lower/OpenMP/copyin.f90
@@ -356,3 +356,70 @@ subroutine common_2()
end do
!$omp end parallel do
end subroutine
+
+! CHECK-LABEL: func.func @_QPpointer() {
+! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFpointerEp) : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {pinned}
+! CHECK: %[[VAL_5:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: fir.store %[[VAL_7]] to %[[VAL_6]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: omp.barrier
+! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.copy_in %[[VAL_8]] to %[[VAL_4]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.box<!fir.ptr<!fir.array<?xi32>>>, i1)
+! CHECK: %[[VAL_10:.*]] = fir.box_addr %[[VAL_9]]#0 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
+! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: fir.call @_QPsub7(%[[VAL_11]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
+! CHECK: hlfir.copy_out %[[VAL_4]], %[[VAL_9]]#1 to %[[VAL_8]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i1, !fir.box<!fir.ptr<!fir.array<?xi32>>>) -> ()
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+subroutine pointer()
+ integer, pointer, save :: p(:)
+ !$omp threadprivate(p)
+
+ !$omp parallel copyin(p)
+ call sub7(p)
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPallocatable() {
+! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFallocatableEp) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK: %[[VAL_9:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_9]] : i64
+! CHECK: fir.if %[[VAL_10]] {
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_6]] temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: }
+! CHECK: hlfir.assign %[[VAL_3]]#0 to %[[VAL_5]]#0 realloc : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: omp.barrier
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_13:.*]] = fir.box_addr %[[VAL_12]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: fir.call @_QPsub8(%[[VAL_14]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+subroutine allocatable()
+ integer, allocatable, save :: p(:)
+ !$omp threadprivate(p)
+
+ !$omp parallel copyin(p)
+ call sub8(p)
+ !$omp end parallel
+end subroutine
|
if (semantics::IsAllocatable(sym->GetUltimate())) { | ||
// copyin should copy the association of the allocatable | ||
|
||
fir::FirOpBuilder &builder = converter.getFirOpBuilder(); | ||
const Fortran::semantics::Symbol &hsym = sym->GetUltimate(); | ||
|
||
Fortran::lower::SymbolBox hsb = | ||
converter.lookupOneLevelUpSymbol(hsym); | ||
assert(hsb && "Host symbol box not found"); | ||
|
||
Fortran::lower::SymbolBox sb = converter.shallowLookupSymbol(*sym); | ||
assert(sb && "Host-associated symbol box not found"); | ||
assert(hsb.getAddr() != sb.getAddr() && | ||
"Host and associated symbol boxes are the same"); | ||
|
||
mlir::Location loc = converter.genLocation(sym->name()); | ||
mlir::OpBuilder::InsertionGuard ipGuard{builder}; | ||
builder.setInsertionPointAfter(sb.getAddr().getDefiningOp()); | ||
builder.create<hlfir::AssignOp>(loc, hsb.getAddr(), sb.getAddr(), | ||
true, false, false); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't copyHostAssociateVar
be used instead?
After #106559, it should be easier to use it to this end and avoid code duplication.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
copyHostAssociateVar
is already being called at line 647 but that wasn't sufficient in my testing to mean that the allocatable got copied in. I needed to add the hlfir.assign
to get the correct behaviour. I was testing with the following program:
program main
integer, allocatable, save :: a
!$omp threadprivate(a)
!$omp master
allocate(a)
a = 1
!$omp end master
!$omp parallel copyin(a)
if (allocated(a)) then
print *, a
endif
!$omp end parallel
end program
Without the copyin(a) this should only print once, whereas with the copyin(a) it should print OMP_NUM_THREADS times. Without the code block highlighted here, it only prints once.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test program works correctly with #106559, if I just remove the TODO:
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -619,9 +619,6 @@ bool ClauseProcessor::processCopyin() const {
checkAndCopyHostAssociateVar(&*mem, &insPt);
break;
}
- if (semantics::IsAllocatableOrObjectPointer(&sym->GetUltimate()))
- TODO(converter.getCurrentLocation(),
- "pointer or allocatable variables in Copyin clause");
assert(sym->has<semantics::HostAssocDetails>() &&
"No host-association found");
checkAndCopyHostAssociateVar(sym);
But line https://github.com/llvm/llvm-project/pull/106559/files#diff-0f70165af00b05acb5abeb11198079b793a20acf8f069edd2dbe7b34ac67b328R1262 should be chaged too, to allow copyin
with unallocated variables.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you saying, if we remove the todo and also add || flags.test(...::CopyIn
in the above place things should work?
When I just removed the TODO it didn't work for me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I didn't have the above patch though. I'll have a look at this tomorrow (it's getting late here)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you saying, if we remove the todo and also add
|| flags.test(...::CopyIn
in the above place things should work? When I just removed the TODO it didn't work for me.
Yes. I have just merged the patch to main.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right, it's sufficient to just remove the TODO with your patch. Thanks!
34bab84
to
006ebf1
Compare
The copyin clause currently forbids pointer and allocatable variables, which are allowed by the OpenMP 1.1 and 3.0 specifications respectively. Since llvm#106559 it is sufficient to remove the TODO check to get correct behaviour.
006ebf1
to
ee837f6
Compare
Looks good, but you also need to add that program main
integer, allocatable, save :: a
!$omp threadprivate(a)
!$omp parallel copyin(a)
a = 1
!$omp end parallel
end program |
d32975e
to
9bd09f5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
The copyin clause currently forbids pointer and allocatable variables,
which are allowed by the OpenMP 1.1 and 3.0 specifications respectively.
For pointer variables it is sufficient to remove the check to get
correct behaviour.
For allocatables we need to ensure that the master thread allocatable is
assigned to all of the threads, so we add an hlfir.assign.