-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][openacc] Add implicit copy for reduction in combined construct #70148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][openacc] Add implicit copy for reduction in combined construct #70148
Conversation
After PR#69417, lowering for combined constructs was updated to adhere to OpenACC 3.3, section 2.11: `A private or reduction clause on a combined construct is treated as if it appeared on the loop construct.` However, the second part of that paragraph notes `In addition, a reduction clause on a combined construct implies a copy clause`. Since the acc dialect decomposes combined constructs, it is important to distinguish between the case where an explicit data clause is required (as noted in section 2.6.2) and the case where an implicit data action must be generated by compiler.
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-flang-fir-hlfir Author: Razvan Lupusoru (razvanlupusoru) ChangesAfter PR#69417, lowering for combined constructs was updated to adhere to OpenACC 3.3, section 2.11: However, the second part of that paragraph notes Full diff: https://github.com/llvm/llvm-project/pull/70148.diff 5 Files Affected:
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 90644279d9e78ce..5c29c781417301b 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -336,7 +336,7 @@ static void genDeclareDataOperandOperationsWithModifier(
template <typename EntryOp, typename ExitOp>
static void genDataExitOperations(fir::FirOpBuilder &builder,
llvm::SmallVector<mlir::Value> operands,
- bool structured, bool implicit) {
+ bool structured) {
for (mlir::Value operand : operands) {
auto entryOp = mlir::dyn_cast_or_null<EntryOp>(operand.getDefiningOp());
assert(entryOp && "data entry op expected");
@@ -346,7 +346,7 @@ static void genDataExitOperations(fir::FirOpBuilder &builder,
varPtr = entryOp.getVarPtr();
builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
entryOp.getBounds(), entryOp.getDataClause(),
- structured, implicit,
+ structured, entryOp.getImplicit(),
builder.getStringAttr(*entryOp.getName()));
}
}
@@ -1872,9 +1872,24 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,
} else if (const auto *reductionClause =
std::get_if<Fortran::parser::AccClause::Reduction>(
&clause.u)) {
- if (!outerCombined)
+ // A reduction clause on a combined construct is treated as if it appeared
+ // on the loop construct. So don't generate a reduction clause when it is
+ // combined - delay it to the loop. However, a reduction clause on a
+ // combined construct implies a copy clause so issue an implicit copy
+ // instead.
+ if (!outerCombined) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes);
+ } else {
+ auto crtDataStart = dataClauseOperands.size();
+ genDataOperandOperations<mlir::acc::CopyinOp>(
+ std::get<Fortran::parser::AccObjectList>(reductionClause->v.t),
+ converter, semanticsContext, stmtCtx, dataClauseOperands,
+ mlir::acc::DataClause::acc_reduction,
+ /*structured=*/true, /*implicit=*/true);
+ copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
+ dataClauseOperands.end());
+ }
} else if (const auto *defaultClause =
std::get_if<Fortran::parser::AccClause::Default>(
&clause.u)) {
@@ -1944,13 +1959,13 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,
// Create the exit operations after the region.
genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
- builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, copyEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
- builder, copyoutEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, copyoutEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
- builder, attachEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, attachEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
- builder, createEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, createEntryOperands, /*structured=*/true);
builder.restoreInsertionPoint(insPt);
return computeOp;
@@ -2099,13 +2114,13 @@ static void genACCDataOp(Fortran::lower::AbstractConverter &converter,
// Create the exit operations after the region.
genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
- builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, copyEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
- builder, copyoutEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, copyoutEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
- builder, attachEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, attachEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
- builder, createEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, createEntryOperands, /*structured=*/true);
builder.restoreInsertionPoint(insPt);
}
@@ -2415,11 +2430,11 @@ genACCExitDataOp(Fortran::lower::AbstractConverter &converter,
exitDataOp.setFinalizeAttr(builder.getUnitAttr());
genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::CopyoutOp>(
- builder, copyoutOperands, /*structured=*/false, /*implicit=*/false);
+ builder, copyoutOperands, /*structured=*/false);
genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DeleteOp>(
- builder, deleteOperands, /*structured=*/false, /*implicit=*/false);
+ builder, deleteOperands, /*structured=*/false);
genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DetachOp>(
- builder, detachOperands, /*structured=*/false, /*implicit=*/false);
+ builder, detachOperands, /*structured=*/false);
}
template <typename Op>
@@ -2594,7 +2609,7 @@ genACCUpdateOp(Fortran::lower::AbstractConverter &converter,
builder, currentLocation, operands, operandSegments);
genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::UpdateHostOp>(
- builder, updateHostOperands, /*structured=*/false, /*implicit=*/false);
+ builder, updateHostOperands, /*structured=*/false);
if (addAsyncAttr)
updateOp.setAsyncAttr(builder.getUnitAttr());
@@ -3054,17 +3069,14 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
builder.setInsertionPointAfter(declareOp);
}
genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
- builder, createEntryOperands, /*structured=*/true,
- /*implicit=*/false);
+ builder, createEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::DeclareDeviceResidentOp,
mlir::acc::DeleteOp>(
- builder, deviceResidentEntryOperands, /*structured=*/true,
- /*implicit=*/false);
+ builder, deviceResidentEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
- builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
+ builder, copyEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
- builder, copyoutEntryOperands, /*structured=*/true,
- /*implicit=*/false);
+ builder, copyoutEntryOperands, /*structured=*/true);
});
}
diff --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
index e708ca65e7c14ae..8679e5f6ccd5f24 100644
--- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
@@ -751,12 +751,16 @@ subroutine acc_kernels_loop
reduction_i = 1
end do
-! CHECK: acc.kernels {
+! CHECK: %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK: %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
+! CHECK: acc.kernels dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}
+! CHECK: acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK: acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
end subroutine
diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
index eea4950b6d38f92..e0a29273bd783e7 100644
--- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -770,13 +770,17 @@ subroutine acc_parallel_loop
reduction_i = 1
end do
-! CHECK: acc.parallel {
+! CHECK: %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK: %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
+! CHECK: acc.parallel dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
+! CHECK: acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK: acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
!$acc parallel loop
do 10 i=0, n
diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90
index fb7e3615b698c1c..c94b5a577350a99 100644
--- a/flang/test/Lower/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90
@@ -705,12 +705,16 @@ subroutine acc_serial_loop
reduction_i = 1
end do
-! CHECK: acc.serial {
+! CHECK: %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK: %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
+! CHECK: acc.serial dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
+! CHECK: acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK: acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
end subroutine acc_serial_loop
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index b7e2aec6a4e6a92..98c800033cbe91c 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -131,7 +131,8 @@ LogicalResult acc::CopyinOp::verify() {
// Test for all clauses this operation can be decomposed from:
if (!getImplicit() && getDataClause() != acc::DataClause::acc_copyin &&
getDataClause() != acc::DataClause::acc_copyin_readonly &&
- getDataClause() != acc::DataClause::acc_copy)
+ getDataClause() != acc::DataClause::acc_copy &&
+ getDataClause() != acc::DataClause::acc_reduction)
return emitError(
"data clause associated with copyin operation must match its intent"
" or specify original clause this operation was decomposed from");
@@ -212,7 +213,8 @@ LogicalResult acc::CopyoutOp::verify() {
// Test for all clauses this operation can be decomposed from:
if (getDataClause() != acc::DataClause::acc_copyout &&
getDataClause() != acc::DataClause::acc_copyout_zero &&
- getDataClause() != acc::DataClause::acc_copy)
+ getDataClause() != acc::DataClause::acc_copy &&
+ getDataClause() != acc::DataClause::acc_reduction)
return emitError(
"data clause associated with copyout operation must match its intent"
" or specify original clause this operation was decomposed from");
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good! Thank you!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
After PR#69417, lowering for combined constructs was updated to adhere to OpenACC 3.3, section 2.11:
A private or reduction clause on a combined construct is treated as if it appeared on the loop construct.
However, the second part of that paragraph notes
In addition, a reduction clause on a combined construct implies a copy clause
. Since the acc dialect decomposes combined constructs, it is important to distinguish between the case where an explicit data clause is required (as noted in section 2.6.2) and the case where an implicit data action must be generated by compiler.