Skip to content

[flang][acc] Avoid implicitly privatizing IVs already privatized #136181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 111 additions & 101 deletions flang/lib/Lower/OpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1804,27 +1804,38 @@ static void privatizeIv(Fortran::lower::AbstractConverter &converter,
builder.restoreInsertionPoint(insPt);
}

std::string recipeName =
fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
Fortran::lower::privatizationRecipePrefix);
auto recipe = Fortran::lower::createOrGetPrivateRecipe(
builder, recipeName, loc, ivValue.getType());
mlir::Operation *privateOp = nullptr;
for (auto privateVal : privateOperands) {
if (mlir::acc::getVar(privateVal.getDefiningOp()) == ivValue) {
privateOp = privateVal.getDefiningOp();
break;
}
}

std::stringstream asFortran;
asFortran << Fortran::lower::mangle::demangleName(toStringRef(sym.name()));
auto op = createDataEntryOp<mlir::acc::PrivateOp>(
builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true,
mlir::acc::DataClause::acc_private, ivValue.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
if (privateOp == nullptr) {
std::string recipeName =
fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
Fortran::lower::privatizationRecipePrefix);
auto recipe = Fortran::lower::createOrGetPrivateRecipe(
builder, recipeName, loc, ivValue.getType());

std::stringstream asFortran;
asFortran << Fortran::lower::mangle::demangleName(toStringRef(sym.name()));
auto op = createDataEntryOp<mlir::acc::PrivateOp>(
builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true,
mlir::acc::DataClause::acc_private, ivValue.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
privateOp = op.getOperation();

privateOperands.push_back(op.getAccVar());
privatizations.push_back(mlir::SymbolRefAttr::get(builder.getContext(),
recipe.getSymName().str()));
privateOperands.push_back(op.getAccVar());
privatizations.push_back(mlir::SymbolRefAttr::get(
builder.getContext(), recipe.getSymName().str()));
}

// Map the new private iv to its symbol for the scope of the loop. bindSymbol
// might create a hlfir.declare op, if so, we map its result in order to
// use the sym value in the scope.
converter.bindSymbol(sym, op.getAccVar());
converter.bindSymbol(sym, mlir::acc::getAccVar(privateOp));
auto privateValue = converter.getSymbolAddress(sym);
if (auto declareOp =
mlir::dyn_cast<hlfir::DeclareOp>(privateValue.getDefiningOp()))
Expand Down Expand Up @@ -1863,92 +1874,6 @@ static mlir::acc::LoopOp createLoopOp(
crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
builder.getContext(), mlir::acc::DeviceType::None));

llvm::SmallVector<mlir::Type> ivTypes;
llvm::SmallVector<mlir::Location> ivLocs;
llvm::SmallVector<bool> inclusiveBounds;

llvm::SmallVector<mlir::Location> locs;
locs.push_back(currentLocation); // Location of the directive
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
if (isDoConcurrent) {
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
const Fortran::parser::LoopControl *loopControl =
&*outerDoConstruct.GetLoopControl();
const auto &concurrent =
std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
.empty())
TODO(currentLocation, "DO CONCURRENT with locality spec");

const auto &concurrentHeader =
std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
const auto &controls =
std::get<std::list<Fortran::parser::ConcurrentControl>>(
concurrentHeader.t);
for (const auto &control : controls) {
lowerbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
upperbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
if (const auto &expr =
std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
control.t))
steps.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(*expr), stmtCtx)));
else // If `step` is not present, assume it is `1`.
steps.push_back(builder.createIntegerConstant(
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));

const auto &name = std::get<Fortran::parser::Name>(control.t);
privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
privateOperands, ivPrivate, privatizations, isDoConcurrent);

inclusiveBounds.push_back(true);
}
} else {
int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
for (unsigned i = 0; i < collapseValue; ++i) {
const Fortran::parser::LoopControl *loopControl;
if (i == 0) {
loopControl = &*outerDoConstruct.GetLoopControl();
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
} else {
auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
assert(doCons && "expect do construct");
loopControl = &*doCons->GetLoopControl();
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(*doCons)));
}

const Fortran::parser::LoopControl::Bounds *bounds =
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
assert(bounds && "Expected bounds on the loop construct");
lowerbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
upperbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
if (bounds->step)
steps.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
else // If `step` is not present, assume it is `1`.
steps.push_back(builder.createIntegerConstant(
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));

Fortran::semantics::Symbol &ivSym =
bounds->name.thing.symbol->GetUltimate();
privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
privateOperands, ivPrivate, privatizations);

inclusiveBounds.push_back(true);

if (i < collapseValue - 1)
crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
}
}

for (const Fortran::parser::AccClause &clause : accClauseList.v) {
mlir::Location clauseLocation = converter.genLocation(clause.source);
if (const auto *gangClause =
Expand Down Expand Up @@ -2101,6 +2026,91 @@ static mlir::acc::LoopOp createLoopOp(
}
}

llvm::SmallVector<mlir::Type> ivTypes;
llvm::SmallVector<mlir::Location> ivLocs;
llvm::SmallVector<bool> inclusiveBounds;
llvm::SmallVector<mlir::Location> locs;
locs.push_back(currentLocation); // Location of the directive
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
if (isDoConcurrent) {
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
const Fortran::parser::LoopControl *loopControl =
&*outerDoConstruct.GetLoopControl();
const auto &concurrent =
std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
.empty())
TODO(currentLocation, "DO CONCURRENT with locality spec");

const auto &concurrentHeader =
std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
const auto &controls =
std::get<std::list<Fortran::parser::ConcurrentControl>>(
concurrentHeader.t);
for (const auto &control : controls) {
lowerbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
upperbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
if (const auto &expr =
std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
control.t))
steps.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(*expr), stmtCtx)));
else // If `step` is not present, assume it is `1`.
steps.push_back(builder.createIntegerConstant(
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));

const auto &name = std::get<Fortran::parser::Name>(control.t);
privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
privateOperands, ivPrivate, privatizations, isDoConcurrent);

inclusiveBounds.push_back(true);
}
} else {
int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
for (unsigned i = 0; i < collapseValue; ++i) {
const Fortran::parser::LoopControl *loopControl;
if (i == 0) {
loopControl = &*outerDoConstruct.GetLoopControl();
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
} else {
auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
assert(doCons && "expect do construct");
loopControl = &*doCons->GetLoopControl();
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(*doCons)));
}

const Fortran::parser::LoopControl::Bounds *bounds =
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
assert(bounds && "Expected bounds on the loop construct");
lowerbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
upperbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
if (bounds->step)
steps.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
else // If `step` is not present, assume it is `1`.
steps.push_back(builder.createIntegerConstant(
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));

Fortran::semantics::Symbol &ivSym =
bounds->name.thing.symbol->GetUltimate();
privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
privateOperands, ivPrivate, privatizations);

inclusiveBounds.push_back(true);

if (i < collapseValue - 1)
crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
}
}

// Prepare the operand segment size attribute and the operands value range.
llvm::SmallVector<mlir::Value> operands;
llvm::SmallVector<int32_t> operandSegments;
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Lower/OpenACC/acc-kernels-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ subroutine acc_kernels_loop

! CHECK: acc.kernels {{.*}} {
! CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32
! CHECK-NEXT: acc.loop {{.*}} gang({num=[[GANGNUM1]] : i32}) {{.*}} {
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM1]] : i32}) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
Expand All @@ -508,7 +508,7 @@ subroutine acc_kernels_loop

! CHECK: acc.kernels {{.*}} {
! CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK-NEXT: acc.loop {{.*}} gang({num=[[GANGNUM2]] : i32}) {{.*}} {
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM2]] : i32}) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
Expand Down
24 changes: 17 additions & 7 deletions flang/test/Lower/OpenACC/acc-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ program acc_loop
END DO

! CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32
! CHECK-NEXT: acc.loop gang({num=[[GANGNUM1]] : i32}) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.loop gang({num=[[GANGNUM1]] : i32}) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}

Expand All @@ -83,7 +83,7 @@ program acc_loop
END DO

! CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK-NEXT: acc.loop gang({num=[[GANGNUM2]] : i32}) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.loop gang({num=[[GANGNUM2]] : i32}) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}

Expand Down Expand Up @@ -145,29 +145,39 @@ program acc_loop
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}

!$acc loop private(c)
DO i = 1, n
c(:,i) = d(:,i)
END DO

! CHECK: acc.loop private(@privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}

! When the induction variable is explicitly private - only a single private entry should be created.
!$acc loop private(i)
DO i = 1, n
a(i) = b(i)
END DO

! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}

!$acc loop private(c, d)
DO i = 1, n
a(i) = b(i)
c(:,i) = d(:,i)
END DO

! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.loop private(@privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}

!$acc loop private(c) private(d)
DO i = 1, n
a(i) = b(i)
c(:,i) = d(:,i)
END DO

! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.loop private(@privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}

Expand Down
6 changes: 3 additions & 3 deletions flang/test/Lower/OpenACC/acc-parallel-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ subroutine acc_parallel_loop
! CHECK: %[[ACC_PRIVATE_B:.*]] = acc.firstprivate varPtr(%[[DECLB]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
! CHECK: acc.parallel {{.*}} firstprivate(@firstprivatization_ref_10xf32 -> %[[ACC_PRIVATE_B]] : !fir.ref<!fir.array<10xf32>>) {
! CHECK: %[[ACC_PRIVATE_A:.*]] = acc.private varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
! CHECK: acc.loop {{.*}} private({{.*}}@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>)
! CHECK: acc.loop {{.*}} private({{.*}}@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>{{.*}})
! CHECK-NOT: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
Expand Down Expand Up @@ -510,7 +510,7 @@ subroutine acc_parallel_loop

! CHECK: acc.parallel {{.*}} {
! CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32
! CHECK-NEXT: acc.loop {{.*}} gang({num=[[GANGNUM1]] : i32})
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM1]] : i32})
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
Expand All @@ -523,7 +523,7 @@ subroutine acc_parallel_loop

! CHECK: acc.parallel {{.*}} {
! CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK-NEXT: acc.loop {{.*}} gang({num=[[GANGNUM2]] : i32})
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM2]] : i32})
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
Expand Down
Loading
Loading