Skip to content

Commit 3c700d1

Browse files
authored
[flang] Extract hlfir.assign inlining from opt-bufferization. (#121544)
Optimized bufferization can transform hlfir.assign into a loop nest doing element per element assignment, but it avoids doing so for RHS that is hlfir.expr. This is done to let ElementalAssignBufferization pattern to try to do a better job. This patch moves the hlfir.assign inlining after opt-bufferization, and enables it for hlfir.expr RHS. The hlfir.expr RHS cases are present in tonto, and this patch results in some nice improvements. Note that those cases are handled by other compilers also using array temporaries, so this patch seems to just get rid of the Assign runtime overhead/inefficiency.
1 parent b9482ce commit 3c700d1

File tree

12 files changed

+228
-178
lines changed

12 files changed

+228
-178
lines changed

flang/include/flang/Optimizer/HLFIR/Passes.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,8 @@ def InlineElementals : Pass<"inline-elementals"> {
4949
let summary = "Inline chained hlfir.elemental operations";
5050
}
5151

52+
def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
53+
let summary = "Inline hlfir.assign operations";
54+
}
55+
5256
#endif //FORTRAN_DIALECT_HLFIR_PASSES

flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_flang_library(HLFIRTransforms
44
BufferizeHLFIR.cpp
55
ConvertToFIR.cpp
66
InlineElementals.cpp
7+
InlineHLFIRAssign.cpp
78
LowerHLFIRIntrinsics.cpp
89
LowerHLFIROrderedAssignments.cpp
910
ScheduleOrderedAssignments.cpp
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
//===- InlineHLFIRAssign.cpp - Inline hlfir.assign ops --------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// Transform hlfir.assign array operations into loop nests performing element
9+
// per element assignments. The inlining is done for trivial data types always,
10+
// though, we may add performance/code-size heuristics in future.
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
14+
#include "flang/Optimizer/Builder/FIRBuilder.h"
15+
#include "flang/Optimizer/Builder/HLFIRTools.h"
16+
#include "flang/Optimizer/HLFIR/HLFIROps.h"
17+
#include "flang/Optimizer/HLFIR/Passes.h"
18+
#include "flang/Optimizer/OpenMP/Passes.h"
19+
#include "mlir/IR/PatternMatch.h"
20+
#include "mlir/Pass/Pass.h"
21+
#include "mlir/Support/LLVM.h"
22+
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
23+
24+
namespace hlfir {
25+
#define GEN_PASS_DEF_INLINEHLFIRASSIGN
26+
#include "flang/Optimizer/HLFIR/Passes.h.inc"
27+
} // namespace hlfir
28+
29+
#define DEBUG_TYPE "inline-hlfir-assign"
30+
31+
namespace {
32+
/// Expand hlfir.assign of array RHS to array LHS into a loop nest
33+
/// of element-by-element assignments:
34+
/// hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
35+
/// !fir.ref<!fir.array<3x3xf32>>
36+
/// into:
37+
/// fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
38+
/// fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
39+
/// %6 = hlfir.designate %4 (%arg2, %arg1) :
40+
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
41+
/// %7 = fir.load %6 : !fir.ref<f32>
42+
/// %8 = hlfir.designate %5 (%arg2, %arg1) :
43+
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
44+
/// hlfir.assign %7 to %8 : f32, !fir.ref<f32>
45+
/// }
46+
/// }
47+
///
48+
/// The transformation is correct only when LHS and RHS do not alias.
49+
/// When RHS is an array expression, then there is no aliasing.
50+
/// This transformation does not support runtime checking for
51+
/// non-conforming LHS/RHS arrays' shapes currently.
52+
class InlineHLFIRAssignConversion
53+
: public mlir::OpRewritePattern<hlfir::AssignOp> {
54+
public:
55+
using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
56+
57+
llvm::LogicalResult
58+
matchAndRewrite(hlfir::AssignOp assign,
59+
mlir::PatternRewriter &rewriter) const override {
60+
if (assign.isAllocatableAssignment())
61+
return rewriter.notifyMatchFailure(assign,
62+
"AssignOp may imply allocation");
63+
64+
hlfir::Entity rhs{assign.getRhs()};
65+
66+
if (!rhs.isArray())
67+
return rewriter.notifyMatchFailure(assign,
68+
"AssignOp's RHS is not an array");
69+
70+
mlir::Type rhsEleTy = rhs.getFortranElementType();
71+
if (!fir::isa_trivial(rhsEleTy))
72+
return rewriter.notifyMatchFailure(
73+
assign, "AssignOp's RHS data type is not trivial");
74+
75+
hlfir::Entity lhs{assign.getLhs()};
76+
if (!lhs.isArray())
77+
return rewriter.notifyMatchFailure(assign,
78+
"AssignOp's LHS is not an array");
79+
80+
mlir::Type lhsEleTy = lhs.getFortranElementType();
81+
if (!fir::isa_trivial(lhsEleTy))
82+
return rewriter.notifyMatchFailure(
83+
assign, "AssignOp's LHS data type is not trivial");
84+
85+
if (lhsEleTy != rhsEleTy)
86+
return rewriter.notifyMatchFailure(assign,
87+
"RHS/LHS element types mismatch");
88+
89+
if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
90+
// If RHS is not an hlfir.expr, then we should prove that
91+
// LHS and RHS do not alias.
92+
// TODO: if they may alias, we can insert hlfir.as_expr for RHS,
93+
// and proceed with the inlining.
94+
fir::AliasAnalysis aliasAnalysis;
95+
mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
96+
// TODO: use areIdenticalOrDisjointSlices() from
97+
// OptimizedBufferization.cpp to check if we can still do the expansion.
98+
if (!aliasRes.isNo()) {
99+
LLVM_DEBUG(llvm::dbgs() << "InlineHLFIRAssign:\n"
100+
<< "\tLHS: " << lhs << "\n"
101+
<< "\tRHS: " << rhs << "\n"
102+
<< "\tALIAS: " << aliasRes << "\n");
103+
return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
104+
}
105+
}
106+
107+
mlir::Location loc = assign->getLoc();
108+
fir::FirOpBuilder builder(rewriter, assign.getOperation());
109+
builder.setInsertionPoint(assign);
110+
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
111+
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
112+
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
113+
llvm::SmallVector<mlir::Value> extents =
114+
hlfir::getIndexExtents(loc, builder, shape);
115+
hlfir::LoopNest loopNest =
116+
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
117+
flangomp::shouldUseWorkshareLowering(assign));
118+
builder.setInsertionPointToStart(loopNest.body);
119+
auto rhsArrayElement =
120+
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
121+
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
122+
auto lhsArrayElement =
123+
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
124+
builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
125+
rewriter.eraseOp(assign);
126+
return mlir::success();
127+
}
128+
};
129+
130+
class InlineHLFIRAssignPass
131+
: public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
132+
public:
133+
void runOnOperation() override {
134+
mlir::MLIRContext *context = &getContext();
135+
136+
mlir::GreedyRewriteConfig config;
137+
// Prevent the pattern driver from merging blocks.
138+
config.enableRegionSimplification =
139+
mlir::GreedySimplifyRegionLevel::Disabled;
140+
141+
mlir::RewritePatternSet patterns(context);
142+
patterns.insert<InlineHLFIRAssignConversion>(context);
143+
144+
if (mlir::failed(mlir::applyPatternsGreedily(
145+
getOperation(), std::move(patterns), config))) {
146+
mlir::emitError(getOperation()->getLoc(),
147+
"failure in hlfir.assign inlining");
148+
signalPassFailure();
149+
}
150+
}
151+
};
152+
} // namespace

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 3 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -772,108 +772,6 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
772772
return mlir::success();
773773
}
774774

775-
/// Expand hlfir.assign of array RHS to array LHS into a loop nest
776-
/// of element-by-element assignments:
777-
/// hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
778-
/// !fir.ref<!fir.array<3x3xf32>>
779-
/// into:
780-
/// fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
781-
/// fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
782-
/// %6 = hlfir.designate %4 (%arg2, %arg1) :
783-
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
784-
/// %7 = fir.load %6 : !fir.ref<f32>
785-
/// %8 = hlfir.designate %5 (%arg2, %arg1) :
786-
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
787-
/// hlfir.assign %7 to %8 : f32, !fir.ref<f32>
788-
/// }
789-
/// }
790-
///
791-
/// The transformation is correct only when LHS and RHS do not alias.
792-
/// This transformation does not support runtime checking for
793-
/// non-conforming LHS/RHS arrays' shapes currently.
794-
class VariableAssignBufferization
795-
: public mlir::OpRewritePattern<hlfir::AssignOp> {
796-
private:
797-
public:
798-
using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
799-
800-
llvm::LogicalResult
801-
matchAndRewrite(hlfir::AssignOp assign,
802-
mlir::PatternRewriter &rewriter) const override;
803-
};
804-
805-
llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
806-
hlfir::AssignOp assign, mlir::PatternRewriter &rewriter) const {
807-
if (assign.isAllocatableAssignment())
808-
return rewriter.notifyMatchFailure(assign, "AssignOp may imply allocation");
809-
810-
hlfir::Entity rhs{assign.getRhs()};
811-
812-
// To avoid conflicts with ElementalAssignBufferization pattern, we avoid
813-
// matching RHS when it is an `ExprType` defined by an `ElementalOp`; which is
814-
// among the main criteria matched by ElementalAssignBufferization.
815-
if (mlir::isa<hlfir::ExprType>(rhs.getType()) &&
816-
mlir::isa<hlfir::ElementalOp>(rhs.getDefiningOp()))
817-
return rewriter.notifyMatchFailure(
818-
assign, "RHS is an ExprType defined by ElementalOp");
819-
820-
if (!rhs.isArray())
821-
return rewriter.notifyMatchFailure(assign,
822-
"AssignOp's RHS is not an array");
823-
824-
mlir::Type rhsEleTy = rhs.getFortranElementType();
825-
if (!fir::isa_trivial(rhsEleTy))
826-
return rewriter.notifyMatchFailure(
827-
assign, "AssignOp's RHS data type is not trivial");
828-
829-
hlfir::Entity lhs{assign.getLhs()};
830-
if (!lhs.isArray())
831-
return rewriter.notifyMatchFailure(assign,
832-
"AssignOp's LHS is not an array");
833-
834-
mlir::Type lhsEleTy = lhs.getFortranElementType();
835-
if (!fir::isa_trivial(lhsEleTy))
836-
return rewriter.notifyMatchFailure(
837-
assign, "AssignOp's LHS data type is not trivial");
838-
839-
if (lhsEleTy != rhsEleTy)
840-
return rewriter.notifyMatchFailure(assign,
841-
"RHS/LHS element types mismatch");
842-
843-
fir::AliasAnalysis aliasAnalysis;
844-
mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
845-
// TODO: use areIdenticalOrDisjointSlices() to check if
846-
// we can still do the expansion.
847-
if (!aliasRes.isNo()) {
848-
LLVM_DEBUG(llvm::dbgs() << "VariableAssignBufferization:\n"
849-
<< "\tLHS: " << lhs << "\n"
850-
<< "\tRHS: " << rhs << "\n"
851-
<< "\tALIAS: " << aliasRes << "\n");
852-
return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
853-
}
854-
855-
mlir::Location loc = assign->getLoc();
856-
fir::FirOpBuilder builder(rewriter, assign.getOperation());
857-
builder.setInsertionPoint(assign);
858-
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
859-
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
860-
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
861-
llvm::SmallVector<mlir::Value> extents =
862-
hlfir::getIndexExtents(loc, builder, shape);
863-
hlfir::LoopNest loopNest =
864-
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
865-
flangomp::shouldUseWorkshareLowering(assign));
866-
builder.setInsertionPointToStart(loopNest.body);
867-
auto rhsArrayElement =
868-
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
869-
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
870-
auto lhsArrayElement =
871-
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
872-
builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
873-
rewriter.eraseOp(assign);
874-
return mlir::success();
875-
}
876-
877775
using GenBodyFn =
878776
std::function<mlir::Value(fir::FirOpBuilder &, mlir::Location, mlir::Value,
879777
const llvm::SmallVectorImpl<mlir::Value> &)>;
@@ -1280,9 +1178,9 @@ class ReductionMaskConversion : public mlir::OpRewritePattern<Op> {
12801178
loc, resultArr, builder.createBool(loc, false));
12811179

12821180
// Check all the users - the destroy is no longer required, and any assign
1283-
// can use resultArr directly so that VariableAssignBufferization in this
1284-
// pass can optimize the results. Other operations are replaces with an
1285-
// AsExpr for the temporary resultArr.
1181+
// can use resultArr directly so that InlineHLFIRAssign pass
1182+
// can optimize the results. Other operations are replaced with an AsExpr
1183+
// for the temporary resultArr.
12861184
llvm::SmallVector<hlfir::DestroyOp> destroys;
12871185
llvm::SmallVector<hlfir::AssignOp> assigns;
12881186
for (auto user : mloc->getUsers()) {
@@ -1430,7 +1328,6 @@ class OptimizedBufferizationPass
14301328
// This requires small code reordering in ElementalAssignBufferization.
14311329
patterns.insert<ElementalAssignBufferization>(context);
14321330
patterns.insert<BroadcastAssignBufferization>(context);
1433-
patterns.insert<VariableAssignBufferization>(context);
14341331
patterns.insert<EvaluateIntoMemoryAssignBufferization>(context);
14351332
patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
14361333
patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
234234
pm.addPass(mlir::createCSEPass());
235235
addNestedPassToAllTopLevelOperations<PassConstructor>(
236236
pm, hlfir::createOptimizedBufferization);
237+
addNestedPassToAllTopLevelOperations<PassConstructor>(
238+
pm, hlfir::createInlineHLFIRAssign);
237239
}
238240
pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
239241
pm.addPass(hlfir::createLowerHLFIRIntrinsics());

flang/test/Driver/mlir-pass-pipeline.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,16 @@
3636
! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
3737
! O2-NEXT: 'fir.global' Pipeline
3838
! O2-NEXT: OptimizedBufferization
39+
! O2-NEXT: InlineHLFIRAssign
3940
! O2-NEXT: 'func.func' Pipeline
4041
! O2-NEXT: OptimizedBufferization
42+
! O2-NEXT: InlineHLFIRAssign
4143
! O2-NEXT: 'omp.declare_reduction' Pipeline
4244
! O2-NEXT: OptimizedBufferization
45+
! O2-NEXT: InlineHLFIRAssign
4346
! O2-NEXT: 'omp.private' Pipeline
4447
! O2-NEXT: OptimizedBufferization
48+
! O2-NEXT: InlineHLFIRAssign
4549
! ALL: LowerHLFIROrderedAssignments
4650
! ALL-NEXT: LowerHLFIRIntrinsics
4751
! ALL-NEXT: BufferizeHLFIR

flang/test/Fir/basic-program.fir

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,16 @@ func.func @_QQmain() {
3737
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
3838
// PASSES-NEXT: 'fir.global' Pipeline
3939
// PASSES-NEXT: OptimizedBufferization
40+
// PASSES-NEXT: InlineHLFIRAssign
4041
// PASSES-NEXT: 'func.func' Pipeline
4142
// PASSES-NEXT: OptimizedBufferization
43+
// PASSES-NEXT: InlineHLFIRAssign
4244
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
4345
// PASSES-NEXT: OptimizedBufferization
46+
// PASSES-NEXT: InlineHLFIRAssign
4447
// PASSES-NEXT: 'omp.private' Pipeline
4548
// PASSES-NEXT: OptimizedBufferization
49+
// PASSES-NEXT: InlineHLFIRAssign
4650
// PASSES-NEXT: LowerHLFIROrderedAssignments
4751
// PASSES-NEXT: LowerHLFIRIntrinsics
4852
// PASSES-NEXT: BufferizeHLFIR

0 commit comments

Comments
 (0)