Skip to content

Commit 5344c60

Browse files
committed
[flang][OpenMP] Rewrite standalone loop directives to simd
Extends conversion support for `loop` directives. This PR handles standalone `loop` constructs by rewriting them to equivalent `simd` constructs. The reasoning behind that decision is documented in the rewrite function itself.
1 parent 42da120 commit 5344c60

File tree

3 files changed

+121
-22
lines changed

3 files changed

+121
-22
lines changed

flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,37 @@ class GenericLoopConversionPattern
3030
: public mlir::OpConversionPattern<mlir::omp::LoopOp> {
3131
public:
3232
enum class GenericLoopCombinedInfo {
33-
None,
33+
Standalone,
3434
TargetTeamsLoop,
3535
TargetParallelLoop
3636
};
3737

3838
using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
3939

40+
explicit GenericLoopConversionPattern(mlir::MLIRContext *ctx)
41+
: mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} {
42+
this->setHasBoundedRewriteRecursion(true);
43+
}
44+
4045
mlir::LogicalResult
4146
matchAndRewrite(mlir::omp::LoopOp loopOp, OpAdaptor adaptor,
4247
mlir::ConversionPatternRewriter &rewriter) const override {
4348
assert(mlir::succeeded(checkLoopConversionSupportStatus(loopOp)));
4449

45-
rewriteToDistributeParallelDo(loopOp, rewriter);
50+
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
51+
52+
switch (combinedInfo) {
53+
case GenericLoopCombinedInfo::Standalone:
54+
rewriteToSimdLoop(loopOp, rewriter);
55+
break;
56+
case GenericLoopCombinedInfo::TargetParallelLoop:
57+
assert(false);
58+
break;
59+
case GenericLoopCombinedInfo::TargetTeamsLoop:
60+
rewriteToDistributeParallelDo(loopOp, rewriter);
61+
break;
62+
}
63+
4664
rewriter.eraseOp(loopOp);
4765
return mlir::success();
4866
}
@@ -52,9 +70,8 @@ class GenericLoopConversionPattern
5270
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
5371

5472
switch (combinedInfo) {
55-
case GenericLoopCombinedInfo::None:
56-
return loopOp.emitError(
57-
"not yet implemented: Standalone `omp loop` directive");
73+
case GenericLoopCombinedInfo::Standalone:
74+
break;
5875
case GenericLoopCombinedInfo::TargetParallelLoop:
5976
return loopOp.emitError(
6077
"not yet implemented: Combined `omp target parallel loop` directive");
@@ -86,7 +103,7 @@ class GenericLoopConversionPattern
86103
static GenericLoopCombinedInfo
87104
findGenericLoopCombineInfo(mlir::omp::LoopOp loopOp) {
88105
mlir::Operation *parentOp = loopOp->getParentOp();
89-
GenericLoopCombinedInfo result = GenericLoopCombinedInfo::None;
106+
GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone;
90107

91108
if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
92109
if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp()))
@@ -100,6 +117,62 @@ class GenericLoopConversionPattern
100117
return result;
101118
}
102119

120+
/// Rewrites standalone `loop` directives to equivalent `simd` constructs.
121+
/// The reasoning behind this decision is that according to the spec (version
122+
/// 5.2, section 11.7.1):
123+
///
124+
/// "If the bind clause is not specified on a construct for which it may be
125+
/// specified and the construct is closely nested inside a teams or parallel
126+
/// construct, the effect is as if binding is teams or parallel. If none of
127+
/// those conditions hold, the binding region is not defined."
128+
///
129+
/// which means that standalone `loop` directives have undefined binding
130+
/// region. Moreover, the spec says (in the next paragraph):
131+
///
132+
/// "The specified binding region determines the binding thread set.
133+
/// Specifically, if the binding region is a teams region, then the binding
134+
/// thread set is the set of initial threads that are executing that region
135+
/// while if the binding region is a parallel region, then the binding thread
136+
/// set is the team of threads that are executing that region. If the binding
137+
/// region is not defined, then the binding thread set is the encountering
138+
/// thread."
139+
///
140+
/// which means that the binding thread set for a standalone `loop` directive
141+
/// is only the encountering thread.
142+
///
143+
/// Since the encountering thread is the binding thread (set) for a
144+
/// standalone `loop` directive, the best we can do in such case is to "simd"
145+
/// the directive.
146+
void rewriteToSimdLoop(mlir::omp::LoopOp loopOp,
147+
mlir::ConversionPatternRewriter &rewriter) const {
148+
loopOp.emitWarning("Detected standalone OpenMP `loop` directive, the "
149+
"associated loop will be rewritten to `simd`.");
150+
mlir::omp::SimdOperands simdClauseOps;
151+
simdClauseOps.privateVars = loopOp.getPrivateVars();
152+
153+
auto privateSyms = loopOp.getPrivateSyms();
154+
if (privateSyms)
155+
simdClauseOps.privateSyms.assign(privateSyms->begin(),
156+
privateSyms->end());
157+
158+
Fortran::common::openmp::EntryBlockArgs simdArgs;
159+
simdArgs.priv.vars = simdClauseOps.privateVars;
160+
161+
auto simdOp =
162+
rewriter.create<mlir::omp::SimdOp>(loopOp.getLoc(), simdClauseOps);
163+
mlir::Block *simdBlock =
164+
genEntryBlock(rewriter, simdArgs, simdOp.getRegion());
165+
166+
mlir::IRMapping mapper;
167+
mlir::Block &loopBlock = *loopOp.getRegion().begin();
168+
169+
for (auto [loopOpArg, simdopArg] :
170+
llvm::zip_equal(loopBlock.getArguments(), simdBlock->getArguments()))
171+
mapper.map(loopOpArg, simdopArg);
172+
173+
rewriter.clone(*loopOp.begin(), mapper);
174+
}
175+
103176
void rewriteToDistributeParallelDo(
104177
mlir::omp::LoopOp loopOp,
105178
mlir::ConversionPatternRewriter &rewriter) const {

flang/test/Lower/OpenMP/loop-directive.f90

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
subroutine test_no_clauses()
1212
integer :: i, j, dummy = 1
1313

14-
! CHECK: omp.loop private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
14+
! CHECK: omp.simd private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
1515
! CHECK-NEXT: omp.loop_nest (%[[IV:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
1616
! CHECK: %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]]
1717
! CHECK: fir.store %[[IV]] to %[[ARG_DECL]]#1 : !fir.ref<i32>
@@ -27,7 +27,7 @@ subroutine test_no_clauses()
2727
! CHECK-LABEL: func.func @_QPtest_collapse
2828
subroutine test_collapse()
2929
integer :: i, j, dummy = 1
30-
! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
30+
! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
3131
! CHECK-NEXT: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 {{.*}} {
3232
! CHECK: }
3333
! CHECK: }
@@ -43,7 +43,7 @@ subroutine test_collapse()
4343
! CHECK-LABEL: func.func @_QPtest_private
4444
subroutine test_private()
4545
integer :: i, dummy = 1
46-
! CHECK: omp.loop private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
46+
! CHECK: omp.simd private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
4747
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
4848
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_privateEdummy"}
4949
! CHECK: %{{.*}} = fir.load %[[DUMMY_DECL]]#0
@@ -100,3 +100,42 @@ subroutine test_bind()
100100
end do
101101
!$omp end loop
102102
end subroutine
103+
104+
! CHECK-LABEL: func.func @_QPtest_nested_directives
105+
subroutine test_nested_directives
106+
implicit none
107+
integer, parameter :: N = 100000
108+
integer a(N), b(N), c(N)
109+
integer j,i, num, flag;
110+
num = N
111+
112+
! CHECK: omp.teams {
113+
114+
! Verify the first `loop` directive was combined with `target teams` into
115+
! `target teams distribute parallel do`.
116+
! CHECK: omp.parallel {{.*}} {
117+
! CHECK: omp.distribute {
118+
! CHECK: omp.wsloop {
119+
! CHECK: omp.loop_nest {{.*}} {
120+
121+
! Very the second `loop` directive was rewritten to `simd`.
122+
! CHECK: omp.simd {{.*}} {
123+
! CHECK: omp.loop_nest {{.*}} {
124+
! CHECK: }
125+
! CHECK: }
126+
127+
! CHECK: }
128+
! CHECK: } {omp.composite}
129+
! CHECK: } {omp.composite}
130+
! CHECK: } {omp.composite}
131+
! CHECK: }
132+
!$omp target teams map(to: a,b) map(from: c)
133+
!$omp loop
134+
do j=1,1000
135+
!$omp loop
136+
do i=1,N
137+
c(i) = a(i) * b(i)
138+
end do
139+
end do
140+
!$omp end target teams
141+
end subroutine

flang/test/Transforms/generic-loop-rewriting-todo.mlir

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,5 @@
11
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
22

3-
func.func @_QPtarget_loop() {
4-
%c0 = arith.constant 0 : i32
5-
%c10 = arith.constant 10 : i32
6-
%c1 = arith.constant 1 : i32
7-
// expected-error@below {{not yet implemented: Standalone `omp loop` directive}}
8-
omp.loop {
9-
omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
10-
omp.yield
11-
}
12-
}
13-
return
14-
}
15-
163
func.func @_QPtarget_parallel_loop() {
174
omp.target {
185
omp.parallel {

0 commit comments

Comments
 (0)