Skip to content

Commit 0698482

Browse files
authored
[flang][MLIR] Hoist do concurrent nest bounds/steps outside the nest (#114020)
If you have the following multi-range `do concurrent` loop: ```fortran do concurrent(i=1:n, j=1:bar(n*m, n/m)) a(i) = n end do ``` Currently, flang generates the following IR: ```mlir fir.do_loop %arg1 = %42 to %44 step %c1 unordered { ... %53:3 = hlfir.associate %49 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) %54:3 = hlfir.associate %52 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) %55 = fir.call @_QFPbar(%53#1, %54#1) fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32 hlfir.end_associate %53#1, %53#2 : !fir.ref<i32>, i1 hlfir.end_associate %54#1, %54#2 : !fir.ref<i32>, i1 %56 = fir.convert %55 : (i32) -> index ... fir.do_loop %arg2 = %46 to %56 step %c1_4 unordered { ... } } ``` However, if `bar` is impure, then we have a direct violation of the standard: ``` C1143 A reference to an impure procedure shall not appear within a DO CONCURRENT construct. ``` Moreover, the standard describes the execution of `do concurrent` construct in multiple stages: ``` 11.1.7.4 Execution of a DO construct ... 11.1.7.4.2 DO CONCURRENT loop control The concurrent-limit and concurrent-step expressions in the concurrent-control-list are evaluated. ... 11.1.7.4.3 The execution cycle ... The block of a DO CONCURRENT construct is executed for every active combination of the index-name values. Each execution of the block is an iteration. The executions may occur in any order. ``` From the above 2 points, it seems to me that execution is divided in multiple consecutive stages: 11.1.7.4.2 is the stage where we evaluate all control expressions including the step and then 11.1.7.4.3 is the stage to execute the block of the concurrent loop itself using the combination of possible iteration values.
1 parent 7cd2974 commit 0698482

File tree

2 files changed

+132
-11
lines changed

2 files changed

+132
-11
lines changed

flang/lib/Lower/Bridge.cpp

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,18 +2131,37 @@ class FirConverter : public Fortran::lower::AbstractConverter {
21312131
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
21322132
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
21332133
mlir::Location loc = toLocation();
2134+
mlir::Operation *boundsAndStepIP = nullptr;
2135+
21342136
for (IncrementLoopInfo &info : incrementLoopNestInfo) {
2135-
info.loopVariable =
2136-
genLoopVariableAddress(loc, *info.loopVariableSym, info.isUnordered);
2137-
mlir::Value lowerValue = genControlValue(info.lowerExpr, info);
2138-
mlir::Value upperValue = genControlValue(info.upperExpr, info);
2139-
bool isConst = true;
2140-
mlir::Value stepValue = genControlValue(
2141-
info.stepExpr, info, info.isStructured() ? nullptr : &isConst);
2142-
// Use a temp variable for unstructured loops with non-const step.
2143-
if (!isConst) {
2144-
info.stepVariable = builder->createTemporary(loc, stepValue.getType());
2145-
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
2137+
mlir::Value lowerValue;
2138+
mlir::Value upperValue;
2139+
mlir::Value stepValue;
2140+
2141+
{
2142+
mlir::OpBuilder::InsertionGuard guard(*builder);
2143+
2144+
// Set the IP before the first loop in the nest so that all nest bounds
2145+
// and step values are created outside the nest.
2146+
if (boundsAndStepIP)
2147+
builder->setInsertionPointAfter(boundsAndStepIP);
2148+
2149+
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
2150+
info.isUnordered);
2151+
lowerValue = genControlValue(info.lowerExpr, info);
2152+
upperValue = genControlValue(info.upperExpr, info);
2153+
bool isConst = true;
2154+
stepValue = genControlValue(info.stepExpr, info,
2155+
info.isStructured() ? nullptr : &isConst);
2156+
boundsAndStepIP = stepValue.getDefiningOp();
2157+
2158+
// Use a temp variable for unstructured loops with non-const step.
2159+
if (!isConst) {
2160+
info.stepVariable =
2161+
builder->createTemporary(loc, stepValue.getType());
2162+
boundsAndStepIP =
2163+
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
2164+
}
21462165
}
21472166

21482167
// Structured loop - generate fir.do_loop.

flang/test/Lower/do_concurrent.f90

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s
2+
3+
! Simple tests for structured concurrent loops with loop-control.
4+
5+
pure function bar(n, m)
6+
implicit none
7+
integer, intent(in) :: n, m
8+
integer :: bar
9+
bar = n + m
10+
end function
11+
12+
!CHECK-LABEL: sub1
13+
subroutine sub1(n)
14+
implicit none
15+
integer :: n, m, i, j, k
16+
integer, dimension(n) :: a
17+
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
18+
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
19+
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
20+
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
21+
22+
!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
23+
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
24+
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
25+
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
26+
27+
!CHECK: %[[LB3:.*]] = arith.constant 5 : i32
28+
!CHECK: %[[LB3_CVT:.*]] = fir.convert %[[LB3]] : (i32) -> index
29+
!CHECK: %[[UB3:.*]] = arith.constant 10 : i32
30+
!CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index
31+
32+
!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
33+
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
34+
!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered
35+
36+
do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
37+
a(i) = n
38+
end do
39+
end subroutine
40+
41+
!CHECK-LABEL: sub2
42+
subroutine sub2(n)
43+
implicit none
44+
integer :: n, m, i, j
45+
integer, dimension(n) :: a
46+
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
47+
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
48+
!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
49+
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
50+
!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
51+
!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
52+
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
53+
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
54+
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
55+
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
56+
do concurrent(i=1:n)
57+
do concurrent(j=1:bar(n*m, n/m))
58+
a(i) = n
59+
end do
60+
end do
61+
end subroutine
62+
63+
64+
!CHECK-LABEL: unstructured
65+
subroutine unstructured(inner_step)
66+
integer(4) :: i, j, inner_step
67+
68+
!CHECK-NOT: cf.br
69+
!CHECK-NOT: cf.cond_br
70+
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
71+
!CHECK: %[[LB1_CVT:.*]] = fir.convert %c1_i32 : (i32) -> i16
72+
!CHECK: %[[UB1:.*]] = arith.constant 5 : i32
73+
!CHECK: %[[UB1_CVT:.*]] = fir.convert %c5_i32 : (i32) -> i16
74+
!CHECK: %[[STP1:.*]] = arith.constant 1 : i16
75+
76+
!CHECK-NOT: cf.br
77+
!CHECK-NOT: cf.cond_br
78+
!CHECK: %[[LB2:.*]] = arith.constant 3 : i32
79+
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> i16
80+
!CHECK: %[[UB2:.*]] = arith.constant 9 : i32
81+
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> i16
82+
!CHECK: %[[STP2:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
83+
!CHECK: %[[STP2_CVT:.*]] = fir.convert %[[STP2]] : (i32) -> i16
84+
!CHECK: fir.store %[[STP2_CVT]] to %{{.*}} : !fir.ref<i16>
85+
!CHECK: cf.br ^[[I_LOOP_HEADER:.*]]
86+
87+
!CHECK: ^[[I_LOOP_HEADER]]:
88+
!CHECK-NEXT: %{{.*}} = fir.load %{{.*}} : !fir.ref<i16>
89+
!CHECK-NEXT: %{{.*}} = arith.constant 0 : i16
90+
!CHECK-NEXT: %{{.*}} = arith.cmpi sgt, %{{.*}}, %{{.*}}: i16
91+
!CHECK-NEXT: cf.cond_br %{{.*}}, ^[[J_LOOP_HEADER:.*]], ^{{.*}}
92+
93+
!CHECK: ^[[J_LOOP_HEADER]]:
94+
!CHECK-NEXT: %[[RANGE:.*]] = arith.subi %[[UB2_CVT]], %[[LB2_CVT]] : i16
95+
!CHECK-NEXT: %{{.*}} = arith.addi %[[RANGE]], %[[STP2_CVT]] : i16
96+
!CHECK-NEXT: %{{.*}} = arith.divsi %{{.*}}, %[[STP2_CVT]] : i16
97+
do concurrent (integer(2)::i=1:5, j=3:9:inner_step, i.ne.3)
98+
goto (7, 7) i+1
99+
print*, 'E:', i, j
100+
7 continue
101+
enddo
102+
end subroutine unstructured

0 commit comments

Comments
 (0)