Skip to content

Commit 7224d09

Browse files
committed
[flang][fir] Extend locality specs lowering to support init and dealloc regions
Extending `fir.do_concurrent` to `fir.do_loop ... unordered` lowering by adding support for lowring/inlining non-empty `init` and `dealloc` regions.
1 parent 4268360 commit 7224d09

File tree

3 files changed

+261
-21
lines changed

3 files changed

+261
-21
lines changed

flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -180,41 +180,53 @@ class DoConcurrentConversion
180180

181181
std::optional<mlir::ArrayAttr> localSyms = loop.getLocalSyms();
182182

183-
for (auto [localVar, localArg, localizerSym] : llvm::zip_equal(
183+
for (auto localInfo : llvm::zip_equal(
184184
loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) {
185+
mlir::Value localVar = std::get<0>(localInfo);
186+
mlir::BlockArgument localArg = std::get<1>(localInfo);
187+
mlir::Attribute localizerSym = std::get<2>(localInfo);
185188
mlir::SymbolRefAttr localizerName =
186189
llvm::cast<mlir::SymbolRefAttr>(localizerSym);
187190
fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName);
188191

189-
if (!localizer.getInitRegion().empty() ||
190-
!localizer.getDeallocRegion().empty())
191-
TODO(localizer.getLoc(), "localizers with `init` and `dealloc` "
192-
"regions are not handled yet.");
193-
194192
// TODO Should this be a heap allocation instead? For now, we allocate
195193
// on the stack for each loop iteration.
196194
mlir::Value localAlloc =
197195
rewriter.create<fir::AllocaOp>(loop.getLoc(), localizer.getType());
198196

199-
if (localizer.getLocalitySpecifierType() ==
200-
fir::LocalitySpecifierType::LocalInit) {
197+
auto cloneLocalizerRegion = [&](mlir::Region &region,
198+
mlir::ValueRange regionArgs,
199+
mlir::Block::iterator insertionPoint) {
201200
// It is reasonable to make this assumption since, at this stage,
202201
// control-flow ops are not converted yet. Therefore, things like `if`
203202
// conditions will still be represented by their encapsulating `fir`
204203
// dialect ops.
205-
assert(localizer.getCopyRegion().hasOneBlock() &&
206-
"Expected localizer to have a single block.");
207-
mlir::Block *beforeLocalInit = rewriter.getInsertionBlock();
208-
mlir::Block *afterLocalInit = rewriter.splitBlock(
209-
rewriter.getInsertionBlock(), rewriter.getInsertionPoint());
210-
rewriter.cloneRegionBefore(localizer.getCopyRegion(), afterLocalInit);
211-
mlir::Block *copyRegionBody = beforeLocalInit->getNextNode();
212-
213-
rewriter.eraseOp(copyRegionBody->getTerminator());
214-
rewriter.mergeBlocks(afterLocalInit, copyRegionBody);
215-
rewriter.mergeBlocks(copyRegionBody, beforeLocalInit,
216-
{localVar, localArg});
217-
}
204+
assert(region.hasOneBlock() &&
205+
"Expected localizer region to have a single block.");
206+
mlir::Block *beforeLocalizerRegion = rewriter.getInsertionBlock();
207+
mlir::Block *afterLocalizerRegion =
208+
rewriter.splitBlock(rewriter.getInsertionBlock(), insertionPoint);
209+
rewriter.cloneRegionBefore(region, afterLocalizerRegion);
210+
mlir::Block *localizerRegion = beforeLocalizerRegion->getNextNode();
211+
212+
rewriter.eraseOp(localizerRegion->getTerminator());
213+
rewriter.mergeBlocks(afterLocalizerRegion, localizerRegion);
214+
rewriter.mergeBlocks(localizerRegion, beforeLocalizerRegion,
215+
regionArgs);
216+
};
217+
218+
if (!localizer.getInitRegion().empty())
219+
cloneLocalizerRegion(localizer.getInitRegion(), {localVar, localArg},
220+
rewriter.getInsertionPoint());
221+
222+
if (localizer.getLocalitySpecifierType() ==
223+
fir::LocalitySpecifierType::LocalInit)
224+
cloneLocalizerRegion(localizer.getCopyRegion(), {localVar, localArg},
225+
rewriter.getInsertionPoint());
226+
227+
if (!localizer.getDeallocRegion().empty())
228+
cloneLocalizerRegion(localizer.getDeallocRegion(), {localArg},
229+
rewriter.getInsertionBlock()->end());
218230

219231
rewriter.replaceAllUsesWith(localArg, localAlloc);
220232
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// Tests converting `fir.local` ops that have `dealloc` regions.
2+
3+
// RUN: fir-opt --split-input-file --simplify-fir-operations %s | FileCheck %s
4+
5+
fir.local {type = local} @_QFlocalizer_with_dealloc_regionEa_private_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
6+
^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>, %arg1: !fir.ref<!fir.box<!fir.array<?xi32>>>):
7+
%c0 = arith.constant 0 : index
8+
%0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
9+
%1:3 = fir.box_dims %0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
10+
%2 = fir.shape %1#1 : (index) -> !fir.shape<1>
11+
%3 = fir.allocmem !fir.array<?xi32>, %1#1 {bindc_name = ".tmp", uniq_name = ""}
12+
%4 = fir.declare %3(%2) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xi32>>
13+
%5 = fir.embox %4(%2) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
14+
%6 = fir.shape_shift %1#0, %1#1 : (index, index) -> !fir.shapeshift<1>
15+
%7 = fir.rebox %5(%6) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?xi32>>
16+
fir.store %7 to %arg1 : !fir.ref<!fir.box<!fir.array<?xi32>>>
17+
fir.yield(%arg1 : !fir.ref<!fir.box<!fir.array<?xi32>>>)
18+
} dealloc {
19+
^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>):
20+
%c0_i64 = arith.constant 0 : i64
21+
%0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
22+
%1 = fir.box_addr %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
23+
%2 = fir.convert %1 : (!fir.ref<!fir.array<?xi32>>) -> i64
24+
%3 = arith.cmpi ne, %2, %c0_i64 : i64
25+
fir.if %3 {
26+
%4 = fir.convert %1 : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
27+
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
28+
}
29+
fir.yield
30+
}
31+
32+
func.func @_QPlocalizer_with_dealloc_region(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}) {
33+
%c42_i32 = arith.constant 42 : i32
34+
%c1 = arith.constant 1 : index
35+
%c0 = arith.constant 0 : index
36+
%0 = fir.alloca !fir.box<!fir.array<?xi32>>
37+
%1 = fir.dummy_scope : !fir.dscope
38+
%2 = fir.declare %arg0 dummy_scope %1 {uniq_name = "_QFlocalizer_with_dealloc_regionEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
39+
%3 = fir.load %2 : !fir.ref<i32>
40+
%4 = fir.convert %3 : (i32) -> index
41+
%5 = arith.cmpi sgt, %4, %c0 : index
42+
%6 = arith.select %5, %4, %c0 : index
43+
%7 = fir.alloca !fir.array<?xi32>, %6 {bindc_name = "a", uniq_name = "_QFlocalizer_with_dealloc_regionEa"}
44+
%8 = fir.shape %6 : (index) -> !fir.shape<1>
45+
%9 = fir.declare %7(%8) {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xi32>>
46+
%10 = fir.embox %9(%8) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
47+
fir.store %10 to %0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
48+
fir.do_concurrent {
49+
%11 = fir.alloca i32 {bindc_name = "i"}
50+
%12 = fir.declare %11 {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
51+
fir.do_concurrent.loop (%arg1) = (%c1) to (%4) step (%c1) local(@_QFlocalizer_with_dealloc_regionEa_private_box_Uxi32 %0 -> %arg2 : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
52+
%13 = fir.convert %arg1 : (index) -> i32
53+
fir.store %13 to %12 : !fir.ref<i32>
54+
%14 = fir.declare %arg2 {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
55+
%15 = fir.load %14 : !fir.ref<!fir.box<!fir.array<?xi32>>>
56+
%16 = fir.load %12 : !fir.ref<i32>
57+
%17 = fir.convert %16 : (i32) -> i64
58+
%18:3 = fir.box_dims %15, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
59+
%19 = fir.shift %18#0 : (index) -> !fir.shift<1>
60+
%20 = fir.array_coor %15(%19) %17 : (!fir.box<!fir.array<?xi32>>, !fir.shift<1>, i64) -> !fir.ref<i32>
61+
fir.store %c42_i32 to %20 : !fir.ref<i32>
62+
}
63+
}
64+
return
65+
}
66+
67+
// CHECK-LABEL: func.func @_QPlocalizer_with_dealloc_region(
68+
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
69+
// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64
70+
// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
71+
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
72+
// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32
73+
// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i"}
74+
// CHECK: %[[VAL_5:.*]] = fir.declare %[[VAL_4]] {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
75+
// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
76+
// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
77+
// CHECK: %[[VAL_8:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFlocalizer_with_dealloc_regionEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
78+
// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]] : !fir.ref<i32>
79+
// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
80+
// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_1]] : index
81+
// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_1]] : index
82+
// CHECK: %[[VAL_13:.*]] = fir.alloca !fir.array<?xi32>, %[[VAL_12]] {bindc_name = "a", uniq_name = "_QFlocalizer_with_dealloc_regionEa"}
83+
// CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1>
84+
// CHECK: %[[VAL_15:.*]] = fir.declare %[[VAL_13]](%[[VAL_14]]) {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xi32>>
85+
// CHECK: %[[VAL_16:.*]] = fir.embox %[[VAL_15]](%[[VAL_14]]) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
86+
// CHECK: fir.store %[[VAL_16]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
87+
// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_10]] step %[[VAL_2]] unordered {
88+
89+
// Local allocation
90+
// CHECK: %[[VAL_18:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
91+
92+
// `init` region body
93+
// CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
94+
// CHECK: %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_19]], %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
95+
// CHECK: %[[VAL_21:.*]] = fir.shape %[[VAL_20]]#1 : (index) -> !fir.shape<1>
96+
// CHECK: %[[VAL_22:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_20]]#1 {bindc_name = ".tmp", uniq_name = ""}
97+
// CHECK: %[[VAL_23:.*]] = fir.declare %[[VAL_22]](%[[VAL_21]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xi32>>
98+
// CHECK: %[[VAL_24:.*]] = fir.embox %[[VAL_23]](%[[VAL_21]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
99+
// CHECK: %[[VAL_25:.*]] = fir.shape_shift %[[VAL_20]]#0, %[[VAL_20]]#1 : (index, index) -> !fir.shapeshift<1>
100+
// CHECK: %[[VAL_26:.*]] = fir.rebox %[[VAL_24]](%[[VAL_25]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?xi32>>
101+
// CHECK: fir.store %[[VAL_26]] to %[[VAL_18]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
102+
103+
// Loop body
104+
// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_17]] : (index) -> i32
105+
// CHECK: fir.store %[[VAL_27]] to %[[VAL_5]] : !fir.ref<i32>
106+
// CHECK: %[[VAL_28:.*]] = fir.declare %[[VAL_18]] {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
107+
// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
108+
// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
109+
// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> i64
110+
// CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_29]], %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
111+
// CHECK: %[[VAL_33:.*]] = fir.shift %[[VAL_32]]#0 : (index) -> !fir.shift<1>
112+
// CHECK: %[[VAL_34:.*]] = fir.array_coor %[[VAL_29]](%[[VAL_33]]) %[[VAL_31]] : (!fir.box<!fir.array<?xi32>>, !fir.shift<1>, i64) -> !fir.ref<i32>
113+
// CHECK: fir.store %[[VAL_3]] to %[[VAL_34]] : !fir.ref<i32>
114+
115+
// `dealloc` region
116+
// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_18]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
117+
// CHECK: %[[VAL_36:.*]] = fir.box_addr %[[VAL_35]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
118+
// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.ref<!fir.array<?xi32>>) -> i64
119+
// CHECK: %[[VAL_38:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_0]] : i64
120+
// CHECK: fir.if %[[VAL_38]] {
121+
// CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_36]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
122+
// CHECK: fir.freemem %[[VAL_39]] : !fir.heap<!fir.array<?xi32>>
123+
// CHECK: }
124+
// CHECK: }
125+
// CHECK: return
126+
// CHECK: }

0 commit comments

Comments
 (0)