-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][fir] Extend locality specs lowering to support init
and dealloc
regions
#144027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][fir] Extend locality specs lowering to support init
and dealloc
regions
#144027
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) ChangesExtending Patch is 20.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144027.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
index cb9e48cced2a1..099681bea0d83 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
@@ -180,41 +180,53 @@ class DoConcurrentConversion
std::optional<mlir::ArrayAttr> localSyms = loop.getLocalSyms();
- for (auto [localVar, localArg, localizerSym] : llvm::zip_equal(
+ for (auto localInfo : llvm::zip_equal(
loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) {
+ mlir::Value localVar = std::get<0>(localInfo);
+ mlir::BlockArgument localArg = std::get<1>(localInfo);
+ mlir::Attribute localizerSym = std::get<2>(localInfo);
mlir::SymbolRefAttr localizerName =
llvm::cast<mlir::SymbolRefAttr>(localizerSym);
fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName);
- if (!localizer.getInitRegion().empty() ||
- !localizer.getDeallocRegion().empty())
- TODO(localizer.getLoc(), "localizers with `init` and `dealloc` "
- "regions are not handled yet.");
-
// TODO Should this be a heap allocation instead? For now, we allocate
// on the stack for each loop iteration.
mlir::Value localAlloc =
rewriter.create<fir::AllocaOp>(loop.getLoc(), localizer.getType());
- if (localizer.getLocalitySpecifierType() ==
- fir::LocalitySpecifierType::LocalInit) {
+ auto cloneLocalizerRegion = [&](mlir::Region ®ion,
+ mlir::ValueRange regionArgs,
+ mlir::Block::iterator insertionPoint) {
// It is reasonable to make this assumption since, at this stage,
// control-flow ops are not converted yet. Therefore, things like `if`
// conditions will still be represented by their encapsulating `fir`
// dialect ops.
- assert(localizer.getCopyRegion().hasOneBlock() &&
- "Expected localizer to have a single block.");
- mlir::Block *beforeLocalInit = rewriter.getInsertionBlock();
- mlir::Block *afterLocalInit = rewriter.splitBlock(
- rewriter.getInsertionBlock(), rewriter.getInsertionPoint());
- rewriter.cloneRegionBefore(localizer.getCopyRegion(), afterLocalInit);
- mlir::Block *copyRegionBody = beforeLocalInit->getNextNode();
-
- rewriter.eraseOp(copyRegionBody->getTerminator());
- rewriter.mergeBlocks(afterLocalInit, copyRegionBody);
- rewriter.mergeBlocks(copyRegionBody, beforeLocalInit,
- {localVar, localArg});
- }
+ assert(region.hasOneBlock() &&
+ "Expected localizer region to have a single block.");
+ mlir::Block *beforeLocalizerRegion = rewriter.getInsertionBlock();
+ mlir::Block *afterLocalizerRegion =
+ rewriter.splitBlock(rewriter.getInsertionBlock(), insertionPoint);
+ rewriter.cloneRegionBefore(region, afterLocalizerRegion);
+ mlir::Block *localizerRegion = beforeLocalizerRegion->getNextNode();
+
+ rewriter.eraseOp(localizerRegion->getTerminator());
+ rewriter.mergeBlocks(afterLocalizerRegion, localizerRegion);
+ rewriter.mergeBlocks(localizerRegion, beforeLocalizerRegion,
+ regionArgs);
+ };
+
+ if (!localizer.getInitRegion().empty())
+ cloneLocalizerRegion(localizer.getInitRegion(), {localVar, localArg},
+ rewriter.getInsertionPoint());
+
+ if (localizer.getLocalitySpecifierType() ==
+ fir::LocalitySpecifierType::LocalInit)
+ cloneLocalizerRegion(localizer.getCopyRegion(), {localVar, localArg},
+ rewriter.getInsertionPoint());
+
+ if (!localizer.getDeallocRegion().empty())
+ cloneLocalizerRegion(localizer.getDeallocRegion(), {localArg},
+ rewriter.getInsertionBlock()->end());
rewriter.replaceAllUsesWith(localArg, localAlloc);
}
diff --git a/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir b/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir
new file mode 100644
index 0000000000000..b59ffdfb34adf
--- /dev/null
+++ b/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir
@@ -0,0 +1,126 @@
+// Tests converting `fir.local` ops that have `dealloc` regions.
+
+// RUN: fir-opt --split-input-file --simplify-fir-operations %s | FileCheck %s
+
+fir.local {type = local} @_QFlocalizer_with_dealloc_regionEa_private_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
+^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>, %arg1: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+ %c0 = arith.constant 0 : index
+ %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+ %1:3 = fir.box_dims %0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+ %2 = fir.shape %1#1 : (index) -> !fir.shape<1>
+ %3 = fir.allocmem !fir.array<?xi32>, %1#1 {bindc_name = ".tmp", uniq_name = ""}
+ %4 = fir.declare %3(%2) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xi32>>
+ %5 = fir.embox %4(%2) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+ %6 = fir.shape_shift %1#0, %1#1 : (index, index) -> !fir.shapeshift<1>
+ %7 = fir.rebox %5(%6) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?xi32>>
+ fir.store %7 to %arg1 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+ fir.yield(%arg1 : !fir.ref<!fir.box<!fir.array<?xi32>>>)
+} dealloc {
+^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+ %c0_i64 = arith.constant 0 : i64
+ %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+ %1 = fir.box_addr %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+ %2 = fir.convert %1 : (!fir.ref<!fir.array<?xi32>>) -> i64
+ %3 = arith.cmpi ne, %2, %c0_i64 : i64
+ fir.if %3 {
+ %4 = fir.convert %1 : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+ fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
+ }
+ fir.yield
+}
+
+func.func @_QPlocalizer_with_dealloc_region(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}) {
+ %c42_i32 = arith.constant 42 : i32
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %0 = fir.alloca !fir.box<!fir.array<?xi32>>
+ %1 = fir.dummy_scope : !fir.dscope
+ %2 = fir.declare %arg0 dummy_scope %1 {uniq_name = "_QFlocalizer_with_dealloc_regionEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ %3 = fir.load %2 : !fir.ref<i32>
+ %4 = fir.convert %3 : (i32) -> index
+ %5 = arith.cmpi sgt, %4, %c0 : index
+ %6 = arith.select %5, %4, %c0 : index
+ %7 = fir.alloca !fir.array<?xi32>, %6 {bindc_name = "a", uniq_name = "_QFlocalizer_with_dealloc_regionEa"}
+ %8 = fir.shape %6 : (index) -> !fir.shape<1>
+ %9 = fir.declare %7(%8) {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xi32>>
+ %10 = fir.embox %9(%8) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+ fir.store %10 to %0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+ fir.do_concurrent {
+ %11 = fir.alloca i32 {bindc_name = "i"}
+ %12 = fir.declare %11 {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ fir.do_concurrent.loop (%arg1) = (%c1) to (%4) step (%c1) local(@_QFlocalizer_with_dealloc_regionEa_private_box_Uxi32 %0 -> %arg2 : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
+ %13 = fir.convert %arg1 : (index) -> i32
+ fir.store %13 to %12 : !fir.ref<i32>
+ %14 = fir.declare %arg2 {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
+ %15 = fir.load %14 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+ %16 = fir.load %12 : !fir.ref<i32>
+ %17 = fir.convert %16 : (i32) -> i64
+ %18:3 = fir.box_dims %15, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+ %19 = fir.shift %18#0 : (index) -> !fir.shift<1>
+ %20 = fir.array_coor %15(%19) %17 : (!fir.box<!fir.array<?xi32>>, !fir.shift<1>, i64) -> !fir.ref<i32>
+ fir.store %c42_i32 to %20 : !fir.ref<i32>
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @_QPlocalizer_with_dealloc_region(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64
+// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32
+// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_5:.*]] = fir.declare %[[VAL_4]] {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_8:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFlocalizer_with_dealloc_regionEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]] : !fir.ref<i32>
+// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_1]] : index
+// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_1]] : index
+// CHECK: %[[VAL_13:.*]] = fir.alloca !fir.array<?xi32>, %[[VAL_12]] {bindc_name = "a", uniq_name = "_QFlocalizer_with_dealloc_regionEa"}
+// CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_15:.*]] = fir.declare %[[VAL_13]](%[[VAL_14]]) {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xi32>>
+// CHECK: %[[VAL_16:.*]] = fir.embox %[[VAL_15]](%[[VAL_14]]) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: fir.store %[[VAL_16]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_10]] step %[[VAL_2]] unordered {
+
+// Local allocation
+// CHECK: %[[VAL_18:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+
+// `init` region body
+// CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+// CHECK: %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_19]], %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK: %[[VAL_21:.*]] = fir.shape %[[VAL_20]]#1 : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_22:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_20]]#1 {bindc_name = ".tmp", uniq_name = ""}
+// CHECK: %[[VAL_23:.*]] = fir.declare %[[VAL_22]](%[[VAL_21]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK: %[[VAL_24:.*]] = fir.embox %[[VAL_23]](%[[VAL_21]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: %[[VAL_25:.*]] = fir.shape_shift %[[VAL_20]]#0, %[[VAL_20]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_26:.*]] = fir.rebox %[[VAL_24]](%[[VAL_25]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: fir.store %[[VAL_26]] to %[[VAL_18]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+
+// Loop body
+// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_17]] : (index) -> i32
+// CHECK: fir.store %[[VAL_27]] to %[[VAL_5]] : !fir.ref<i32>
+// CHECK: %[[VAL_28:.*]] = fir.declare %[[VAL_18]] {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
+// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> i64
+// CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_29]], %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK: %[[VAL_33:.*]] = fir.shift %[[VAL_32]]#0 : (index) -> !fir.shift<1>
+// CHECK: %[[VAL_34:.*]] = fir.array_coor %[[VAL_29]](%[[VAL_33]]) %[[VAL_31]] : (!fir.box<!fir.array<?xi32>>, !fir.shift<1>, i64) -> !fir.ref<i32>
+// CHECK: fir.store %[[VAL_3]] to %[[VAL_34]] : !fir.ref<i32>
+
+// `dealloc` region
+// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_18]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+// CHECK: %[[VAL_36:.*]] = fir.box_addr %[[VAL_35]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.ref<!fir.array<?xi32>>) -> i64
+// CHECK: %[[VAL_38:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_0]] : i64
+// CHECK: fir.if %[[VAL_38]] {
+// CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_36]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK: fir.freemem %[[VAL_39]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: }
+// CHECK: }
+// CHECK: return
+// CHECK: }
diff --git a/flang/test/Transforms/do-concurrent-localizer-init-region.fir b/flang/test/Transforms/do-concurrent-localizer-init-region.fir
new file mode 100644
index 0000000000000..ebb56aec278f6
--- /dev/null
+++ b/flang/test/Transforms/do-concurrent-localizer-init-region.fir
@@ -0,0 +1,102 @@
+// Tests converting `fir.local` ops that have `init` regions.
+
+// RUN: fir-opt --split-input-file --simplify-fir-operations %s | FileCheck %s
+
+fir.local {type = local_init} @_QFlocalizer_with_init_regionEp_firstprivate_box_ptr_Uxi32 : !fir.box<!fir.ptr<!fir.array<?xi32>>> init {
+^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+ %c0 = arith.constant 0 : index
+ %0 = fir.shape %c0 : (index) -> !fir.shape<1>
+ %1 = fir.zero_bits !fir.ptr<!fir.array<?xi32>>
+ %2 = fir.embox %1(%0) : (!fir.ptr<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xi32>>>
+ fir.store %2 to %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+ fir.yield(%arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+} copy {
+^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+ %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+ fir.store %0 to %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+ fir.yield(%arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+}
+
+func.func @_QPlocalizer_with_init_region() {
+ %c42_i32 = arith.constant 42 : i32
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFlocalizer_with_init_regionEn"}
+ %2 = fir.declare %1 {uniq_name = "_QFlocalizer_with_init_regionEn"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.alloca !fir.box<!fir.ptr<!fir.array<?xi32>>> {bindc_name = "p", uniq_name = "_QFlocalizer_with_init_regionEp"}
+ %4 = fir.zero_bits !fir.ptr<!fir.array<?xi32>>
+ %5 = fir.shape %c0 : (index) -> !fir.shape<1>
+ %6 = fir.embox %4(%5) : (!fir.ptr<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xi32>>>
+ fir.store %6 to %3 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+ %7 = fir.declare %3 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+ %8 = fir.load %2 : !fir.ref<i32>
+ %9 = fir.convert %8 : (i32) -> index
+
+ fir.do_concurrent {
+ %10 = fir.alloca i32 {bindc_name = "i"}
+ %11 = fir.declare %10 {uniq_name = "_QFlocalizer_with_init_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ fir.do_concurrent.loop (%arg0) = (%c1) to (%9) step (%c1) local(@_QFlocalizer_with_init_regionEp_firstprivate_box_ptr_Uxi32 %7 -> %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
+ %12 = fir.convert %arg0 : (index) -> i32
+ fir.store %12 to %11 : !fir.ref<i32>
+ %13 = fir.declare %arg1 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+ %14 = fir.load %13 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+ %15 = fir.load %11 : !fir.ref<i32>
+ %16 = fir.convert %15 : (i32) -> i64
+ %17:3 = fir.box_dims %14, %c0 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+ %18 = fir.shift %17#0 : (index) -> !fir.shift<1>
+ %19 = fir.array_coor %14(%18) %16 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.shift<1>, i64) -> !fir.ref<i32>
+ fir.store %c42_i32 to %19 : !fir.ref<i32>
+ }
+ }
+
+ return
+}
+
+// CHECK-LABEL: func.func @_QPlocalizer_with_init_region() {
+// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_2:.*]] = arith.constant 42 : i32
+// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFlocalizer_with_init_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFlocalizer_with_init_regionEn"}
+// CHECK: %[[VAL_7:.*]] = fir.declare %[[VAL_6]] {uniq_name = "_QFlocalizer_with_init_regionEn"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?xi32>>> {bindc_name = "p", uniq_name = "_QFlocalizer_with_init_regionEp"}
+// CHECK: %[[VAL_9:.*]] = fir.zero_bits !fir.ptr<!fir.array<?xi32>>
+// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_0]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_9]](%[[VAL_10]]) : (!fir.ptr<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xi32>>>
+// CHECK: fir.store %[[VAL_11]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK: %[[VAL_12:.*]] = fir.declare %[[VAL_8]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index
+// CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_1]] to %[[VAL_14]] step %[[VAL_1]] unordered {
+
+// Local allocation
+// CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?xi32>>>
+
+// `init` region body
+// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_0]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_18:.*]] = fir.zero_bits !fir.ptr<!fir.array<?xi32>>
+// CHECK: %[[VAL_19:.*]] = fir.embox %[[VAL_18]](%[[VAL_17]]) : (!fir.ptr<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xi32>>>
+// CHECK: fir.store %[[VAL_19]] to %[[VAL_16]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+
+// `copy` region body
+// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_12]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK: fir.store %[[VAL_20]] to %[[VAL_16]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+
+// loop body
+// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK: fir.store %[[VAL_21]] to %[[VAL_4]] : !fir.ref<i32>
+// CHECK: %[[VAL_22:.*]] = fir.declare %[[VAL_16]] {fortran_attrs = #fir.var_attrs<pointer...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
mlir::Block *beforeLocalizerRegion = rewriter.getInsertionBlock(); | ||
mlir::Block *afterLocalizerRegion = | ||
rewriter.splitBlock(rewriter.getInsertionBlock(), insertionPoint); | ||
rewriter.cloneRegionBefore(region, afterLocalizerRegion); | ||
mlir::Block *localizerRegion = beforeLocalizerRegion->getNextNode(); | ||
|
||
rewriter.eraseOp(localizerRegion->getTerminator()); | ||
rewriter.mergeBlocks(afterLocalizerRegion, localizerRegion); | ||
rewriter.mergeBlocks(localizerRegion, beforeLocalizerRegion, | ||
regionArgs); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know this code was already here, but I find the loop and clone approach more readable/auditable than splitting, cloning and merging back blocks:
mlir::Block *beforeLocalizerRegion = rewriter.getInsertionBlock(); | |
mlir::Block *afterLocalizerRegion = | |
rewriter.splitBlock(rewriter.getInsertionBlock(), insertionPoint); | |
rewriter.cloneRegionBefore(region, afterLocalizerRegion); | |
mlir::Block *localizerRegion = beforeLocalizerRegion->getNextNode(); | |
rewriter.eraseOp(localizerRegion->getTerminator()); | |
rewriter.mergeBlocks(afterLocalizerRegion, localizerRegion); | |
rewriter.mergeBlocks(localizerRegion, beforeLocalizerRegion, | |
regionArgs); | |
mlir::OpBuilder::InsertionGuard guard(rewriter); | |
rewriter.setInsertionPoint(insertionPoint); | |
mlir::IRMapping mapper; | |
mapper.map(region.getArguments(), regionArgs); | |
for (mlir::Operation &op : region.front().without_terminator()) | |
(void)rewriter.clone(op, mapper); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the suggestion. Much simpler. Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…alloc` regions Extending `fir.do_concurrent` to `fir.do_loop ... unordered` lowering by adding support for lowring/inlining non-empty `init` and `dealloc` regions.
7224d09
to
efd3eb6
Compare
…alloc` regions (llvm#144027) Extending `fir.do_concurrent` to `fir.do_loop ... unordered` lowering by adding support for lowring/inlining non-empty `init` and `dealloc` regions. Resolves llvm#143897 (actually handles the todo).
…current` (#144074) Reintroduces changes from llvm/llvm-project#143897. A fix for the reported problem in llvm/llvm-project#143897 is hopefully resolved in llvm/llvm-project#144027. This PR aims to make it easier and more self-contained to revert the switch/flag if we discover any problems with enabling it by default.
…lvm#144074) Reintroduces changes from llvm#143897. A fix for the reported problem in llvm#143897 is hopefully resolved in llvm#144027. This PR aims to make it easier and more self-contained to revert the switch/flag if we discover any problems with enabling it by default.
…lvm#144074) Reintroduces changes from llvm#143897. A fix for the reported problem in llvm#143897 is hopefully resolved in llvm#144027. This PR aims to make it easier and more self-contained to revert the switch/flag if we discover any problems with enabling it by default.
Extending
fir.do_concurrent
tofir.do_loop ... unordered
lowering by adding support for lowring/inlining non-emptyinit
anddealloc
regions.Resolves #143897 (actually handles the todo).