Skip to content

Commit 6f068b9

Browse files
authored
[flang][OpenMP] Allocate array reduction variables on the heap (#87773)
Following up on a review comment: #84958 (comment) Reductions might be inlined inside of a loop so stack allocations are not safe. Normally flang allocates arrays on the stack. Allocatable arrays have a different type: fir.box<fir.heap<fir.array<...>>> instead of fir.box<fir.array<...>>. This patch will allocate all arrays on the heap. Reductions on allocatable arrays still aren't supported (but I will get to this soon).
1 parent eef63d3 commit 6f068b9

File tree

7 files changed

+171
-35
lines changed

7 files changed

+171
-35
lines changed

flang/lib/Lower/OpenMP/ReductionProcessor.cpp

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "flang/Optimizer/Builder/Todo.h"
2121
#include "flang/Optimizer/Dialect/FIRType.h"
2222
#include "flang/Optimizer/HLFIR/HLFIROps.h"
23+
#include "flang/Optimizer/Support/FatalError.h"
2324
#include "flang/Parser/tools.h"
2425
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
2526
#include "llvm/Support/CommandLine.h"
@@ -391,8 +392,60 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
391392
TODO(loc, "OpenMP genCombiner for unsupported reduction variable type");
392393
}
393394

395+
static void
396+
createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc,
397+
mlir::omp::DeclareReductionOp &reductionDecl) {
398+
mlir::Type redTy = reductionDecl.getType();
399+
400+
mlir::Region &cleanupRegion = reductionDecl.getCleanupRegion();
401+
assert(cleanupRegion.empty());
402+
mlir::Block *block =
403+
builder.createBlock(&cleanupRegion, cleanupRegion.end(), {redTy}, {loc});
404+
builder.setInsertionPointToEnd(block);
405+
406+
auto typeError = [loc]() {
407+
fir::emitFatalError(loc,
408+
"Attempt to create an omp reduction cleanup region "
409+
"for a type that wasn't allocated",
410+
/*genCrashDiag=*/true);
411+
};
412+
413+
mlir::Type valTy = fir::unwrapRefType(redTy);
414+
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
415+
mlir::Type innerTy = fir::extractSequenceType(boxTy);
416+
if (!mlir::isa<fir::SequenceType>(innerTy))
417+
typeError();
418+
419+
mlir::Value arg = block->getArgument(0);
420+
arg = builder.loadIfRef(loc, arg);
421+
assert(mlir::isa<fir::BaseBoxType>(arg.getType()));
422+
423+
// Deallocate box
424+
// The FIR type system doesn't nesecarrily know that this is a mutable box
425+
// if we allocated the thread local array on the heap to avoid looped stack
426+
// allocations.
427+
mlir::Value addr =
428+
hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg});
429+
mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr);
430+
fir::IfOp ifOp =
431+
builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false);
432+
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
433+
434+
mlir::Value cast = builder.createConvert(
435+
loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr);
436+
builder.create<fir::FreeMemOp>(loc, cast);
437+
438+
builder.setInsertionPointAfter(ifOp);
439+
builder.create<mlir::omp::YieldOp>(loc);
440+
return;
441+
}
442+
443+
typeError();
444+
}
445+
394446
static mlir::Value
395447
createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc,
448+
mlir::omp::DeclareReductionOp &reductionDecl,
396449
const ReductionProcessor::ReductionIdentifier redId,
397450
mlir::Type type, bool isByRef) {
398451
mlir::Type ty = fir::unwrapRefType(type);
@@ -419,11 +472,24 @@ createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc,
419472
// Create the private copy from the initial fir.box:
420473
hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)};
421474

422-
// TODO: if the whole reduction is nested inside of a loop, this alloca
423-
// could lead to a stack overflow (the memory is only freed at the end of
424-
// the stack frame). The reduction declare operation needs a deallocation
425-
// region to undo the init region.
426-
hlfir::Entity temp = createStackTempFromMold(loc, builder, source);
475+
// Allocating on the heap in case the whole reduction is nested inside of a
476+
// loop
477+
// TODO: compare performance here to using allocas - this could be made to
478+
// work by inserting stacksave/stackrestore around the reduction in
479+
// openmpirbuilder
480+
auto [temp, needsDealloc] = createTempFromMold(loc, builder, source);
481+
// if needsDealloc isn't statically false, add cleanup region. TODO: always
482+
// do this for allocatable boxes because they might have been re-allocated
483+
// in the body of the loop/parallel region
484+
std::optional<int64_t> cstNeedsDealloc =
485+
fir::getIntIfConstant(needsDealloc);
486+
assert(cstNeedsDealloc.has_value() &&
487+
"createTempFromMold decides this statically");
488+
if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
489+
auto insPt = builder.saveInsertionPoint();
490+
createReductionCleanupRegion(builder, loc, reductionDecl);
491+
builder.restoreInsertionPoint(insPt);
492+
}
427493

428494
// Put the temporary inside of a box:
429495
hlfir::Entity box = hlfir::genVariableBox(loc, builder, temp);
@@ -462,7 +528,7 @@ mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction(
462528
builder.setInsertionPointToEnd(&decl.getInitializerRegion().back());
463529

464530
mlir::Value init =
465-
createReductionInitRegion(builder, loc, redId, type, isByRef);
531+
createReductionInitRegion(builder, loc, decl, redId, type, isByRef);
466532
builder.create<mlir::omp::YieldOp>(loc, init);
467533

468534
builder.createBlock(&decl.getReductionRegion(),

flang/test/Lower/OpenMP/parallel-reduction-array.f90

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ program reduce
1515

1616
! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> init {
1717
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
18-
! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<3xi32> {bindc_name = ".tmp"}
1918
! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
2019
! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
2120
! CHECK: %[[VAL_4:.*]] = arith.constant 3 : index
2221
! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
23-
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
24-
! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
22+
! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32> {bindc_name = ".tmp", uniq_name = ""}
23+
! CHECK: %[[TRUE:.*]] = arith.constant true
24+
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<3xi32>>,
25+
!fir.shape<1>) -> (!fir.heap<!fir.array<3xi32>>, !fir.heap<!fir.array<3xi32>>)
26+
! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.heap<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
2527
! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
2628
! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
2729
! CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
@@ -43,6 +45,18 @@ program reduce
4345
! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
4446
! CHECK: }
4547
! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
48+
! CHECK: } cleanup {
49+
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
50+
! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
51+
! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>) -> !fir.ref<!fir.array<3xi32>>
52+
! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> i64
53+
! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
54+
! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
55+
! CHECK: fir.if %[[VAL_5]] {
56+
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> !fir.heap<!fir.array<3xi32>>
57+
! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<3xi32>>
58+
! CHECK: }
59+
! CHECK: omp.yield
4660
! CHECK: }
4761

4862
! CHECK-LABEL: func.func @_QQmain()

flang/test/Lower/OpenMP/parallel-reduction-array2.f90

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ program reduce
1515

1616
! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> init {
1717
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
18-
! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<3xi32> {bindc_name = ".tmp"}
1918
! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
2019
! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
2120
! CHECK: %[[VAL_4:.*]] = arith.constant 3 : index
2221
! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
23-
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
24-
! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
22+
! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32>
23+
! CHECK: %[[TRUE:.*]] = arith.constant true
24+
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<3xi32>>,
25+
!fir.shape<1>) -> (!fir.heap<!fir.array<3xi32>>, !fir.heap<!fir.array<3xi32>>)
26+
! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.heap<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
2527
! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
2628
! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
2729
! CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
@@ -43,6 +45,18 @@ program reduce
4345
! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
4446
! CHECK: }
4547
! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
48+
! CHECK: } cleanup {
49+
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
50+
! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
51+
! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>) -> !fir.ref<!fir.array<3xi32>>
52+
! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> i64
53+
! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
54+
! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
55+
! CHECK: fir.if %[[VAL_5]] {
56+
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> !fir.heap<!fir.array<3xi32>>
57+
! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<3xi32>>
58+
! CHECK: }
59+
! CHECK: omp.yield
4660
! CHECK: }
4761

4862
! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {

flang/test/Lower/OpenMP/parallel-reduction3.f90

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,21 @@
1-
! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
2-
3-
! The script is designed to make adding checks to
4-
! a test case fast, it is *not* designed to be authoritative
5-
! about what constitutes a good test! The CHECK should be
6-
! minimized and named to reflect the test intent.
7-
81
! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
92
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
103

11-
12-
134
! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> init {
145
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
156
! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32
167
! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
178
! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
189
! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
1910
! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1>
20-
! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.array<?xi32>, %[[VAL_4]]#1 {bindc_name = ".tmp"}
21-
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
11+
! CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_4]]#1 {bindc_name = ".tmp", uniq_name = ""}
12+
! CHECK: %[[TRUE:.*]] = arith.constant true
13+
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
2214
! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_7]]#0 : i32, !fir.box<!fir.array<?xi32>>
2315
! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
2416
! CHECK: fir.store %[[VAL_7]]#0 to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
2517
! CHECK: omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
26-
27-
! CHECK-LABEL: } combiner {
18+
! CHECK: } combiner {
2819
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
2920
! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
3021
! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
@@ -41,6 +32,18 @@
4132
! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
4233
! CHECK: }
4334
! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
35+
! CHECK: } cleanup {
36+
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
37+
! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
38+
! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
39+
! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xi32>>) -> i64
40+
! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
41+
! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
42+
! CHECK: fir.if %[[VAL_5]] {
43+
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
44+
! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<?xi32>>
45+
! CHECK: }
46+
! CHECK: omp.yield
4447
! CHECK: }
4548

4649
! CHECK-LABEL: func.func @_QPs(
@@ -122,4 +125,4 @@ subroutine s(x)
122125
!$omp end parallel do
123126

124127
if (c(1) /= 5050) stop 1
125-
end subroutine s
128+
end subroutine s

flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ subroutine reduce(r)
2929
! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
3030
! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
3131
! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1>
32-
! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.array<?xf64>, %[[VAL_4]]#1 {bindc_name = ".tmp"}
33-
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.array<?xf64>>)
32+
! CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array<?xf64>, %[[VAL_4]]#1 {bindc_name = ".tmp", uniq_name = ""}
33+
! CHECK: %[[TRUE:.*]] = arith.constant true
34+
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf64>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.heap<!fir.array<?xf64>>)
3435
! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_7]]#0 : f64, !fir.box<!fir.array<?xf64>>
3536
! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
3637
! CHECK: fir.store %[[VAL_7]]#0 to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
@@ -53,6 +54,18 @@ subroutine reduce(r)
5354
! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<f64>
5455
! CHECK: }
5556
! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf64>>>)
57+
! CHECK: } cleanup {
58+
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf64>>>):
59+
! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
60+
! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
61+
! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xf64>>) -> i64
62+
! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
63+
! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
64+
! CHECK: fir.if %[[VAL_5]] {
65+
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xf64>>) -> !fir.heap<!fir.array<?xf64>>
66+
! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<?xf64>>
67+
! CHECK: }
68+
! CHECK: omp.yield
5669
! CHECK: }
5770

5871
! CHECK-LABEL: func.func private @_QFPreduce(

flang/test/Lower/OpenMP/wsloop-reduction-array.f90

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,14 @@ program reduce
1616

1717
! CHECK-LABEL omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref<!fir.box<!fir.array<2xi32>>> init {
1818
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>):
19-
! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<2xi32> {bindc_name = ".tmp"}
2019
! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
2120
! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
2221
! CHECK: %[[VAL_4:.*]] = arith.constant 2 : index
2322
! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
24-
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
25-
! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
23+
! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<2xi32> {bindc_name = ".tmp", uniq_name = ""}
24+
! CHECK: %[[TRUE:.*]] = arith.constant true
25+
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<2xi32>>, !fir.heap<!fir.array<2xi32>>)
26+
! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.heap<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
2627
! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<2xi32>>
2728
! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
2829
! CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
@@ -45,6 +46,18 @@ program reduce
4546
! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
4647
! CHECK: }
4748
! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)
49+
! CHECK: } cleanup {
50+
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>):
51+
! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
52+
! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.ref<!fir.array<2xi32>>
53+
! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<2xi32>>) -> i64
54+
! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
55+
! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
56+
! CHECK: fir.if %[[VAL_5]] {
57+
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<2xi32>>) -> !fir.heap<!fir.array<2xi32>>
58+
! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<2xi32>>
59+
! CHECK: }
60+
! CHECK: omp.yield
4861
! CHECK: }
4962

5063
! CHECK-LABEL func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {

0 commit comments

Comments
 (0)