Skip to content

Commit 5d38e6e

Browse files
committed
[flang] Introduce hlfir.elemental lowerings to omp.workshare_loop_nest (#104748)
This patch adds parallelization support for the following expression in OpenMP workshare constructs: * Elemental procedures in array expressions (reapplied with linking fix)
1 parent cbc7802 commit 5d38e6e

File tree

9 files changed

+431
-4
lines changed

9 files changed

+431
-4
lines changed

flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
2727
#include "flang/Optimizer/HLFIR/HLFIROps.h"
2828
#include "flang/Optimizer/HLFIR/Passes.h"
29+
#include "flang/Optimizer/OpenMP/Passes.h"
2930
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
3031
#include "mlir/IR/Dominance.h"
3132
#include "mlir/IR/PatternMatch.h"
@@ -792,7 +793,8 @@ struct ElementalOpConversion
792793
// Generate a loop nest looping around the fir.elemental shape and clone
793794
// fir.elemental region inside the inner loop.
794795
hlfir::LoopNest loopNest =
795-
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
796+
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
797+
flangomp::shouldUseWorkshareLowering(elemental));
796798
auto insPt = builder.saveInsertionPoint();
797799
builder.setInsertionPointToStart(loopNest.body);
798800
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,

flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ add_flang_library(HLFIRTransforms
2424
FIRDialectSupport
2525
FIRSupport
2626
FIRTransforms
27+
FlangOpenMPTransforms
2728
HLFIRDialect
2829
MLIRIR
2930
${dialect_libs}

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
2121
#include "flang/Optimizer/HLFIR/HLFIROps.h"
2222
#include "flang/Optimizer/HLFIR/Passes.h"
23+
#include "flang/Optimizer/OpenMP/Passes.h"
2324
#include "flang/Optimizer/Transforms/Utils.h"
2425
#include "mlir/Dialect/Func/IR/FuncOps.h"
2526
#include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
482483
// Generate a loop nest looping around the hlfir.elemental shape and clone
483484
// hlfir.elemental region inside the inner loop
484485
hlfir::LoopNest loopNest =
485-
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
486+
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
487+
flangomp::shouldUseWorkshareLowering(elemental));
486488
builder.setInsertionPointToStart(loopNest.body);
487489
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
488490
loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
553555
llvm::SmallVector<mlir::Value> extents =
554556
hlfir::getIndexExtents(loc, builder, shape);
555557
hlfir::LoopNest loopNest =
556-
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
558+
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
559+
flangomp::shouldUseWorkshareLowering(assign));
557560
builder.setInsertionPointToStart(loopNest.body);
558561
auto arrayElement =
559562
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -651,7 +654,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
651654
llvm::SmallVector<mlir::Value> extents =
652655
hlfir::getIndexExtents(loc, builder, shape);
653656
hlfir::LoopNest loopNest =
654-
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
657+
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
658+
flangomp::shouldUseWorkshareLowering(assign));
655659
builder.setInsertionPointToStart(loopNest.body);
656660
auto rhsArrayElement =
657661
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
2+
3+
// CHECK-LABEL: func.func @simple(
4+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
5+
// CHECK: omp.parallel {
6+
// CHECK: omp.workshare {
7+
// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
8+
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
9+
// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
10+
// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
11+
// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
12+
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
13+
// CHECK: %[[VAL_7:.*]] = arith.constant true
14+
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
15+
// CHECK: omp.workshare.loop_wrapper {
16+
// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
17+
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
18+
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
19+
// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
20+
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
21+
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
22+
// CHECK: omp.yield
23+
// CHECK: }
24+
// CHECK: }
25+
// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
26+
// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
27+
// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
28+
// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
29+
// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
30+
// CHECK: omp.terminator
31+
// CHECK: }
32+
// CHECK: omp.terminator
33+
// CHECK: }
34+
// CHECK: return
35+
// CHECK: }
36+
func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
37+
omp.parallel {
38+
omp.workshare {
39+
%c42 = arith.constant 42 : index
40+
%c1_i32 = arith.constant 1 : i32
41+
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
42+
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
43+
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
44+
^bb0(%i: index):
45+
%ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
46+
%val = fir.load %ref : !fir.ref<i32>
47+
%sub = arith.subi %val, %c1_i32 : i32
48+
hlfir.yield_element %sub : i32
49+
}
50+
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
51+
hlfir.destroy %elemental : !hlfir.expr<42xi32>
52+
omp.terminator
53+
}
54+
omp.terminator
55+
}
56+
return
57+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
11+
12+
subroutine sb1(x, y)
13+
integer :: x(:)
14+
integer :: y(:)
15+
!$omp parallel workshare
16+
x = y
17+
!$omp end parallel workshare
18+
end subroutine
19+
20+
! HLFIR: omp.parallel {
21+
! HLFIR: omp.workshare {
22+
! HLFIR: hlfir.assign
23+
! HLFIR: omp.terminator
24+
! HLFIR: }
25+
! HLFIR: omp.terminator
26+
! HLFIR: }
27+
28+
! FIR: omp.parallel {
29+
! FIR: omp.wsloop nowait {
30+
! FIR: omp.loop_nest
31+
! FIR: }
32+
! FIR: omp.barrier
33+
! FIR: omp.terminator
34+
! FIR: }
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
11+
12+
subroutine sb1(a, x, y, z)
13+
integer :: a
14+
integer :: x(:)
15+
integer :: y(:)
16+
integer :: z(:)
17+
!$omp parallel workshare
18+
z = a * x + y
19+
!$omp end parallel workshare
20+
end subroutine
21+
22+
! HLFIR: func.func @_QPsb1
23+
! HLFIR: omp.parallel {
24+
! HLFIR: omp.workshare {
25+
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
26+
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
27+
! HLFIR: hlfir.assign
28+
! HLFIR: hlfir.destroy
29+
! HLFIR: hlfir.destroy
30+
! HLFIR-NOT: omp.barrier
31+
! HLFIR: omp.terminator
32+
! HLFIR: }
33+
! HLFIR-NOT: omp.barrier
34+
! HLFIR: omp.terminator
35+
! HLFIR: }
36+
! HLFIR: return
37+
! HLFIR: }
38+
! HLFIR:}
39+
40+
41+
! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
42+
! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
43+
44+
! FIR: func.func @_QPsb1
45+
! FIR: omp.parallel {
46+
! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
47+
! FIR: fir.allocmem
48+
! FIR: omp.wsloop {
49+
! FIR: omp.loop_nest
50+
! FIR: omp.single nowait {
51+
! FIR: fir.call @_FortranAAssign
52+
! FIR: fir.freemem
53+
! FIR: omp.terminator
54+
! FIR: }
55+
! FIR: omp.barrier
56+
! FIR: omp.terminator
57+
! FIR: }
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
11+
12+
subroutine sb1(a, x)
13+
integer :: a
14+
integer :: x(:)
15+
!$omp parallel workshare
16+
x = a
17+
!$omp end parallel workshare
18+
end subroutine
19+
20+
! HLFIR: omp.parallel {
21+
! HLFIR: omp.workshare {
22+
! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32>
23+
! HLFIR: hlfir.assign %[[SCALAR]] to
24+
! HLFIR: omp.terminator
25+
! HLFIR: }
26+
! HLFIR: omp.terminator
27+
! HLFIR: }
28+
29+
! FIR: omp.parallel {
30+
! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32
31+
! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) {
32+
! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32>
33+
! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32>
34+
! FIR: omp.terminator
35+
! FIR: }
36+
! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32>
37+
! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
38+
! FIR: omp.wsloop nowait {
39+
! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) {
40+
! FIR: fir.store %[[SCALAR_RELOAD]]
41+
! FIR: omp.yield
42+
! FIR: }
43+
! FIR: }
44+
! FIR: omp.barrier
45+
! FIR: omp.terminator
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3
11+
12+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0
13+
!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0
14+
15+
program test
16+
real :: arr_01(10)
17+
!$omp parallel workshare
18+
arr_01 = arr_01*2
19+
!$omp end parallel workshare
20+
end program
21+
22+
! HLFIR-O3: omp.parallel {
23+
! HLFIR-O3: omp.workshare {
24+
! HLFIR-O3: hlfir.elemental
25+
! HLFIR-O3: hlfir.assign
26+
! HLFIR-O3: hlfir.destroy
27+
! HLFIR-O3: omp.terminator
28+
! HLFIR-O3: omp.terminator
29+
30+
! FIR-O3: omp.parallel {
31+
! FIR-O3: omp.wsloop nowait {
32+
! FIR-O3: omp.loop_nest
33+
! FIR-O3: omp.barrier
34+
! FIR-O3: omp.terminator
35+
36+
! HLFIR-O0: omp.parallel {
37+
! HLFIR-O0: omp.workshare {
38+
! HLFIR-O0: hlfir.elemental
39+
! HLFIR-O0: hlfir.assign
40+
! HLFIR-O0: hlfir.destroy
41+
! HLFIR-O0: omp.terminator
42+
! HLFIR-O0: omp.terminator
43+
44+
! Check the copyprivate copy function
45+
! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
46+
! FIR-O0: fir.load %[[SRC]]
47+
! FIR-O0: fir.store {{.*}} to %[[DST]]
48+
49+
! Check that we properly handle the temporary array
50+
! FIR-O0: omp.parallel {
51+
! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>>
52+
! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_
53+
! FIR-O0: fir.allocmem
54+
! FIR-O0: fir.store
55+
! FIR-O0: omp.terminator
56+
! FIR-O0: fir.load %[[CP]]
57+
! FIR-O0: omp.wsloop {
58+
! FIR-O0: omp.loop_nest
59+
! FIR-O0: omp.yield
60+
! FIR-O0: omp.single nowait {
61+
! FIR-O0: fir.call @_FortranAAssign
62+
! FIR-O0: fir.freemem
63+
! FIR-O0: omp.terminator
64+
! FIR-O0: omp.barrier
65+
! FIR-O0: omp.terminator

0 commit comments

Comments
 (0)