Skip to content

Commit a0f2307

Browse files
committed
Add workshare loop wrapper lowerings
Bufferize test Bufferize test Bufferize test Add test for should use workshare lowering Add integration test for workshare One more integration test Add test for cfg workshare bufferization Fix tests Test coverage for all changes Integration tests bufferize fix
1 parent fbd9ab0 commit a0f2307

File tree

8 files changed

+430
-4
lines changed

8 files changed

+430
-4
lines changed

flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
2727
#include "flang/Optimizer/HLFIR/HLFIROps.h"
2828
#include "flang/Optimizer/HLFIR/Passes.h"
29+
#include "flang/Optimizer/OpenMP/Passes.h"
2930
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
3031
#include "mlir/IR/Dominance.h"
3132
#include "mlir/IR/PatternMatch.h"
@@ -792,7 +793,8 @@ struct ElementalOpConversion
792793
// Generate a loop nest looping around the fir.elemental shape and clone
793794
// fir.elemental region inside the inner loop.
794795
hlfir::LoopNest loopNest =
795-
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
796+
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
797+
flangomp::shouldUseWorkshareLowering(elemental));
796798
auto insPt = builder.saveInsertionPoint();
797799
builder.setInsertionPointToStart(loopNest.body);
798800
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
2121
#include "flang/Optimizer/HLFIR/HLFIROps.h"
2222
#include "flang/Optimizer/HLFIR/Passes.h"
23+
#include "flang/Optimizer/OpenMP/Passes.h"
2324
#include "flang/Optimizer/Transforms/Utils.h"
2425
#include "mlir/Dialect/Func/IR/FuncOps.h"
2526
#include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
482483
// Generate a loop nest looping around the hlfir.elemental shape and clone
483484
// hlfir.elemental region inside the inner loop
484485
hlfir::LoopNest loopNest =
485-
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
486+
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
487+
flangomp::shouldUseWorkshareLowering(elemental));
486488
builder.setInsertionPointToStart(loopNest.body);
487489
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
488490
loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
553555
llvm::SmallVector<mlir::Value> extents =
554556
hlfir::getIndexExtents(loc, builder, shape);
555557
hlfir::LoopNest loopNest =
556-
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
558+
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
559+
flangomp::shouldUseWorkshareLowering(assign));
557560
builder.setInsertionPointToStart(loopNest.body);
558561
auto arrayElement =
559562
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -651,7 +654,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
651654
llvm::SmallVector<mlir::Value> extents =
652655
hlfir::getIndexExtents(loc, builder, shape);
653656
hlfir::LoopNest loopNest =
654-
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
657+
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
658+
flangomp::shouldUseWorkshareLowering(assign));
655659
builder.setInsertionPointToStart(loopNest.body);
656660
auto rhsArrayElement =
657661
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
2+
3+
// CHECK-LABEL: func.func @simple(
4+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
5+
// CHECK: omp.parallel {
6+
// CHECK: omp.workshare {
7+
// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
8+
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
9+
// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
10+
// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
11+
// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
12+
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
13+
// CHECK: %[[VAL_7:.*]] = arith.constant true
14+
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
15+
// CHECK: omp.workshare.loop_wrapper {
16+
// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
17+
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
18+
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
19+
// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
20+
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
21+
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
22+
// CHECK: omp.yield
23+
// CHECK: }
24+
// CHECK: }
25+
// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
26+
// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
27+
// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
28+
// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
29+
// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
30+
// CHECK: omp.terminator
31+
// CHECK: }
32+
// CHECK: omp.terminator
33+
// CHECK: }
34+
// CHECK: return
35+
// CHECK: }
36+
func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
37+
omp.parallel {
38+
omp.workshare {
39+
%c42 = arith.constant 42 : index
40+
%c1_i32 = arith.constant 1 : i32
41+
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
42+
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
43+
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
44+
^bb0(%i: index):
45+
%ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
46+
%val = fir.load %ref : !fir.ref<i32>
47+
%sub = arith.subi %val, %c1_i32 : i32
48+
hlfir.yield_element %sub : i32
49+
}
50+
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
51+
hlfir.destroy %elemental : !hlfir.expr<42xi32>
52+
omp.terminator
53+
}
54+
omp.terminator
55+
}
56+
return
57+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
11+
12+
subroutine sb1(x, y)
13+
integer :: x(:)
14+
integer :: y(:)
15+
!$omp parallel workshare
16+
x = y
17+
!$omp end parallel workshare
18+
end subroutine
19+
20+
! HLFIR: omp.parallel {
21+
! HLFIR: omp.workshare {
22+
! HLFIR: hlfir.assign
23+
! HLFIR: omp.terminator
24+
! HLFIR: }
25+
! HLFIR: omp.terminator
26+
! HLFIR: }
27+
28+
! FIR: omp.parallel {
29+
! FIR: omp.wsloop nowait {
30+
! FIR: omp.loop_nest
31+
! FIR: }
32+
! FIR: omp.barrier
33+
! FIR: omp.terminator
34+
! FIR: }
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
11+
12+
subroutine sb1(a, x, y, z)
13+
integer :: a
14+
integer :: x(:)
15+
integer :: y(:)
16+
integer :: z(:)
17+
!$omp parallel workshare
18+
z = a * x + y
19+
!$omp end parallel workshare
20+
end subroutine
21+
22+
! HLFIR: func.func @_QPsb1
23+
! HLFIR: omp.parallel {
24+
! HLFIR: omp.workshare {
25+
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
26+
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
27+
! HLFIR: hlfir.assign
28+
! HLFIR: hlfir.destroy
29+
! HLFIR: hlfir.destroy
30+
! HLFIR-NOT: omp.barrier
31+
! HLFIR: omp.terminator
32+
! HLFIR: }
33+
! HLFIR-NOT: omp.barrier
34+
! HLFIR: omp.terminator
35+
! HLFIR: }
36+
! HLFIR: return
37+
! HLFIR: }
38+
! HLFIR:}
39+
40+
41+
! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
42+
! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
43+
44+
! FIR: func.func @_QPsb1
45+
! FIR: omp.parallel {
46+
! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
47+
! FIR: fir.allocmem
48+
! FIR: omp.wsloop {
49+
! FIR: omp.loop_nest
50+
! FIR: omp.single nowait {
51+
! FIR: fir.call @_FortranAAssign
52+
! FIR: fir.freemem
53+
! FIR: omp.terminator
54+
! FIR: }
55+
! FIR: omp.barrier
56+
! FIR: omp.terminator
57+
! FIR: }
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
11+
12+
subroutine sb1(a, x)
13+
integer :: a
14+
integer :: x(:)
15+
!$omp parallel workshare
16+
x = a
17+
!$omp end parallel workshare
18+
end subroutine
19+
20+
! HLFIR: omp.parallel {
21+
! HLFIR: omp.workshare {
22+
! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32>
23+
! HLFIR: hlfir.assign %[[SCALAR]] to
24+
! HLFIR: omp.terminator
25+
! HLFIR: }
26+
! HLFIR: omp.terminator
27+
! HLFIR: }
28+
29+
! FIR: omp.parallel {
30+
! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32
31+
! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) {
32+
! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32>
33+
! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32>
34+
! FIR: omp.terminator
35+
! FIR: }
36+
! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32>
37+
! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
38+
! FIR: omp.wsloop nowait {
39+
! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) {
40+
! FIR: fir.store %[[SCALAR_RELOAD]]
41+
! FIR: omp.yield
42+
! FIR: }
43+
! FIR: }
44+
! FIR: omp.barrier
45+
! FIR: omp.terminator
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3
10+
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3
11+
12+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0
13+
!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0
14+
15+
program test
16+
real :: arr_01(10)
17+
!$omp parallel workshare
18+
arr_01 = arr_01*2
19+
!$omp end parallel workshare
20+
end program
21+
22+
! HLFIR-O3: omp.parallel {
23+
! HLFIR-O3: omp.workshare {
24+
! HLFIR-O3: hlfir.elemental
25+
! HLFIR-O3: hlfir.assign
26+
! HLFIR-O3: hlfir.destroy
27+
! HLFIR-O3: omp.terminator
28+
! HLFIR-O3: omp.terminator
29+
30+
! FIR-O3: omp.parallel {
31+
! FIR-O3: omp.wsloop nowait {
32+
! FIR-O3: omp.loop_nest
33+
! FIR-O3: omp.barrier
34+
! FIR-O3: omp.terminator
35+
36+
! HLFIR-O0: omp.parallel {
37+
! HLFIR-O0: omp.workshare {
38+
! HLFIR-O0: hlfir.elemental
39+
! HLFIR-O0: hlfir.assign
40+
! HLFIR-O0: hlfir.destroy
41+
! HLFIR-O0: omp.terminator
42+
! HLFIR-O0: omp.terminator
43+
44+
! Check the copyprivate copy function
45+
! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
46+
! FIR-O0: fir.load %[[SRC]]
47+
! FIR-O0: fir.store {{.*}} to %[[DST]]
48+
49+
! Check that we properly handle the temporary array
50+
! FIR-O0: omp.parallel {
51+
! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>>
52+
! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_
53+
! FIR-O0: fir.allocmem
54+
! FIR-O0: fir.store
55+
! FIR-O0: omp.terminator
56+
! FIR-O0: fir.load %[[CP]]
57+
! FIR-O0: omp.wsloop {
58+
! FIR-O0: omp.loop_nest
59+
! FIR-O0: omp.yield
60+
! FIR-O0: omp.single nowait {
61+
! FIR-O0: fir.call @_FortranAAssign
62+
! FIR-O0: fir.freemem
63+
! FIR-O0: omp.terminator
64+
! FIR-O0: omp.barrier
65+
! FIR-O0: omp.terminator

0 commit comments

Comments
 (0)