Skip to content

Commit 57726c4

Browse files
authored
[Flang][OpenMP] DISTRIBUTE PARALLEL DO SIMD lowering (#106211)
This patch adds PFT to MLIR lowering support for `distribute parallel do simd` composite constructs.
1 parent 9c8ce5f commit 57726c4

File tree

4 files changed

+587
-9
lines changed

4 files changed

+587
-9
lines changed

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2150,8 +2150,78 @@ static void genCompositeDistributeParallelDoSimd(
21502150
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
21512151
mlir::Location loc, const ConstructQueue &queue,
21522152
ConstructQueue::const_iterator item) {
2153+
lower::StatementContext stmtCtx;
2154+
21532155
assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
2154-
TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
2156+
ConstructQueue::const_iterator distributeItem = item;
2157+
ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
2158+
ConstructQueue::const_iterator doItem = std::next(parallelItem);
2159+
ConstructQueue::const_iterator simdItem = std::next(doItem);
2160+
2161+
// Create parent omp.parallel first.
2162+
mlir::omp::ParallelOperands parallelClauseOps;
2163+
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
2164+
llvm::SmallVector<mlir::Type> parallelReductionTypes;
2165+
genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
2166+
parallelClauseOps, parallelReductionTypes,
2167+
parallelReductionSyms);
2168+
2169+
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2170+
/*shouldCollectPreDeterminedSymbols=*/true,
2171+
/*useDelayedPrivatization=*/true, &symTable);
2172+
dsp.processStep1(&parallelClauseOps);
2173+
2174+
genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
2175+
parallelClauseOps, parallelReductionSyms,
2176+
parallelReductionTypes, &dsp, /*isComposite=*/true);
2177+
2178+
// Clause processing.
2179+
mlir::omp::DistributeOperands distributeClauseOps;
2180+
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
2181+
loc, distributeClauseOps);
2182+
2183+
mlir::omp::WsloopOperands wsloopClauseOps;
2184+
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
2185+
llvm::SmallVector<mlir::Type> wsloopReductionTypes;
2186+
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
2187+
wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms);
2188+
2189+
mlir::omp::SimdOperands simdClauseOps;
2190+
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
2191+
2192+
mlir::omp::LoopNestOperands loopNestClauseOps;
2193+
llvm::SmallVector<const semantics::Symbol *> iv;
2194+
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
2195+
loopNestClauseOps, iv);
2196+
2197+
// Operation creation.
2198+
// TODO: Populate entry block arguments with private variables.
2199+
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
2200+
converter, loc, distributeClauseOps, /*blockArgTypes=*/{});
2201+
distributeOp.setComposite(/*val=*/true);
2202+
2203+
// TODO: Add private variables to entry block arguments.
2204+
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
2205+
converter, loc, wsloopClauseOps, wsloopReductionTypes);
2206+
wsloopOp.setComposite(/*val=*/true);
2207+
2208+
// TODO: Populate entry block arguments with reduction and private variables.
2209+
auto simdOp = genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps,
2210+
/*blockArgTypes=*/{});
2211+
simdOp.setComposite(/*val=*/true);
2212+
2213+
// Construct wrapper entry block list and associated symbols. It is important
2214+
// that the symbol order and the block argument order match, so that the
2215+
// symbol-value bindings created are correct.
2216+
auto &wrapperSyms = wsloopReductionSyms;
2217+
2218+
auto wrapperArgs = llvm::to_vector(llvm::concat<mlir::BlockArgument>(
2219+
distributeOp.getRegion().getArguments(),
2220+
wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments()));
2221+
2222+
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
2223+
loopNestClauseOps, iv, wrapperSyms, wrapperArgs,
2224+
llvm::omp::Directive::OMPD_distribute_parallel_do_simd, dsp);
21552225
}
21562226

21572227
static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
! This test checks lowering of OpenMP DISTRIBUTE PARALLEL DO SIMD composite
2+
! constructs.
3+
4+
! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
5+
! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
6+
7+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_num_threads(
8+
subroutine distribute_parallel_do_simd_num_threads()
9+
!$omp teams
10+
11+
! CHECK: omp.parallel num_threads({{.*}}) private({{.*}}) {
12+
! CHECK: omp.distribute {
13+
! CHECK-NEXT: omp.wsloop {
14+
! CHECK-NEXT: omp.simd {
15+
! CHECK-NEXT: omp.loop_nest
16+
!$omp distribute parallel do simd num_threads(10)
17+
do index_ = 1, 10
18+
end do
19+
!$omp end distribute parallel do simd
20+
21+
!$omp end teams
22+
end subroutine distribute_parallel_do_simd_num_threads
23+
24+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_dist_schedule(
25+
subroutine distribute_parallel_do_simd_dist_schedule()
26+
!$omp teams
27+
28+
! CHECK: omp.parallel private({{.*}}) {
29+
! CHECK: omp.distribute dist_schedule_static dist_schedule_chunk_size({{.*}}) {
30+
! CHECK-NEXT: omp.wsloop {
31+
! CHECK-NEXT: omp.simd {
32+
! CHECK-NEXT: omp.loop_nest
33+
!$omp distribute parallel do simd dist_schedule(static, 4)
34+
do index_ = 1, 10
35+
end do
36+
!$omp end distribute parallel do simd
37+
38+
!$omp end teams
39+
end subroutine distribute_parallel_do_simd_dist_schedule
40+
41+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_schedule(
42+
subroutine distribute_parallel_do_simd_schedule()
43+
!$omp teams
44+
45+
! CHECK: omp.parallel private({{.*}}) {
46+
! CHECK: omp.distribute {
47+
! CHECK-NEXT: omp.wsloop schedule(static = {{.*}}) {
48+
! CHECK-NEXT: omp.simd {
49+
! CHECK-NEXT: omp.loop_nest
50+
!$omp distribute parallel do simd schedule(static, 4)
51+
do index_ = 1, 10
52+
end do
53+
!$omp end distribute parallel do simd
54+
55+
!$omp end teams
56+
end subroutine distribute_parallel_do_simd_schedule
57+
58+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_simdlen(
59+
subroutine distribute_parallel_do_simd_simdlen()
60+
!$omp teams
61+
62+
! CHECK: omp.parallel private({{.*}}) {
63+
! CHECK: omp.distribute {
64+
! CHECK-NEXT: omp.wsloop {
65+
! CHECK-NEXT: omp.simd simdlen(4) {
66+
! CHECK-NEXT: omp.loop_nest
67+
!$omp distribute parallel do simd simdlen(4)
68+
do index_ = 1, 10
69+
end do
70+
!$omp end distribute parallel do simd
71+
72+
!$omp end teams
73+
end subroutine distribute_parallel_do_simd_simdlen
74+
75+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_private(
76+
subroutine distribute_parallel_do_simd_private()
77+
! CHECK: %[[INDEX_ALLOC:.*]] = fir.alloca i32
78+
! CHECK: %[[INDEX:.*]]:2 = hlfir.declare %[[INDEX_ALLOC]]
79+
! CHECK: %[[X_ALLOC:.*]] = fir.alloca i64
80+
! CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_ALLOC]]
81+
integer(8) :: x
82+
83+
! CHECK: omp.teams {
84+
!$omp teams
85+
86+
! CHECK: omp.parallel private(@{{.*}} %[[X]]#0 -> %[[X_ARG:.*]] : !fir.ref<i64>,
87+
! CHECK-SAME: @{{.*}} %[[INDEX]]#0 -> %[[INDEX_ARG:.*]] : !fir.ref<i32>) {
88+
! CHECK: %[[X_PRIV:.*]]:2 = hlfir.declare %[[X_ARG]]
89+
! CHECK: %[[INDEX_PRIV:.*]]:2 = hlfir.declare %[[INDEX_ARG]]
90+
! CHECK: omp.distribute {
91+
! CHECK-NEXT: omp.wsloop {
92+
! CHECK-NEXT: omp.simd {
93+
! CHECK-NEXT: omp.loop_nest
94+
!$omp distribute parallel do simd private(x)
95+
do index_ = 1, 10
96+
end do
97+
!$omp end distribute parallel do simd
98+
99+
!$omp end teams
100+
end subroutine distribute_parallel_do_simd_private

0 commit comments

Comments
 (0)