Skip to content

Commit e16c878

Browse files
committed
[Flang][OpenMP] DISTRIBUTE PARALLEL DO SIMD lowering
This patch adds PFT to MLIR lowering support for `distribute parallel do simd` composite constructs.
1 parent 5b654d9 commit e16c878

File tree

4 files changed

+587
-9
lines changed

4 files changed

+587
-9
lines changed

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2144,8 +2144,78 @@ static void genCompositeDistributeParallelDoSimd(
21442144
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
21452145
mlir::Location loc, const ConstructQueue &queue,
21462146
ConstructQueue::const_iterator item) {
2147+
lower::StatementContext stmtCtx;
2148+
21472149
assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
2148-
TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
2150+
ConstructQueue::const_iterator distributeItem = item;
2151+
ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
2152+
ConstructQueue::const_iterator doItem = std::next(parallelItem);
2153+
ConstructQueue::const_iterator simdItem = std::next(doItem);
2154+
2155+
// Create parent omp.parallel first.
2156+
mlir::omp::ParallelOperands parallelClauseOps;
2157+
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
2158+
llvm::SmallVector<mlir::Type> parallelReductionTypes;
2159+
genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
2160+
parallelClauseOps, parallelReductionTypes,
2161+
parallelReductionSyms);
2162+
2163+
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2164+
/*shouldCollectPreDeterminedSymbols=*/true,
2165+
/*useDelayedPrivatization=*/true, &symTable);
2166+
dsp.processStep1(&parallelClauseOps);
2167+
2168+
genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
2169+
parallelClauseOps, parallelReductionSyms,
2170+
parallelReductionTypes, &dsp, /*isComposite=*/true);
2171+
2172+
// Clause processing.
2173+
mlir::omp::DistributeOperands distributeClauseOps;
2174+
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
2175+
loc, distributeClauseOps);
2176+
2177+
mlir::omp::WsloopOperands wsloopClauseOps;
2178+
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
2179+
llvm::SmallVector<mlir::Type> wsloopReductionTypes;
2180+
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
2181+
wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms);
2182+
2183+
mlir::omp::SimdOperands simdClauseOps;
2184+
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
2185+
2186+
mlir::omp::LoopNestOperands loopNestClauseOps;
2187+
llvm::SmallVector<const semantics::Symbol *> iv;
2188+
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
2189+
loopNestClauseOps, iv);
2190+
2191+
// Operation creation.
2192+
// TODO: Populate entry block arguments with private variables.
2193+
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
2194+
converter, loc, distributeClauseOps, /*blockArgTypes=*/{});
2195+
distributeOp.setComposite(/*val=*/true);
2196+
2197+
// TODO: Add private variables to entry block arguments.
2198+
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
2199+
converter, loc, wsloopClauseOps, wsloopReductionTypes);
2200+
wsloopOp.setComposite(/*val=*/true);
2201+
2202+
// TODO: Populate entry block arguments with reduction and private variables.
2203+
auto simdOp = genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps,
2204+
/*blockArgTypes=*/{});
2205+
simdOp.setComposite(/*val=*/true);
2206+
2207+
// Construct wrapper entry block list and associated symbols. It is important
2208+
// that the symbol order and the block argument order match, so that the
2209+
// symbol-value bindings created are correct.
2210+
auto &wrapperSyms = wsloopReductionSyms;
2211+
2212+
auto wrapperArgs = llvm::to_vector(llvm::concat<mlir::BlockArgument>(
2213+
distributeOp.getRegion().getArguments(),
2214+
wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments()));
2215+
2216+
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
2217+
loopNestClauseOps, iv, wrapperSyms, wrapperArgs,
2218+
llvm::omp::Directive::OMPD_distribute_parallel_do_simd, dsp);
21492219
}
21502220

21512221
static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
! This test checks lowering of OpenMP DISTRIBUTE PARALLEL DO SIMD composite
2+
! constructs.
3+
4+
! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
5+
! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
6+
7+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_num_threads(
8+
subroutine distribute_parallel_do_simd_num_threads()
9+
!$omp teams
10+
11+
! CHECK: omp.parallel num_threads({{.*}}) private({{.*}}) {
12+
! CHECK: omp.distribute {
13+
! CHECK-NEXT: omp.wsloop {
14+
! CHECK-NEXT: omp.simd {
15+
! CHECK-NEXT: omp.loop_nest
16+
!$omp distribute parallel do simd num_threads(10)
17+
do index_ = 1, 10
18+
end do
19+
!$omp end distribute parallel do simd
20+
21+
!$omp end teams
22+
end subroutine distribute_parallel_do_simd_num_threads
23+
24+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_dist_schedule(
25+
subroutine distribute_parallel_do_simd_dist_schedule()
26+
!$omp teams
27+
28+
! CHECK: omp.parallel private({{.*}}) {
29+
! CHECK: omp.distribute dist_schedule_static dist_schedule_chunk_size({{.*}}) {
30+
! CHECK-NEXT: omp.wsloop {
31+
! CHECK-NEXT: omp.simd {
32+
! CHECK-NEXT: omp.loop_nest
33+
!$omp distribute parallel do simd dist_schedule(static, 4)
34+
do index_ = 1, 10
35+
end do
36+
!$omp end distribute parallel do simd
37+
38+
!$omp end teams
39+
end subroutine distribute_parallel_do_simd_dist_schedule
40+
41+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_schedule(
42+
subroutine distribute_parallel_do_simd_schedule()
43+
!$omp teams
44+
45+
! CHECK: omp.parallel private({{.*}}) {
46+
! CHECK: omp.distribute {
47+
! CHECK-NEXT: omp.wsloop schedule(static = {{.*}}) {
48+
! CHECK-NEXT: omp.simd {
49+
! CHECK-NEXT: omp.loop_nest
50+
!$omp distribute parallel do simd schedule(static, 4)
51+
do index_ = 1, 10
52+
end do
53+
!$omp end distribute parallel do simd
54+
55+
!$omp end teams
56+
end subroutine distribute_parallel_do_simd_schedule
57+
58+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_simdlen(
59+
subroutine distribute_parallel_do_simd_simdlen()
60+
!$omp teams
61+
62+
! CHECK: omp.parallel private({{.*}}) {
63+
! CHECK: omp.distribute {
64+
! CHECK-NEXT: omp.wsloop {
65+
! CHECK-NEXT: omp.simd simdlen(4) {
66+
! CHECK-NEXT: omp.loop_nest
67+
!$omp distribute parallel do simd simdlen(4)
68+
do index_ = 1, 10
69+
end do
70+
!$omp end distribute parallel do simd
71+
72+
!$omp end teams
73+
end subroutine distribute_parallel_do_simd_simdlen
74+
75+
! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_private(
76+
subroutine distribute_parallel_do_simd_private()
77+
! CHECK: %[[INDEX_ALLOC:.*]] = fir.alloca i32
78+
! CHECK: %[[INDEX:.*]]:2 = hlfir.declare %[[INDEX_ALLOC]]
79+
! CHECK: %[[X_ALLOC:.*]] = fir.alloca i64
80+
! CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_ALLOC]]
81+
integer(8) :: x
82+
83+
! CHECK: omp.teams {
84+
!$omp teams
85+
86+
! CHECK: omp.parallel private(@{{.*}} %[[X]]#0 -> %[[X_ARG:.*]] : !fir.ref<i64>,
87+
! CHECK-SAME: @{{.*}} %[[INDEX]]#0 -> %[[INDEX_ARG:.*]] : !fir.ref<i32>) {
88+
! CHECK: %[[X_PRIV:.*]]:2 = hlfir.declare %[[X_ARG]]
89+
! CHECK: %[[INDEX_PRIV:.*]]:2 = hlfir.declare %[[INDEX_ARG]]
90+
! CHECK: omp.distribute {
91+
! CHECK-NEXT: omp.wsloop {
92+
! CHECK-NEXT: omp.simd {
93+
! CHECK-NEXT: omp.loop_nest
94+
!$omp distribute parallel do simd private(x)
95+
do index_ = 1, 10
96+
end do
97+
!$omp end distribute parallel do simd
98+
99+
!$omp end teams
100+
end subroutine distribute_parallel_do_simd_private

0 commit comments

Comments
 (0)