Skip to content

Commit 8f264db

Browse files
DavidTrubyyuxuanchen1997
authored andcommitted
[flang][OpenMP] Implement lastprivate with collapse (#99500)
This patch enables the lastprivate clause to be used in the presence of the collapse clause. Note: the way we currently implement lastprivate means that this adds a large number of compare instructions to the end of every iteration of the loop. This is a clearly non-optimal thing to do, but lastprivate in general will need re-implementing to prevent this. This is planned as part of the delayed privatization work. This current implementation is just a stop-gap measure as generating sub-optimal but working code is better than crashing out.
1 parent 11fca5a commit 8f264db

File tree

4 files changed

+276
-32
lines changed

4 files changed

+276
-32
lines changed

flang/lib/Lower/OpenMP/DataSharingProcessor.cpp

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ void DataSharingProcessor::collectOmpObjectListSymbol(
139139
}
140140

141141
void DataSharingProcessor::collectSymbolsForPrivatization() {
142-
bool hasCollapse = false;
143142
for (const omp::Clause &clause : clauses) {
144143
if (const auto &privateClause =
145144
std::get_if<omp::clause::Private>(&clause.u)) {
@@ -153,16 +152,11 @@ void DataSharingProcessor::collectSymbolsForPrivatization() {
153152
const ObjectList &objects = std::get<ObjectList>(lastPrivateClause->t);
154153
collectOmpObjectListSymbol(objects, explicitlyPrivatizedSymbols);
155154
hasLastPrivateOp = true;
156-
} else if (std::get_if<omp::clause::Collapse>(&clause.u)) {
157-
hasCollapse = true;
158155
}
159156
}
160157

161158
for (auto *sym : explicitlyPrivatizedSymbols)
162159
allPrivatizedSymbols.insert(sym);
163-
164-
if (hasCollapse && hasLastPrivateOp)
165-
TODO(converter.getCurrentLocation(), "Collapse clause with lastprivate");
166160
}
167161

168162
bool DataSharingProcessor::needBarrier() {
@@ -225,28 +219,39 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
225219
mlir::Operation *lastOper = loopOp.getRegion().back().getTerminator();
226220
firOpBuilder.setInsertionPoint(lastOper);
227221

228-
mlir::Value iv = loopOp.getIVs()[0];
229-
mlir::Value ub = loopOp.getUpperBound()[0];
230-
mlir::Value step = loopOp.getStep()[0];
231-
232-
// v = iv + step
233-
// cmp = step < 0 ? v < ub : v > ub
234-
mlir::Value v = firOpBuilder.create<mlir::arith::AddIOp>(loc, iv, step);
235-
mlir::Value zero =
236-
firOpBuilder.createIntegerConstant(loc, step.getType(), 0);
237-
mlir::Value negativeStep = firOpBuilder.create<mlir::arith::CmpIOp>(
238-
loc, mlir::arith::CmpIPredicate::slt, step, zero);
239-
mlir::Value vLT = firOpBuilder.create<mlir::arith::CmpIOp>(
240-
loc, mlir::arith::CmpIPredicate::slt, v, ub);
241-
mlir::Value vGT = firOpBuilder.create<mlir::arith::CmpIOp>(
242-
loc, mlir::arith::CmpIPredicate::sgt, v, ub);
243-
mlir::Value cmpOp = firOpBuilder.create<mlir::arith::SelectOp>(
244-
loc, negativeStep, vLT, vGT);
222+
mlir::Value cmpOp;
223+
llvm::SmallVector<mlir::Value> vs;
224+
vs.reserve(loopOp.getIVs().size());
225+
for (auto [iv, ub, step] : llvm::zip_equal(
226+
loopOp.getIVs(), loopOp.getUpperBound(), loopOp.getStep())) {
227+
// v = iv + step
228+
// cmp = step < 0 ? v < ub : v > ub
229+
mlir::Value v = firOpBuilder.create<mlir::arith::AddIOp>(loc, iv, step);
230+
vs.push_back(v);
231+
mlir::Value zero =
232+
firOpBuilder.createIntegerConstant(loc, step.getType(), 0);
233+
mlir::Value negativeStep = firOpBuilder.create<mlir::arith::CmpIOp>(
234+
loc, mlir::arith::CmpIPredicate::slt, step, zero);
235+
mlir::Value vLT = firOpBuilder.create<mlir::arith::CmpIOp>(
236+
loc, mlir::arith::CmpIPredicate::slt, v, ub);
237+
mlir::Value vGT = firOpBuilder.create<mlir::arith::CmpIOp>(
238+
loc, mlir::arith::CmpIPredicate::sgt, v, ub);
239+
mlir::Value icmpOp = firOpBuilder.create<mlir::arith::SelectOp>(
240+
loc, negativeStep, vLT, vGT);
241+
242+
if (cmpOp) {
243+
cmpOp = firOpBuilder.create<mlir::arith::AndIOp>(loc, cmpOp, icmpOp);
244+
} else {
245+
cmpOp = icmpOp;
246+
}
247+
}
245248

246249
auto ifOp = firOpBuilder.create<fir::IfOp>(loc, cmpOp, /*else*/ false);
247250
firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
248-
assert(loopIV && "loopIV was not set");
249-
firOpBuilder.createStoreWithConvert(loc, v, loopIV);
251+
for (auto [v, loopIV] : llvm::zip_equal(vs, loopIVs)) {
252+
assert(loopIV && "loopIV was not set");
253+
firOpBuilder.createStoreWithConvert(loc, v, loopIV);
254+
}
250255
lastPrivIP = firOpBuilder.saveInsertionPoint();
251256
} else if (mlir::isa<mlir::omp::SectionsOp>(op)) {
252257
// Already handled by genOMP()

flang/lib/Lower/OpenMP/DataSharingProcessor.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class DataSharingProcessor {
7171
bool hasLastPrivateOp;
7272
mlir::OpBuilder::InsertPoint lastPrivIP;
7373
mlir::OpBuilder::InsertPoint insPt;
74-
mlir::Value loopIV;
74+
llvm::SmallVector<mlir::Value> loopIVs;
7575
// Symbols in private, firstprivate, and/or lastprivate clauses.
7676
llvm::SetVector<const semantics::Symbol *> explicitlyPrivatizedSymbols;
7777
llvm::SetVector<const semantics::Symbol *> defaultSymbols;
@@ -147,10 +147,7 @@ class DataSharingProcessor {
147147
void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr);
148148
void processStep2(mlir::Operation *op, bool isLoop);
149149

150-
void setLoopIV(mlir::Value iv) {
151-
assert(!loopIV && "Loop iteration variable already set");
152-
loopIV = iv;
153-
}
150+
void pushLoopIV(mlir::Value iv) { loopIVs.push_back(iv); }
154151

155152
const llvm::SetVector<const semantics::Symbol *> &
156153
getAllSymbolsToPrivatize() const {

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -677,8 +677,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
677677
assert(tempDsp.has_value());
678678
tempDsp->processStep2(privatizationTopLevelOp, isLoop);
679679
} else {
680-
if (isLoop && regionArgs.size() > 0)
681-
info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0]));
680+
if (isLoop && regionArgs.size() > 0) {
681+
for (const auto &regionArg : regionArgs) {
682+
info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg));
683+
}
684+
}
682685
info.dsp->processStep2(privatizationTopLevelOp, isLoop);
683686
}
684687
}
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
! This test checks lowering of OpenMP parallel DO, with the loop bound being
2+
! a lastprivate variable
3+
4+
! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
5+
6+
! CHECK: func @_QPomp_do_lastprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
7+
subroutine omp_do_lastprivate(a)
8+
! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFomp_do_lastprivateEa"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
9+
integer::a
10+
integer::n
11+
n = a+1
12+
!$omp parallel do lastprivate(a)
13+
! CHECK: omp.parallel {
14+
15+
! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_do_lastprivateEa"}
16+
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
17+
18+
! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
19+
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
20+
21+
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
22+
! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
23+
! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
24+
! CHECK-NEXT: omp.wsloop {
25+
! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
26+
! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
27+
! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
28+
! CHECK: %[[NEXT_ARG1:.*]] = arith.addi %[[ARG1]], %[[STEP]] : i32
29+
! CHECK: %[[ZERO:.*]] = arith.constant 0 : i32
30+
! CHECK: %[[STEP_DIR:.*]] = arith.cmpi slt, %[[STEP]], %[[ZERO]] : i32
31+
! CHECK: %[[LT_UB:.*]] = arith.cmpi slt, %[[NEXT_ARG1]], %[[UB]] : i32
32+
! CHECK: %[[GT_UB:.*]] = arith.cmpi sgt, %[[NEXT_ARG1]], %[[UB]] : i32
33+
! CHECK: %[[SEL:.*]] = arith.select %[[STEP_DIR]], %[[LT_UB]], %[[GT_UB]] : i1
34+
! CHECK: fir.if %[[SEL]] {
35+
! CHECK: fir.store %[[NEXT_ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
36+
! CHECK: %[[A_PVT_LOAD:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
37+
! CHECK: hlfir.assign %[[A_PVT_LOAD]] to %[[ARG0_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
38+
! CHECK: }
39+
40+
! CHECK-NEXT: omp.yield
41+
! CHECK-NEXT: }
42+
! CHECK-NEXT: omp.terminator
43+
! CHECK-NEXT: }
44+
do i=1, a
45+
call foo(i, a)
46+
end do
47+
!$omp end parallel do
48+
!CHECK: fir.call @_QPbar(%[[ARG0_DECL]]#1) {{.*}}: (!fir.ref<i32>) -> ()
49+
call bar(a)
50+
end subroutine omp_do_lastprivate
51+
52+
! CHECK: func @_QPomp_do_lastprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
53+
subroutine omp_do_lastprivate2(a, n)
54+
! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFomp_do_lastprivate2Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
55+
! CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFomp_do_lastprivate2En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
56+
integer::a
57+
integer::n
58+
n = a+1
59+
!$omp parallel do lastprivate(a, n)
60+
! CHECK: omp.parallel {
61+
62+
! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, {{.*}}}
63+
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
64+
65+
! CHECK: %[[N_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "n", pinned, uniq_name = "_QFomp_do_lastprivate2En"}
66+
! CHECK: %[[N_PVT_DECL:.*]]:2 = hlfir.declare %[[N_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
67+
68+
! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
69+
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
70+
71+
! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
72+
! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref<i32>
73+
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
74+
! CHECK: omp.wsloop {
75+
! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
76+
! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
77+
! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
78+
! CHECK: %[[NEXT_ARG2:.*]] = arith.addi %[[ARG2]], %[[STEP]] : i32
79+
! CHECK: %[[ZERO:.*]] = arith.constant 0 : i32
80+
! CHECK: %[[STEP_DIR:.*]] = arith.cmpi slt, %[[STEP]], %[[ZERO]] : i32
81+
! CHECK: %[[LT_UB:.*]] = arith.cmpi slt, %[[NEXT_ARG2]], %[[UB]] : i32
82+
! CHECK: %[[GT_UB:.*]] = arith.cmpi sgt, %[[NEXT_ARG2]], %[[UB]] : i32
83+
! CHECK: %[[SEL:.*]] = arith.select %[[STEP_DIR]], %[[LT_UB]], %[[GT_UB]] : i1
84+
! CHECK: fir.if %[[SEL]] {
85+
! CHECK: fir.store %[[NEXT_ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
86+
! CHECK: %[[A_PVT_LOAD:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
87+
! CHECK: hlfir.assign %[[A_PVT_LOAD]] to %[[ARG0_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
88+
! CHECK: %[[N_PVT_LOAD:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref<i32>
89+
! CHECK: hlfir.assign %[[N_PVT_LOAD]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
90+
! CHECK: }
91+
92+
! CHECK: omp.yield
93+
! CHECK: omp.terminator
94+
do i= a, n
95+
call foo(i, a)
96+
end do
97+
!$omp end parallel do
98+
!CHECK: fir.call @_QPbar(%[[ARG1_DECL]]#1) {{.*}}: (!fir.ref<i32>) -> ()
99+
call bar(n)
100+
end subroutine omp_do_lastprivate2
101+
102+
! CHECK: func @_QPomp_do_lastprivate_collapse2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
103+
subroutine omp_do_lastprivate_collapse2(a)
104+
! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
105+
integer::a
106+
!$omp parallel do lastprivate(a) collapse(2)
107+
! CHECK: omp.parallel {
108+
109+
! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_do_lastprivate_collapse2Ea"}
110+
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
111+
112+
! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
113+
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
114+
!
115+
! CHECK: %[[J_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "j", pinned, {{.*}}}
116+
! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
117+
118+
! CHECK: %[[LB1:.*]] = arith.constant 1 : i32
119+
! CHECK-NEXT: %[[UB1:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
120+
! CHECK-NEXT: %[[STEP1:.*]] = arith.constant 1 : i32
121+
! CHECK: %[[LB2:.*]] = arith.constant 1 : i32
122+
! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
123+
! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32
124+
! CHECK-NEXT: omp.wsloop {
125+
! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) {
126+
! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
127+
! CHECK-NEXT: fir.store %[[ARG2]] to %[[J_PVT_DECL]]#1 : !fir.ref<i32>
128+
! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
129+
! CHECK: %[[NEXT_ARG1:.*]] = arith.addi %[[ARG1]], %[[STEP1]] : i32
130+
! CHECK: %[[ZERO1:.*]] = arith.constant 0 : i32
131+
! CHECK: %[[STEP1_END:.*]] = arith.cmpi slt, %[[STEP1]], %[[ZERO1]] : i32
132+
! CHECK: %[[LT_UB1:.*]] = arith.cmpi slt, %[[NEXT_ARG1]], %[[UB1]] : i32
133+
! CHECK: %[[GT_UB1:.*]] = arith.cmpi sgt, %[[NEXT_ARG1]], %[[UB1]] : i32
134+
! CHECK: %[[SEL1:.*]] = arith.select %[[STEP1_END]], %[[LT_UB1]], %[[GT_UB1]] : i1
135+
! CHECK: %[[NEXT_ARG2:.*]] = arith.addi %[[ARG2]], %[[STEP2]] : i32
136+
! CHECK: %[[ZERO2:.*]] = arith.constant 0 : i32
137+
! CHECK: %[[STEP2_END:.*]] = arith.cmpi slt, %[[STEP2]], %[[ZERO2]] : i32
138+
! CHECK: %[[LT_UB2:.*]] = arith.cmpi slt, %[[NEXT_ARG2]], %[[UB2]] : i32
139+
! CHECK: %[[GT_UB2:.*]] = arith.cmpi sgt, %[[NEXT_ARG2]], %[[UB2]] : i32
140+
! CHECK: %[[SEL2:.*]] = arith.select %[[STEP2_END]], %[[LT_UB2]], %[[GT_UB2]] : i1
141+
! CHECK: %[[AND:.*]] = arith.andi %[[SEL1]], %[[SEL2]] : i1
142+
! CHECK: fir.if %[[AND]] {
143+
! CHECK: fir.store %[[NEXT_ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
144+
! CHECK: fir.store %[[NEXT_ARG2]] to %[[J_PVT_DECL]]#1 : !fir.ref<i32>
145+
! CHECK: %[[A_PVT_LOAD:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
146+
! CHECK: hlfir.assign %[[A_PVT_LOAD]] to %[[ARG0_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
147+
! CHECK: }
148+
149+
! CHECK-NEXT: omp.yield
150+
! CHECK-NEXT: }
151+
! CHECK-NEXT: omp.terminator
152+
! CHECK-NEXT: }
153+
do i=1, a
154+
do j=1, a
155+
call foo(i, a)
156+
end do
157+
end do
158+
!$omp end parallel do
159+
!CHECK: fir.call @_QPbar(%[[ARG0_DECL]]#1) {{.*}}: (!fir.ref<i32>) -> ()
160+
call bar(a)
161+
end subroutine omp_do_lastprivate_collapse2
162+
163+
! CHECK: func @_QPomp_do_lastprivate_collapse3(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
164+
subroutine omp_do_lastprivate_collapse3(a)
165+
! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
166+
integer::a
167+
!$omp parallel do lastprivate(a) collapse(3)
168+
! CHECK: omp.parallel {
169+
170+
! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_do_lastprivate_collapse3Ea"}
171+
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
172+
173+
! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
174+
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
175+
176+
! CHECK: %[[J_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "j", pinned, {{.*}}}
177+
! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
178+
179+
! CHECK: %[[K_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "k", pinned, {{.*}}}
180+
! CHECK: %[[K_PVT_DECL:.*]]:2 = hlfir.declare %[[K_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
181+
182+
! CHECK: %[[LB1:.*]] = arith.constant 1 : i32
183+
! CHECK-NEXT: %[[UB1:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
184+
! CHECK-NEXT: %[[STEP1:.*]] = arith.constant 1 : i32
185+
! CHECK: %[[LB2:.*]] = arith.constant 1 : i32
186+
! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
187+
! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32
188+
! CHECK: %[[LB3:.*]] = arith.constant 1 : i32
189+
! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
190+
! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32
191+
! CHECK-NEXT: omp.wsloop {
192+
! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) {
193+
! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
194+
! CHECK-NEXT: fir.store %[[ARG2]] to %[[J_PVT_DECL]]#1 : !fir.ref<i32>
195+
! CHECK-NEXT: fir.store %[[ARG3]] to %[[K_PVT_DECL]]#1 : !fir.ref<i32>
196+
! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
197+
! CHECK: %[[NEXT_ARG1:.*]] = arith.addi %[[ARG1]], %[[STEP1]] : i32
198+
! CHECK: %[[ZERO1:.*]] = arith.constant 0 : i32
199+
! CHECK: %[[STEP1_END:.*]] = arith.cmpi slt, %[[STEP1]], %[[ZERO1]] : i32
200+
! CHECK: %[[LT_UB1:.*]] = arith.cmpi slt, %[[NEXT_ARG1]], %[[UB1]] : i32
201+
! CHECK: %[[GT_UB1:.*]] = arith.cmpi sgt, %[[NEXT_ARG1]], %[[UB1]] : i32
202+
! CHECK: %[[SEL1:.*]] = arith.select %[[STEP1_END]], %[[LT_UB1]], %[[GT_UB1]] : i1
203+
! CHECK: %[[NEXT_ARG2:.*]] = arith.addi %[[ARG2]], %[[STEP2]] : i32
204+
! CHECK: %[[ZERO2:.*]] = arith.constant 0 : i32
205+
! CHECK: %[[STEP2_END:.*]] = arith.cmpi slt, %[[STEP2]], %[[ZERO2]] : i32
206+
! CHECK: %[[LT_UB2:.*]] = arith.cmpi slt, %[[NEXT_ARG2]], %[[UB2]] : i32
207+
! CHECK: %[[GT_UB2:.*]] = arith.cmpi sgt, %[[NEXT_ARG2]], %[[UB2]] : i32
208+
! CHECK: %[[SEL2:.*]] = arith.select %[[STEP2_END]], %[[LT_UB2]], %[[GT_UB2]] : i1
209+
! CHECK: %[[AND1:.*]] = arith.andi %[[SEL1]], %[[SEL2]] : i1
210+
! CHECK: %[[NEXT_ARG3:.*]] = arith.addi %[[ARG3]], %[[STEP3]] : i32
211+
! CHECK: %[[ZERO3:.*]] = arith.constant 0 : i32
212+
! CHECK: %[[STEP3_END:.*]] = arith.cmpi slt, %[[STEP3]], %[[ZERO3]] : i32
213+
! CHECK: %[[LT_UB3:.*]] = arith.cmpi slt, %[[NEXT_ARG3]], %[[UB3]] : i32
214+
! CHECK: %[[GT_UB3:.*]] = arith.cmpi sgt, %[[NEXT_ARG3]], %[[UB3]] : i32
215+
! CHECK: %[[SEL3:.*]] = arith.select %[[STEP3_END]], %[[LT_UB3]], %[[GT_UB3]] : i1
216+
! CHECK: %[[AND2:.*]] = arith.andi %[[AND1]], %[[SEL3]] : i1
217+
! CHECK: fir.if %[[AND2]] {
218+
! CHECK: fir.store %[[NEXT_ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
219+
! CHECK: fir.store %[[NEXT_ARG2]] to %[[J_PVT_DECL]]#1 : !fir.ref<i32>
220+
! CHECK: fir.store %[[NEXT_ARG3]] to %[[K_PVT_DECL]]#1 : !fir.ref<i32>
221+
! CHECK: %[[A_PVT_LOAD:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
222+
! CHECK: hlfir.assign %[[A_PVT_LOAD]] to %[[ARG0_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
223+
! CHECK: }
224+
225+
! CHECK-NEXT: omp.yield
226+
! CHECK-NEXT: }
227+
! CHECK-NEXT: omp.terminator
228+
! CHECK-NEXT: }
229+
do i=1, a
230+
do j=1, a
231+
do k=1, a
232+
call foo(i, a)
233+
end do
234+
end do
235+
end do
236+
!$omp end parallel do
237+
!CHECK: fir.call @_QPbar(%[[ARG0_DECL]]#1) {{.*}}: (!fir.ref<i32>) -> ()
238+
call bar(a)
239+
end subroutine omp_do_lastprivate_collapse3

0 commit comments

Comments
 (0)