Skip to content

Commit 185a82f

Browse files
committed
Hoist allocas
1 parent 7074354 commit 185a82f

File tree

2 files changed

+41
-38
lines changed

2 files changed

+41
-38
lines changed

flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
163163
OpBuilder copyFuncBuilder(m.getBodyRegion());
164164
fir::FirOpBuilder firCopyFuncBuilder(copyFuncBuilder, m);
165165

166-
// TODO need to copyprivate the alloca's
167166
auto mapReloadedValue =
168167
[&](Value v, OpBuilder allocaBuilder, OpBuilder singleBuilder,
169168
OpBuilder parallelBuilder, IRMapping singleMapping) -> Value {
@@ -202,10 +201,17 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
202201
SmallVector<Value> copyPrivate;
203202

204203
for (Operation &op : llvm::make_range(sr.begin, sr.end)) {
205-
singleBuilder.clone(op, singleMapping);
206204
if (isSafeToParallelize(&op)) {
205+
singleBuilder.clone(op, singleMapping);
207206
parallelBuilder.clone(op, rootMapping);
207+
} else if (auto alloca = dyn_cast<fir::AllocaOp>(&op)) {
208+
auto hoisted =
209+
cast<fir::AllocaOp>(allocaBuilder.clone(*alloca, singleMapping));
210+
rootMapping.map(&*alloca, &*hoisted);
211+
rootMapping.map(alloca.getResult(), hoisted.getResult());
212+
copyPrivate.push_back(hoisted);
208213
} else {
214+
singleBuilder.clone(op, singleMapping);
209215
// Prepare reloaded values for results of operations that cannot be
210216
// safely parallelized and which are used after the region `sr`
211217
for (auto res : op.getResults()) {

flang/test/Transforms/OpenMP/lower-workshare.mlir

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s
22

3+
// checks:
4+
// nowait on final omp.single
35
func.func @wsfunc(%arg0: !fir.ref<!fir.array<42xi32>>) {
46
omp.parallel {
57
omp.workshare {
@@ -37,6 +39,8 @@ func.func @wsfunc(%arg0: !fir.ref<!fir.array<42xi32>>) {
3739

3840
// -----
3941

42+
// checks:
43+
// fir.alloca hoisted out and copyprivate'd
4044
func.func @wsfunc(%arg0: !fir.ref<!fir.array<42xi32>>) {
4145
omp.workshare {
4246
%c1_i32 = arith.constant 1 : i32
@@ -73,7 +77,6 @@ func.func @wsfunc(%arg0: !fir.ref<!fir.array<42xi32>>) {
7377
return
7478
}
7579

76-
7780
// CHECK-LABEL: func.func private @_workshare_copy_heap_42xi32(
7881
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.heap<!fir.array<42xi32>>>,
7982
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.heap<!fir.array<42xi32>>>) {
@@ -130,9 +133,9 @@ func.func @wsfunc(%arg0: !fir.ref<!fir.array<42xi32>>) {
130133
// CHECK: return
131134
// CHECK: }
132135

133-
// CHECK-LABEL: func.func private @_workshare_copy_llvm_ptr(
134-
// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr,
135-
// CHECK-SAME: %[[VAL_1:.*]]: !llvm.ptr) {
136+
// CHECK-LABEL: func.func private @_workshare_copy_i32(
137+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<i32>,
138+
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32>) {
136139
// CHECK: return
137140
// CHECK: }
138141

@@ -141,46 +144,40 @@ func.func @wsfunc(%arg0: !fir.ref<!fir.array<42xi32>>) {
141144
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
142145
// CHECK: %[[VAL_2:.*]] = arith.constant 42 : index
143146
// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
144-
// CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i32) : i32
145-
// CHECK: %[[VAL_5:.*]] = arith.constant true
146-
// CHECK: fir.if %[[VAL_5]] {
147-
// CHECK: %[[VAL_6:.*]] = llvm.alloca %[[VAL_4]] x !llvm.ptr : (i32) -> !llvm.ptr
148-
// CHECK: %[[VAL_7:.*]] = fir.alloca !fir.heap<!fir.array<42xi32>>
149-
// CHECK: omp.single copyprivate(%[[VAL_6]] -> @_workshare_copy_llvm_ptr : !llvm.ptr, %[[VAL_7]] -> @_workshare_copy_heap_42xi32 : !fir.ref<!fir.heap<!fir.array<42xi32>>>) {
150-
// CHECK: %[[VAL_8:.*]] = fir.alloca i32
151-
// CHECK: %[[VAL_9:.*]] = builtin.unrealized_conversion_cast %[[VAL_8]] : !fir.ref<i32> to !llvm.ptr
152-
// CHECK: llvm.store %[[VAL_9]], %[[VAL_6]] : !llvm.ptr, !llvm.ptr
153-
// CHECK: fir.store %[[VAL_3]] to %[[VAL_8]] : !fir.ref<i32>
154-
// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
155-
// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_10]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
156-
// CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
157-
// CHECK: fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<!fir.heap<!fir.array<42xi32>>>
158-
// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_10]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
147+
// CHECK: %[[VAL_4:.*]] = arith.constant true
148+
// CHECK: fir.if %[[VAL_4]] {
149+
// CHECK: %[[VAL_5:.*]] = fir.alloca i32
150+
// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.heap<!fir.array<42xi32>>
151+
// CHECK: omp.single copyprivate(%[[VAL_5]] -> @_workshare_copy_i32 : !fir.ref<i32>, %[[VAL_6]] -> @_workshare_copy_heap_42xi32 : !fir.ref<!fir.heap<!fir.array<42xi32>>>) {
152+
// CHECK: fir.store %[[VAL_3]] to %[[VAL_5]] : !fir.ref<i32>
153+
// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
154+
// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_7]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
155+
// CHECK: %[[VAL_9:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
156+
// CHECK: fir.store %[[VAL_9]] to %[[VAL_6]] : !fir.ref<!fir.heap<!fir.array<42xi32>>>
157+
// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_7]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
159158
// CHECK: omp.terminator
160159
// CHECK: }
161-
// CHECK: %[[VAL_14:.*]] = llvm.load %[[VAL_6]] : !llvm.ptr -> !llvm.ptr
162-
// CHECK: %[[VAL_15:.*]] = builtin.unrealized_conversion_cast %[[VAL_14]] : !llvm.ptr to !fir.ref<i32>
163-
// CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
164-
// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_16]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
165-
// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_7]] : !fir.ref<!fir.heap<!fir.array<42xi32>>>
166-
// CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_16]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
160+
// CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
161+
// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_11]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
162+
// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]] : !fir.ref<!fir.heap<!fir.array<42xi32>>>
163+
// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]](%[[VAL_11]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
167164
// CHECK: omp.wsloop {
168-
// CHECK: omp.loop_nest (%[[VAL_20:.*]]) : index = (%[[VAL_1]]) to (%[[VAL_2]]) inclusive step (%[[VAL_1]]) {
169-
// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
170-
// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<i32>
171-
// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_15]] : !fir.ref<i32>
172-
// CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_22]], %[[VAL_23]] : i32
173-
// CHECK: %[[VAL_25:.*]] = arith.subi %[[VAL_24]], %[[VAL_3]] : i32
174-
// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19]]#0 (%[[VAL_20]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
175-
// CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_26]] temporary_lhs : i32, !fir.ref<i32>
165+
// CHECK: omp.loop_nest (%[[VAL_15:.*]]) : index = (%[[VAL_1]]) to (%[[VAL_2]]) inclusive step (%[[VAL_1]]) {
166+
// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_15]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
167+
// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
168+
// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
169+
// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_17]], %[[VAL_18]] : i32
170+
// CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_19]], %[[VAL_3]] : i32
171+
// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_14]]#0 (%[[VAL_15]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
172+
// CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_21]] temporary_lhs : i32, !fir.ref<i32>
176173
// CHECK: omp.yield
177174
// CHECK: }
178175
// CHECK: omp.terminator
179176
// CHECK: }
180177
// CHECK: omp.single nowait {
181-
// CHECK: "test.test1"(%[[VAL_15]]) : (!fir.ref<i32>) -> ()
182-
// CHECK: hlfir.assign %[[VAL_19]]#0 to %[[VAL_17]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
183-
// CHECK: fir.freemem %[[VAL_19]]#0 : !fir.heap<!fir.array<42xi32>>
178+
// CHECK: "test.test1"(%[[VAL_5]]) : (!fir.ref<i32>) -> ()
179+
// CHECK: hlfir.assign %[[VAL_14]]#0 to %[[VAL_12]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
180+
// CHECK: fir.freemem %[[VAL_14]]#0 : !fir.heap<!fir.array<42xi32>>
184181
// CHECK: omp.terminator
185182
// CHECK: }
186183
// CHECK: omp.barrier

0 commit comments

Comments
 (0)