Skip to content

Commit d291e45

Browse files
authored
[flang] Teach omp-map-info-finalization to reuse descriptor allocas (#122507)
Internal testing shows improvements in some SPEC HPC benchmarks with this change.
1 parent 1d58699 commit d291e45

File tree

2 files changed

+58
-5
lines changed

2 files changed

+58
-5
lines changed

flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,12 @@ class MapInfoFinalizationPass
6666
/// Tracks any intermediate function/subroutine local allocations we
6767
/// generate for the descriptors of box type dummy arguments, so that
6868
/// we can retrieve it for subsequent reuses within the functions
69-
/// scope
70-
std::map</*descriptor opaque pointer=*/void *,
71-
/*corresponding local alloca=*/fir::AllocaOp>
72-
localBoxAllocas;
69+
/// scope.
70+
///
71+
/// descriptor defining op
72+
/// | corresponding local alloca
73+
/// | |
74+
std::map<mlir::Operation *, mlir::Value> localBoxAllocas;
7375

7476
/// getMemberUserList gathers all users of a particular MapInfoOp that are
7577
/// other MapInfoOp's and places them into the mapMemberUsers list, which
@@ -132,6 +134,11 @@ class MapInfoFinalizationPass
132134
if (!mlir::isa<fir::BaseBoxType>(descriptor.getType()))
133135
return descriptor;
134136

137+
mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()];
138+
if (slot) {
139+
return slot;
140+
}
141+
135142
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
136143
// allowing it to access non-reference box operations can cause some
137144
// problematic SSA IR. However, in the case of assumed shape's the type
@@ -147,7 +154,7 @@ class MapInfoFinalizationPass
147154
auto alloca = builder.create<fir::AllocaOp>(loc, descriptor.getType());
148155
builder.restoreInsertionPoint(insPt);
149156
builder.create<fir::StoreOp>(loc, descriptor, alloca);
150-
return alloca;
157+
return slot = alloca;
151158
}
152159

153160
/// Function that generates a FIR operation accessing the descriptor's

flang/test/Transforms/omp-map-info-finalization.fir

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,49 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi
296296
// CHECK: %[[DESC_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {{.*}}
297297
// CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.type<[[REC_TY]]>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<[[REC_TY]]>> {{{.*}} partial_map = true}
298298
// CHECK: omp.target map_entries(%[[TOP_PARENT_MAP]] -> %{{.*}}, %[[DESC_MAP_1]] -> %{{.*}}, %[[BASE_ADDR_MAP_1]] -> %{{.*}}, %[[DESC_MAP_2]] -> %{{.*}}, %[[BASE_ADDR_MAP_2]] -> %{{.*}} : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
299+
300+
// -----
301+
302+
func.func @_QPreuse_alloca(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}) {
303+
%0 = fir.dummy_scope : !fir.dscope
304+
%1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFreuse_allocaEa"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
305+
%c1 = arith.constant 1 : index
306+
%c0 = arith.constant 0 : index
307+
%2:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
308+
%c0_0 = arith.constant 0 : index
309+
%3 = arith.subi %2#1, %c1 : index
310+
%4 = omp.map.bounds lower_bound(%c0_0 : index) upper_bound(%3 : index) extent(%2#1 : index) stride(%2#2 : index) start_idx(%c1 : index) {stride_in_bytes = true}
311+
%5 = fir.box_addr %1#1 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
312+
%6 = omp.map.info var_ptr(%5 : !fir.ref<!fir.array<?xf64>>, f64) map_clauses(to) capture(ByRef) bounds(%4) -> !fir.ref<!fir.array<?xf64>> {name = "a"}
313+
omp.target_data map_entries(%6 : !fir.ref<!fir.array<?xf64>>) {
314+
%cst = arith.constant 0.000000e+00 : f64
315+
%c0_1 = arith.constant 0 : index
316+
%7 = hlfir.designate %1#0 (%c0_1) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
317+
hlfir.assign %cst to %7 : f64, !fir.ref<f64>
318+
%c1_2 = arith.constant 1 : index
319+
%c0_3 = arith.constant 0 : index
320+
%8:3 = fir.box_dims %1#0, %c0_3 : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
321+
%c0_4 = arith.constant 0 : index
322+
%9 = arith.subi %8#1, %c1_2 : index
323+
%10 = omp.map.bounds lower_bound(%c0_4 : index) upper_bound(%9 : index) extent(%8#1 : index) stride(%8#2 : index) start_idx(%c1_2 : index) {stride_in_bytes = true}
324+
%11 = fir.box_addr %1#1 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
325+
%12 = omp.map.info var_ptr(%11 : !fir.ref<!fir.array<?xf64>>, f64) map_clauses(from) capture(ByRef) bounds(%10) -> !fir.ref<!fir.array<?xf64>> {name = "a"}
326+
omp.target_update map_entries(%12 : !fir.ref<!fir.array<?xf64>>)
327+
omp.terminator
328+
}
329+
return
330+
}
331+
332+
// CHECK-LABEL: @_QPreuse_alloca
333+
// CHECK-NEXT: %[[ALLOCA:[0-9]+]] = fir.alloca !fir.box<!fir.array<?xf64>>
334+
// CHECK-NOT: fir.alloca
335+
// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
336+
// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
337+
// CHECK: omp.target_data map_entries
338+
// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
339+
// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
340+
// CHECK: omp.target_update map_entries
341+
// CHECK: omp.terminator
342+
// CHECK: }
343+
// CHECK: return
344+

0 commit comments

Comments
 (0)