-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang] Teach omp-map-info-finalization to reuse descriptor allocas #122507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-openmp Author: None (macurtis-amd) ChangesInternal testing shows improvements in some SPEC HPC benchmarks with this change. Full diff: https://github.com/llvm/llvm-project/pull/122507.diff 2 Files Affected:
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index e8234439587144..c63d2f4531a6f1 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -66,10 +66,12 @@ class MapInfoFinalizationPass
/// Tracks any intermediate function/subroutine local allocations we
/// generate for the descriptors of box type dummy arguments, so that
/// we can retrieve it for subsequent reuses within the functions
- /// scope
- std::map</*descriptor opaque pointer=*/void *,
- /*corresponding local alloca=*/fir::AllocaOp>
- localBoxAllocas;
+ /// scope.
+ ///
+ /// descriptor defining op
+ /// | corresponding local alloca
+ /// | |
+ std::map<mlir::Operation *, mlir::Value> localBoxAllocas;
/// getMemberUserList gathers all users of a particular MapInfoOp that are
/// other MapInfoOp's and places them into the mapMemberUsers list, which
@@ -132,6 +134,11 @@ class MapInfoFinalizationPass
if (!mlir::isa<fir::BaseBoxType>(descriptor.getType()))
return descriptor;
+ mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()];
+ if (slot) {
+ return slot;
+ }
+
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
// allowing it to access non-reference box operations can cause some
// problematic SSA IR. However, in the case of assumed shape's the type
@@ -147,7 +154,7 @@ class MapInfoFinalizationPass
auto alloca = builder.create<fir::AllocaOp>(loc, descriptor.getType());
builder.restoreInsertionPoint(insPt);
builder.create<fir::StoreOp>(loc, descriptor, alloca);
- return alloca;
+ return slot = alloca;
}
/// Function that generates a FIR operation accessing the descriptor's
diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir
index 19e6dcad068cd8..a7254bcddd5232 100644
--- a/flang/test/Transforms/omp-map-info-finalization.fir
+++ b/flang/test/Transforms/omp-map-info-finalization.fir
@@ -296,3 +296,49 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi
// CHECK: %[[DESC_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {{.*}}
// CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.type<[[REC_TY]]>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<[[REC_TY]]>> {{{.*}} partial_map = true}
// CHECK: omp.target map_entries(%[[TOP_PARENT_MAP]] -> %{{.*}}, %[[DESC_MAP_1]] -> %{{.*}}, %[[BASE_ADDR_MAP_1]] -> %{{.*}}, %[[DESC_MAP_2]] -> %{{.*}}, %[[BASE_ADDR_MAP_2]] -> %{{.*}} : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+
+// -----
+
+func.func @_QPreuse_alloca(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}) {
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFreuse_allocaEa"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %2:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+ %c0_0 = arith.constant 0 : index
+ %3 = arith.subi %2#1, %c1 : index
+ %4 = omp.map.bounds lower_bound(%c0_0 : index) upper_bound(%3 : index) extent(%2#1 : index) stride(%2#2 : index) start_idx(%c1 : index) {stride_in_bytes = true}
+ %5 = fir.box_addr %1#1 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %6 = omp.map.info var_ptr(%5 : !fir.ref<!fir.array<?xf64>>, f64) map_clauses(to) capture(ByRef) bounds(%4) -> !fir.ref<!fir.array<?xf64>> {name = "a"}
+ omp.target_data map_entries(%6 : !fir.ref<!fir.array<?xf64>>) {
+ %cst = arith.constant 0.000000e+00 : f64
+ %c0_1 = arith.constant 0 : index
+ %7 = hlfir.designate %1#0 (%c0_1) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+ hlfir.assign %cst to %7 : f64, !fir.ref<f64>
+ %c1_2 = arith.constant 1 : index
+ %c0_3 = arith.constant 0 : index
+ %8:3 = fir.box_dims %1#0, %c0_3 : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+ %c0_4 = arith.constant 0 : index
+ %9 = arith.subi %8#1, %c1_2 : index
+ %10 = omp.map.bounds lower_bound(%c0_4 : index) upper_bound(%9 : index) extent(%8#1 : index) stride(%8#2 : index) start_idx(%c1_2 : index) {stride_in_bytes = true}
+ %11 = fir.box_addr %1#1 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %12 = omp.map.info var_ptr(%11 : !fir.ref<!fir.array<?xf64>>, f64) map_clauses(from) capture(ByRef) bounds(%10) -> !fir.ref<!fir.array<?xf64>> {name = "a"}
+ omp.target_update map_entries(%12 : !fir.ref<!fir.array<?xf64>>)
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @_QPreuse_alloca
+// CHECK-NEXT: %[[ALLOCA:[0-9]+]] = fir.alloca !fir.box<!fir.array<?xf64>>
+// CHECK-NOT: fir.alloca
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: omp.target_data map_entries
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: omp.target_update map_entries
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+
|
@llvm/pr-subscribers-flang-fir-hlfir Author: None (macurtis-amd) ChangesInternal testing shows improvements in some SPEC HPC benchmarks with this change. Full diff: https://github.com/llvm/llvm-project/pull/122507.diff 2 Files Affected:
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index e8234439587144..c63d2f4531a6f1 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -66,10 +66,12 @@ class MapInfoFinalizationPass
/// Tracks any intermediate function/subroutine local allocations we
/// generate for the descriptors of box type dummy arguments, so that
/// we can retrieve it for subsequent reuses within the functions
- /// scope
- std::map</*descriptor opaque pointer=*/void *,
- /*corresponding local alloca=*/fir::AllocaOp>
- localBoxAllocas;
+ /// scope.
+ ///
+ /// descriptor defining op
+ /// | corresponding local alloca
+ /// | |
+ std::map<mlir::Operation *, mlir::Value> localBoxAllocas;
/// getMemberUserList gathers all users of a particular MapInfoOp that are
/// other MapInfoOp's and places them into the mapMemberUsers list, which
@@ -132,6 +134,11 @@ class MapInfoFinalizationPass
if (!mlir::isa<fir::BaseBoxType>(descriptor.getType()))
return descriptor;
+ mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()];
+ if (slot) {
+ return slot;
+ }
+
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
// allowing it to access non-reference box operations can cause some
// problematic SSA IR. However, in the case of assumed shape's the type
@@ -147,7 +154,7 @@ class MapInfoFinalizationPass
auto alloca = builder.create<fir::AllocaOp>(loc, descriptor.getType());
builder.restoreInsertionPoint(insPt);
builder.create<fir::StoreOp>(loc, descriptor, alloca);
- return alloca;
+ return slot = alloca;
}
/// Function that generates a FIR operation accessing the descriptor's
diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir
index 19e6dcad068cd8..a7254bcddd5232 100644
--- a/flang/test/Transforms/omp-map-info-finalization.fir
+++ b/flang/test/Transforms/omp-map-info-finalization.fir
@@ -296,3 +296,49 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi
// CHECK: %[[DESC_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {{.*}}
// CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.type<[[REC_TY]]>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<[[REC_TY]]>> {{{.*}} partial_map = true}
// CHECK: omp.target map_entries(%[[TOP_PARENT_MAP]] -> %{{.*}}, %[[DESC_MAP_1]] -> %{{.*}}, %[[BASE_ADDR_MAP_1]] -> %{{.*}}, %[[DESC_MAP_2]] -> %{{.*}}, %[[BASE_ADDR_MAP_2]] -> %{{.*}} : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+
+// -----
+
+func.func @_QPreuse_alloca(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}) {
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFreuse_allocaEa"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %2:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+ %c0_0 = arith.constant 0 : index
+ %3 = arith.subi %2#1, %c1 : index
+ %4 = omp.map.bounds lower_bound(%c0_0 : index) upper_bound(%3 : index) extent(%2#1 : index) stride(%2#2 : index) start_idx(%c1 : index) {stride_in_bytes = true}
+ %5 = fir.box_addr %1#1 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %6 = omp.map.info var_ptr(%5 : !fir.ref<!fir.array<?xf64>>, f64) map_clauses(to) capture(ByRef) bounds(%4) -> !fir.ref<!fir.array<?xf64>> {name = "a"}
+ omp.target_data map_entries(%6 : !fir.ref<!fir.array<?xf64>>) {
+ %cst = arith.constant 0.000000e+00 : f64
+ %c0_1 = arith.constant 0 : index
+ %7 = hlfir.designate %1#0 (%c0_1) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+ hlfir.assign %cst to %7 : f64, !fir.ref<f64>
+ %c1_2 = arith.constant 1 : index
+ %c0_3 = arith.constant 0 : index
+ %8:3 = fir.box_dims %1#0, %c0_3 : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+ %c0_4 = arith.constant 0 : index
+ %9 = arith.subi %8#1, %c1_2 : index
+ %10 = omp.map.bounds lower_bound(%c0_4 : index) upper_bound(%9 : index) extent(%8#1 : index) stride(%8#2 : index) start_idx(%c1_2 : index) {stride_in_bytes = true}
+ %11 = fir.box_addr %1#1 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %12 = omp.map.info var_ptr(%11 : !fir.ref<!fir.array<?xf64>>, f64) map_clauses(from) capture(ByRef) bounds(%10) -> !fir.ref<!fir.array<?xf64>> {name = "a"}
+ omp.target_update map_entries(%12 : !fir.ref<!fir.array<?xf64>>)
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @_QPreuse_alloca
+// CHECK-NEXT: %[[ALLOCA:[0-9]+]] = fir.alloca !fir.box<!fir.array<?xf64>>
+// CHECK-NOT: fir.alloca
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: omp.target_data map_entries
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: omp.target_update map_entries
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thank you for the fix :-)
…lvm#122507) Internal testing shows improvements in some SPEC HPC benchmarks with this change.
Internal testing shows improvements in some SPEC HPC benchmarks with this change.