Skip to content

Commit ba8077c

Browse files
authored
[flang] Use optimal shape for assign expansion as a loop. (#143050)
During `hlfir.assign` inlining and `ElementalAssignBufferization` we can deduce the optimal shape from `lhs` and `rhs` shapes. It is probably better be done in a separate pass that propagates constant shapes, but I have not seen any benchmarks that would benefit from this yet. So consider this as a workaround for a bigger TODO issue. The `ElementalAssignBufferization` case is from 465.tonto, but I do not have performance results yet (I do not expect much).
1 parent e16f603 commit ba8077c

File tree

4 files changed

+75
-11
lines changed

4 files changed

+75
-11
lines changed

flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,14 @@ class InlineHLFIRAssignConversion
109109
builder.setInsertionPoint(assign);
110110
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
111111
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
112-
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
112+
mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs);
113+
llvm::SmallVector<mlir::Value> lhsExtents =
114+
hlfir::getIndexExtents(loc, builder, lhsShape);
115+
mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
116+
llvm::SmallVector<mlir::Value> rhsExtents =
117+
hlfir::getIndexExtents(loc, builder, rhsShape);
113118
llvm::SmallVector<mlir::Value> extents =
114-
hlfir::getIndexExtents(loc, builder, shape);
119+
fir::factory::deduceOptimalExtents(lhsExtents, rhsExtents);
115120
hlfir::LoopNest loopNest =
116121
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
117122
flangomp::shouldUseWorkshareLowering(assign));

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -700,10 +700,17 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
700700

701701
mlir::Location loc = elemental->getLoc();
702702
fir::FirOpBuilder builder(rewriter, elemental.getOperation());
703-
auto extents = hlfir::getIndexExtents(loc, builder, elemental.getShape());
703+
auto rhsExtents = hlfir::getIndexExtents(loc, builder, elemental.getShape());
704704

705705
// create the loop at the assignment
706706
builder.setInsertionPoint(match->assign);
707+
hlfir::Entity lhs{match->array};
708+
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
709+
mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs);
710+
llvm::SmallVector<mlir::Value> lhsExtents =
711+
hlfir::getIndexExtents(loc, builder, lhsShape);
712+
llvm::SmallVector<mlir::Value> extents =
713+
fir::factory::deduceOptimalExtents(rhsExtents, lhsExtents);
707714

708715
// Generate a loop nest looping around the hlfir.elemental shape and clone
709716
// hlfir.elemental region inside the inner loop
@@ -717,8 +724,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
717724
rewriter.eraseOp(yield);
718725

719726
// Assign the element value to the array element for this iteration.
720-
auto arrayElement = hlfir::getElementAt(
721-
loc, builder, hlfir::Entity{match->array}, loopNest.oneBasedIndices);
727+
auto arrayElement =
728+
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
722729
builder.create<hlfir::AssignOp>(
723730
loc, elementValue, arrayElement, /*realloc=*/false,
724731
/*keep_lhs_length_if_realloc=*/false, match->assign.getTemporaryLhs());

flang/test/HLFIR/inline-hlfir-assign.fir

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,18 +145,16 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"})
145145
// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_10]]#1, %[[VAL_1]] : index
146146
// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_12]], %[[VAL_14]] : (index, index) -> !fir.shape<2>
147147
// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_2]]:%[[VAL_9]]#1:%[[VAL_2]], %[[VAL_2]]:%[[VAL_10]]#1:%[[VAL_2]]) shape %[[VAL_15]] : (!fir.box<!fir.array<?x?xf32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box<!fir.array<?x?xf32>>
148-
// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_14]] step %[[VAL_2]] unordered {
149-
// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_12]] step %[[VAL_2]] unordered {
148+
// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
149+
// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
150150
// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
151151
// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<f32>
152152
// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
153153
// CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_21]] : f32, !fir.ref<f32>
154154
// CHECK: }
155155
// CHECK: }
156-
// CHECK: %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_1]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
157-
// CHECK: %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
158-
// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_23]]#1 step %[[VAL_2]] unordered {
159-
// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_22]]#1 step %[[VAL_2]] unordered {
156+
// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
157+
// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
160158
// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
161159
// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f32>
162160
// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// RUN: fir-opt --opt-bufferization %s | FileCheck %s
2+
3+
// Check that the elemental+assign are rewritten into a loop
4+
// with "optimal" loop bounds, e.g. that we use constants
5+
// when possible.
6+
7+
// CHECK-LABEL: func.func @_QPtest1(
8+
// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
9+
// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
10+
// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered {
11+
// CHECK-NOT: hlfir.assign{{.*}}array
12+
func.func @_QPtest1(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<!fir.array<3xf32>> {fir.bindc_name = "y"}) {
13+
%c0 = arith.constant 0 : index
14+
%c3 = arith.constant 3 : index
15+
%0 = fir.dummy_scope : !fir.dscope
16+
%1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
17+
%2 = fir.shape %c3 : (index) -> !fir.shape<1>
18+
%3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest1Ey"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
19+
%4:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
20+
%5 = fir.shape %4#1 : (index) -> !fir.shape<1>
21+
%6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
22+
^bb0(%arg2: index):
23+
%7 = hlfir.designate %1#0 (%arg2) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
24+
%8 = fir.load %7 : !fir.ref<f32>
25+
%9 = arith.addf %8, %8 fastmath<contract> : f32
26+
hlfir.yield_element %9 : f32
27+
}
28+
hlfir.assign %6 to %3#0 : !hlfir.expr<?xf32>, !fir.ref<!fir.array<3xf32>>
29+
hlfir.destroy %6 : !hlfir.expr<?xf32>
30+
return
31+
}
32+
33+
// CHECK-LABEL: func.func @_QPtest2(
34+
// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
35+
// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
36+
// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered {
37+
// CHECK-NOT: hlfir.assign{{.*}}array
38+
func.func @_QPtest2(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<!fir.array<3xf32>> {fir.bindc_name = "y"}) {
39+
%c3 = arith.constant 3 : index
40+
%0 = fir.dummy_scope : !fir.dscope
41+
%1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest2Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
42+
%2 = fir.shape %c3 : (index) -> !fir.shape<1>
43+
%3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest2Ey"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
44+
%4 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
45+
^bb0(%arg2: index):
46+
%5 = hlfir.designate %3#0 (%arg2) : (!fir.ref<!fir.array<3xf32>>, index) -> !fir.ref<f32>
47+
%6 = fir.load %5 : !fir.ref<f32>
48+
%7 = arith.addf %6, %6 fastmath<contract> : f32
49+
hlfir.yield_element %7 : f32
50+
}
51+
hlfir.assign %4 to %1#0 : !hlfir.expr<3xf32>, !fir.box<!fir.array<?xf32>>
52+
hlfir.destroy %4 : !hlfir.expr<3xf32>
53+
return
54+
}

0 commit comments

Comments
 (0)