Skip to content

Commit 3f0cc06

Browse files
authored
[flang] Assume matching shapes in elemental assignment with non-realloc lhs. (#118552)
The optimized bufferization pass cannot optimize very simple cases of elemental assignments, because of the suboptimal checks order. This patch relies on the fact that in a legal program the lhs and rhs of an assignment have matching shapes, when lhs is not an allocatable and rhs is a result of an elemental array operation.
1 parent b8c4fb0 commit 3f0cc06

File tree

4 files changed

+124
-90
lines changed

4 files changed

+124
-90
lines changed

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -338,34 +338,20 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
338338
if (!fir::isa_trivial(eleTy))
339339
return std::nullopt;
340340

341-
// the array must have the same shape as the elemental. CSE should have
342-
// deduplicated the fir.shape operations where they are provably the same
343-
// so we just have to check for the same ssa value
344-
// TODO: add more ways of getting the shape of the array
345-
mlir::Value arrayShape;
346-
if (match.array.getDefiningOp())
347-
arrayShape =
348-
mlir::TypeSwitch<mlir::Operation *, mlir::Value>(
349-
match.array.getDefiningOp())
350-
.Case([](hlfir::DesignateOp designate) {
351-
return designate.getShape();
352-
})
353-
.Case([](hlfir::DeclareOp declare) { return declare.getShape(); })
354-
.Default([](mlir::Operation *) { return mlir::Value{}; });
355-
if (!arrayShape) {
356-
LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at "
357-
<< elemental->getLoc() << "\n");
341+
// The array must have the same shape as the elemental.
342+
//
343+
// f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
344+
// conformable unless the lhs is an allocatable array. In HLFIR we can
345+
// see this from the presence or absence of the realloc attribute on
346+
// hlfir.assign. If it is not a realloc assignment, we can trust that
347+
// the shapes do conform.
348+
//
349+
// TODO: the lhs's shape is dynamic, so it is hard to prove that
350+
// there is no reallocation of the lhs due to the assignment.
351+
// We can probably try generating multiple versions of the code
352+
// with checking for the shape match, length parameters match, etc.
353+
if (match.assign.getRealloc())
358354
return std::nullopt;
359-
}
360-
if (arrayShape != elemental.getShape()) {
361-
// f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
362-
// conformable unless the lhs is an allocatable array. In HLFIR we can
363-
// see this from the presence or absence of the realloc attribute on
364-
// hlfir.assign. If it is not a realloc assignment, we can trust that
365-
// the shapes do conform
366-
if (match.assign.getRealloc())
367-
return std::nullopt;
368-
}
369355

370356
// the transformation wants to apply the elemental in a do-loop at the
371357
// hlfir.assign, check there are no effects which make this unsafe

flang/test/HLFIR/minloc-elemental.fir

Lines changed: 59 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -188,67 +188,65 @@ func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_n
188188
hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
189189
return
190190
}
191-
// The minloc has other uses, not an assign that gets optimized out.
192-
// CHECK-LABEL: _QPtest_kind2_convert
193-
// CHECK-SAME: (%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
194-
// CHECK-NEXT: %false = arith.constant false
195-
// CHECK-NEXT: %true = arith.constant true
196-
// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32
197-
// CHECK-NEXT: %c1_i16 = arith.constant 1 : i16
198-
// CHECK-NEXT: %c0 = arith.constant 0 : index
199-
// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16
200-
// CHECK-NEXT: %c1 = arith.constant 1 : index
201-
// CHECK-NEXT: %[[V0:.*]] = fir.alloca i16
202-
// CHECK-NEXT: %[[V1:.*]] = fir.alloca !fir.array<1xi16>
203-
// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
204-
// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
205-
// CHECK-NEXT: %[[V4:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
206-
// CHECK-NEXT: %[[V5:.*]] = fir.load %[[V4]]#0 : !fir.ref<i32>
207-
// CHECK-NEXT: %[[V6:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
208-
// CHECK-NEXT: fir.store %c0_i16 to %[[V6]] : !fir.ref<i16>
209-
// CHECK-NEXT: fir.store %c0_i16 to %[[V0]] : !fir.ref<i16>
210-
// CHECK-NEXT: %[[V7:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
211-
// CHECK-NEXT: %[[V8:.*]] = arith.subi %[[V7]]#1, %c1 : index
212-
// CHECK-NEXT: %[[V9:.*]] = fir.do_loop %arg3 = %c0 to %[[V8]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
213-
// CHECK-NEXT: %[[V15:.*]] = arith.addi %arg3, %c1 : index
214-
// CHECK-NEXT: %[[V16:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
215-
// CHECK-NEXT: %[[V17:.*]] = fir.load %[[V16]] : !fir.ref<i32>
216-
// CHECK-NEXT: %[[V18:.*]] = arith.cmpi sge, %[[V17]], %[[V5]] : i32
217-
// CHECK-NEXT: %[[V19:.*]] = fir.if %[[V18]] -> (i32) {
218-
// CHECK-NEXT: %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i16>
219-
// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
220-
// CHECK-NEXT: %[[V24:.*]] = fir.load %[[V23]] : !fir.ref<i32>
221-
// CHECK-NEXT: %[[V25:.*]] = arith.cmpi slt, %[[V24]], %arg4 : i32
222-
// CHECK-NEXT: %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i16) -> i1
223-
// CHECK-NEXT: %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
224-
// CHECK-NEXT: %[[ORCOND:.*]] = arith.ori %[[V25]], %[[ISFIRSTNOT]] : i1
225-
// CHECK-NEXT: %[[V26:.*]] = fir.if %[[ORCOND]] -> (i32) {
226-
// CHECK-NEXT: fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
227-
// CHECK-NEXT: %[[V27:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
228-
// CHECK-NEXT: %[[V28:.*]] = fir.convert %[[V15]] : (index) -> i16
229-
// CHECK-NEXT: fir.store %[[V28]] to %[[V27]] : !fir.ref<i16>
230-
// CHECK-NEXT: fir.result %[[V24]] : i32
231-
// CHECK-NEXT: } else {
232-
// CHECK-NEXT: fir.result %arg4 : i32
233-
// CHECK-NEXT: }
234-
// CHECK-NEXT: fir.result %[[V26]] : i32
235-
// CHECK-NEXT: } else {
236-
// CHECK-NEXT: fir.result %arg4 : i32
237-
// CHECK-NEXT: }
238-
// CHECK-NEXT: fir.result %[[V19]] : i32
239-
// CHECK-NEXT: }
240-
// CHECK-NEXT: %[[V12:.*]] = hlfir.as_expr %[[V1]] move %false : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
241-
// CHECK-NEXT: %[[V13:.*]] = fir.shape %c1 : (index) -> !fir.shape<1>
242-
// CHECK-NEXT: %[[V14:.*]] = hlfir.elemental %[[V13]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
243-
// CHECK-NEXT: ^bb0(%arg3: index):
244-
// CHECK-NEXT: %[[V15:.*]] = hlfir.apply %[[V12]], %arg3 : (!hlfir.expr<1xi16>, index) -> i16
245-
// CHECK-NEXT: %[[V16:.*]] = fir.convert %[[V15]] : (i16) -> i32
246-
// CHECK-NEXT: hlfir.yield_element %[[V16]] : i32
247-
// CHECK-NEXT: }
248-
// CHECK-NEXT: hlfir.assign %[[V14]] to %[[V3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
249-
// CHECK-NEXT: hlfir.destroy %[[V14]] : !hlfir.expr<?xi32>
250-
// CHECK-NEXT: return
251-
191+
// CHECK-LABEL: func.func @_QPtest_kind2_convert(
192+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"},
193+
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "val"},
194+
// CHECK-SAME: %[[VAL_2:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
195+
// CHECK: %[[VAL_3:.*]] = arith.constant false
196+
// CHECK: %[[VAL_4:.*]] = arith.constant true
197+
// CHECK: %[[VAL_5:.*]] = arith.constant 2147483647 : i32
198+
// CHECK: %[[VAL_6:.*]] = arith.constant 1 : i16
199+
// CHECK: %[[VAL_7:.*]] = arith.constant 0 : index
200+
// CHECK: %[[VAL_8:.*]] = arith.constant 0 : i16
201+
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
202+
// CHECK: %[[VAL_10:.*]] = fir.alloca i16
203+
// CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<1xi16>
204+
// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
205+
// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
206+
// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
207+
// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
208+
// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]]) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
209+
// CHECK: fir.store %[[VAL_8]] to %[[VAL_16]] : !fir.ref<i16>
210+
// CHECK: fir.store %[[VAL_8]] to %[[VAL_10]] : !fir.ref<i16>
211+
// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_12]]#0, %[[VAL_7]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
212+
// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#1, %[[VAL_9]] : index
213+
// CHECK: %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_7]] to %[[VAL_18]] step %[[VAL_9]] iter_args(%[[VAL_21:.*]] = %[[VAL_5]]) -> (i32) {
214+
// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_9]] : index
215+
// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
216+
// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<i32>
217+
// CHECK: %[[VAL_25:.*]] = arith.cmpi sge, %[[VAL_24]], %[[VAL_15]] : i32
218+
// CHECK: %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (i32) {
219+
// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_10]] : !fir.ref<i16>
220+
// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
221+
// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<i32>
222+
// CHECK: %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_29]], %[[VAL_21]] : i32
223+
// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (i16) -> i1
224+
// CHECK: %[[VAL_32:.*]] = arith.xori %[[VAL_31]], %[[VAL_4]] : i1
225+
// CHECK: %[[VAL_33:.*]] = arith.ori %[[VAL_30]], %[[VAL_32]] : i1
226+
// CHECK: %[[VAL_34:.*]] = fir.if %[[VAL_33]] -> (i32) {
227+
// CHECK: fir.store %[[VAL_6]] to %[[VAL_10]] : !fir.ref<i16>
228+
// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]]) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
229+
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_22]] : (index) -> i16
230+
// CHECK: fir.store %[[VAL_36]] to %[[VAL_35]] : !fir.ref<i16>
231+
// CHECK: fir.result %[[VAL_29]] : i32
232+
// CHECK: } else {
233+
// CHECK: fir.result %[[VAL_21]] : i32
234+
// CHECK: }
235+
// CHECK: fir.result %[[VAL_34]] : i32
236+
// CHECK: } else {
237+
// CHECK: fir.result %[[VAL_21]] : i32
238+
// CHECK: }
239+
// CHECK: fir.result %[[VAL_26]] : i32
240+
// CHECK: }
241+
// CHECK: %[[VAL_37:.*]] = hlfir.as_expr %[[VAL_11]] move %[[VAL_3]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
242+
// CHECK: fir.do_loop %[[VAL_38:.*]] = %[[VAL_9]] to %[[VAL_9]] step %[[VAL_9]] unordered {
243+
// CHECK: %[[VAL_39:.*]] = hlfir.apply %[[VAL_37]], %[[VAL_38]] : (!hlfir.expr<1xi16>, index) -> i16
244+
// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i16) -> i32
245+
// CHECK: %[[VAL_41:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
246+
// CHECK: hlfir.assign %[[VAL_40]] to %[[VAL_41]] : i32, !fir.ref<i32>
247+
// CHECK: }
248+
// CHECK: return
249+
// CHECK: }
252250

253251

254252
func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// RUN: fir-opt --opt-bufferization %s | FileCheck %s
2+
3+
// Verify that the shape match is not required for optimizing
4+
// elemental assignment, when lhs not an allocatable.
5+
// The shapes of lhs and rhs must conform in a legal program.
6+
//
7+
// Example:
8+
// subroutine test(a,b)
9+
// integer :: a(:), b(:)
10+
// a = b + 1
11+
// end subroutine test
12+
13+
func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
14+
%c0 = arith.constant 0 : index
15+
%c1_i32 = arith.constant 1 : i32
16+
%0 = fir.dummy_scope : !fir.dscope
17+
%1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEa"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
18+
%2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEb"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
19+
%3:3 = fir.box_dims %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
20+
%4 = fir.shape %3#1 : (index) -> !fir.shape<1>
21+
%5 = hlfir.elemental %4 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
22+
^bb0(%arg2: index):
23+
%6 = hlfir.designate %2#0 (%arg2) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
24+
%7 = fir.load %6 : !fir.ref<i32>
25+
%8 = arith.addi %7, %c1_i32 : i32
26+
hlfir.yield_element %8 : i32
27+
}
28+
hlfir.assign %5 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
29+
hlfir.destroy %5 : !hlfir.expr<?xi32>
30+
return
31+
}
32+
// CHECK-LABEL: func.func @_QPtest(
33+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
34+
// CHECK-SAME: %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
35+
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
36+
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
37+
// CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
38+
// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
39+
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFtestEa"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
40+
// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_5]] {uniq_name = "_QFtestEb"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
41+
// CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_7]]#0, %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
42+
// CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_2]] to %[[VAL_8]]#1 step %[[VAL_2]] unordered {
43+
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_9]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
44+
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
45+
// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_4]] : i32
46+
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
47+
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] : i32, !fir.ref<i32>
48+
// CHECK: }
49+
// CHECK: return
50+
// CHECK: }

flang/test/Integration/OpenMP/workshare-axpy.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ subroutine sb1(a, x, y, z)
1313
integer :: a
1414
integer :: x(:)
1515
integer :: y(:)
16-
integer :: z(:)
16+
integer, allocatable :: z(:)
1717
!$omp parallel workshare
1818
z = a * x + y
1919
!$omp end parallel workshare
@@ -43,7 +43,7 @@ subroutine sb1(a, x, y, z)
4343

4444
! FIR: func.func @_QPsb1
4545
! FIR: omp.parallel {
46-
! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
46+
! FIR: omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
4747
! FIR: fir.allocmem
4848
! FIR: omp.wsloop {
4949
! FIR: omp.loop_nest

0 commit comments

Comments
 (0)