Skip to content

[flang] Assume matching shapes in elemental assignment with non-realloc lhs. #118552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 13 additions & 27 deletions flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,34 +338,20 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
if (!fir::isa_trivial(eleTy))
return std::nullopt;

// the array must have the same shape as the elemental. CSE should have
// deduplicated the fir.shape operations where they are provably the same
// so we just have to check for the same ssa value
// TODO: add more ways of getting the shape of the array
mlir::Value arrayShape;
if (match.array.getDefiningOp())
arrayShape =
mlir::TypeSwitch<mlir::Operation *, mlir::Value>(
match.array.getDefiningOp())
.Case([](hlfir::DesignateOp designate) {
return designate.getShape();
})
.Case([](hlfir::DeclareOp declare) { return declare.getShape(); })
.Default([](mlir::Operation *) { return mlir::Value{}; });
if (!arrayShape) {
LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at "
<< elemental->getLoc() << "\n");
// The array must have the same shape as the elemental.
//
// f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
// conformable unless the lhs is an allocatable array. In HLFIR we can
// see this from the presence or absence of the realloc attribute on
// hlfir.assign. If it is not a realloc assignment, we can trust that
// the shapes do conform.
//
// TODO: the lhs's shape is dynamic, so it is hard to prove that
// there is no reallocation of the lhs due to the assignment.
// We can probably try generating multiple versions of the code
// with checking for the shape match, length parameters match, etc.
if (match.assign.getRealloc())
return std::nullopt;
}
if (arrayShape != elemental.getShape()) {
// f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
// conformable unless the lhs is an allocatable array. In HLFIR we can
// see this from the presence or absence of the realloc attribute on
// hlfir.assign. If it is not a realloc assignment, we can trust that
// the shapes do conform
if (match.assign.getRealloc())
return std::nullopt;
}

// the transformation wants to apply the elemental in a do-loop at the
// hlfir.assign, check there are no effects which make this unsafe
Expand Down
120 changes: 59 additions & 61 deletions flang/test/HLFIR/minloc-elemental.fir
Original file line number Diff line number Diff line change
Expand Up @@ -188,67 +188,65 @@ func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_n
hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
return
}
// The minloc has other uses, not an assign that gets optimized out.
// CHECK-LABEL: _QPtest_kind2_convert
// CHECK-SAME: (%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
// CHECK-NEXT: %false = arith.constant false
// CHECK-NEXT: %true = arith.constant true
// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32
// CHECK-NEXT: %c1_i16 = arith.constant 1 : i16
// CHECK-NEXT: %c0 = arith.constant 0 : index
// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16
// CHECK-NEXT: %c1 = arith.constant 1 : index
// CHECK-NEXT: %[[V0:.*]] = fir.alloca i16
// CHECK-NEXT: %[[V1:.*]] = fir.alloca !fir.array<1xi16>
// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
// CHECK-NEXT: %[[V4:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
// CHECK-NEXT: %[[V5:.*]] = fir.load %[[V4]]#0 : !fir.ref<i32>
// CHECK-NEXT: %[[V6:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
// CHECK-NEXT: fir.store %c0_i16 to %[[V6]] : !fir.ref<i16>
// CHECK-NEXT: fir.store %c0_i16 to %[[V0]] : !fir.ref<i16>
// CHECK-NEXT: %[[V7:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
// CHECK-NEXT: %[[V8:.*]] = arith.subi %[[V7]]#1, %c1 : index
// CHECK-NEXT: %[[V9:.*]] = fir.do_loop %arg3 = %c0 to %[[V8]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
// CHECK-NEXT: %[[V15:.*]] = arith.addi %arg3, %c1 : index
// CHECK-NEXT: %[[V16:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK-NEXT: %[[V17:.*]] = fir.load %[[V16]] : !fir.ref<i32>
// CHECK-NEXT: %[[V18:.*]] = arith.cmpi sge, %[[V17]], %[[V5]] : i32
// CHECK-NEXT: %[[V19:.*]] = fir.if %[[V18]] -> (i32) {
// CHECK-NEXT: %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i16>
// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK-NEXT: %[[V24:.*]] = fir.load %[[V23]] : !fir.ref<i32>
// CHECK-NEXT: %[[V25:.*]] = arith.cmpi slt, %[[V24]], %arg4 : i32
// CHECK-NEXT: %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i16) -> i1
// CHECK-NEXT: %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
// CHECK-NEXT: %[[ORCOND:.*]] = arith.ori %[[V25]], %[[ISFIRSTNOT]] : i1
// CHECK-NEXT: %[[V26:.*]] = fir.if %[[ORCOND]] -> (i32) {
// CHECK-NEXT: fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
// CHECK-NEXT: %[[V27:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
// CHECK-NEXT: %[[V28:.*]] = fir.convert %[[V15]] : (index) -> i16
// CHECK-NEXT: fir.store %[[V28]] to %[[V27]] : !fir.ref<i16>
// CHECK-NEXT: fir.result %[[V24]] : i32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : i32
// CHECK-NEXT: }
// CHECK-NEXT: fir.result %[[V26]] : i32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : i32
// CHECK-NEXT: }
// CHECK-NEXT: fir.result %[[V19]] : i32
// CHECK-NEXT: }
// CHECK-NEXT: %[[V12:.*]] = hlfir.as_expr %[[V1]] move %false : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
// CHECK-NEXT: %[[V13:.*]] = fir.shape %c1 : (index) -> !fir.shape<1>
// CHECK-NEXT: %[[V14:.*]] = hlfir.elemental %[[V13]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
// CHECK-NEXT: ^bb0(%arg3: index):
// CHECK-NEXT: %[[V15:.*]] = hlfir.apply %[[V12]], %arg3 : (!hlfir.expr<1xi16>, index) -> i16
// CHECK-NEXT: %[[V16:.*]] = fir.convert %[[V15]] : (i16) -> i32
// CHECK-NEXT: hlfir.yield_element %[[V16]] : i32
// CHECK-NEXT: }
// CHECK-NEXT: hlfir.assign %[[V14]] to %[[V3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
// CHECK-NEXT: hlfir.destroy %[[V14]] : !hlfir.expr<?xi32>
// CHECK-NEXT: return

// CHECK-LABEL: func.func @_QPtest_kind2_convert(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"},
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "val"},
// CHECK-SAME: %[[VAL_2:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
// CHECK: %[[VAL_3:.*]] = arith.constant false
// CHECK: %[[VAL_4:.*]] = arith.constant true
// CHECK: %[[VAL_5:.*]] = arith.constant 2147483647 : i32
// CHECK: %[[VAL_6:.*]] = arith.constant 1 : i16
// CHECK: %[[VAL_7:.*]] = arith.constant 0 : index
// CHECK: %[[VAL_8:.*]] = arith.constant 0 : i16
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_10:.*]] = fir.alloca i16
// CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<1xi16>
// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]]) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
// CHECK: fir.store %[[VAL_8]] to %[[VAL_16]] : !fir.ref<i16>
// CHECK: fir.store %[[VAL_8]] to %[[VAL_10]] : !fir.ref<i16>
// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_12]]#0, %[[VAL_7]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#1, %[[VAL_9]] : index
// CHECK: %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_7]] to %[[VAL_18]] step %[[VAL_9]] iter_args(%[[VAL_21:.*]] = %[[VAL_5]]) -> (i32) {
// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_9]] : index
// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<i32>
// CHECK: %[[VAL_25:.*]] = arith.cmpi sge, %[[VAL_24]], %[[VAL_15]] : i32
// CHECK: %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (i32) {
// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_10]] : !fir.ref<i16>
// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<i32>
// CHECK: %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_29]], %[[VAL_21]] : i32
// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (i16) -> i1
// CHECK: %[[VAL_32:.*]] = arith.xori %[[VAL_31]], %[[VAL_4]] : i1
// CHECK: %[[VAL_33:.*]] = arith.ori %[[VAL_30]], %[[VAL_32]] : i1
// CHECK: %[[VAL_34:.*]] = fir.if %[[VAL_33]] -> (i32) {
// CHECK: fir.store %[[VAL_6]] to %[[VAL_10]] : !fir.ref<i16>
// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]]) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_22]] : (index) -> i16
// CHECK: fir.store %[[VAL_36]] to %[[VAL_35]] : !fir.ref<i16>
// CHECK: fir.result %[[VAL_29]] : i32
// CHECK: } else {
// CHECK: fir.result %[[VAL_21]] : i32
// CHECK: }
// CHECK: fir.result %[[VAL_34]] : i32
// CHECK: } else {
// CHECK: fir.result %[[VAL_21]] : i32
// CHECK: }
// CHECK: fir.result %[[VAL_26]] : i32
// CHECK: }
// CHECK: %[[VAL_37:.*]] = hlfir.as_expr %[[VAL_11]] move %[[VAL_3]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
// CHECK: fir.do_loop %[[VAL_38:.*]] = %[[VAL_9]] to %[[VAL_9]] step %[[VAL_9]] unordered {
// CHECK: %[[VAL_39:.*]] = hlfir.apply %[[VAL_37]], %[[VAL_38]] : (!hlfir.expr<1xi16>, index) -> i16
// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i16) -> i32
// CHECK: %[[VAL_41:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %[[VAL_40]] to %[[VAL_41]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: return
// CHECK: }


func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
Expand Down
50 changes: 50 additions & 0 deletions flang/test/HLFIR/opt-bufferization-non-realloc-assignment.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// RUN: fir-opt --opt-bufferization %s | FileCheck %s

// Verify that the shape match is not required for optimizing
// elemental assignment, when lhs not an allocatable.
// The shapes of lhs and rhs must conform in a legal program.
//
// Example:
// subroutine test(a,b)
// integer :: a(:), b(:)
// a = b + 1
// end subroutine test

func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
%c0 = arith.constant 0 : index
%c1_i32 = arith.constant 1 : i32
%0 = fir.dummy_scope : !fir.dscope
%1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEa"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
%2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEb"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
%3:3 = fir.box_dims %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
%4 = fir.shape %3#1 : (index) -> !fir.shape<1>
%5 = hlfir.elemental %4 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
^bb0(%arg2: index):
%6 = hlfir.designate %2#0 (%arg2) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
%7 = fir.load %6 : !fir.ref<i32>
%8 = arith.addi %7, %c1_i32 : i32
hlfir.yield_element %8 : i32
}
hlfir.assign %5 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
hlfir.destroy %5 : !hlfir.expr<?xi32>
return
}
// CHECK-LABEL: func.func @_QPtest(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
// CHECK-SAME: %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
// CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFtestEa"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_5]] {uniq_name = "_QFtestEb"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
// CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_7]]#0, %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
// CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_2]] to %[[VAL_8]]#1 step %[[VAL_2]] unordered {
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_9]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_4]] : i32
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: return
// CHECK: }
4 changes: 2 additions & 2 deletions flang/test/Integration/OpenMP/workshare-axpy.f90
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ subroutine sb1(a, x, y, z)
integer :: a
integer :: x(:)
integer :: y(:)
integer :: z(:)
integer, allocatable :: z(:)
!$omp parallel workshare
z = a * x + y
!$omp end parallel workshare
Expand Down Expand Up @@ -43,7 +43,7 @@ subroutine sb1(a, x, y, z)

! FIR: func.func @_QPsb1
! FIR: omp.parallel {
! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
! FIR: omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
! FIR: fir.allocmem
! FIR: omp.wsloop {
! FIR: omp.loop_nest
Expand Down
Loading