Skip to content

Commit 8f1671c

Browse files
committed
[flang][hlfir] Allow hlfir.assign expansion for array slices.
This case is important for `Polyhedron/channel2`: ``` u(2:M-1,1:N,new) = u(2:M-1,1:N,old) & +2.d0*dt*f(2:M-1,1:N)*v(2:M-1,1:N,mid) & -2.d0*dt/(2.d0*dx)*g*dhdx(2:M-1,1:N) ``` The slices of `u` on the left and the right hand sides are completely disjoint, but `old` and `new` are unknown runtime values. So the slices may also be identical rather than disjoint. For the purpose of hlfir.assign expansion we do not care whether they are identical or disjoint. Such kind of an answer does not fit well into the alias analysis definition, so I added a very simplified check to handle this case. This drops icelake execution time from 120 to 70 seconds. Reviewed By: tblah Differential Revision: https://reviews.llvm.org/D159323
1 parent 460bba3 commit 8f1671c

File tree

2 files changed

+211
-2
lines changed

2 files changed

+211
-2
lines changed

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,83 @@ containsReadOrWriteEffectOn(const mlir::MemoryEffects::EffectInstance &effect,
157157
return mlir::AliasResult::NoAlias;
158158
}
159159

160+
// Returns true if the given array references represent identical
161+
// or completely disjoint array slices. The callers may use this
162+
// method when the alias analysis reports an alias of some kind,
163+
// so that we can run Fortran specific analysis on the array slices
164+
// to see if they are identical or disjoint. Note that the alias
165+
// analysis are not able to give such an answer about the references.
166+
static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) {
167+
if (ref1 == ref2)
168+
return true;
169+
170+
auto des1 = ref1.getDefiningOp<hlfir::DesignateOp>();
171+
auto des2 = ref2.getDefiningOp<hlfir::DesignateOp>();
172+
// We only support a pair of designators right now.
173+
if (!des1 || !des2)
174+
return false;
175+
176+
if (des1.getMemref() != des2.getMemref()) {
177+
// If the bases are different, then there is unknown overlap.
178+
LLVM_DEBUG(llvm::dbgs() << "No identical base for:\n"
179+
<< des1 << "and:\n"
180+
<< des2 << "\n");
181+
return false;
182+
}
183+
184+
// Require all components of the designators to be the same.
185+
// It might be too strict, e.g. we may probably allow for
186+
// different type parameters.
187+
if (des1.getComponent() != des2.getComponent() ||
188+
des1.getComponentShape() != des2.getComponentShape() ||
189+
des1.getSubstring() != des2.getSubstring() ||
190+
des1.getComplexPart() != des2.getComplexPart() ||
191+
des1.getShape() != des2.getShape() ||
192+
des1.getTypeparams() != des2.getTypeparams()) {
193+
LLVM_DEBUG(llvm::dbgs() << "Different designator specs for:\n"
194+
<< des1 << "and:\n"
195+
<< des2 << "\n");
196+
return false;
197+
}
198+
199+
if (des1.getIsTriplet() != des2.getIsTriplet()) {
200+
LLVM_DEBUG(llvm::dbgs() << "Different sections for:\n"
201+
<< des1 << "and:\n"
202+
<< des2 << "\n");
203+
return false;
204+
}
205+
206+
// Analyze the subscripts.
207+
// For example:
208+
// hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0) shape %9
209+
// hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %1) shape %9
210+
//
211+
// If all the triplets (section speficiers) are the same, then
212+
// we do not care if %0 is equal to %1 - the slices are either
213+
// identical or completely disjoint.
214+
//
215+
// TODO: if we can prove that all non-triplet subscripts are different
216+
// (by value), then we may return true regardless of the triplet
217+
// values - the sections must be completely disjoint.
218+
auto des1It = des1.getIndices().begin();
219+
auto des2It = des2.getIndices().begin();
220+
for (bool isTriplet : des1.getIsTriplet()) {
221+
if (isTriplet) {
222+
for (int i = 0; i < 3; ++i)
223+
if (*des1It++ != *des2It++) {
224+
LLVM_DEBUG(llvm::dbgs() << "Triplet mismatch for:\n"
225+
<< des1 << "and:\n"
226+
<< des2 << "\n");
227+
return false;
228+
}
229+
} else {
230+
++des1It;
231+
++des2It;
232+
}
233+
}
234+
return true;
235+
}
236+
160237
std::optional<ElementalAssignBufferization::MatchInfo>
161238
ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
162239
mlir::Operation::user_range users = elemental->getUsers();
@@ -274,7 +351,7 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
274351
if (!res.isPartial()) {
275352
if (auto designate =
276353
effect.getValue().getDefiningOp<hlfir::DesignateOp>()) {
277-
if (designate.getMemref() != match.array) {
354+
if (!areIdenticalOrDisjointSlices(match.array, designate.getMemref())) {
278355
LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate
279356
<< " at " << elemental.getLoc() << "\n");
280357
return std::nullopt;
@@ -291,7 +368,7 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
291368
continue;
292369
}
293370
}
294-
LLVM_DEBUG(llvm::dbgs() << "diasllowed side-effect: " << effect.getValue()
371+
LLVM_DEBUG(llvm::dbgs() << "disallowed side-effect: " << effect.getValue()
295372
<< " for " << elemental.getLoc() << "\n");
296373
return std::nullopt;
297374
}
@@ -484,6 +561,8 @@ mlir::LogicalResult VariableAssignBufferization::matchAndRewrite(
484561

485562
fir::AliasAnalysis aliasAnalysis;
486563
mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
564+
// TODO: use areIdenticalOrDisjointSlices() to check if
565+
// we can still do the expansion.
487566
if (!aliasRes.isNo()) {
488567
LLVM_DEBUG(llvm::dbgs() << "VariableAssignBufferization:\n"
489568
<< "\tLHS: " << lhs << "\n"
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Test optimized bufferization for hlfir.assign of array
2+
// slices, e.g.:
3+
// x(2:7999,1:120,new) = (x(2:7999,1:120,old))
4+
// We can expand hlfir.assign if the slices are either identical
5+
// or completely disjoint. In case they are identical, we still
6+
// need to make sure that the one-based indices are used
7+
// uniformly for both LHS and RHS.
8+
// RUN: fir-opt --opt-bufferization %s | FileCheck %s
9+
10+
func.func @_QPtest1(%arg0: !fir.ref<!fir.array<8000x120x3xf32>> {fir.bindc_name = "x"}) {
11+
%c7998 = arith.constant 7998 : index
12+
%c1 = arith.constant 1 : index
13+
%c7999 = arith.constant 7999 : index
14+
%c2 = arith.constant 2 : index
15+
%c3 = arith.constant 3 : index
16+
%c120 = arith.constant 120 : index
17+
%c8000 = arith.constant 8000 : index
18+
%0 = fir.alloca i32 {bindc_name = "new", uniq_name = "_QFtest1Enew"}
19+
%1:2 = hlfir.declare %0 {uniq_name = "_QFtest1Enew"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
20+
%2 = fir.alloca i32 {bindc_name = "old", uniq_name = "_QFtest1Eold"}
21+
%3:2 = hlfir.declare %2 {uniq_name = "_QFtest1Eold"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
22+
%4 = fir.shape %c8000, %c120, %c3 : (index, index, index) -> !fir.shape<3>
23+
%5:2 = hlfir.declare %arg0(%4) {uniq_name = "_QFtest1Ex"} : (!fir.ref<!fir.array<8000x120x3xf32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<8000x120x3xf32>>, !fir.ref<!fir.array<8000x120x3xf32>>)
24+
%6 = fir.load %3#0 : !fir.ref<i32>
25+
%7 = fir.convert %6 : (i32) -> i64
26+
%8 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2>
27+
%9 = hlfir.designate %5#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %7) shape %8 : (!fir.ref<!fir.array<8000x120x3xf32>>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
28+
%10 = hlfir.elemental %8 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> {
29+
^bb0(%arg1: index, %arg2: index):
30+
%14 = hlfir.designate %9 (%arg1, %arg2) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
31+
%15 = fir.load %14 : !fir.ref<f32>
32+
%16 = hlfir.no_reassoc %15 : f32
33+
hlfir.yield_element %16 : f32
34+
}
35+
%11 = fir.load %1#0 : !fir.ref<i32>
36+
%12 = fir.convert %11 : (i32) -> i64
37+
%13 = hlfir.designate %5#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %12) shape %8 : (!fir.ref<!fir.array<8000x120x3xf32>>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
38+
hlfir.assign %10 to %13 : !hlfir.expr<7998x120xf32>, !fir.box<!fir.array<7998x120xf32>>
39+
hlfir.destroy %10 : !hlfir.expr<7998x120xf32>
40+
return
41+
}
42+
// CHECK-LABEL: func.func @_QPtest1(
43+
// CHECK: fir.do_loop %[[VAL_21:.*]] =
44+
// CHECK: fir.do_loop %[[VAL_22:.*]] =
45+
// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_17:.*]] (%[[VAL_22]], %[[VAL_21]]) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
46+
// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
47+
// CHECK: %[[VAL_25:.*]] = hlfir.no_reassoc %[[VAL_24]] : f32
48+
// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_20:.*]] (%[[VAL_22]], %[[VAL_21]]) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
49+
// CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_26]] : f32, !fir.ref<f32>
50+
// CHECK: }
51+
// CHECK: }
52+
53+
func.func @_QPtest2(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "x"}) {
54+
%c120 = arith.constant 120 : index
55+
%c7998 = arith.constant 7998 : index
56+
%c1 = arith.constant 1 : index
57+
%c7999 = arith.constant 7999 : index
58+
%c2 = arith.constant 2 : index
59+
%0:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest2Ex"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>)
60+
%1 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
61+
%2 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2>
62+
%3 = hlfir.designate %1 (%c2:%c7999:%c1, %c1:%c120:%c1, %c2) shape %2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
63+
%4 = hlfir.elemental %2 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> {
64+
^bb0(%arg1: index, %arg2: index):
65+
%6 = hlfir.designate %3 (%arg1, %arg2) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
66+
%7 = fir.load %6 : !fir.ref<f32>
67+
%8 = hlfir.no_reassoc %7 : f32
68+
hlfir.yield_element %8 : f32
69+
}
70+
%5 = hlfir.designate %1 (%c2:%c7999:%c1, %c1:%c120:%c1, %c1) shape %2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
71+
hlfir.assign %4 to %5 : !hlfir.expr<7998x120xf32>, !fir.box<!fir.array<7998x120xf32>>
72+
hlfir.destroy %4 : !hlfir.expr<7998x120xf32>
73+
return
74+
}
75+
// CHECK-LABEL: func.func @_QPtest2(
76+
// CHECK: fir.do_loop %[[VAL_11:.*]] =
77+
// CHECK: fir.do_loop %[[VAL_12:.*]] =
78+
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_9:.*]] (%[[VAL_12]], %[[VAL_11]]) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
79+
// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
80+
// CHECK: %[[VAL_15:.*]] = hlfir.no_reassoc %[[VAL_14]] : f32
81+
// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10:.*]] (%[[VAL_12]], %[[VAL_11]]) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
82+
// CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref<f32>
83+
// CHECK: }
84+
// CHECK: }
85+
86+
func.func @_QPtest3(%arg0: !fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>> {fir.bindc_name = "x"}) {
87+
%c7998 = arith.constant 7998 : index
88+
%c7999 = arith.constant 7999 : index
89+
%c2 = arith.constant 2 : index
90+
%c3 = arith.constant 3 : index
91+
%c120 = arith.constant 120 : index
92+
%c8000 = arith.constant 8000 : index
93+
%c1 = arith.constant 1 : index
94+
%c10 = arith.constant 10 : index
95+
%0 = fir.alloca i32 {bindc_name = "new", uniq_name = "_QFtest3Enew"}
96+
%1:2 = hlfir.declare %0 {uniq_name = "_QFtest3Enew"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
97+
%2 = fir.alloca i32 {bindc_name = "old", uniq_name = "_QFtest3Eold"}
98+
%3:2 = hlfir.declare %2 {uniq_name = "_QFtest3Eold"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
99+
%4 = fir.shape %c10 : (index) -> !fir.shape<1>
100+
%5:2 = hlfir.declare %arg0(%4) {uniq_name = "_QFtest3Ex"} : (!fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>, !fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>)
101+
%6 = hlfir.designate %5#0 (%c1) : (!fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>, index) -> !fir.ref<!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>
102+
%7 = fir.shape %c8000, %c120, %c3 : (index, index, index) -> !fir.shape<3>
103+
%8 = fir.load %3#0 : !fir.ref<i32>
104+
%9 = fir.convert %8 : (i32) -> i64
105+
%10 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2>
106+
%11 = hlfir.designate %6{"x"} <%7> (%c2:%c7999:%c1, %c1:%c120:%c1, %9) shape %10 : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>, !fir.shape<3>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
107+
%12 = hlfir.elemental %10 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> {
108+
^bb0(%arg1: index, %arg2: index):
109+
%16 = hlfir.designate %11 (%arg1, %arg2) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
110+
%17 = fir.load %16 : !fir.ref<f32>
111+
%18 = hlfir.no_reassoc %17 : f32
112+
hlfir.yield_element %18 : f32
113+
}
114+
%13 = fir.load %1#0 : !fir.ref<i32>
115+
%14 = fir.convert %13 : (i32) -> i64
116+
%15 = hlfir.designate %6{"x"} <%7> (%c2:%c7999:%c1, %c1:%c120:%c1, %14) shape %10 : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>, !fir.shape<3>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
117+
hlfir.assign %12 to %15 : !hlfir.expr<7998x120xf32>, !fir.box<!fir.array<7998x120xf32>>
118+
hlfir.destroy %12 : !hlfir.expr<7998x120xf32>
119+
return
120+
}
121+
// CHECK-LABEL: func.func @_QPtest3(
122+
// CHECK: fir.do_loop %[[VAL_24:.*]] =
123+
// CHECK: fir.do_loop %[[VAL_25:.*]] =
124+
// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_20:.*]] (%[[VAL_25]], %[[VAL_24]]) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
125+
// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f32>
126+
// CHECK: %[[VAL_28:.*]] = hlfir.no_reassoc %[[VAL_27]] : f32
127+
// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_23:.*]] (%[[VAL_25]], %[[VAL_24]]) : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
128+
// CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_29]] : f32, !fir.ref<f32>
129+
// CHECK: }
130+
// CHECK: }

0 commit comments

Comments
 (0)