Skip to content

Commit c50feca

Browse files
authored
[mlir] Fix region simplification bug when later blocks use prior block argument values (#97960)
This fixes #94520 by ensuring that any if any block arguments are being used outside of the original block that the block is not considered a candidate for merging. More details: the root cause of the issue described in #94520 was that `^bb2` and `^bb5` were being merged despite `%4` (an argument to `^bb2`) was being used later in `^bb7`. When the block merge occurred, that unintentionally changed the value of `%4` for all downstream code. This change prevents that from happening.
1 parent f574b9c commit c50feca

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

mlir/lib/Transforms/Utils/RegionUtils.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,15 @@ static LogicalResult mergeIdenticalBlocks(RewriterBase &rewriter,
877877
if (hasNonEmptyRegion)
878878
continue;
879879

880+
// Don't allow merging if this block's arguments are used outside of the
881+
// original block.
882+
bool argHasExternalUsers = llvm::any_of(
883+
block->getArguments(), [block](mlir::BlockArgument &arg) {
884+
return arg.isUsedOutsideOfBlock(block);
885+
});
886+
if (argHasExternalUsers)
887+
continue;
888+
880889
// Try to add this block to an existing cluster.
881890
bool addedToCluster = false;
882891
for (auto &cluster : clusters)

mlir/test/Transforms/canonicalize-block-merge.mlir

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,31 @@ func.func @dead_dealloc_fold_multi_use(%cond : i1) {
290290
memref.dealloc %a: memref<4xf32>
291291
return
292292
}
293+
294+
// CHECK-LABEL: func @nested_loop
295+
func.func @nested_loop(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i1) {
296+
// Irreducible control-flow: enter the middle of the loop in LoopBody_entry here.
297+
"test.foo_br"(%arg0, %arg4)[^LoopBody_entry] : (i32, i32) -> ()
298+
299+
// Loop exit condition: jump to exit or LoobBody blocks
300+
^Loop_header: // 2 preds: ^bb2, ^bb3
301+
// Consumes the block arg from LoopBody_entry
302+
// Because of this use here, we can't merge the two blocks below.
303+
"test.foo_br2"(%0)[^EXIT, ^LoopBody_entry, ^LoopBody_other] : (i32) -> ()
304+
305+
// LoopBody_entry is jumped in from the entry block (bb0) and Loop_header
306+
// It **dominates** the Loop_header.
307+
^LoopBody_entry(%0: i32): // 2 preds: ^bb0, ^Loop_header
308+
// CHECK: test.bar
309+
%1 = "test.bar"(%0) : (i32) -> i32
310+
cf.br ^Loop_header
311+
312+
// Other block inside the loop, not dominating the header
313+
^LoopBody_other(%2: i32): // pred: ^Loop_header
314+
// CHECK: test.bar
315+
%3 = "test.bar"(%2) : (i32) -> i32
316+
cf.br ^Loop_header
317+
318+
^EXIT: // pred: ^Loop_header
319+
return
320+
}

0 commit comments

Comments
 (0)