Skip to content

Commit ac8b5a9

Browse files
authored
[mlir][scf]-Fix reverse iterator overflow in loop traversal (#128421)
Fix a bug in method `getUntiledProducerFromSliceSource` where address sanitizer fails compilation on heap buffer overflow for accessing value out of the iteration range. This PR fixes the issue and adds a lit test to reproduce it.
1 parent ba7e273 commit ac8b5a9

File tree

2 files changed

+57
-1
lines changed

2 files changed

+57
-1
lines changed

mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1119,8 +1119,10 @@ static std::tuple<OpResult, std::optional<OpOperand *>>
11191119
getUntiledProducerFromSliceSource(OpOperand *source,
11201120
ArrayRef<LoopLikeOpInterface> loops) {
11211121
std::optional<OpOperand *> destinationIterArg;
1122+
assert(!loops.empty() && "expected non empty loops container");
11221123
auto loopIt = loops.rbegin();
1123-
while (auto iterArg = dyn_cast<BlockArgument>(source->get())) {
1124+
while (loopIt != loops.rend() && isa<BlockArgument>(source->get())) {
1125+
auto iterArg = cast<BlockArgument>(source->get());
11241126
auto loop = *loopIt;
11251127
if (iterArg.getOwner()->getParentOp() != loop)
11261128
break;

mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,3 +634,57 @@ module attributes {transform.with_named_sequence} {
634634
// CHECK: %[[INSERT_SLICE:.+]] = tensor.insert_slice %[[GENERIC]] into %[[ITER_ARG]]
635635
// CHECK: scf.yield %[[INSERT_SLICE]]
636636
// CHECK: return %[[FOR_RESULT]]
637+
638+
// -----
639+
640+
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
641+
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2, d1)>
642+
module {
643+
func.func private @tile_one_consumer_using_tile_and_fuse(%arg0: tensor<16x128x48x96xf32>, %arg1: tensor<16x96x48x128xf32>) -> tensor<16x96x48x128xf32> {
644+
%0 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<16x128x48x96xf32>) outs(%arg1 : tensor<16x96x48x128xf32>) {
645+
^bb0(%in: f32, %out: f32):
646+
linalg.yield %in : f32
647+
} -> tensor<16x96x48x128xf32>
648+
return %0 : tensor<16x96x48x128xf32>
649+
}
650+
}
651+
module attributes {transform.with_named_sequence} {
652+
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
653+
%generic = transform.structured.match ops{["linalg.generic"]} in %arg1
654+
: (!transform.any_op) -> !transform.any_op
655+
%a, %loops:4 = transform.structured.fuse %generic {tile_sizes = [1, 16, 16, 16], tile_interchange = [0, 1, 2, 3], apply_cleanup = false}
656+
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
657+
transform.yield
658+
}
659+
}
660+
661+
// CHECK: func.func private @tile_one_consumer_using_tile_and_fuse(%[[VAL_0:.*]]: tensor<16x128x48x96xf32>, %[[VAL_1:.*]]: tensor<16x96x48x128xf32>) -> tensor<16x96x48x128xf32> {
662+
// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
663+
// CHECK: %[[VAL_3:.*]] = arith.constant 16 : index
664+
// CHECK: %[[VAL_4:.*]] = arith.constant 128 : index
665+
// CHECK: %[[VAL_5:.*]] = arith.constant 48 : index
666+
// CHECK: %[[VAL_6:.*]] = arith.constant 96 : index
667+
// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
668+
// CHECK: %[[VAL_8:.*]] = scf.for %[[VAL_9:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_7]] iter_args(%[[VAL_10:.*]] = %[[VAL_1]]) -> (tensor<16x96x48x128xf32>) {
669+
// CHECK: %[[VAL_11:.*]] = scf.for %[[VAL_12:.*]] = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_3]] iter_args(%[[VAL_13:.*]] = %[[VAL_10]]) -> (tensor<16x96x48x128xf32>) {
670+
// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (tensor<16x96x48x128xf32>) {
671+
// CHECK: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_3]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (tensor<16x96x48x128xf32>) {
672+
// CHECK: %[[VAL_20:.*]] = tensor.extract_slice %[[VAL_0]]{{\[}}%[[VAL_9]], %[[VAL_12]], %[[VAL_15]], %[[VAL_18]]] [1, 16, 16, 16] [1, 1, 1, 1] : tensor<16x128x48x96xf32> to tensor<1x16x16x16xf32>
673+
// CHECK: %[[VAL_21:.*]] = tensor.extract_slice %[[VAL_19]]{{\[}}%[[VAL_9]], %[[VAL_18]], %[[VAL_15]], %[[VAL_12]]] [1, 16, 16, 16] [1, 1, 1, 1] : tensor<16x96x48x128xf32> to tensor<1x16x16x16xf32>
674+
// CHECK: %[[VAL_22:.*]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[VAL_20]] : tensor<1x16x16x16xf32>) outs(%[[VAL_21]] : tensor<1x16x16x16xf32>) {
675+
// CHECK: ^bb0(%[[VAL_23:.*]]: f32, %[[VAL_24:.*]]: f32):
676+
// CHECK: linalg.yield %[[VAL_23]] : f32
677+
// CHECK: } -> tensor<1x16x16x16xf32>
678+
// CHECK: %[[VAL_25:.*]] = tensor.insert_slice %[[VAL_26:.*]] into %[[VAL_19]]{{\[}}%[[VAL_9]], %[[VAL_18]], %[[VAL_15]], %[[VAL_12]]] [1, 16, 16, 16] [1, 1, 1, 1] : tensor<1x16x16x16xf32> into tensor<16x96x48x128xf32>
679+
// CHECK: scf.yield %[[VAL_25]] : tensor<16x96x48x128xf32>
680+
// CHECK: }
681+
// CHECK: scf.yield %[[VAL_27:.*]] : tensor<16x96x48x128xf32>
682+
// CHECK: }
683+
// CHECK: scf.yield %[[VAL_28:.*]] : tensor<16x96x48x128xf32>
684+
// CHECK: }
685+
// CHECK: scf.yield %[[VAL_29:.*]] : tensor<16x96x48x128xf32>
686+
// CHECK: }
687+
// CHECK: return %[[VAL_30:.*]] : tensor<16x96x48x128xf32>
688+
// CHECK: }
689+
// CHECK: }
690+

0 commit comments

Comments
 (0)