Skip to content

Commit ad2f9f6

Browse files
committed
[mlir] Fix subtensor_insert bufferization.
It was incorrect in the presence of a tensor argument with multiple uses. The bufferization of subtensor_insert was writing into a converted memref operand, but there is no guarantee that the converted memref for that operand is safe to write into. In this case, the same converted memref is written to in-place by the subtensor_insert bufferization, violating the tensor-level semantics. I left some comments in a TODO about ways forward on this. I will be working actively on this problem in the coming days. Differential Revision: https://reviews.llvm.org/D91371
1 parent d0ba6c4 commit ad2f9f6

File tree

3 files changed

+73
-18
lines changed

3 files changed

+73
-18
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -func-bufferize \
2+
// RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
3+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
4+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
5+
// RUN: | FileCheck %s
6+
7+
func @main() {
8+
%const = constant dense<10.0> : tensor<2xf32>
9+
%insert_val = constant dense<20.0> : tensor<1xf32>
10+
11+
// Both of these subtensor_insert ops insert into the same original tensor
12+
// value `%const`. This can easily cause bugs if at the memref level
13+
// we attempt to write in-place into the memref that %const has been
14+
// converted into.
15+
%inserted_at_position_0 = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
16+
%inserted_at_position_1 = subtensor_insert %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32>
17+
18+
%unranked_at_position_0 = tensor_cast %inserted_at_position_0 : tensor<2xf32> to tensor<*xf32>
19+
call @print_memref_f32(%unranked_at_position_0) : (tensor<*xf32>) -> ()
20+
21+
// CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
22+
// CHECK-SAME: rank = 1 offset = 0 sizes = [2] strides = [1] data =
23+
// CHECK-NEXT: [20, 10]
24+
25+
%unranked_at_position_1 = tensor_cast %inserted_at_position_1 : tensor<2xf32> to tensor<*xf32>
26+
call @print_memref_f32(%unranked_at_position_1) : (tensor<*xf32>) -> ()
27+
28+
// CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
29+
// CHECK-SAME: rank = 1 offset = 0 sizes = [2] strides = [1] data =
30+
// CHECK-NEXT: [10, 20]
31+
32+
return
33+
}
34+
35+
func @print_memref_f32(%ptr : tensor<*xf32>)

mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,19 @@ static Value maybeConvertToIndex(Location loc, Value val, OpBuilder &b) {
4040
return b.create<IndexCastOp>(loc, val, b.getIndexType());
4141
}
4242

43+
static Value cloneMemref(Location loc, Value memref, OpBuilder &b) {
44+
auto memrefType = memref.getType().cast<MemRefType>();
45+
SmallVector<Value, 4> dynOperands;
46+
for (auto dim : llvm::enumerate(memrefType.getShape())) {
47+
if (dim.value() == TensorType::kDynamicSize) {
48+
dynOperands.push_back(b.create<DimOp>(loc, memref, dim.index()));
49+
}
50+
}
51+
auto alloc = b.create<AllocOp>(loc, memrefType, dynOperands);
52+
b.create<linalg::CopyOp>(loc, memref, alloc);
53+
return alloc;
54+
}
55+
4356
static LogicalResult
4457
allocateBuffersForResults(Location loc, LinalgOp linalgOp,
4558
linalg::GenericOpAdaptor &adaptor,
@@ -65,19 +78,10 @@ allocateBuffersForResults(Location loc, LinalgOp linalgOp,
6578
// results.
6679
// TODO: update this assumption because the reality is more complex
6780
// under linalg on tensor based transformations.
68-
bool foldedInitTensor = resultIndex < linalgOp.getNumInitTensors();
69-
if (foldedInitTensor) {
70-
Value initTensor = linalgOp.getInitTensor(resultIndex);
71-
Value initBuffer = adaptor.init_tensors()[resultIndex];
72-
SmallVector<Value, 4> dynOperands;
73-
for (auto dim : llvm::enumerate(tensorShape)) {
74-
if (dim.value() == TensorType::kDynamicSize) {
75-
dynOperands.push_back(b.create<DimOp>(loc, initTensor, dim.index()));
76-
}
77-
}
78-
auto alloc = b.create<AllocOp>(loc, memrefType, dynOperands);
79-
b.create<linalg::CopyOp>(loc, initBuffer, alloc);
80-
resultBuffers.push_back(alloc);
81+
bool hasInitTensor = resultIndex < linalgOp.getNumInitTensors();
82+
if (hasInitTensor) {
83+
resultBuffers.push_back(
84+
cloneMemref(loc, adaptor.init_tensors()[resultIndex], b));
8185
continue;
8286
}
8387

@@ -303,7 +307,10 @@ class SubTensorInsertOpConverter
303307
Value sourceMemRef = adaptor.source();
304308
assert(sourceMemRef.getType().isa<MemRefType>());
305309

306-
Value destMemRef = adaptor.dest();
310+
// For now, be conservative and copy the converted input memref.
311+
// In general, the converted input memref here could be aliased or could
312+
// point into constant memory, so mutating it would lead to miscompilations.
313+
Value destMemRef = cloneMemref(op.getLoc(), adaptor.dest(), rewriter);
307314
assert(destMemRef.getType().isa<MemRefType>());
308315

309316
// Take a subview to copy the small memref.

mlir/test/Dialect/Linalg/bufferize.mlir

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,20 +199,33 @@ func @bufferize_subtensor_insert(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %
199199
// CHECK: %[[IDX:.*]] = call @make_index() : () -> index
200200
%i0 = call @make_index() : () -> index
201201

202+
202203
// CHECK-DAG: %[[M0:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
203204
// CHECK-DAG: %[[SM0:.*]] = tensor_to_memref %[[ST0]] : memref<2x3xf32>
204-
// CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0]][0, 0] [2, 3] [1, 1]
205+
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
206+
// CHECK-NEXT: %[[DIM0:.*]] = dim %[[M0]], %[[C0]] : memref<?x?xf32>
207+
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
208+
// CHECK-NEXT: %[[DIM1:.*]] = dim %[[M0]], %[[C1]] : memref<?x?xf32>
209+
// CHECK-NEXT: %[[M0_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
210+
// CHECK-NEXT: linalg.copy(%[[M0]], %[[M0_COPY]]) : memref<?x?xf32>, memref<?x?xf32>
211+
// CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0_COPY]][0, 0] [2, 3] [1, 1]
205212
// CHECK-SAME: memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
206213
// CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]>
207-
// CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0]] : memref<?x?xf32>
214+
// CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0_COPY]] : memref<?x?xf32>
208215
%t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>
209216

210217
// CHECK-DAG: %[[M1:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
211218
// CHECK-DAG: %[[SM1:.*]] = tensor_to_memref %[[ST1]] : memref<2x?xf32>
212-
// CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
219+
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
220+
// CHECK-NEXT: %[[DIM0:.*]] = dim %[[M1]], %[[C0]] : memref<?x?xf32>
221+
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
222+
// CHECK-NEXT: %[[DIM1:.*]] = dim %[[M1]], %[[C1]] : memref<?x?xf32>
223+
// CHECK-NEXT: %[[M1_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
224+
// CHECK-NEXT: linalg.copy(%[[M1]], %[[M1_COPY]]) : memref<?x?xf32>, memref<?x?xf32>
225+
// CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1_COPY]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
213226
// CHECK-SAME: memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
214227
// CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]>
215-
// CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1]] : memref<?x?xf32>
228+
// CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1_COPY]] : memref<?x?xf32>
216229
%t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>
217230

218231
// CHECK: return %[[RT0]], %[[RT1]]

0 commit comments

Comments
 (0)