llvm
diff --git a/‎mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
Lines changed: 6 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
Lines changed: 6 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
Lines changed: 12 additions & 6 deletions b/‎mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
Lines changed: 12 additions & 6 deletions
diff --git a/‎mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
Lines changed: 4 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
Lines changed: 4 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
Lines changed: 1 addition & 1 deletion b/‎mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
Lines changed: 10 additions & 0 deletions b/‎mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
Lines changed: 10 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
Lines changed: 12 additions & 1 deletion b/‎mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
Lines changed: 12 additions & 1 deletion
diff --git a/‎mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
Lines changed: 5 additions & 7 deletions b/‎mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
Lines changed: 5 additions & 7 deletions
diff --git a/‎mlir/test/Dialect/Arith/bufferize.mlir
Lines changed: 3 additions & 3 deletions b/‎mlir/test/Dialect/Arith/bufferize.mlir
Lines changed: 3 additions & 3 deletions
diff --git a/‎mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-other.mlir
Lines changed: 1 addition & 1 deletion b/‎mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-other.mlir
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir
Lines changed: 2 additions & 2 deletions b/‎mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir
Lines changed: 2 additions & 2 deletions
diff --git a/‎mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encodings.mlir
Lines changed: 133 additions & 0 deletions b/‎mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encodings.mlir
Lines changed: 133 additions & 0 deletions
diff --git a/‎mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
Lines changed: 3 additions & 3 deletions b/‎mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
Lines changed: 3 additions & 3 deletions
@@ -12,10 +12,16 @@
 #include "mlir/Bytecode/BytecodeOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/IR/BuiltinTypeInterfaces.h"
 #include "mlir/Interfaces/CopyOpInterface.h"
 #include "mlir/Interfaces/DestinationStyleOpInterface.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/SubsetOpInterface.h"
+#include "llvm/Support/Debug.h"
+
+namespace mlir::bufferization::detail {
+bool tensorTypesMatchUpToEncoding(Type lhs, Type rhs);
+} // namespace mlir::bufferization::detail
 
 //===----------------------------------------------------------------------===//
 // Bufferization Dialect
 
@@ -387,9 +387,7 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [
     BufferizableOpInterface,
     SameOperandsAndResultShape,
     SameOperandsAndResultElementType,
-    TypesMatchWith<"result type matches tensor equivalent of 'memref'",
-                   "memref", "result",
-                   "memref::getTensorTypeFromMemRefType($_self)">
+    AllElementTypesMatch<["memref", "result"]>
   ]> {
   let summary = "create a tensor from a `memref`";
   let description = [{
@@ -476,9 +474,16 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [
 
   let assemblyFormat = [{
     $memref (`restrict` $restrict^)? (`writable` $writable^)? attr-dict
-      `:` type($memref)
+      `:` type($memref) `->` type($result)
   }];
 
+  let builders = [
+    OpBuilder<(ins "Value":$memref, CArg<"bool", "false">:$restrict, CArg<"bool", "false">:$writeable), [{
+      auto rtt = memref::getTensorTypeFromMemRefType(memref.getType());
+      build($_builder, $_state, rtt, memref, restrict, writeable);
+    }]>
+  ];
+
   let hasCanonicalizer = 1;
   let hasFolder = 1;
 }
@@ -495,7 +500,8 @@ def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref", [
     Pure,
     TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
                    "memref", "tensor",
-                   "memref::getTensorTypeFromMemRefType($_self)">
+                   "memref::getTensorTypeFromMemRefType($_self)",
+                   "bufferization::detail::tensorTypesMatchUpToEncoding">
   ]> {
   let summary = "cast a tensor to memref";
   let description = [{
@@ -550,7 +556,7 @@ def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref", [
   }];
 
   let assemblyFormat = [{
-    $tensor (`read_only` $read_only^)? attr-dict `:` type($memref)
+    $tensor (`read_only` $read_only^)? attr-dict `:` type($tensor) `->` type($memref)
   }];
 
   let hasFolder = 1;
 
@@ -510,6 +510,10 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
            /*default=*/"false",
            "The memory space of an memref types must always be inferred. If "
            "unset, a default memory space of 0 is used otherwise.">,
+    Option<"useEncodingForMemorySpace", "use-encoding-for-memory-space", "bool",
+            /*default=*/"false",
+            "Use the Tensor encoding attribute for the memory space. Exclusive to"
+            " the 'must-infer-memory-space option'">,
     Option<"testAnalysisOnly", "test-analysis-only", "bool",
             /*default=*/"false",
            "Test only: Only run inplaceability analysis and annotate IR">,
 
@@ -718,7 +718,7 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,
       // loose all of its users and eventually DCE away.
       rewriter.setInsertionPointAfter(op);
       replacement = rewriter.create<bufferization::ToTensorOp>(
-          replacement.getLoc(), replacement);
+          replacement.getLoc(), opResult.getType(), replacement);
     }
     replacements.push_back(replacement);
   }
 
@@ -23,6 +23,16 @@ using namespace mlir::bufferization;
 // Helper functions
 //===----------------------------------------------------------------------===//
 
+bool bufferization::detail::tensorTypesMatchUpToEncoding(Type lhs, Type rhs) {
+  auto lhsType = cast<ShapedType>(lhs);
+  auto rhsType = cast<ShapedType>(rhs);
+  if (lhsType.getElementType() != rhsType.getElementType())
+    return false;
+  if (lhsType.hasRank() && rhsType.hasRank())
+    return lhsType.getShape() == rhsType.getShape();
+  return true;
+}
+
 FailureOr<Value> mlir::bufferization::castOrReallocMemRefValue(
     OpBuilder &b, Value value, MemRefType destType,
     const BufferizationOptions &options) {
 
@@ -66,10 +66,14 @@ BufferizeTypeConverter::BufferizeTypeConverter() {
                               ValueRange inputs, Location loc) -> Value {
     assert(inputs.size() == 1 && "expected exactly one input");
 
+    // Unranked to ranked casts must be explicit.
+    if (auto inputType = dyn_cast<UnrankedMemRefType>(inputs[0].getType()))
+      return nullptr;
+
     if (auto inputType = dyn_cast<MemRefType>(inputs[0].getType())) {
       // MemRef to MemRef cast.
       assert(inputType != type && "expected different types");
-      // Unranked to ranked and ranked to unranked casts must be explicit.
+      // Ranked to unranked casts must be explicit.
       auto rankedDestType = dyn_cast<MemRefType>(type);
       if (!rankedDestType)
         return nullptr;
@@ -152,6 +156,13 @@ struct OneShotBufferizePass
             [](TensorType t) -> std::optional<Attribute> {
           return std::nullopt;
         };
+      } else if (useEncodingForMemorySpace) {
+        opt.defaultMemorySpaceFn =
+            [](TensorType t) -> std::optional<Attribute> {
+          if (auto rtt = dyn_cast<RankedTensorType>(t))
+            return rtt.getEncoding();
+          return std::nullopt;
+        };
       }
       opt.printConflicts = printConflicts;
       opt.bufferAlignment = bufferAlignment;
 
@@ -480,10 +480,6 @@ struct FromElementsOpInterface
     auto fromElementsOp = cast<tensor::FromElementsOp>(op);
     auto tensorType = cast<RankedTensorType>(fromElementsOp.getType());
 
-    // TODO: Implement memory space for this op.
-    if (options.defaultMemorySpaceFn(tensorType) != Attribute())
-      return op->emitError("memory space not implemented yet");
-
     // Allocate a buffer for the result.
     Location loc = op->getLoc();
     auto shape = tensorType.getShape();
@@ -493,10 +489,12 @@ struct FromElementsOpInterface
         /*copy=*/false);
     if (failed(tensorAlloc))
       return failure();
-    auto memrefType =
-        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+    FailureOr<BaseMemRefType> memrefType =
+        bufferization::getBufferType(*tensorAlloc, options);
+    if (failed(memrefType))
+      return failure();
     Value buffer = rewriter.create<bufferization::ToMemrefOp>(
-        op->getLoc(), memrefType, *tensorAlloc);
+        op->getLoc(), *memrefType, *tensorAlloc);
 
     // Case: tensor<0xelem_type>.
     if (fromElementsOp.getElements().empty()) {
 
@@ -7,7 +7,7 @@ func.func @index_cast(%tensor: tensor<i32>, %scalar: i32) -> (tensor<index>, ind
   %index_scalar = arith.index_cast %scalar : i32 to index
   return %index_tensor, %index_scalar : tensor<index>, index
 }
-// CHECK:  %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<i32>
+// CHECK:  %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor<i32>
 // CHECK-NEXT: %[[INDEX_MEMREF:.*]] = arith.index_cast %[[MEMREF]]
 // CHECK-SAME:   memref<i32> to memref<index>
 // CHECK-NEXT: %[[INDEX_TENSOR:.*]] = bufferization.to_tensor %[[INDEX_MEMREF]]
@@ -83,8 +83,8 @@ func.func @non_tensor() {
 // CHECK-SAME:                 %[[PRED:.*]]: i1,
 // CHECK-SAME:                 %[[TRUE_VAL:.*]]: tensor<f32>,
 // CHECK-SAME:                 %[[FALSE_VAL:.*]]: tensor<f32>) -> tensor<f32> {
-// CHECK-DAG:           %[[TRUE_VAL_MEMREF:.*]] = bufferization.to_memref %[[TRUE_VAL]] : memref<f32>
-// CHECK-DAG:           %[[FALSE_VAL_MEMREF:.*]] = bufferization.to_memref %[[FALSE_VAL]] : memref<f32>
+// CHECK-DAG:           %[[TRUE_VAL_MEMREF:.*]] = bufferization.to_memref %[[TRUE_VAL]] : tensor<f32>
+// CHECK-DAG:           %[[FALSE_VAL_MEMREF:.*]] = bufferization.to_memref %[[FALSE_VAL]] : tensor<f32>
 // CHECK:           %[[RET_MEMREF:.*]] = arith.select %[[PRED]], %[[TRUE_VAL_MEMREF]], %[[FALSE_VAL_MEMREF]] : memref<f32>
 // CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[RET_MEMREF]] : memref<f32>
 // CHECK:           return %[[RET]] : tensor<f32>
 
@@ -9,7 +9,7 @@
 //  CHECK-NEXT:   %[[clone:.*]] = bufferization.clone %[[m]]
 //  CHECK-NEXT:   return %[[clone]]
 func.func private @no_interface_no_operands(%t : tensor<?x?x?xf16>) -> memref<?x?x?xf16> {
-  %0 = bufferization.to_memref %t : memref<?x?x?xf16>
+  %0 = bufferization.to_memref %t : tensor<?x?x?xf16> -> memref<?x?x?xf16>
   return %0 : memref<?x?x?xf16>
 }
 
 
@@ -96,7 +96,7 @@ func.func @to_memref_not_read_only(%idx : index, %f: f32) -> f32 {
   // Some op may write into the result of to_memref later.
   // CHECK: bufferization.to_memref
   // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
-  %m = bufferization.to_memref %t : memref<5xf32>
+  %m = bufferization.to_memref %t : tensor<5xf32> -> memref<5xf32>
   %2 = tensor.extract %t[%idx] : tensor<5xf32>
   return %2 : f32
 }
@@ -112,7 +112,7 @@ func.func @to_memref_read_only(%idx : index, %f: f32) -> f32 {
   // Some op may write into the result of to_memref later.
   // CHECK: bufferization.to_memref
   // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
-  %m = bufferization.to_memref %t {read_only} : memref<5xf32>
+  %m = bufferization.to_memref %t {read_only} : tensor<5xf32> -> memref<5xf32>
   %2 = tensor.extract %t[%idx] : tensor<5xf32>
   return %2 : f32
 }
 
@@ -0,0 +1,133 @@
+// RUN: mlir-opt %s -one-shot-bufferize="use-encoding-for-memory-space" -split-input-file | FileCheck %s
+
+// TODO: move to tensor dialect tests
+func.func @from_elements(%fill: f32, %f: f32, %idx: index) -> tensor<3xf32, 1> {
+  %t = tensor.from_elements %fill, %fill, %fill : tensor<3xf32, 1>
+  %i = tensor.insert %f into %t[%idx] : tensor<3xf32, 1>
+  return %i : tensor<3xf32, 1>
+}
+
+// CHECK-LABEL: @from_elements
+//  CHECK-SAME: (%[[arg0:.+]]: f32, %[[arg1:.+]]: f32, %[[arg2:.+]]: index) -> tensor<3xf32, 1 : i64>
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {{.*}} : memref<3xf32, 1>
+//       CHECK-DAG:     %[[c0:.+]] = arith.constant 0 : index
+//       CHECK-DAG:     %[[c1:.+]] = arith.constant 1 : index
+//       CHECK-DAG:     %[[c2:.+]] = arith.constant 2 : index
+//       CHECK:     memref.store %[[arg0]], %[[alloc]][%[[c0]]] : memref<3xf32, 1>
+//       CHECK:     memref.store %[[arg0]], %[[alloc]][%[[c1]]] : memref<3xf32, 1>
+//       CHECK:     memref.store %[[arg0]], %[[alloc]][%[[c2]]] : memref<3xf32, 1>
+//       CHECK:     memref.store %[[arg1]], %[[alloc]][%[[arg2]]] : memref<3xf32, 1>
+//       CHECK:     %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<3xf32, 1> -> tensor<3xf32, 1 : i64>
+//       CHECK:     return %[[v0]] : tensor<3xf32, 1 : i64>
+
+// -----
+
+func.func @alloc_tesor_with_space_no_encoding() -> tensor<128xf32> {
+  %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<128xf32>
+  return %0 : tensor<128xf32>
+}
+
+// CHECK-LABEL: @alloc_tesor_with_space_no_encoding
+//  CHECK-SAME: () -> tensor<128xf32> {
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1>
+//       CHECK:     %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> -> tensor<128xf32>
+//       CHECK:     return %[[v0]] : tensor<128xf32>
+
+// -----
+
+func.func @alloc_tesor_with_space_and_cast() -> tensor<128xf32, 1> {
+  %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<128xf32>
+  %1 = tensor.cast %0 : tensor<128xf32> to tensor<128xf32, 1>
+  return %1 : tensor<128xf32, 1>
+}
+
+// CHECK-LABEL: @alloc_tesor_with_space_and_cast
+//  CHECK-SAME: () -> tensor<128xf32, 1 : i64> {
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1>
+//       CHECK:     %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> -> tensor<128xf32, 1 : i64>
+//       CHECK:     return %[[v0]] : tensor<128xf32, 1 : i64>
+
+// -----
+
+func.func @alloc_tesor_with_space_with_encoding() -> tensor<128xf32, 1 : i64> {
+  %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<128xf32, 1 : i64>
+  return %0 : tensor<128xf32, 1 : i64>
+}
+
+// CHECK-LABEL: @alloc_tesor_with_space_with_encoding
+//  CHECK-SAME: () -> tensor<128xf32, 1 : i64> {
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1>
+//       CHECK:     %[[v0:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> -> tensor<128xf32, 1 : i64>
+//       CHECK:     return %[[v0]] : tensor<128xf32, 1 : i64>
+
+// -----
+
+func.func @alloc_tesor_copy_from_default_space(%arg0: tensor<128xf32>) -> tensor<128xf32> {
+  %0 = bufferization.alloc_tensor() copy(%arg0) {memory_space = 1 : i64} : tensor<128xf32>
+  return %0 : tensor<128xf32>
+}
+
+// CHECK-LABEL: @alloc_tesor_copy_from_default_space
+//  CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32>) -> tensor<128xf32> {
+//       CHECK:     %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32> -> memref<128xf32, strided<[?], offset: ?>>
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1>
+//       CHECK:     memref.copy %[[v0]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>> to memref<128xf32, 1>
+//       CHECK:     %[[v1:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 1> -> tensor<128xf32>
+//       CHECK:     return %[[v1]] : tensor<128xf32>
+
+// -----
+
+func.func @alloc_tesor_copy_from_non_default_space(%arg0: tensor<128xf32, 1>) -> tensor<128xf32, 2> {
+  %0 = bufferization.alloc_tensor() copy(%arg0) {memory_space = 2 : i64} : tensor<128xf32, 1>
+  %1 = tensor.cast %0 : tensor<128xf32, 1> to tensor<128xf32, 2>
+  return %1 : tensor<128xf32, 2>
+}
+
+// CHECK-LABEL: @alloc_tesor_copy_from_non_default_space
+//  CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>) -> tensor<128xf32, 2 : i64> {
+//       CHECK:     %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> -> memref<128xf32, strided<[?], offset: ?>, 1>
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 2>
+//       CHECK:     memref.copy %[[v0]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 2>
+//       CHECK:     %[[v1:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 2> -> tensor<128xf32, 2 : i64>
+//       CHECK:     return %[[v1]] : tensor<128xf32, 2 : i64>
+
+// -----
+
+// TODO: this should be illegal since ultimately we can not eliminate the `bufferization.to_tensor` when we
+// bufferize function boundaries.
+func.func @alloc_tesor_copy_from_non_default_space_no_cast(%arg0: tensor<128xf32, 1>,
+                                                           %arg1: tensor<4xf32, 1>) -> tensor<128xf32, 1> {
+  %0 = bufferization.alloc_tensor() copy(%arg0) {memory_space = 2 : i64} : tensor<128xf32, 1>
+  %1 = tensor.insert_slice %arg1 into %arg0 [0][4][1] : tensor<4xf32, 1> into tensor<128xf32, 1>
+  return %0 : tensor<128xf32, 1>
+}
+
+// CHECK-LABEL: @alloc_tesor_copy_from_non_default_space_no_cast
+//  CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>, %[[arg1:.+]]: tensor<4xf32, 1 : i64>) -> tensor<128xf32, 1 : i64> {
+//       CHECK:     %[[v0:.+]] = bufferization.to_memref %[[arg1]] : tensor<4xf32, 1 : i64> -> memref<4xf32, strided<[?], offset: ?>, 1>
+//       CHECK:     %[[v1:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> -> memref<128xf32, strided<[?], offset: ?>, 1>
+//       CHECK:     %[[v2:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> -> memref<128xf32, strided<[?], offset: ?>, 1>
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 2>
+//       CHECK:     memref.copy %[[v2]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 2>
+//       CHECK:     %[[v3:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 2> -> tensor<128xf32, 1 : i64>
+//       CHECK:     %[[alloc_0:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 1>
+//       CHECK:     memref.copy %[[v1]], %[[alloc_0]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 1>
+//       CHECK:     %[[subview:.+]] = memref.subview %[[alloc_0]][0] [4] [1] : memref<128xf32, 1> to memref<4xf32, strided<[1]>, 1>
+//       CHECK:     memref.copy %[[v0]], %[[subview]] : memref<4xf32, strided<[?], offset: ?>, 1> to memref<4xf32, strided<[1]>, 1>
+//       CHECK:     return %[[v3]] : tensor<128xf32, 1 : i64>
+
+// -----
+
+func.func @materialize_in_destination(%arg0: tensor<128xf32, 1>) -> tensor<128xf32, 2> {
+  %0 = bufferization.alloc_tensor () {memory_space = 2 : i64} : tensor<128xf32, 2>
+  %1 = bufferization.materialize_in_destination %arg0 in %0 : (tensor<128xf32, 1>, tensor<128xf32, 2>) -> tensor<128xf32, 2>
+  return %1 : tensor<128xf32, 2>
+}
+
+// CHECK-LABEL: @materialize_in_destination
+//  CHECK-SAME: (%[[arg0:.+]]: tensor<128xf32, 1 : i64>) -> tensor<128xf32, 2 : i64> {
+//       CHECK:     %[[v0:.+]] = bufferization.to_memref %[[arg0]] : tensor<128xf32, 1 : i64> -> memref<128xf32, strided<[?], offset: ?>, 1>
+//       CHECK:     %[[alloc:.+]] = memref.alloc() {alignment = 64 : i64} : memref<128xf32, 2>
+//       CHECK:     memref.copy %[[v0]], %[[alloc]] : memref<128xf32, strided<[?], offset: ?>, 1> to memref<128xf32, 2>
+//       CHECK:     %[[v1:.+]] = bufferization.to_tensor %[[alloc]] : memref<128xf32, 2> -> tensor<128xf32, 2 : i64>
+//       CHECK:     return %[[v1]] : tensor<128xf32, 2 : i64>
@@ -25,9 +25,9 @@ func.func @use_of_unknown_op_1(%t1: tensor<?xf32>)
 
   %idx = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref<?xf32, strided<[?], offset: ?>>
+  // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : tensor<?xf32> -> memref<?xf32, strided<[?], offset: ?>>
   // CHECK: vector.transfer_read %[[dummy_memref]][%{{.*}}], %{{.*}} : memref<?xf32, strided<[?], offset: ?>>
-  // CHECK-NO-LAYOUT-MAP: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref<?xf32>
+  // CHECK-NO-LAYOUT-MAP: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : tensor<?xf32> -> memref<?xf32>
   // CHECK-NO-LAYOUT-MAP: vector.transfer_read %[[dummy_memref]][%{{.*}}], %{{.*}} : memref<?xf32>
   %1 = vector.transfer_read %0[%idx], %cst : tensor<?xf32>, vector<5xf32>
   return %1 : vector<5xf32>
@@ -61,7 +61,7 @@ func.func @use_of_unknown_op_3(%t1: tensor<?xf32>)
 
   // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]])
   %0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
-  // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref<?xf32, strided<[?], offset: ?>>
+  // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : tensor<?xf32> -> memref<?xf32, strided<[?], offset: ?>>
   // CHECK: %[[v2:.*]] = vector.transfer_read %[[dummy_memref]]
   %2 = vector.transfer_read %0[%idx], %cst : tensor<?xf32>, vector<5xf32>
Original file line number	Diff line number	Diff line change
`@@ -718,7 +718,7 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,`
`718`	`718`	`// loose all of its users and eventually DCE away.`
`719`	`719`	`rewriter.setInsertionPointAfter(op);`
`720`	`720`	`replacement = rewriter.create<bufferization::ToTensorOp>(`
`721`		`- replacement.getLoc(), replacement);`
	`721`	`+ replacement.getLoc(), opResult.getType(), replacement);`
`722`	`722`	`}`
`723`	`723`	`replacements.push_back(replacement);`
`724`	`724`	`}`
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@`
`9`	`9`	`// CHECK-NEXT: %[[clone:.*]] = bufferization.clone %[[m]]`
`10`	`10`	`// CHECK-NEXT: return %[[clone]]`
`11`	`11`	`func.func private @no_interface_no_operands(%t : tensor<?x?x?xf16>) -> memref<?x?x?xf16> {`
`12`		`- %0 = bufferization.to_memref %t : memref<?x?x?xf16>`
	`12`	`+ %0 = bufferization.to_memref %t : tensor<?x?x?xf16> -> memref<?x?x?xf16>`
`13`	`13`	`return %0 : memref<?x?x?xf16>`
`14`	`14`	`}`
`15`	`15`