Skip to content

Commit 2791162

Browse files
committed
[mlir] make memref.subview produce strided layout
Memref subview operation has been initially designed to work on memrefs with strided layouts only and has never supported anything else. Port it to use the recently added StridedLayoutAttr instead of extracting the strided from implicitly from affine maps. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D133938
1 parent 57c7bb3 commit 2791162

19 files changed

+266
-335
lines changed

mlir/include/mlir/IR/BuiltinTypes.h

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -413,28 +413,18 @@ inline bool TensorType::classof(Type type) {
413413
/// MemRefs with a layout map in strided form include:
414414
/// 1. empty or identity layout map, in which case the stride information is
415415
/// the canonical form computed from sizes;
416-
/// 2. single affine map layout of the form `K + k0 * d0 + ... kn * dn`,
417-
/// where K and ki's are constants or symbols.
416+
/// 2. a StridedLayoutAttr layout;
417+
/// 3. any other layout that be converted into a single affine map layout of
418+
/// the form `K + k0 * d0 + ... kn * dn`, where K and ki's are constants or
419+
/// symbols.
418420
///
419421
/// A stride specification is a list of integer values that are either static
420-
/// or dynamic (encoded with getDynamicStrideOrOffset()). Strides encode the
421-
/// distance in the number of elements between successive entries along a
422+
/// or dynamic (encoded with ShapedType::kDynamicStrideOrOffset). Strides encode
423+
/// the distance in the number of elements between successive entries along a
422424
/// particular dimension.
423-
///
424-
/// For example, `memref<42x16xf32, (64 * d0 + d1)>` specifies a view into a
425-
/// non-contiguous memory region of `42` by `16` `f32` elements in which the
426-
/// distance between two consecutive elements along the outer dimension is `1`
427-
/// and the distance between two consecutive elements along the inner dimension
428-
/// is `64`.
429-
///
430-
/// The convention is that the strides for dimensions d0, .. dn appear in
431-
/// order to make indexing intuitive into the result.
432425
LogicalResult getStridesAndOffset(MemRefType t,
433426
SmallVectorImpl<int64_t> &strides,
434427
int64_t &offset);
435-
LogicalResult getStridesAndOffset(MemRefType t,
436-
SmallVectorImpl<AffineExpr> &strides,
437-
AffineExpr &offset);
438428

439429
/// Return a version of `t` with identity layout if it can be determined
440430
/// statically that the layout is the canonical contiguous strided layout.

mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,11 +2184,10 @@ Type SubViewOp::inferResultType(MemRefType sourceMemRefType,
21842184
}
21852185

21862186
// The type is now known.
2187-
return MemRefType::get(
2188-
staticSizes, sourceMemRefType.getElementType(),
2189-
makeStridedLinearLayoutMap(targetStrides, targetOffset,
2190-
sourceMemRefType.getContext()),
2191-
sourceMemRefType.getMemorySpace());
2187+
return MemRefType::get(staticSizes, sourceMemRefType.getElementType(),
2188+
StridedLayoutAttr::get(sourceMemRefType.getContext(),
2189+
targetOffset, targetStrides),
2190+
sourceMemRefType.getMemorySpace());
21922191
}
21932192

21942193
Type SubViewOp::inferResultType(MemRefType sourceMemRefType,
@@ -2224,14 +2223,19 @@ Type SubViewOp::inferRankReducedResultType(ArrayRef<int64_t> resultShape,
22242223
Optional<llvm::SmallDenseSet<unsigned>> dimsToProject =
22252224
computeRankReductionMask(inferredType.getShape(), resultShape);
22262225
assert(dimsToProject.has_value() && "invalid rank reduction");
2227-
llvm::SmallBitVector dimsToProjectVector(inferredType.getRank());
2228-
for (unsigned dim : *dimsToProject)
2229-
dimsToProjectVector.set(dim);
2230-
2231-
// Compute layout map and result type.
2232-
AffineMap map = getProjectedMap(inferredType.getLayout().getAffineMap(),
2233-
dimsToProjectVector);
2234-
return MemRefType::get(resultShape, inferredType.getElementType(), map,
2226+
2227+
// Compute the layout and result type.
2228+
auto inferredLayout = inferredType.getLayout().cast<StridedLayoutAttr>();
2229+
SmallVector<int64_t> rankReducedStrides;
2230+
rankReducedStrides.reserve(resultShape.size());
2231+
for (auto [idx, value] : llvm::enumerate(inferredLayout.getStrides())) {
2232+
if (!dimsToProject->contains(idx))
2233+
rankReducedStrides.push_back(value);
2234+
}
2235+
return MemRefType::get(resultShape, inferredType.getElementType(),
2236+
StridedLayoutAttr::get(inferredLayout.getContext(),
2237+
inferredLayout.getOffset(),
2238+
rankReducedStrides),
22352239
inferredType.getMemorySpace());
22362240
}
22372241

@@ -2363,8 +2367,8 @@ Value SubViewOp::getViewSource() { return getSource(); }
23632367
/// Return true if t1 and t2 have equal offsets (both dynamic or of same
23642368
/// static value).
23652369
static bool haveCompatibleOffsets(MemRefType t1, MemRefType t2) {
2366-
AffineExpr t1Offset, t2Offset;
2367-
SmallVector<AffineExpr> t1Strides, t2Strides;
2370+
int64_t t1Offset, t2Offset;
2371+
SmallVector<int64_t> t1Strides, t2Strides;
23682372
auto res1 = getStridesAndOffset(t1, t1Strides, t1Offset);
23692373
auto res2 = getStridesAndOffset(t2, t2Strides, t2Offset);
23702374
return succeeded(res1) && succeeded(res2) && t1Offset == t2Offset;
@@ -2506,16 +2510,25 @@ static MemRefType getCanonicalSubViewResultType(
25062510
// Return nullptr as failure mode.
25072511
if (!unusedDims)
25082512
return nullptr;
2509-
SmallVector<int64_t> shape;
2510-
for (const auto &sizes : llvm::enumerate(nonRankReducedType.getShape())) {
2511-
if (unusedDims->test(sizes.index()))
2513+
2514+
auto layout = nonRankReducedType.getLayout().cast<StridedLayoutAttr>();
2515+
SmallVector<int64_t> shape, strides;
2516+
unsigned numDimsAfterReduction =
2517+
nonRankReducedType.getRank() - unusedDims->count();
2518+
shape.reserve(numDimsAfterReduction);
2519+
strides.reserve(numDimsAfterReduction);
2520+
for (const auto &[idx, size, stride] :
2521+
llvm::zip(llvm::seq<unsigned>(0, nonRankReducedType.getRank()),
2522+
nonRankReducedType.getShape(), layout.getStrides())) {
2523+
if (unusedDims->test(idx))
25122524
continue;
2513-
shape.push_back(sizes.value());
2525+
shape.push_back(size);
2526+
strides.push_back(stride);
25142527
}
2515-
AffineMap layoutMap = nonRankReducedType.getLayout().getAffineMap();
2516-
if (!layoutMap.isIdentity())
2517-
layoutMap = getProjectedMap(layoutMap, *unusedDims);
2518-
return MemRefType::get(shape, nonRankReducedType.getElementType(), layoutMap,
2528+
2529+
return MemRefType::get(shape, nonRankReducedType.getElementType(),
2530+
StridedLayoutAttr::get(sourceType.getContext(),
2531+
layout.getOffset(), strides),
25192532
nonRankReducedType.getMemorySpace());
25202533
}
25212534

mlir/lib/IR/BuiltinTypes.cpp

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -766,9 +766,22 @@ static LogicalResult extractStrides(AffineExpr e,
766766
llvm_unreachable("unexpected binary operation");
767767
}
768768

769-
LogicalResult mlir::getStridesAndOffset(MemRefType t,
770-
SmallVectorImpl<AffineExpr> &strides,
771-
AffineExpr &offset) {
769+
/// A stride specification is a list of integer values that are either static
770+
/// or dynamic (encoded with ShapedType::kDynamicStrideOrOffset). Strides encode
771+
/// the distance in the number of elements between successive entries along a
772+
/// particular dimension.
773+
///
774+
/// For example, `memref<42x16xf32, (64 * d0 + d1)>` specifies a view into a
775+
/// non-contiguous memory region of `42` by `16` `f32` elements in which the
776+
/// distance between two consecutive elements along the outer dimension is `1`
777+
/// and the distance between two consecutive elements along the inner dimension
778+
/// is `64`.
779+
///
780+
/// The convention is that the strides for dimensions d0, .. dn appear in
781+
/// order to make indexing intuitive into the result.
782+
static LogicalResult getStridesAndOffset(MemRefType t,
783+
SmallVectorImpl<AffineExpr> &strides,
784+
AffineExpr &offset) {
772785
AffineMap m = t.getLayout().getAffineMap();
773786

774787
if (m.getNumResults() != 1 && !m.isIdentity())
@@ -807,12 +820,12 @@ LogicalResult mlir::getStridesAndOffset(MemRefType t,
807820
for (auto &stride : strides)
808821
stride = simplifyAffineExpr(stride, numDims, numSymbols);
809822

810-
/// In practice, a strided memref must be internally non-aliasing. Test
811-
/// against 0 as a proxy.
812-
/// TODO: static cases can have more advanced checks.
813-
/// TODO: dynamic cases would require a way to compare symbolic
814-
/// expressions and would probably need an affine set context propagated
815-
/// everywhere.
823+
// In practice, a strided memref must be internally non-aliasing. Test
824+
// against 0 as a proxy.
825+
// TODO: static cases can have more advanced checks.
826+
// TODO: dynamic cases would require a way to compare symbolic
827+
// expressions and would probably need an affine set context propagated
828+
// everywhere.
816829
if (llvm::any_of(strides, [](AffineExpr e) {
817830
return e == getAffineConstantExpr(0, e.getContext());
818831
})) {
@@ -827,6 +840,15 @@ LogicalResult mlir::getStridesAndOffset(MemRefType t,
827840
LogicalResult mlir::getStridesAndOffset(MemRefType t,
828841
SmallVectorImpl<int64_t> &strides,
829842
int64_t &offset) {
843+
// Happy path: the type uses the strided layout directly.
844+
if (auto strided = t.getLayout().dyn_cast<StridedLayoutAttr>()) {
845+
llvm::append_range(strides, strided.getStrides());
846+
offset = strided.getOffset();
847+
return success();
848+
}
849+
850+
// Otherwise, defer to the affine fallback as layouts are supposed to be
851+
// convertible to affine maps.
830852
AffineExpr offsetExpr;
831853
SmallVector<AffineExpr, 4> strideExprs;
832854
if (failed(::getStridesAndOffset(t, strideExprs, offsetExpr)))

mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,11 @@ func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
7474
// -----
7575

7676
// CHECK: #[[$map2a:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
77-
// CHECK: #[[$map2b:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 20 + s0 + d1)>
7877
// CHECK-LABEL: func @callee(
7978
// CHECK-SAME: %{{.*}}: index,
8079
// CHECK-SAME: %[[r:.*]]: memref<2x5xf32, #[[$map2a]]>) {
8180
// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
82-
// CHECK: %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, #[[$map2b]]>
81+
// CHECK: %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, strided<[20, 1], offset: ?>>
8382
// CHECK: %[[casted:.*]] = memref.cast %[[subview]]
8483
// CHECK: memref.copy %[[casted]], %[[r]]
8584
// CHECK: memref.dealloc %[[alloc]]
@@ -98,9 +97,8 @@ func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
9897
// CHECK-NO-LAYOUT: memref.copy %[[alloc2]], %[[r]]
9998
// CHECK-NO-LAYOUT: memref.dealloc %[[alloc2]]
10099

101-
// CHECK-BASELINE: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 20 + s0 + d1)>
102100
// CHECK-BASELINE-LABEL: func @callee(
103-
// CHECK-BASELINE-SAME: %{{.*}}: index) -> memref<2x5xf32, #[[$map2]]> {
101+
// CHECK-BASELINE-SAME: %{{.*}}: index) -> memref<2x5xf32, strided<[20, 1], offset: ?>> {
104102
// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
105103
// CHECK-BASELINE: %[[subview:.*]] = memref.subview %[[alloc]]
106104
// CHECK-BASELINE: return %[[subview]]

mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,24 +48,20 @@ func.func private @external_func_with_return_val(tensor<4xi32>) -> f32
4848

4949
// A function that returns a non-equivalent tensor with layout map.
5050

51-
// CHECK: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>
52-
// CHECK-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32,
53-
// CHECK-SAME: #[[$map2]]> {
51+
// CHECK-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32, strided<[10, 1], offset: ?>>
5452
// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32>
55-
// CHECK: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, #[[$map2]]>
53+
// CHECK: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, strided<[10, 1], offset: ?>>
5654
// CHECK: return %[[subview]]
5755

58-
// CHECK-NO-LAYOUT-MAP: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>
5956
// CHECK-NO-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32>
6057
// CHECK-NO-LAYOUT-MAP: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32>
61-
// CHECK-NO-LAYOUT-MAP: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, #[[$map2]]>
58+
// CHECK-NO-LAYOUT-MAP: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, strided<[10, 1], offset: ?>>
6259
// CHECK-NO-LAYOUT-MAP: %[[alloc_no_layout:.*]] = memref.alloc(%{{.*}}) : memref<2x?xf32>
6360
// CHECK-NO-LAYOUT-MAP: memref.copy %[[subview]], %[[alloc_no_layout]]
6461
// CHECK-NO-LAYOUT-MAP: memref.dealloc %[[alloc]]
6562
// CHECK-NO-LAYOUT-MAP: return %[[alloc_no_layout]]
6663

6764
// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map2a:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
68-
// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map2b:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>
6965
// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32,
7066
// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map2a]]> {
7167
func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>)
@@ -375,11 +371,11 @@ func.func @scf_for_with_tensor_insert_slice(
375371
-> (tensor<?xf32>, tensor<?xf32>)
376372
{
377373
// CHECK-NEXT: %[[SVA:.*]] = memref.subview %[[A]]
378-
// CHECK-NEXT: memref.copy %[[C]], %[[SVA]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
374+
// CHECK-NEXT: memref.copy %[[C]], %[[SVA]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, strided<[?], offset: ?>>
379375
%ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32>
380376

381377
// CHECK-NEXT: %[[SVB:.*]] = memref.subview %[[B]]
382-
// CHECK-NEXT: memref.copy %[[C]], %[[SVB]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
378+
// CHECK-NEXT: memref.copy %[[C]], %[[SVB]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, strided<[?], offset: ?>>
383379
%ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32>
384380

385381
// scf.yield is empty and is elided

mlir/test/Dialect/Linalg/promote.mlir

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
#map2 = affine_map<(d0) -> (d0 + 4)>
55
#map3 = affine_map<(d0) -> (d0 + 3)>
66

7-
// CHECK-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
8-
97
func.func @matmul_f32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
108
%c4 = arith.constant 4 : index
119
%c3 = arith.constant 3 : index
@@ -44,24 +42,24 @@ func.func @matmul_f32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
4442
///
4543
// CHECK: %[[tmpA:.*]] = memref.alloca() : memref<32xi8>
4644
// CHECK: %[[fullA:.*]] = memref.view %[[tmpA]][{{.*}}][{{.*}}] : memref<32xi8> to memref<?x?xf32>
47-
// CHECK: %[[partialA:.*]] = memref.subview %[[fullA]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
45+
// CHECK: %[[partialA:.*]] = memref.subview %[[fullA]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
4846
///
4947
// CHECK: %[[tmpB:.*]] = memref.alloca() : memref<48xi8>
5048
// CHECK: %[[fullB:.*]] = memref.view %[[tmpB]][{{.*}}][{{.*}}] : memref<48xi8> to memref<?x?xf32>
51-
// CHECK: %[[partialB:.*]] = memref.subview %[[fullB]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
49+
// CHECK: %[[partialB:.*]] = memref.subview %[[fullB]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
5250
///
5351
// CHECK: %[[tmpC:.*]] = memref.alloca() : memref<24xi8>
5452
// CHECK: %[[fullC:.*]] = memref.view %[[tmpC]][{{.*}}][{{.*}}] : memref<24xi8> to memref<?x?xf32>
55-
// CHECK: %[[partialC:.*]] = memref.subview %[[fullC]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
53+
// CHECK: %[[partialC:.*]] = memref.subview %[[fullC]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
5654

57-
// CHECK: memref.copy %[[vA]], %[[partialA]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, #[[$strided2D]]>
58-
// CHECK: memref.copy %[[vB]], %[[partialB]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, #[[$strided2D]]>
59-
// CHECK: memref.copy %[[vC]], %[[partialC]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, #[[$strided2D]]>
55+
// CHECK: memref.copy %[[vA]], %[[partialA]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
56+
// CHECK: memref.copy %[[vB]], %[[partialB]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
57+
// CHECK: memref.copy %[[vC]], %[[partialC]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
6058
//
6159
// CHECK: linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} outs(%[[partialC]]
6260
//
6361
// CHECK: memref.copy %[[partialC]], %[[vC]] :
64-
// CHECK: memref<?x?xf32, #[[$strided2D]]> to
62+
// CHECK: memref<?x?xf32, strided<[?, 1], offset: ?>> to
6563
// CHECK: memref<?x?xf32, strided<[?, 1], offset: ?>>
6664
//
6765
// CHECK-NOT: memref.dealloc %[[tmpA]] : memref<32xi8>
@@ -117,24 +115,24 @@ func.func @matmul_f64(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
117115
///
118116
// CHECK: %[[tmpA_f64:.*]] = memref.alloc() : memref<64xi8>
119117
// CHECK: %[[fullA_f64:.*]] = memref.view %[[tmpA_f64]][{{.*}}][{{.*}}] : memref<64xi8> to memref<?x?xf64>
120-
// CHECK: %[[partialA_f64:.*]] = memref.subview %[[fullA_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
118+
// CHECK: %[[partialA_f64:.*]] = memref.subview %[[fullA_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, strided<[?, 1], offset: ?>>
121119
///
122120
// CHECK: %[[tmpB_f64:.*]] = memref.alloc() : memref<96xi8>
123121
// CHECK: %[[fullB_f64:.*]] = memref.view %[[tmpB_f64]][{{.*}}][{{.*}}] : memref<96xi8> to memref<?x?xf64>
124-
// CHECK: %[[partialB_f64:.*]] = memref.subview %[[fullB_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
122+
// CHECK: %[[partialB_f64:.*]] = memref.subview %[[fullB_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, strided<[?, 1], offset: ?>>
125123
///
126124
// CHECK: %[[tmpC_f64:.*]] = memref.alloc() : memref<48xi8>
127125
// CHECK: %[[fullC_f64:.*]] = memref.view %[[tmpC_f64]][{{.*}}][{{.*}}] : memref<48xi8> to memref<?x?xf64>
128-
// CHECK: %[[partialC_f64:.*]] = memref.subview %[[fullC_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
126+
// CHECK: %[[partialC_f64:.*]] = memref.subview %[[fullC_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, strided<[?, 1], offset: ?>>
129127

130-
// CHECK: memref.copy %[[vA_f64]], %[[partialA_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, #[[$strided2D]]>
131-
// CHECK: memref.copy %[[vB_f64]], %[[partialB_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, #[[$strided2D]]>
132-
// CHECK: memref.copy %[[vC_f64]], %[[partialC_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, #[[$strided2D]]>
128+
// CHECK: memref.copy %[[vA_f64]], %[[partialA_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, strided<[?, 1], offset: ?>>
129+
// CHECK: memref.copy %[[vB_f64]], %[[partialB_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, strided<[?, 1], offset: ?>>
130+
// CHECK: memref.copy %[[vC_f64]], %[[partialC_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, strided<[?, 1], offset: ?>>
133131
//
134132
// CHECK: linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} outs(%[[partialC_f64]]
135133
//
136134
// CHECK: memref.copy %[[partialC_f64]], %[[vC_f64]] :
137-
// CHECK: memref<?x?xf64, #[[$strided2D]]> to
135+
// CHECK: memref<?x?xf64, strided<[?, 1], offset: ?>> to
138136
// CHECK: memref<?x?xf64, strided<[?, 1], offset: ?>>
139137
//
140138
// CHECK: memref.dealloc %[[tmpA_f64]] : memref<64xi8>

0 commit comments

Comments
 (0)