address comment + fix bug

Peiming Liu · PeimingLiu · commit 5b15e44600b5 · 2024-02-02T13:46:17.000-06:00
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp
@@ -313,16 +313,16 @@ void LoopEmitter::initSubSectIterator(OpBuilder &builder, Location loc) {
         // Compute the subsection size.
         Value size = c0;
         for (auto [loop, stride] : remDepStack[t][lvl]) {
-          Value loopHi = loopHighs[loop];
-          size = ADDI(size, MULI(loopHi, C_IDX(stride)));
+          Value idxMax = SUBI(loopHighs[loop], C_IDX(1));
+          size = ADDI(size, ADDI(MULI(idxMax, C_IDX(stride)), C_IDX(1)));
         }
         it = makeNonEmptySubSectIterator(builder, loc, parent, loopHighs[loop],
                                          std::move(lvlIt), size, curDep.second);
       } else {
-        Value size = loopHighs[loop];
         const SparseIterator &subSectIter = *iters[t][lvl].back();
         it = makeTraverseSubSectIterator(builder, loc, subSectIter, *parent,
-                                         std::move(lvlIt), size, curDep.second);
+                                         std::move(lvlIt), loopHighs[loop],
+                                         curDep.second);
       }
       lastIter[t] = it.get();
       iters[t][lvl].emplace_back(std::move(it));
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.cpp
@@ -665,11 +665,10 @@ class SubSectIterator : public SparseIterator {
 public:
   SubSectIterator(const NonEmptySubSectIterator &subSect,
                   const SparseIterator &parent,
-                  std::unique_ptr<SparseIterator> &&wrap, Value size)
+                  std::unique_ptr<SparseIterator> &&wrap)
       : SparseIterator(IterKind::kSubSect, *wrap,
                        /*extraCursorCnt=*/wrap->randomAccessible() ? 0 : 1),
-        subSect(subSect), wrap(std::move(wrap)), parent(parent), size(size),
-        helper(*this) {
+        subSect(subSect), wrap(std::move(wrap)), parent(parent), helper(*this) {
     assert(subSect.tid == tid && subSect.lvl == lvl);
     assert(parent.kind != IterKind::kSubSect || parent.lvl + 1 == lvl);
   };
@@ -691,7 +690,9 @@ class SubSectIterator : public SparseIterator {
 
   bool randomAccessible() const override { return wrap->randomAccessible(); };
   bool iteratableByFor() const override { return randomAccessible(); };
-  Value upperBound(OpBuilder &b, Location l) const override { return size; }
+  Value upperBound(OpBuilder &b, Location l) const override {
+    return subSect.subSectSz;
+  }
   std::pair<Value, Value> getCurPosition() const override {
     return wrap->getCurPosition();
   };
@@ -709,7 +710,7 @@ class SubSectIterator : public SparseIterator {
         assert(p->lvl + 1 == lvl);
         wrap->genInit(b, l, p);
         // Linearize the dense subsection index.
-        nxLvlTupleStart = MULI(size, p->getNxLvlTupleId(b, l));
+        nxLvlTupleStart = MULI(subSect.subSectSz, p->getNxLvlTupleId(b, l));
       } else {
         assert(subSect.lvl == lvl && subSect.isSubSectRoot());
         wrap->deserialize(subSect.delegate->serialize());
@@ -763,7 +764,6 @@ class SubSectIterator : public SparseIterator {
   std::unique_ptr<SparseIterator> wrap;
   const SparseIterator &parent;
 
-  Value size;
   SubSectIterHelper helper;
 };
 
@@ -1354,17 +1354,18 @@ std::unique_ptr<SparseIterator> sparse_tensor::makeNonEmptySubSectIterator(
 std::unique_ptr<SparseIterator> sparse_tensor::makeTraverseSubSectIterator(
     OpBuilder &b, Location l, const SparseIterator &subSectIter,
     const SparseIterator &parent, std::unique_ptr<SparseIterator> &&wrap,
-    Value size, unsigned stride) {
+    Value loopBound, unsigned stride) {
 
   // This must be a subsection iterator or a filtered subsection iterator.
   auto &subSect =
       llvm::cast<NonEmptySubSectIterator>(*tryUnwrapFilter(&subSectIter));
 
   auto it = std::make_unique<SubSectIterator>(
-      subSect, *tryUnwrapFilter(&parent), std::move(wrap), size);
+      subSect, *tryUnwrapFilter(&parent), std::move(wrap));
+
   if (stride != 1) {
     return std::make_unique<FilterIterator>(std::move(it), /*offset=*/C_IDX(0),
-                                            C_IDX(stride), /*size=*/size);
+                                            C_IDX(stride), /*size=*/loopBound);
   }
   return it;
 }
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.h
@@ -301,8 +301,8 @@ std::unique_ptr<SparseIterator> makeNonEmptySubSectIterator(
 /// non-empty subsection created by NonEmptySubSectIterator.
 std::unique_ptr<SparseIterator> makeTraverseSubSectIterator(
     OpBuilder &b, Location l, const SparseIterator &subsectIter,
-    const SparseIterator &parent, std::unique_ptr<SparseIterator> &&delegate,
-    Value size, unsigned stride);
+    const SparseIterator &parent, std::unique_ptr<SparseIterator> &&wrap,
+    Value loopBound, unsigned stride);
 
 } // namespace sparse_tensor
 } // namespace mlir
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dilated_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dilated_conv_2d_nhwc_hwcf.mlir
@@ -90,8 +90,8 @@ func.func @entry() {
   %zero = arith.constant 0.00000e+00 : f32
 
   %filter2D_nhwc = call @alloc_4d_filled_f32(%c3, %c3, %c3, %c1, %val) :(index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
-  %in2D_tmp = call @alloc_4d_filled_f32(%c3, %c7, %c7, %c3, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
-  %in2D_nhwc = tensor.insert %f10 into %in2D_tmp[%c0, %c1, %c1, %c0] : tensor<?x?x?x?xf32>
+  %in2D_tmp = call @alloc_4d_filled_f32(%c3, %c7, %c7, %c3, %f10) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
+  %in2D_nhwc = tensor.insert %zero into %in2D_tmp[%c0, %c1, %c1, %c0] : tensor<?x?x?x?xf32>
   %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c3, %c3, %c1, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
 
   %in2D_nhwc_CCCC = sparse_tensor.convert %in2D_nhwc
@@ -108,35 +108,35 @@ func.func @entry() {
   %dual_CDCC_ret = call @conv_2d_nhwc_hwcf_dual_CDCC(%in2D_nhwc_CDCC, %filter2D_nhwc_CDCC, %out2D_nhwc)
     : (tensor<?x?x?x?xf32, #CDCC>, tensor<?x?x?x?xf32, #CDCC>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
 
-  // CHECK:      ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ) )
+  // CHECK-NEXT: ( ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 520 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ) )
   %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0], %zero
       : tensor<?x?x?x?xf32>, vector<3x3x3x1xf32>
   vector.print %dense_v : vector<3x3x3x1xf32>
 
-  // CHECK-NEXT: ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ) )
+  // CHECK-NEXT: ( ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 520 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ) )
   %v_dual = vector.transfer_read %dual_CDCC_ret[%c0, %c0, %c0, %c0], %zero
       : tensor<?x?x?x?xf32>, vector<3x3x3x1xf32>
   vector.print %v_dual : vector<3x3x3x1xf32>
 
-  // CHECK-NEXT: ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ) )
+  // CHECK-NEXT: ( ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 520 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ) )
   %v1 = vector.transfer_read %CCCC_ret[%c0, %c0, %c0, %c0], %zero
       : tensor<?x?x?x?xf32>, vector<3x3x3x1xf32>
   vector.print %v1 : vector<3x3x3x1xf32>
 
-  // CHECK-NEXT: ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ),
-  // CHECK-SAME:   ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ) )
+  // CHECK-NEXT: ( ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 520 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ),
+  // CHECK-SAME:   ( ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ), ( ( 540 ), ( 540 ), ( 540 ) ) ) )
   %v2 = vector.transfer_read %CDCC_ret[%c0, %c0, %c0, %c0], %zero
       : tensor<?x?x?x?xf32>, vector<3x3x3x1xf32>
   vector.print %v1 : vector<3x3x3x1xf32>
 
-  // Free the resources
+  // Free the resources.
   bufferization.dealloc_tensor %in2D_nhwc : tensor<?x?x?x?xf32>
   bufferization.dealloc_tensor %filter2D_nhwc : tensor<?x?x?x?xf32>
   bufferization.dealloc_tensor %out2D_nhwc : tensor<?x?x?x?xf32>