-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][sparse] support 2:4 structured sparsity and loose compressed #69968
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This adds library support for these two new level formats.
@llvm/pr-subscribers-mlir-execution-engine @llvm/pr-subscribers-mlir Author: Aart Bik (aartbik) ChangesThis adds library support for these two new level formats. Full diff: https://github.com/llvm/llvm-project/pull/69968.diff 5 Files Affected:
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index c65a27567d59d9a..1e9aa2bdf45dbdb 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -277,7 +277,7 @@ constexpr bool isCompressedDLT(DimLevelType dlt) {
static_cast<uint8_t>(DimLevelType::Compressed);
}
-/// Check if the `DimLevelType` is compressed (regardless of properties).
+/// Check if the `DimLevelType` is loose compressed (regardless of properties).
constexpr bool isLooseCompressedDLT(DimLevelType dlt) {
return (static_cast<uint8_t>(dlt) & ~3) ==
static_cast<uint8_t>(DimLevelType::LooseCompressed);
@@ -289,6 +289,12 @@ constexpr bool isSingletonDLT(DimLevelType dlt) {
static_cast<uint8_t>(DimLevelType::Singleton);
}
+/// Check if the `DimLevelType` is 2OutOf4 (regardless of properties).
+constexpr bool is2OutOf4DLT(DimLevelType dlt) {
+ return (static_cast<uint8_t>(dlt) & ~3) ==
+ static_cast<uint8_t>(DimLevelType::TwoOutOfFour);
+}
+
/// Check if the `DimLevelType` is ordered (regardless of storage format).
constexpr bool isOrderedDLT(DimLevelType dlt) {
return !(static_cast<uint8_t>(dlt) & 2);
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index ad92ee1f89fc153..460549726356370 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -115,11 +115,19 @@ class SparseTensorStorageBase {
return isCompressedDLT(getLvlType(l));
}
+ /// Safely checks if the level uses loose compressed storage.
+ bool isLooseCompressedLvl(uint64_t l) const {
+ return isLooseCompressedDLT(getLvlType(l));
+ }
+
/// Safely checks if the level uses singleton storage.
bool isSingletonLvl(uint64_t l) const {
return isSingletonDLT(getLvlType(l));
}
+ /// Safely checks if the level uses 2 out of 4 storage.
+ bool is2OutOf4Lvl(uint64_t l) const { return is2OutOf4DLT(getLvlType(l)); }
+
/// Safely checks if the level is ordered.
bool isOrderedLvl(uint64_t l) const { return isOrderedDLT(getLvlType(l)); }
@@ -138,9 +146,6 @@ class SparseTensorStorageBase {
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES)
#undef DECL_GETCOORDINATES
- /// Gets the coordinate-value stored at the given level and position.
- virtual uint64_t getCrd(uint64_t lvl, uint64_t pos) const = 0;
-
/// Gets primary storage.
#define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **);
MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
@@ -280,13 +285,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
*out = &values;
}
- /// Returns coordinate at given position.
- uint64_t getCrd(uint64_t lvl, uint64_t pos) const final {
- assert(isCompressedDLT(getLvlType(lvl)) || isSingletonDLT(getLvlType(lvl)));
- assert(pos < coordinates[lvl].size());
- return coordinates[lvl][pos]; // Converts the stored `C` into `uint64_t`.
- }
-
/// Partially specialize forwarding insertions based on template types.
void forwardingInsert(const uint64_t *dimCoords, V val) final {
assert(dimCoords && coo);
@@ -302,7 +300,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
if (allDense) {
uint64_t lvlRank = getLvlRank();
uint64_t valIdx = 0;
- // Linearize the address
+ // Linearize the address.
for (uint64_t lvl = 0; lvl < lvlRank; lvl++)
valIdx = valIdx * getLvlSize(lvl) + lvlCoords[lvl];
values[valIdx] = val;
@@ -441,16 +439,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
}
private:
- /// Appends an arbitrary new position to `positions[lvl]`. This method
- /// checks that `pos` is representable in the `P` type; however, it
- /// does not check that `pos` is semantically valid (i.e., larger than
- /// the previous position and smaller than `coordinates[lvl].capacity()`).
- void appendPos(uint64_t lvl, uint64_t pos, uint64_t count = 1) {
- assert(isCompressedLvl(lvl));
- positions[lvl].insert(positions[lvl].end(), count,
- detail::checkOverflowCast<P>(pos));
- }
-
/// Appends coordinate `crd` to level `lvl`, in the semantically
/// general sense. For non-dense levels, that means appending to the
/// `coordinates[lvl]` array, checking that `crd` is representable in
@@ -461,11 +449,11 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
/// `full` is the number of "entries" already written to `values` for this
/// segment (aka one after the highest coordinate previously appended).
void appendCrd(uint64_t lvl, uint64_t full, uint64_t crd) {
- const auto dlt = getLvlType(lvl); // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) {
+ if (!isDenseLvl(lvl)) {
+ assert(isCompressedLvl(lvl) || isLooseCompressedLvl(lvl) ||
+ isSingletonLvl(lvl) || is2OutOf4Lvl(lvl));
coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd));
} else { // Dense level.
- assert(isDenseDLT(dlt));
assert(crd >= full && "Coordinate was already filled");
if (crd == full)
return; // Short-circuit, since it'll be a nop.
@@ -482,15 +470,13 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
/// storage, as opposed to "level-sizes" which are the cardinality
/// of possible coordinates for that level.
uint64_t assembledSize(uint64_t parentSz, uint64_t l) const {
- const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt))
+ if (isCompressedLvl(l))
return positions[l][parentSz];
- if (isSingletonDLT(dlt))
+ if (isSingletonLvl(l))
return parentSz; // New size is same as the parent.
- if (isDenseDLT(dlt))
- return parentSz * getLvlSize(l);
- MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
- static_cast<uint8_t>(dlt));
+ // TODO: support levels assignment for loose/2:4?
+ assert(isDenseLvl(l));
+ return parentSz * getLvlSize(l);
}
/// Initializes sparse tensor storage scheme from a memory-resident sparse
@@ -514,7 +500,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
uint64_t seg = lo + 1;
if (isUniqueLvl(l))
while (seg < hi && lvlElements[seg].coords[l] == c)
- ++seg;
+ seg++;
// Handle segment in interval for sparse or dense level.
appendCrd(l, full, c);
full = c + 1;
@@ -529,14 +515,22 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
/// Finalizes the sparse position structure at this level.
void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) {
if (count == 0)
- return; // Short-circuit, since it'll be a nop.
- const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt)) {
- appendPos(l, coordinates[l].size(), count);
- } else if (isSingletonDLT(dlt)) {
+ return; // Short-circuit, since it'll be a nop.
+ if (isCompressedLvl(l)) {
+ uint64_t pos = coordinates[l].size();
+ positions[l].insert(positions[l].end(), count,
+ detail::checkOverflowCast<P>(pos));
+ } else if (isLooseCompressedLvl(l)) {
+ // Finish this level, and push pairs for the empty ones, and one
+ // more for next level. Note that this always leaves one extra
+ // unused element at the end.
+ uint64_t pos = coordinates[l].size();
+ positions[l].insert(positions[l].end(), 2 * count,
+ detail::checkOverflowCast<P>(pos));
+ } else if (isSingletonLvl(l) || is2OutOf4Lvl(l)) {
return; // Nothing to finalize.
} else { // Dense dimension.
- assert(isDenseDLT(dlt));
+ assert(isDenseLvl(l));
const uint64_t sz = getLvlSizes()[l];
assert(sz >= full && "Segment is overfull");
count = detail::checkedMul(count, sz - full);
@@ -589,7 +583,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
(crd < cur && !isOrderedLvl(l))) {
return l;
}
-
if (crd < cur) {
assert(false && "non-lexicographic insertion");
return -1u;
@@ -609,27 +602,37 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
return;
}
if (isCompressedLvl(l)) {
- // Look up the bounds of the `l`-level segment determined by the
- // `(l - 1)`-level position `parentPos`.
const std::vector<P> &positionsL = positions[l];
assert(parentPos + 1 < positionsL.size());
const uint64_t pstart = static_cast<uint64_t>(positionsL[parentPos]);
const uint64_t pstop = static_cast<uint64_t>(positionsL[parentPos + 1]);
- // Loop-invariant code for looking up the `l`-level coordinates.
const std::vector<C> &coordinatesL = coordinates[l];
assert(pstop <= coordinatesL.size());
- for (uint64_t pos = pstart; pos < pstop; ++pos) {
+ for (uint64_t pos = pstart; pos < pstop; pos++) {
lvlCursor[l] = static_cast<uint64_t>(coordinatesL[pos]);
toCOO(pos, l + 1, dimCoords);
}
- } else if (isSingletonLvl(l)) {
- lvlCursor[l] = getCrd(l, parentPos);
+ } else if (isLooseCompressedLvl(l)) {
+ const std::vector<P> &positionsL = positions[l];
+ assert(2 * parentPos + 1 < positionsL.size());
+ const uint64_t pstart = static_cast<uint64_t>(positionsL[2 * parentPos]);
+ const uint64_t pstop =
+ static_cast<uint64_t>(positionsL[2 * parentPos + 1]);
+ const std::vector<C> &coordinatesL = coordinates[l];
+ assert(pstop <= coordinatesL.size());
+ for (uint64_t pos = pstart; pos < pstop; pos++) {
+ lvlCursor[l] = static_cast<uint64_t>(coordinatesL[pos]);
+ toCOO(pos, l + 1, dimCoords);
+ }
+ } else if (isSingletonLvl(l) || is2OutOf4Lvl(l)) {
+ assert(parentPos < coordinates[l].size());
+ lvlCursor[l] = static_cast<uint64_t>(coordinates[l][parentPos]);
toCOO(parentPos, l + 1, dimCoords);
} else { // Dense level.
assert(isDenseLvl(l));
const uint64_t sz = getLvlSizes()[l];
const uint64_t pstart = parentPos * sz;
- for (uint64_t c = 0; c < sz; ++c) {
+ for (uint64_t c = 0; c < sz; c++) {
lvlCursor[l] = c;
toCOO(pstart + c, l + 1, dimCoords);
}
@@ -706,19 +709,30 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
bool allDense = true;
uint64_t sz = 1;
for (uint64_t l = 0; l < lvlRank; l++) {
- const DimLevelType dlt = lvlTypes[l]; // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt)) {
+ if (isCompressedLvl(l)) {
positions[l].reserve(sz + 1);
positions[l].push_back(0);
coordinates[l].reserve(sz);
sz = 1;
allDense = false;
- } else if (isSingletonDLT(dlt)) {
+ } else if (isLooseCompressedLvl(l)) {
+ positions[l].reserve(2 * sz + 1); // last one unused
+ positions[l].push_back(0);
coordinates[l].reserve(sz);
sz = 1;
allDense = false;
+ } else if (isSingletonLvl(l)) {
+ coordinates[l].reserve(sz);
+ sz = 1;
+ allDense = false;
+ } else if (is2OutOf4Lvl(l)) {
+ assert(allDense && l == lvlRank - 1 && "unexpected 2:4 usage");
+ sz = detail::checkedMul(sz, lvlSizes[l]) / 2;
+ coordinates[l].reserve(sz);
+ values.reserve(sz);
+ allDense = false;
} else { // Dense level.
- assert(isDenseDLT(dlt));
+ assert(isDenseLvl(l));
sz = detail::checkedMul(sz, lvlSizes[l]);
}
}
@@ -773,6 +787,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
positions[l].assign(posPtr, posPtr + parentSz + 1);
coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
} else {
+ // TODO: support levels assignment for loose/2:4?
assert(isDenseLvl(l));
}
parentSz = assembledSize(parentSz, l);
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
index 40805a179d4b385..ea7e3125b7f47d9 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -36,11 +36,8 @@ SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT
assert(lvlRank > 0 && "Trivial shape is unsupported");
for (uint64_t l = 0; l < lvlRank; ++l) {
assert(lvlSizes[l] > 0 && "Level size zero has trivial storage");
- const auto dlt = lvlTypes[l];
- if (!(isDenseDLT(dlt) || isCompressedDLT(dlt) || isSingletonDLT(dlt))) {
- MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
- static_cast<uint8_t>(dlt));
- }
+ assert(isDenseLvl(l) || isCompressedLvl(l) || isLooseCompressedLvl(l) ||
+ isSingletonLvl(l) || is2OutOf4Lvl(l));
}
}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_ds.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_ds.mlir
new file mode 100644
index 000000000000000..773c34e1f3dabca
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_ds.mlir
@@ -0,0 +1,120 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+// do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true
+// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/ds.mtx"
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// TODO: enable!
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false
+// R_UN: %{compile} | env %{env} %{run} | FileCheck %s
+
+!Filename = !llvm.ptr<i8>
+
+#CSR = #sparse_tensor.encoding<{
+ map = (i, j) -> ( i : dense, j : compressed)
+}>
+
+#CSR_hi = #sparse_tensor.encoding<{
+ map = (i, j) -> ( i : dense, j : loose_compressed)
+}>
+
+#NV_24 = #sparse_tensor.encoding<{
+ map = ( i, j ) -> ( i : dense,
+ j floordiv 4 : dense,
+ j mod 4 : block2_4),
+ crdWidth = 8
+}>
+
+module {
+
+ func.func private @getTensorFilename(index) -> (!Filename)
+
+ //
+ // Input matrix:
+ //
+ // [[0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 4.0],
+ // [0.0, 5.0, 6.0, 0.0, 7.0, 0.0, 0.0, 8.0],
+ // [9.0, 0.0, 10.0, 0.0, 11.0, 12.0, 0.0, 0.0]]
+ //
+ func.func @entry() {
+ %u0 = arith.constant 0 : i8
+ %c0 = arith.constant 0 : index
+ %f0 = arith.constant 0.0 : f64
+
+ %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+ %A1 = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #CSR>
+ %A2 = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #CSR_hi>
+ %A3 = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #NV_24>
+
+ //
+ // CSR:
+ //
+ // CHECK: ( 0, 4, 8, 12 )
+ // CHECK-NEXT: ( 2, 3, 5, 7, 1, 2, 4, 7, 0, 2, 4, 5 )
+ // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 )
+ //
+ %pos1 = sparse_tensor.positions %A1 {level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
+ %vecp1 = vector.transfer_read %pos1[%c0], %c0 : memref<?xindex>, vector<4xindex>
+ vector.print %vecp1 : vector<4xindex>
+ %crd1 = sparse_tensor.coordinates %A1 {level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
+ %vecc1 = vector.transfer_read %crd1[%c0], %c0 : memref<?xindex>, vector<12xindex>
+ vector.print %vecc1 : vector<12xindex>
+ %val1 = sparse_tensor.values %A1 : tensor<?x?xf64, #CSR> to memref<?xf64>
+ %vecv1 = vector.transfer_read %val1[%c0], %f0 : memref<?xf64>, vector<12xf64>
+ vector.print %vecv1 : vector<12xf64>
+
+ //
+ // CSR_hi:
+ //
+ // CHECK-NEXT: ( 0, 4, 4, 8, 8, 12 )
+ // CHECK-NEXT: ( 2, 3, 5, 7, 1, 2, 4, 7, 0, 2, 4, 5 )
+ // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 )
+ //
+ %pos2 = sparse_tensor.positions %A2 {level = 1 : index } : tensor<?x?xf64, #CSR_hi> to memref<?xindex>
+ %vecp2 = vector.transfer_read %pos2[%c0], %c0 : memref<?xindex>, vector<6xindex>
+ vector.print %vecp2 : vector<6xindex>
+ %crd2 = sparse_tensor.coordinates %A2 {level = 1 : index } : tensor<?x?xf64, #CSR_hi> to memref<?xindex>
+ %vecc2 = vector.transfer_read %crd2[%c0], %c0 : memref<?xindex>, vector<12xindex>
+ vector.print %vecc2 : vector<12xindex>
+ %val2 = sparse_tensor.values %A2 : tensor<?x?xf64, #CSR_hi> to memref<?xf64>
+ %vecv2 = vector.transfer_read %val2[%c0], %f0 : memref<?xf64>, vector<12xf64>
+ vector.print %vecv2 : vector<12xf64>
+
+ //
+ // NV_24
+ //
+ // CHECK-NEXT: ( 2, 3, 1, 3, 1, 2, 0, 3, 0, 2, 0, 1 )
+ // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 )
+ //
+ %crd3 = sparse_tensor.coordinates %A3 {level = 2 : index } : tensor<?x?xf64, #NV_24> to memref<?xi8>
+ %vecc3 = vector.transfer_read %crd3[%c0], %u0 : memref<?xi8>, vector<12xi8>
+ vector.print %vecc3 : vector<12xi8>
+ %val3 = sparse_tensor.values %A3 : tensor<?x?xf64, #NV_24> to memref<?xf64>
+ %vecv3 = vector.transfer_read %val3[%c0], %f0 : memref<?xf64>, vector<12xf64>
+ vector.print %vecv3 : vector<12xf64>
+
+ // Release the resources.
+ bufferization.dealloc_tensor %A1: tensor<?x?xf64, #CSR>
+ bufferization.dealloc_tensor %A2: tensor<?x?xf64, #CSR_hi>
+ bufferization.dealloc_tensor %A3: tensor<?x?xf64, #NV_24>
+
+ return
+ }
+}
diff --git a/mlir/test/Integration/data/ds.mtx b/mlir/test/Integration/data/ds.mtx
new file mode 100755
index 000000000000000..8acc2ce081b6b35
--- /dev/null
+++ b/mlir/test/Integration/data/ds.mtx
@@ -0,0 +1,14 @@
+%%MatrixMarket matrix coordinate real general
+3 8 12
+1 3 1.0
+1 4 2.0
+1 6 3.0
+1 8 4.0
+2 2 5.0
+2 3 6.0
+2 5 7.0
+2 8 8.0
+3 1 9.0
+3 3 10.0
+3 5 11.0
+3 6 12.0
|
positions[l].insert(positions[l].end(), count, | ||
detail::checkOverflowCast<P>(pos)); | ||
} else if (isLooseCompressedLvl(l)) { | ||
// Finish this level, and push pairs for the empty ones, and one |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: finishes and pushes
This adds library support for these two new level formats.