Skip to content

Commit 6b26857

Browse files
committed
[mlir][sparse] add asCOO() functionality to sparse tensor object
This prepares general sparse to sparse conversions. The code that needs to be generated using this new feature is now simply: (1) coo = sparse_tensor_1->asCOO(); // source format1 (2) sparse_tensor_2 = newSparseTensor(coo); // destination format2 By using COO as an intermediate, we can do *all* conversions without having to implement the full O(N^2) conversion matrix. Note that we can always improve particular conversions individually if a faster solution is required. Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D108681
1 parent a45d72e commit 6b26857

File tree

1 file changed

+83
-24
lines changed

1 file changed

+83
-24
lines changed

mlir/lib/ExecutionEngine/SparseUtils.cpp

Lines changed: 83 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,17 @@ struct SparseTensor {
9494
/// Getter for elements array.
9595
const std::vector<Element<V>> &getElements() const { return elements; }
9696

97-
/// Factory method.
97+
/// Factory method. Permutes the original dimensions according to
98+
/// the given ordering and expects subsequent add() calls to honor
99+
/// that same ordering for the given indices. The result is a
100+
/// fully permuted coordinate scheme.
98101
static SparseTensor<V> *newSparseTensor(uint64_t size, uint64_t *sizes,
99102
uint64_t *perm,
100103
uint64_t capacity = 0) {
101-
std::vector<uint64_t> indices(size);
104+
std::vector<uint64_t> permsz(size);
102105
for (uint64_t r = 0; r < size; r++)
103-
indices[perm[r]] = sizes[r];
104-
return new SparseTensor<V>(indices, capacity);
106+
permsz[perm[r]] = sizes[r];
107+
return new SparseTensor<V>(permsz, capacity);
105108
}
106109

107110
private:
@@ -168,8 +171,13 @@ class SparseTensorStorage : public SparseTensorStorageBase {
168171
/// Constructs a sparse tensor storage scheme from the given sparse
169172
/// tensor in coordinate scheme following the given per-rank dimension
170173
/// dense/sparse annotations.
171-
SparseTensorStorage(SparseTensor<V> *tensor, uint8_t *sparsity)
172-
: sizes(tensor->getSizes()), pointers(getRank()), indices(getRank()) {
174+
SparseTensorStorage(SparseTensor<V> *tensor, uint8_t *sparsity,
175+
uint64_t *perm)
176+
: sizes(tensor->getSizes()), rev(getRank()), pointers(getRank()),
177+
indices(getRank()) {
178+
// Store "reverse" permutation.
179+
for (uint64_t d = 0, rank = getRank(); d < rank; d++)
180+
rev[perm[d]] = d;
173181
// Provide hints on capacity.
174182
// TODO: needs fine-tuning based on sparsity
175183
uint64_t nnz = tensor->getElements().size();
@@ -184,8 +192,12 @@ class SparseTensorStorage : public SparseTensorStorageBase {
184192
assert(sparsity[d] == kDense && "singleton not yet supported");
185193
}
186194
}
195+
// Prepare sparse pointer structures for all dimensions.
196+
for (uint64_t d = 0, rank = getRank(); d < rank; d++)
197+
if (sparsity[d] == kCompressed)
198+
pointers[d].push_back(0);
187199
// Then setup the tensor.
188-
traverse(tensor, sparsity, 0, nnz, 0);
200+
fromCOO(tensor, sparsity, 0, nnz, 0);
189201
}
190202

191203
virtual ~SparseTensorStorage() {}
@@ -203,11 +215,35 @@ class SparseTensorStorage : public SparseTensorStorageBase {
203215
}
204216
void getValues(std::vector<V> **out) override { *out = &values; }
205217

206-
/// Factory method.
207-
static SparseTensorStorage<P, I, V> *newSparseTensor(SparseTensor<V> *t,
208-
uint8_t *s) {
218+
/// Returns this sparse tensor storage scheme as a new memory-resident
219+
/// sparse tensor in coordinate scheme with the given dimension order.
220+
SparseTensor<V> *asCOO(uint64_t *perm) {
221+
// Restore original order of the dimension sizes and allocate coordinate
222+
// scheme with desired new ordering specified in perm.
223+
uint64_t size = getRank();
224+
std::vector<uint64_t> orgsz(size);
225+
for (uint64_t r = 0; r < size; r++)
226+
orgsz[rev[r]] = sizes[r];
227+
SparseTensor<V> *tensor = SparseTensor<V>::newSparseTensor(
228+
size, orgsz.data(), perm, values.size());
229+
// Populate coordinate scheme restored from old ordering and changed with
230+
// new ordering. Rather than applying both reorderings during the recursion,
231+
// we compute the combine permutation in advance.
232+
std::vector<uint64_t> reord(size);
233+
for (uint64_t r = 0; r < size; r++)
234+
reord[r] = perm[rev[r]];
235+
std::vector<uint64_t> idx(size);
236+
toCOO(tensor, reord, idx, 0, 0);
237+
return tensor;
238+
}
239+
240+
/// Factory method. Expects a coordinate scheme that respects the same
241+
/// permutation as is desired for the new sparse storage scheme.
242+
static SparseTensorStorage<P, I, V> *
243+
newSparseTensor(SparseTensor<V> *t, uint8_t *sparsity, uint64_t *perm) {
209244
t->sort(); // sort lexicographically
210-
SparseTensorStorage<P, I, V> *n = new SparseTensorStorage<P, I, V>(t, s);
245+
SparseTensorStorage<P, I, V> *n =
246+
new SparseTensorStorage<P, I, V>(t, sparsity, perm);
211247
delete t;
212248
return n;
213249
}
@@ -216,17 +252,14 @@ class SparseTensorStorage : public SparseTensorStorageBase {
216252
/// Initializes sparse tensor storage scheme from a memory-resident sparse
217253
/// tensor in coordinate scheme. This method prepares the pointers and indices
218254
/// arrays under the given per-rank dimension dense/sparse annotations.
219-
void traverse(SparseTensor<V> *tensor, uint8_t *sparsity, uint64_t lo,
220-
uint64_t hi, uint64_t d) {
255+
void fromCOO(SparseTensor<V> *tensor, uint8_t *sparsity, uint64_t lo,
256+
uint64_t hi, uint64_t d) {
221257
const std::vector<Element<V>> &elements = tensor->getElements();
222258
// Once dimensions are exhausted, insert the numerical values.
223259
if (d == getRank()) {
224260
values.push_back(lo < hi ? elements[lo].value : 0);
225261
return;
226262
}
227-
// Prepare a sparse pointer structure at this dimension.
228-
if (sparsity[d] == kCompressed && pointers[d].empty())
229-
pointers[d].push_back(0);
230263
// Visit all elements in this interval.
231264
uint64_t full = 0;
232265
while (lo < hi) {
@@ -240,10 +273,10 @@ class SparseTensorStorage : public SparseTensorStorageBase {
240273
indices[d].push_back(idx);
241274
} else {
242275
for (; full < idx; full++)
243-
traverse(tensor, sparsity, 0, 0, d + 1); // pass empty
276+
fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty
244277
full++;
245278
}
246-
traverse(tensor, sparsity, lo, seg, d + 1);
279+
fromCOO(tensor, sparsity, lo, seg, d + 1);
247280
// And move on to next segment in interval.
248281
lo = seg;
249282
}
@@ -252,12 +285,34 @@ class SparseTensorStorage : public SparseTensorStorageBase {
252285
pointers[d].push_back(indices[d].size());
253286
} else {
254287
for (uint64_t sz = tensor->getSizes()[d]; full < sz; full++)
255-
traverse(tensor, sparsity, 0, 0, d + 1); // pass empty
288+
fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty
289+
}
290+
}
291+
292+
/// Stores the sparse tensor storage scheme into a memory-resident sparse
293+
/// tensor in coordinate scheme.
294+
void toCOO(SparseTensor<V> *tensor, std::vector<uint64_t> &reord,
295+
std::vector<uint64_t> &idx, uint64_t pos, uint64_t d) {
296+
if (d == getRank()) {
297+
tensor->add(idx, values[pos]);
298+
} else if (pointers[d].empty()) {
299+
// Dense dimension.
300+
for (uint64_t i = 0; i < sizes[d]; i++) {
301+
idx[reord[d]] = i;
302+
toCOO(tensor, reord, idx, pos * sizes[d] + i, d + 1);
303+
}
304+
} else {
305+
// Sparse dimension.
306+
for (uint64_t ii = pointers[d][pos]; ii < pointers[d][pos + 1]; ii++) {
307+
idx[reord[d]] = indices[d][ii];
308+
toCOO(tensor, reord, idx, ii, d + 1);
309+
}
256310
}
257311
}
258312

259313
private:
260314
std::vector<uint64_t> sizes; // per-rank dimension sizes
315+
std::vector<uint64_t> rev; // "reverse" permutation
261316
std::vector<std::vector<P>> pointers;
262317
std::vector<std::vector<I>> indices;
263318
std::vector<V> values;
@@ -437,9 +492,12 @@ char *getTensorFilename(uint64_t id) {
437492
tensor = openTensor<V>(static_cast<char *>(ptr), asize, sizes, perm); \
438493
else if (action == 1) \
439494
tensor = static_cast<SparseTensor<V> *>(ptr); \
440-
else \
495+
else if (action == 2) \
441496
return SparseTensor<V>::newSparseTensor(asize, sizes, perm); \
442-
return SparseTensorStorage<P, I, V>::newSparseTensor(tensor, sparsity); \
497+
else \
498+
return static_cast<SparseTensorStorage<P, I, V> *>(ptr)->asCOO(perm); \
499+
return SparseTensorStorage<P, I, V>::newSparseTensor(tensor, sparsity, \
500+
perm); \
443501
}
444502

445503
#define IMPL1(RET, NAME, TYPE, LIB) \
@@ -498,9 +556,10 @@ enum PrimaryTypeEnum : uint64_t {
498556
/// Constructs a new sparse tensor. This is the "swiss army knife"
499557
/// method for materializing sparse tensors into the computation.
500558
/// action
501-
/// 0 : ptr contains filename to read into storage
502-
/// 1 : ptr contains coordinate scheme to assign to storage
503-
/// 2 : returns coordinate scheme to fill (call back later with 1)
559+
/// 0 : ptr contains filename to read into storage
560+
/// 1 : ptr contains coordinate scheme to assign to new storage
561+
/// 2 : returns empty coordinate scheme to fill (call back 1 to setup)
562+
/// 3 : returns coordinate scheme from storage in ptr (call back 1 to convert)
504563
void *newSparseTensor(uint8_t *abase, uint8_t *adata, uint64_t aoff,
505564
uint64_t asize, uint64_t astride, uint64_t *sbase,
506565
uint64_t *sdata, uint64_t soff, uint64_t ssize,

0 commit comments

Comments
 (0)