Skip to content

Commit cfa82f7

Browse files
author
K-Wu
committed
[mlir][sparse][gpu] introduce flag that controls host to device copy strategies (regular dma default)
Differential Revision: https://reviews.llvm.org/D155352
1 parent 9a80655 commit cfa82f7

File tree

10 files changed

+281
-104
lines changed

10 files changed

+281
-104
lines changed

mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@ struct SparseCompilerOptions
5252
mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
5353
"any-storage-any-loop",
5454
"Enable sparse parallelization for any storage and loop."))};
55+
PassOptions::Option<mlir::GPUDataTransferStrategy> gpuDataTransfer{
56+
*this, "gpu-data-transfer-strategy",
57+
::llvm::cl::desc(
58+
"Set the data transfer strategy between the host and the GPUs"),
59+
::llvm::cl::init(mlir::GPUDataTransferStrategy::kRegularDMA),
60+
llvm::cl::values(
61+
clEnumValN(mlir::GPUDataTransferStrategy::kRegularDMA, "regular-dma",
62+
"Default option: malloc on host without additional "
63+
"options or care and then use DMA to copy the data"),
64+
clEnumValN(mlir::GPUDataTransferStrategy::kPinnedDMA, "pinned-dma",
65+
"Based on the default option, pin the host memory to "
66+
"accelerate the data transfer"),
67+
clEnumValN(mlir::GPUDataTransferStrategy::kZeroCopy, "zero-copy",
68+
"Use zero-copy to perform the data transfer from the host "
69+
"to the GPU"))};
5570

5671
PassOptions::Option<bool> enableIndexReduction{
5772
*this, "enable-index-reduction",
@@ -138,8 +153,9 @@ struct SparseCompilerOptions
138153

139154
/// Projects out the options for `createSparsificationPass`.
140155
SparsificationOptions sparsificationOptions() const {
141-
return SparsificationOptions(parallelization, enableIndexReduction,
142-
enableGPULibgen, enableRuntimeLibrary);
156+
return SparsificationOptions(parallelization, gpuDataTransfer,
157+
enableIndexReduction, enableGPULibgen,
158+
enableRuntimeLibrary);
143159
}
144160

145161
/// Projects out the options for `createSparseTensorConversionPass`.

mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,26 @@ enum class SparseParallelizationStrategy {
4444
// TODO: support reduction parallelization too?
4545
};
4646

47+
// TODO : Zero copy is disabled due to correctness bugs.Tracker #64316
48+
enum class GPUDataTransferStrategy { kRegularDMA, kZeroCopy, kPinnedDMA };
49+
4750
#define GEN_PASS_DECL
4851
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h.inc"
4952

5053
/// Options for the Sparsification pass.
5154
struct SparsificationOptions {
52-
SparsificationOptions(SparseParallelizationStrategy p, bool idxReduc,
55+
SparsificationOptions(SparseParallelizationStrategy p,
56+
GPUDataTransferStrategy t, bool idxReduc,
5357
bool gpuLibgen, bool enableRT)
54-
: parallelizationStrategy(p), enableIndexReduction(idxReduc),
55-
enableGPULibgen(gpuLibgen), enableRuntimeLibrary(enableRT) {}
58+
: parallelizationStrategy(p), gpuDataTransferStrategy(t),
59+
enableIndexReduction(idxReduc), enableGPULibgen(gpuLibgen),
60+
enableRuntimeLibrary(enableRT) {}
5661
SparsificationOptions()
57-
: SparsificationOptions(SparseParallelizationStrategy::kNone, false,
62+
: SparsificationOptions(SparseParallelizationStrategy::kNone,
63+
GPUDataTransferStrategy::kRegularDMA, false,
5864
false, true) {}
5965
SparseParallelizationStrategy parallelizationStrategy;
66+
GPUDataTransferStrategy gpuDataTransferStrategy;
6067
bool enableIndexReduction;
6168
bool enableGPULibgen;
6269
bool enableRuntimeLibrary;
@@ -211,8 +218,8 @@ std::unique_ptr<Pass> createSparseVectorizationPass(unsigned vectorLength,
211218
void populateSparseGPUCodegenPatterns(RewritePatternSet &patterns,
212219
unsigned numThreads);
213220

214-
void populateSparseGPULibgenPatterns(RewritePatternSet &patterns,
215-
bool enableRT);
221+
void populateSparseGPULibgenPatterns(RewritePatternSet &patterns, bool enableRT,
222+
GPUDataTransferStrategy gpuDataTransfer);
216223

217224
std::unique_ptr<Pass> createSparseGPUCodegenPass();
218225
std::unique_ptr<Pass> createSparseGPUCodegenPass(unsigned numThreads);

mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,19 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
102102
clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
103103
"any-storage-any-loop",
104104
"Enable sparse parallelization for any storage and loop."))}]>,
105+
Option<"gpuDataTransfer", "gpu-data-transfer-strategy", "mlir::GPUDataTransferStrategy",
106+
"mlir::GPUDataTransferStrategy::kRegularDMA",
107+
"Set the data transfer strategy", [{llvm::cl::values(
108+
clEnumValN(mlir::GPUDataTransferStrategy::kRegularDMA,
109+
"regular-dma",
110+
"Default option: malloc on host without additional "
111+
"options or care and then use DMA to copy the data"),
112+
clEnumValN(mlir::GPUDataTransferStrategy::kPinnedDMA, "pinned-dma",
113+
"Based on the default option, pin the host memory to "
114+
"accelerate the data transfer"),
115+
clEnumValN(mlir::GPUDataTransferStrategy::kZeroCopy, "zero-copy",
116+
"Use zero-copy to perform the data transfer from the host "
117+
"to the GPU"))}]>,
105118
Option<"enableGPULibgen", "enable-gpu-libgen", "bool",
106119
"false",
107120
"Enable GPU acceleration by means of direct library calls (like cuSPARSE)">,
@@ -110,6 +123,7 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
110123
];
111124
}
112125

126+
113127
def PostSparsificationRewrite : Pass<"post-sparsification-rewrite", "ModuleOp"> {
114128
let summary = "Applies sparse tensor rewriting rules after sparsification";
115129
let description = [{

0 commit comments

Comments
 (0)