Skip to content

Commit 5ef4467

Browse files
authored
[mlir][sparse][gpu] cleanup GPUDataTransferStrategy (llvm#71615)
The flag seems to be doing practically the same thing for zero cost and pinned dma. In addition, the register host is not truly the right zero cost mechanism according to Thomas. So we are simplifying the setup for now, until we have a better definition for what to implement and test. llvm#64316
1 parent 048ece4 commit 5ef4467

File tree

14 files changed

+119
-302
lines changed

14 files changed

+119
-302
lines changed

mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -52,21 +52,6 @@ struct SparseCompilerOptions
5252
mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
5353
"any-storage-any-loop",
5454
"Enable sparse parallelization for any storage and loop."))};
55-
PassOptions::Option<mlir::GPUDataTransferStrategy> gpuDataTransfer{
56-
*this, "gpu-data-transfer-strategy",
57-
::llvm::cl::desc(
58-
"Set the data transfer strategy between the host and the GPUs"),
59-
::llvm::cl::init(mlir::GPUDataTransferStrategy::kRegularDMA),
60-
llvm::cl::values(
61-
clEnumValN(mlir::GPUDataTransferStrategy::kRegularDMA, "regular-dma",
62-
"Default option: malloc on host without additional "
63-
"options or care and then use DMA to copy the data"),
64-
clEnumValN(mlir::GPUDataTransferStrategy::kPinnedDMA, "pinned-dma",
65-
"Based on the default option, pin the host memory to "
66-
"accelerate the data transfer"),
67-
clEnumValN(mlir::GPUDataTransferStrategy::kZeroCopy, "zero-copy",
68-
"Use zero-copy to perform the data transfer from the host "
69-
"to the GPU"))};
7055

7156
PassOptions::Option<bool> enableIndexReduction{
7257
*this, "enable-index-reduction",
@@ -166,9 +151,8 @@ struct SparseCompilerOptions
166151

167152
/// Projects out the options for `createSparsificationPass`.
168153
SparsificationOptions sparsificationOptions() const {
169-
return SparsificationOptions(parallelization, gpuDataTransfer,
170-
enableIndexReduction, enableGPULibgen,
171-
enableRuntimeLibrary);
154+
return SparsificationOptions(parallelization, enableIndexReduction,
155+
enableGPULibgen, enableRuntimeLibrary);
172156
}
173157

174158
/// Projects out the options for `createConvertVectorToLLVMPass`.

mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@ enum class ReinterpretMapScope {
4747
kExceptGeneric, // reinterprets operation other than linalg.generic
4848
};
4949

50-
/// Defines data movement strategy between host and device for GPU.
51-
// TODO : Zero copy is disabled due to correctness bugs (tracker #64316)
52-
enum class GPUDataTransferStrategy { kRegularDMA, kZeroCopy, kPinnedDMA };
53-
5450
#define GEN_PASS_DECL
5551
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h.inc"
5652

@@ -78,18 +74,14 @@ std::unique_ptr<Pass> createPreSparsificationRewritePass();
7874

7975
/// Options for the Sparsification pass.
8076
struct SparsificationOptions {
81-
SparsificationOptions(SparseParallelizationStrategy p,
82-
GPUDataTransferStrategy t, bool idxReduc,
77+
SparsificationOptions(SparseParallelizationStrategy p, bool idxReduc,
8378
bool gpuLibgen, bool enableRT)
84-
: parallelizationStrategy(p), gpuDataTransferStrategy(t),
85-
enableIndexReduction(idxReduc), enableGPULibgen(gpuLibgen),
86-
enableRuntimeLibrary(enableRT) {}
79+
: parallelizationStrategy(p), enableIndexReduction(idxReduc),
80+
enableGPULibgen(gpuLibgen), enableRuntimeLibrary(enableRT) {}
8781
SparsificationOptions()
88-
: SparsificationOptions(SparseParallelizationStrategy::kNone,
89-
GPUDataTransferStrategy::kRegularDMA, false,
82+
: SparsificationOptions(SparseParallelizationStrategy::kNone, false,
9083
false, true) {}
9184
SparseParallelizationStrategy parallelizationStrategy;
92-
GPUDataTransferStrategy gpuDataTransferStrategy;
9385
bool enableIndexReduction;
9486
bool enableGPULibgen;
9587
bool enableRuntimeLibrary;
@@ -201,8 +193,8 @@ std::unique_ptr<Pass> createSparseVectorizationPass(unsigned vectorLength,
201193
void populateSparseGPUCodegenPatterns(RewritePatternSet &patterns,
202194
unsigned numThreads);
203195

204-
void populateSparseGPULibgenPatterns(RewritePatternSet &patterns, bool enableRT,
205-
GPUDataTransferStrategy gpuDataTransfer);
196+
void populateSparseGPULibgenPatterns(RewritePatternSet &patterns,
197+
bool enableRT);
206198

207199
std::unique_ptr<Pass> createSparseGPUCodegenPass();
208200
std::unique_ptr<Pass> createSparseGPUCodegenPass(unsigned numThreads);

mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -134,19 +134,6 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
134134
clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
135135
"any-storage-any-loop",
136136
"Enable sparse parallelization for any storage and loop."))}]>,
137-
Option<"gpuDataTransfer", "gpu-data-transfer-strategy", "mlir::GPUDataTransferStrategy",
138-
"mlir::GPUDataTransferStrategy::kRegularDMA",
139-
"Set the data transfer strategy", [{llvm::cl::values(
140-
clEnumValN(mlir::GPUDataTransferStrategy::kRegularDMA,
141-
"regular-dma",
142-
"Default option: malloc on host without additional "
143-
"options or care and then use DMA to copy the data"),
144-
clEnumValN(mlir::GPUDataTransferStrategy::kPinnedDMA, "pinned-dma",
145-
"Based on the default option, pin the host memory to "
146-
"accelerate the data transfer"),
147-
clEnumValN(mlir::GPUDataTransferStrategy::kZeroCopy, "zero-copy",
148-
"Use zero-copy to perform the data transfer from the host "
149-
"to the GPU"))}]>,
150137
Option<"enableGPULibgen", "enable-gpu-libgen", "bool",
151138
"false",
152139
"Enable GPU acceleration by means of direct library calls (like cuSPARSE)">,

0 commit comments

Comments
 (0)