Skip to content

[mlir][sparse][gpu] cleanup GPUDataTransferStrategy #71615

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 2 additions & 18 deletions mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,6 @@ struct SparseCompilerOptions
mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
"any-storage-any-loop",
"Enable sparse parallelization for any storage and loop."))};
PassOptions::Option<mlir::GPUDataTransferStrategy> gpuDataTransfer{
*this, "gpu-data-transfer-strategy",
::llvm::cl::desc(
"Set the data transfer strategy between the host and the GPUs"),
::llvm::cl::init(mlir::GPUDataTransferStrategy::kRegularDMA),
llvm::cl::values(
clEnumValN(mlir::GPUDataTransferStrategy::kRegularDMA, "regular-dma",
"Default option: malloc on host without additional "
"options or care and then use DMA to copy the data"),
clEnumValN(mlir::GPUDataTransferStrategy::kPinnedDMA, "pinned-dma",
"Based on the default option, pin the host memory to "
"accelerate the data transfer"),
clEnumValN(mlir::GPUDataTransferStrategy::kZeroCopy, "zero-copy",
"Use zero-copy to perform the data transfer from the host "
"to the GPU"))};

PassOptions::Option<bool> enableIndexReduction{
*this, "enable-index-reduction",
Expand Down Expand Up @@ -166,9 +151,8 @@ struct SparseCompilerOptions

/// Projects out the options for `createSparsificationPass`.
SparsificationOptions sparsificationOptions() const {
return SparsificationOptions(parallelization, gpuDataTransfer,
enableIndexReduction, enableGPULibgen,
enableRuntimeLibrary);
return SparsificationOptions(parallelization, enableIndexReduction,
enableGPULibgen, enableRuntimeLibrary);
}

/// Projects out the options for `createConvertVectorToLLVMPass`.
Expand Down
20 changes: 6 additions & 14 deletions mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ enum class ReinterpretMapScope {
kExceptGeneric, // reinterprets operation other than linalg.generic
};

/// Defines data movement strategy between host and device for GPU.
// TODO : Zero copy is disabled due to correctness bugs (tracker #64316)
enum class GPUDataTransferStrategy { kRegularDMA, kZeroCopy, kPinnedDMA };

#define GEN_PASS_DECL
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h.inc"

Expand Down Expand Up @@ -78,18 +74,14 @@ std::unique_ptr<Pass> createPreSparsificationRewritePass();

/// Options for the Sparsification pass.
struct SparsificationOptions {
SparsificationOptions(SparseParallelizationStrategy p,
GPUDataTransferStrategy t, bool idxReduc,
SparsificationOptions(SparseParallelizationStrategy p, bool idxReduc,
bool gpuLibgen, bool enableRT)
: parallelizationStrategy(p), gpuDataTransferStrategy(t),
enableIndexReduction(idxReduc), enableGPULibgen(gpuLibgen),
enableRuntimeLibrary(enableRT) {}
: parallelizationStrategy(p), enableIndexReduction(idxReduc),
enableGPULibgen(gpuLibgen), enableRuntimeLibrary(enableRT) {}
SparsificationOptions()
: SparsificationOptions(SparseParallelizationStrategy::kNone,
GPUDataTransferStrategy::kRegularDMA, false,
: SparsificationOptions(SparseParallelizationStrategy::kNone, false,
false, true) {}
SparseParallelizationStrategy parallelizationStrategy;
GPUDataTransferStrategy gpuDataTransferStrategy;
bool enableIndexReduction;
bool enableGPULibgen;
bool enableRuntimeLibrary;
Expand Down Expand Up @@ -201,8 +193,8 @@ std::unique_ptr<Pass> createSparseVectorizationPass(unsigned vectorLength,
void populateSparseGPUCodegenPatterns(RewritePatternSet &patterns,
unsigned numThreads);

void populateSparseGPULibgenPatterns(RewritePatternSet &patterns, bool enableRT,
GPUDataTransferStrategy gpuDataTransfer);
void populateSparseGPULibgenPatterns(RewritePatternSet &patterns,
bool enableRT);

std::unique_ptr<Pass> createSparseGPUCodegenPass();
std::unique_ptr<Pass> createSparseGPUCodegenPass(unsigned numThreads);
Expand Down
13 changes: 0 additions & 13 deletions mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -134,19 +134,6 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
"any-storage-any-loop",
"Enable sparse parallelization for any storage and loop."))}]>,
Option<"gpuDataTransfer", "gpu-data-transfer-strategy", "mlir::GPUDataTransferStrategy",
"mlir::GPUDataTransferStrategy::kRegularDMA",
"Set the data transfer strategy", [{llvm::cl::values(
clEnumValN(mlir::GPUDataTransferStrategy::kRegularDMA,
"regular-dma",
"Default option: malloc on host without additional "
"options or care and then use DMA to copy the data"),
clEnumValN(mlir::GPUDataTransferStrategy::kPinnedDMA, "pinned-dma",
"Based on the default option, pin the host memory to "
"accelerate the data transfer"),
clEnumValN(mlir::GPUDataTransferStrategy::kZeroCopy, "zero-copy",
"Use zero-copy to perform the data transfer from the host "
"to the GPU"))}]>,
Option<"enableGPULibgen", "enable-gpu-libgen", "bool",
"false",
"Enable GPU acceleration by means of direct library calls (like cuSPARSE)">,
Expand Down
Loading