Skip to content

[mlir][gpu][sparse] gracefully accept zero size allocation #66127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 12, 2023

Conversation

aartbik
Copy link
Contributor

@aartbik aartbik commented Sep 12, 2023

This cleans up a unnecessary code that changes zero size allocation to avoid the following error message

'cuMemAlloc(&ptr, sizeBytes)' failed with 'CUDA_ERROR_INVALID_VALUE'

This cleans up a unnecessary code that changes zero size
allocation to avoid the following error message

'cuMemAlloc(&ptr, sizeBytes)' failed with 'CUDA_ERROR_INVALID_VALUE'
@llvmbot
Copy link
Member

llvmbot commented Sep 12, 2023

@llvm/pr-subscribers-mlir-execution-engine

Changes

This cleans up a unnecessary code that changes zero size allocation to avoid the following error message

'cuMemAlloc(&ptr, sizeBytes)' failed with 'CUDA_ERROR_INVALID_VALUE'

Full diff: https://github.com/llvm/llvm-project/pull/66127.diff

1 Files Affected:

  • (modified) mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp (+10-17)
diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index 1dba677ebe66365..7bf6804902479a8 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -212,8 +212,9 @@ extern MLIR_CUDA_WRAPPERS_EXPORT "C" void mgpuEventRecord(CUevent event,
 
 extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream /*stream*/) {
   ScopedContext scopedContext;
-  CUdeviceptr ptr;
-  CUDA_REPORT_IF_ERROR(cuMemAlloc(&ptr, sizeBytes));
+  CUdeviceptr ptr = 0;
+  if (sizeBytes != 0)
+    CUDA_REPORT_IF_ERROR(cuMemAlloc(&ptr, sizeBytes));
   return reinterpret_cast(ptr);
 }
 
@@ -521,7 +522,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMVBufferSize(
   CUSPARSE_REPORT_IF_ERROR(cusparseSpMV_bufferSize(
       cusparse_env, modeA, alphap, matA, vecX, betap, vecY, cTp,
       CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize))
-  return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
+  return bufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMV(int32_t ma, void *a, void *x,
@@ -555,7 +556,7 @@ mgpuSpMMBufferSize(int32_t ma, int32_t mb, void *a, void *b, void *c,
   CUSPARSE_REPORT_IF_ERROR(cusparseSpMM_bufferSize(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SPMM_ALG_DEFAULT, &bufferSize))
-  return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
+  return bufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMM(int32_t ma, int32_t mb,
@@ -590,7 +591,7 @@ mgpuSDDMMBufferSize(int32_t ma, int32_t mb, void *a, void *b, void *c,
   CUSPARSE_REPORT_IF_ERROR(cusparseSDDMM_bufferSize(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SDDMM_ALG_DEFAULT, &bufferSize))
-  return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
+  return bufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSDDMM(int32_t ma, int32_t mb,
@@ -638,7 +639,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpGEMMWorkEstimation(
   CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_workEstimation(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SPGEMM_DEFAULT, spgemmDesc, &newBufferSize, buf))
-  return newBufferSize == 0 ? 1 : newBufferSize; // avoid zero-alloc
+  return newBufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t
@@ -656,7 +657,7 @@ mgpuSpGEMMCompute(void *s, int32_t ma, int32_t mb, void *a, void *b, void *c,
   CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_compute(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SPGEMM_DEFAULT, spgemmDesc, &newBufferSize2, buf2))
-  return newBufferSize2 == 0 ? 1 : newBufferSize2; // avoid zero-alloc
+  return newBufferSize2;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
@@ -794,7 +795,6 @@ mgpuCuSparseLtSpMMBufferSize(void *bs, int32_t ma, int32_t mb, void *a, void *b,
   auto workspace_size = reinterpret_cast(bs);
   auto compressed_size = &(reinterpret_cast(bs)[1]);
   auto compressed_buffer_size = &(reinterpret_cast(bs)[2]);
-  size_t workspace_size_, compressed_size_, compressed_buffer_size_;
   auto cTp = static_cast(ctp);
 
   cusparseOperation_t modeA = static_cast(ma);
@@ -836,16 +836,9 @@ mgpuCuSparseLtSpMMBufferSize(void *bs, int32_t ma, int32_t mb, void *a, void *b,
   }
 
   CUSPARSE_REPORT_IF_ERROR(cusparseLtMatmulGetWorkspace(
-      &cusparseLt_env, &(matA->plan), &workspace_size_))
+      &cusparseLt_env, &(matA->plan), workspace_size))
   CUSPARSE_REPORT_IF_ERROR(cusparseLtSpMMACompressedSize(
-      &cusparseLt_env, &(matA->plan), &compressed_size_,
-      &compressed_buffer_size_))
-
-  // Avoid zero-allocation.
-  *workspace_size = (workspace_size_ == 0 ? 1 : workspace_size_);
-  *compressed_size = (compressed_size_ == 0 ? 1 : compressed_size_);
-  *compressed_buffer_size =
-      (compressed_buffer_size_ == 0 ? 1 : compressed_buffer_size_);
+      &cusparseLt_env, &(matA->plan), compressed_size, compressed_buffer_size))
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void

@llvmbot
Copy link
Member

llvmbot commented Sep 12, 2023

@llvm/pr-subscribers-mlir

Changes

This cleans up a unnecessary code that changes zero size allocation to avoid the following error message

'cuMemAlloc(&ptr, sizeBytes)' failed with 'CUDA_ERROR_INVALID_VALUE'

Full diff: https://github.com/llvm/llvm-project/pull/66127.diff

1 Files Affected:

  • (modified) mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp (+10-17)
diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index 1dba677ebe66365..7bf6804902479a8 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -212,8 +212,9 @@ extern MLIR_CUDA_WRAPPERS_EXPORT "C" void mgpuEventRecord(CUevent event,
 
 extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream /*stream*/) {
   ScopedContext scopedContext;
-  CUdeviceptr ptr;
-  CUDA_REPORT_IF_ERROR(cuMemAlloc(&ptr, sizeBytes));
+  CUdeviceptr ptr = 0;
+  if (sizeBytes != 0)
+    CUDA_REPORT_IF_ERROR(cuMemAlloc(&ptr, sizeBytes));
   return reinterpret_cast(ptr);
 }
 
@@ -521,7 +522,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMVBufferSize(
   CUSPARSE_REPORT_IF_ERROR(cusparseSpMV_bufferSize(
       cusparse_env, modeA, alphap, matA, vecX, betap, vecY, cTp,
       CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize))
-  return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
+  return bufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMV(int32_t ma, void *a, void *x,
@@ -555,7 +556,7 @@ mgpuSpMMBufferSize(int32_t ma, int32_t mb, void *a, void *b, void *c,
   CUSPARSE_REPORT_IF_ERROR(cusparseSpMM_bufferSize(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SPMM_ALG_DEFAULT, &bufferSize))
-  return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
+  return bufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMM(int32_t ma, int32_t mb,
@@ -590,7 +591,7 @@ mgpuSDDMMBufferSize(int32_t ma, int32_t mb, void *a, void *b, void *c,
   CUSPARSE_REPORT_IF_ERROR(cusparseSDDMM_bufferSize(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SDDMM_ALG_DEFAULT, &bufferSize))
-  return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
+  return bufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSDDMM(int32_t ma, int32_t mb,
@@ -638,7 +639,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpGEMMWorkEstimation(
   CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_workEstimation(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SPGEMM_DEFAULT, spgemmDesc, &newBufferSize, buf))
-  return newBufferSize == 0 ? 1 : newBufferSize; // avoid zero-alloc
+  return newBufferSize;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t
@@ -656,7 +657,7 @@ mgpuSpGEMMCompute(void *s, int32_t ma, int32_t mb, void *a, void *b, void *c,
   CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_compute(
       cusparse_env, modeA, modeB, alphap, matA, matB, betap, matC, cTp,
       CUSPARSE_SPGEMM_DEFAULT, spgemmDesc, &newBufferSize2, buf2))
-  return newBufferSize2 == 0 ? 1 : newBufferSize2; // avoid zero-alloc
+  return newBufferSize2;
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
@@ -794,7 +795,6 @@ mgpuCuSparseLtSpMMBufferSize(void *bs, int32_t ma, int32_t mb, void *a, void *b,
   auto workspace_size = reinterpret_cast(bs);
   auto compressed_size = &(reinterpret_cast(bs)[1]);
   auto compressed_buffer_size = &(reinterpret_cast(bs)[2]);
-  size_t workspace_size_, compressed_size_, compressed_buffer_size_;
   auto cTp = static_cast(ctp);
 
   cusparseOperation_t modeA = static_cast(ma);
@@ -836,16 +836,9 @@ mgpuCuSparseLtSpMMBufferSize(void *bs, int32_t ma, int32_t mb, void *a, void *b,
   }
 
   CUSPARSE_REPORT_IF_ERROR(cusparseLtMatmulGetWorkspace(
-      &cusparseLt_env, &(matA->plan), &workspace_size_))
+      &cusparseLt_env, &(matA->plan), workspace_size))
   CUSPARSE_REPORT_IF_ERROR(cusparseLtSpMMACompressedSize(
-      &cusparseLt_env, &(matA->plan), &compressed_size_,
-      &compressed_buffer_size_))
-
-  // Avoid zero-allocation.
-  *workspace_size = (workspace_size_ == 0 ? 1 : workspace_size_);
-  *compressed_size = (compressed_size_ == 0 ? 1 : compressed_size_);
-  *compressed_buffer_size =
-      (compressed_buffer_size_ == 0 ? 1 : compressed_buffer_size_);
+      &cusparseLt_env, &(matA->plan), compressed_size, compressed_buffer_size))
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void

Copy link
Member

@grypp grypp left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the right thing to do. Thanks for doing that

@aartbik aartbik merged commit 3635c74 into llvm:main Sep 12, 2023
@aartbik aartbik deleted the bik branch September 12, 2023 21:41
ZijunZhaoCCK pushed a commit to ZijunZhaoCCK/llvm-project that referenced this pull request Sep 19, 2023
This cleans up a unnecessary code that changes zero size allocation to
avoid the following error message

'cuMemAlloc(&ptr, sizeBytes)' failed with 'CUDA_ERROR_INVALID_VALUE'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

7 participants