Skip to content

Commit 097ef1d

Browse files
SongXiaoXiarthw
authored andcommitted
ggml: fix zero division in ‘dne’ calculation in CUDA COUNT_EQUAL operator when ‘ne’ is small (ggml-org#10213)
1 parent e101aca commit 097ef1d

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/count-equal.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
4444

4545
const int64_t ne = ggml_nelements(src0);
4646
GGML_ASSERT(ne < (1 << 30) && "atomicAdd implementation only supports int");
47-
const int64_t dne = GGML_PAD(ne / (4*nsm), CUDA_COUNT_EQUAL_CHUNK_SIZE);
47+
const int64_t dne = GGML_PAD((ne + 4*nsm - 1) / (4*nsm), CUDA_COUNT_EQUAL_CHUNK_SIZE);
4848

4949
CUDA_CHECK(cudaMemsetAsync(dst_d, 0, ggml_nbytes(dst), stream));
5050

0 commit comments

Comments
 (0)