Skip to content

Commit 4958708

Browse files
JohannesGaesslerpull[bot]
authored andcommitted
CUDA: fix RoPE asserts, block sizes (#2833)
1 parent d159e5f commit 4958708

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

ggml-cuda.cu

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4908,16 +4908,17 @@ static void scale_f32_cuda(const float * x, float * dst, const float scale, cons
49084908

49094909
static void rope_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, const float p0,
49104910
const float p_delta, const int p_delta_rows, const float theta_scale, cudaStream_t stream) {
4911-
GGML_ASSERT(nrows % 2 == 0); // GG: is this assert really needed? I don't see why
4912-
const dim3 block_dims(1, 2*CUDA_ROPE_BLOCK_SIZE, 1);
4911+
GGML_ASSERT(ncols % 2 == 0);
4912+
const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1);
49134913
const int num_blocks_x = (ncols + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE);
49144914
const dim3 block_nums(nrows, num_blocks_x, 1);
49154915
rope_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols, p0, p_delta, p_delta_rows, theta_scale);
49164916
}
49174917

49184918
static void rope_neox_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, const float p0,
49194919
const float p_delta, const int p_delta_rows, const float theta_scale, cudaStream_t stream) {
4920-
const dim3 block_dims(1, 2*CUDA_ROPE_BLOCK_SIZE, 1);
4920+
GGML_ASSERT(ncols % 2 == 0);
4921+
const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1);
49214922
const int num_blocks_x = (ncols + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE);
49224923
const dim3 block_nums(nrows, num_blocks_x, 1);
49234924
rope_neox_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols, p0, p_delta, p_delta_rows, theta_scale);

0 commit comments

Comments
 (0)