Skip to content

Commit 38a021f

Browse files
committed
fix rebase
1 parent 3cf2247 commit 38a021f

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

ggml.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8252,7 +8252,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
82528252
for (int64_t i02 = 0; i02 < ne02; i02++) {
82538253
#if defined(GGML_USE_CUBLAS)
82548254
// copy src0 while converting src1
8255-
CUDA_CHECK(ggml_cuda_h2d_tensor_2d(d_X, src0, i02, i03, g_cudaStream));
8255+
CUDA_CHECK(ggml_cuda_h2d_tensor_2d(d_X, src0, i03, i02, g_cudaStream));
82568256

82578257
// with cuBlAS, instead of converting src0 to fp32, we convert src1 to fp16
82588258
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + (ne11 * ne10) * (i03 * ne02 + i02);
@@ -8523,10 +8523,11 @@ static void ggml_compute_forward_mul_mat_q_f32(
85238523

85248524
#if defined(GGML_USE_CUBLAS)
85258525
// copy and dequantize on device
8526-
CUDA_CHECK(ggml_cuda_h2d_tensor_2d(d_Q, src0, i03, i02, g_cudaStream));
8526+
CUDA_CHECK(ggml_cuda_h2d_tensor_2d(d_Q, src0, i03, i02, g_cudaStream2));
85278527

85288528
dequantize_row_q_cuda(d_Q, d_X, x_ne, g_cudaStream2);
85298529
CUDA_CHECK(cudaGetLastError());
8530+
CUDA_CHECK(cudaEventRecord(g_cudaEvent, g_cudaStream2));
85308531
#elif defined(GGML_USE_CLBLAST)
85318532
const void* x = (char *) src0->data + i03*nb03 + i02*nb02;
85328533
#else

0 commit comments

Comments
 (0)