@@ -6309,14 +6309,14 @@ static void dequantize_row_q3_K_cuda(const void * vx, dst_t * y, const int k, cu
6309
6309
}
6310
6310
6311
6311
template <typename dst_t >
6312
- static void dequantize_q4_0_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6312
+ static void dequantize_row_q4_0_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6313
6313
const int nb32 = k / 32 ;
6314
6314
const int nb = (k + 255 ) / 256 ;
6315
6315
dequantize_block_q4_0<<<nb, 32 , 0 , stream>>> (vx, y, nb32);
6316
6316
}
6317
6317
6318
6318
template <typename dst_t >
6319
- static void dequantize_q4_1_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6319
+ static void dequantize_row_q4_1_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6320
6320
const int nb32 = k / 32 ;
6321
6321
const int nb = (k + 255 ) / 256 ;
6322
6322
dequantize_block_q4_1<<<nb, 32 , 0 , stream>>> (vx, y, nb32);
@@ -6370,9 +6370,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
6370
6370
int id;
6371
6371
switch (type) {
6372
6372
case GGML_TYPE_Q4_0:
6373
- return dequantize_q4_0_cuda ;
6373
+ return dequantize_row_q4_0_cuda ;
6374
6374
case GGML_TYPE_Q4_1:
6375
- return dequantize_q4_1_cuda ;
6375
+ return dequantize_row_q4_1_cuda ;
6376
6376
case GGML_TYPE_Q5_0:
6377
6377
return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
6378
6378
case GGML_TYPE_Q5_1:
@@ -6407,9 +6407,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
6407
6407
static to_fp32_cuda_t ggml_get_to_fp32_cuda (ggml_type type) {
6408
6408
switch (type) {
6409
6409
case GGML_TYPE_Q4_0:
6410
- return dequantize_q4_0_cuda ;
6410
+ return dequantize_row_q4_0_cuda ;
6411
6411
case GGML_TYPE_Q4_1:
6412
- return dequantize_q4_1_cuda ;
6412
+ return dequantize_row_q4_1_cuda ;
6413
6413
case GGML_TYPE_Q5_0:
6414
6414
return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
6415
6415
case GGML_TYPE_Q5_1:
0 commit comments