Skip to content

Commit 44acb8b

Browse files
refacotr
1 parent 3c4a83e commit 44acb8b

File tree

1 file changed

+70
-100
lines changed

1 file changed

+70
-100
lines changed

ggml-cuda.cu

Lines changed: 70 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -3418,23 +3418,20 @@ template <bool need_check> static __global__ void mul_mat_q4_0(
34183418
const int mmq_x = MMQ_X_Q4_0_AMPERE;
34193419
const int mmq_y = MMQ_Y_Q4_0_AMPERE;
34203420
const int nwarps = NWARPS_Q4_0_AMPERE;
3421-
3422-
mul_mat_q<QK4_0, QR4_0, QI4_0, true, block_q4_0, mmq_x, mmq_y, nwarps, allocate_tiles_q4_0<mmq_y>,
3423-
load_tiles_q4_0<mmq_y, nwarps, need_check>, VDR_Q4_0_Q8_1_MMQ, vec_dot_q4_0_q8_1_mul_mat>
3424-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3425-
34263421
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
34273422
const int mmq_x = MMQ_X_Q4_0_PASCAL;
34283423
const int mmq_y = MMQ_Y_Q4_0_PASCAL;
34293424
const int nwarps = NWARPS_Q4_0_PASCAL;
3430-
3431-
mul_mat_q<QK4_0, QR4_0, QI4_0, true, block_q4_0, mmq_x, mmq_y, nwarps, allocate_tiles_q4_0<mmq_y>,
3432-
load_tiles_q4_0<mmq_y, nwarps, need_check>, VDR_Q4_0_Q8_1_MMQ, vec_dot_q4_0_q8_1_mul_mat>
3433-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
34343425
#else
3435-
(void) vec_dot_q4_0_q8_1_mul_mat;
3426+
const int mmq_x = -1;
3427+
const int mmq_y = -1;
3428+
const int nwarps = -1;
34363429
assert(false);
34373430
#endif // __CUDA_ARCH__ >= CC_TURING
3431+
3432+
mul_mat_q<QK4_0, QR4_0, QI4_0, true, block_q4_0, mmq_x, mmq_y, nwarps, allocate_tiles_q4_0<mmq_y>,
3433+
load_tiles_q4_0<mmq_y, nwarps, need_check>, VDR_Q4_0_Q8_1_MMQ, vec_dot_q4_0_q8_1_mul_mat>
3434+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
34383435
}
34393436

34403437
#define MMQ_X_Q4_1_AMPERE 64
@@ -3457,23 +3454,20 @@ template <bool need_check> static __global__ void
34573454
const int mmq_x = MMQ_X_Q4_1_AMPERE;
34583455
const int mmq_y = MMQ_Y_Q4_1_AMPERE;
34593456
const int nwarps = NWARPS_Q4_1_AMPERE;
3460-
3461-
mul_mat_q<QK4_1, QR4_1, QI4_1, true, block_q4_1, mmq_x, mmq_y, nwarps, allocate_tiles_q4_1<mmq_y>,
3462-
load_tiles_q4_1<mmq_y, nwarps, need_check>, VDR_Q4_1_Q8_1_MMQ, vec_dot_q4_1_q8_1_mul_mat>
3463-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3464-
34653457
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
34663458
const int mmq_x = MMQ_X_Q4_1_PASCAL;
34673459
const int mmq_y = MMQ_Y_Q4_1_PASCAL;
34683460
const int nwarps = NWARPS_Q4_1_PASCAL;
3469-
3470-
mul_mat_q<QK4_1, QR4_1, QI4_1, true, block_q4_1, mmq_x, mmq_y, nwarps, allocate_tiles_q4_1<mmq_y>,
3471-
load_tiles_q4_1<mmq_y, nwarps, need_check>, VDR_Q4_1_Q8_1_MMQ, vec_dot_q4_1_q8_1_mul_mat>
3472-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
34733461
#else
3474-
(void) vec_dot_q4_1_q8_1_mul_mat;
3462+
const int mmq_x = -1;
3463+
const int mmq_y = -1;
3464+
const int nwarps = -1;
34753465
assert(false);
34763466
#endif // __CUDA_ARCH__ >= CC_TURING
3467+
3468+
mul_mat_q<QK4_1, QR4_1, QI4_1, true, block_q4_1, mmq_x, mmq_y, nwarps, allocate_tiles_q4_1<mmq_y>,
3469+
load_tiles_q4_1<mmq_y, nwarps, need_check>, VDR_Q4_1_Q8_1_MMQ, vec_dot_q4_1_q8_1_mul_mat>
3470+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
34773471
}
34783472

34793473
#define MMQ_X_Q5_0_AMPERE 128
@@ -3492,23 +3486,20 @@ template <bool need_check> static __global__ void mul_mat_q5_0(
34923486
const int mmq_x = MMQ_X_Q5_0_AMPERE;
34933487
const int mmq_y = MMQ_Y_Q5_0_AMPERE;
34943488
const int nwarps = NWARPS_Q5_0_AMPERE;
3495-
3496-
mul_mat_q<QK5_0, QR5_0, QI5_0, false, block_q5_0, mmq_x, mmq_y, nwarps, allocate_tiles_q5_0<mmq_y>,
3497-
load_tiles_q5_0<mmq_y, nwarps, need_check>, VDR_Q5_0_Q8_1_MMQ, vec_dot_q5_0_q8_1_mul_mat>
3498-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3499-
35003489
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
35013490
const int mmq_x = MMQ_X_Q5_0_PASCAL;
35023491
const int mmq_y = MMQ_Y_Q5_0_PASCAL;
35033492
const int nwarps = NWARPS_Q5_0_PASCAL;
3504-
3505-
mul_mat_q<QK5_0, QR5_0, QI5_0, false, block_q5_0, mmq_x, mmq_y, nwarps, allocate_tiles_q5_0<mmq_y>,
3506-
load_tiles_q5_0<mmq_y, nwarps, need_check>, VDR_Q5_0_Q8_1_MMQ, vec_dot_q5_0_q8_1_mul_mat>
3507-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
35083493
#else
3509-
(void) vec_dot_q5_0_q8_1_mul_mat;
3494+
const int mmq_x = -1;
3495+
const int mmq_y = -1;
3496+
const int nwarps = -1;
35103497
assert(false);
35113498
#endif // __CUDA_ARCH__ >= CC_TURING
3499+
3500+
mul_mat_q<QK5_0, QR5_0, QI5_0, false, block_q5_0, mmq_x, mmq_y, nwarps, allocate_tiles_q5_0<mmq_y>,
3501+
load_tiles_q5_0<mmq_y, nwarps, need_check>, VDR_Q5_0_Q8_1_MMQ, vec_dot_q5_0_q8_1_mul_mat>
3502+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
35123503
}
35133504

35143505
#define MMQ_X_Q5_1_AMPERE 128
@@ -3527,23 +3518,20 @@ template <bool need_check> static __global__ void mul_mat_q5_1(
35273518
const int mmq_x = MMQ_X_Q5_1_AMPERE;
35283519
const int mmq_y = MMQ_Y_Q5_1_AMPERE;
35293520
const int nwarps = NWARPS_Q5_1_AMPERE;
3530-
3531-
mul_mat_q<QK5_1, QR5_1, QI5_1, true, block_q5_1, mmq_x, mmq_y, nwarps, allocate_tiles_q5_1<mmq_y>,
3532-
load_tiles_q5_1<mmq_y, nwarps, need_check>, VDR_Q5_1_Q8_1_MMQ, vec_dot_q5_1_q8_1_mul_mat>
3533-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3534-
35353521
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
35363522
const int mmq_x = MMQ_X_Q5_1_PASCAL;
35373523
const int mmq_y = MMQ_Y_Q5_1_PASCAL;
35383524
const int nwarps = NWARPS_Q5_1_PASCAL;
3539-
3540-
mul_mat_q<QK5_1, QR5_1, QI5_1, true, block_q5_1, mmq_x, mmq_y, nwarps, allocate_tiles_q5_1<mmq_y>,
3541-
load_tiles_q5_1<mmq_y, nwarps, need_check>, VDR_Q5_1_Q8_1_MMQ, vec_dot_q5_1_q8_1_mul_mat>
3542-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
35433525
#else
3544-
(void) vec_dot_q5_1_q8_1_mul_mat;
3526+
const int mmq_x = -1;
3527+
const int mmq_y = -1;
3528+
const int nwarps = -1;
35453529
assert(false);
35463530
#endif // __CUDA_ARCH__ >= CC_TURING
3531+
3532+
mul_mat_q<QK5_1, QR5_1, QI5_1, true, block_q5_1, mmq_x, mmq_y, nwarps, allocate_tiles_q5_1<mmq_y>,
3533+
load_tiles_q5_1<mmq_y, nwarps, need_check>, VDR_Q5_1_Q8_1_MMQ, vec_dot_q5_1_q8_1_mul_mat>
3534+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
35473535
}
35483536

35493537
#define MMQ_X_Q8_0_AMPERE 128
@@ -3562,23 +3550,20 @@ template <bool need_check> static __global__ void mul_mat_q8_0(
35623550
const int mmq_x = MMQ_X_Q8_0_AMPERE;
35633551
const int mmq_y = MMQ_Y_Q8_0_AMPERE;
35643552
const int nwarps = NWARPS_Q8_0_AMPERE;
3565-
3566-
mul_mat_q<QK8_0, QR8_0, QI8_0, false, block_q8_0, mmq_x, mmq_y, nwarps, allocate_tiles_q8_0<mmq_y>,
3567-
load_tiles_q8_0<mmq_y, nwarps, need_check>, VDR_Q8_0_Q8_1_MMQ, vec_dot_q8_0_q8_1_mul_mat>
3568-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3569-
35703553
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
35713554
const int mmq_x = MMQ_X_Q8_0_PASCAL;
35723555
const int mmq_y = MMQ_Y_Q8_0_PASCAL;
35733556
const int nwarps = NWARPS_Q8_0_PASCAL;
3574-
3575-
mul_mat_q<QK8_0, QR8_0, QI8_0, false, block_q8_0, mmq_x, mmq_y, nwarps, allocate_tiles_q8_0<mmq_y>,
3576-
load_tiles_q8_0<mmq_y, nwarps, need_check>, VDR_Q8_0_Q8_1_MMQ, vec_dot_q8_0_q8_1_mul_mat>
3577-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
35783557
#else
3579-
(void) vec_dot_q8_0_q8_1_mul_mat;
3558+
const int mmq_x = -1;
3559+
const int mmq_y = -1;
3560+
const int nwarps = -1;
35803561
assert(false);
35813562
#endif // __CUDA_ARCH__ >= CC_TURING
3563+
3564+
mul_mat_q<QK8_0, QR8_0, QI8_0, false, block_q8_0, mmq_x, mmq_y, nwarps, allocate_tiles_q8_0<mmq_y>,
3565+
load_tiles_q8_0<mmq_y, nwarps, need_check>, VDR_Q8_0_Q8_1_MMQ, vec_dot_q8_0_q8_1_mul_mat>
3566+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
35823567
}
35833568

35843569
#define MMQ_X_Q2_K_AMPERE 64
@@ -3597,23 +3582,20 @@ template <bool need_check> static __global__ void mul_mat_q2_K(
35973582
const int mmq_x = MMQ_X_Q2_K_AMPERE;
35983583
const int mmq_y = MMQ_Y_Q2_K_AMPERE;
35993584
const int nwarps = NWARPS_Q2_K_AMPERE;
3600-
3601-
mul_mat_q<QK_K, QR2_K, QI2_K, false, block_q2_K, mmq_x, mmq_y, nwarps, allocate_tiles_q2_K<mmq_y>,
3602-
load_tiles_q2_K<mmq_y, nwarps, need_check>, VDR_Q2_K_Q8_1_MMQ, vec_dot_q2_K_q8_1_mul_mat>
3603-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3604-
36053585
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
36063586
const int mmq_x = MMQ_X_Q2_K_PASCAL;
36073587
const int mmq_y = MMQ_Y_Q2_K_PASCAL;
36083588
const int nwarps = NWARPS_Q2_K_PASCAL;
3609-
3610-
mul_mat_q<QK_K, QR2_K, QI2_K, false, block_q2_K, mmq_x, mmq_y, nwarps, allocate_tiles_q2_K<mmq_y>,
3611-
load_tiles_q2_K<mmq_y, nwarps, need_check>, VDR_Q2_K_Q8_1_MMQ, vec_dot_q2_K_q8_1_mul_mat>
3612-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
36133589
#else
3614-
(void) vec_dot_q2_K_q8_1_mul_mat;
3590+
const int mmq_x = -1;
3591+
const int mmq_y = -1;
3592+
const int nwarps = -1;
36153593
assert(false);
36163594
#endif // __CUDA_ARCH__ >= CC_TURING
3595+
3596+
mul_mat_q<QK_K, QR2_K, QI2_K, false, block_q2_K, mmq_x, mmq_y, nwarps, allocate_tiles_q2_K<mmq_y>,
3597+
load_tiles_q2_K<mmq_y, nwarps, need_check>, VDR_Q2_K_Q8_1_MMQ, vec_dot_q2_K_q8_1_mul_mat>
3598+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
36173599
}
36183600

36193601
#define MMQ_X_Q3_K_AMPERE 128
@@ -3636,23 +3618,20 @@ template <bool need_check> static __global__ void
36363618
const int mmq_x = MMQ_X_Q3_K_AMPERE;
36373619
const int mmq_y = MMQ_Y_Q3_K_AMPERE;
36383620
const int nwarps = NWARPS_Q3_K_AMPERE;
3639-
3640-
mul_mat_q<QK_K, QR3_K, QI3_K, false, block_q3_K, mmq_x, mmq_y, nwarps, allocate_tiles_q3_K<mmq_y>,
3641-
load_tiles_q3_K<mmq_y, nwarps, need_check>, VDR_Q3_K_Q8_1_MMQ, vec_dot_q3_K_q8_1_mul_mat>
3642-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3643-
36443621
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
36453622
const int mmq_x = MMQ_X_Q3_K_PASCAL;
36463623
const int mmq_y = MMQ_Y_Q3_K_PASCAL;
36473624
const int nwarps = NWARPS_Q3_K_PASCAL;
3648-
3649-
mul_mat_q<QK_K, QR3_K, QI3_K, false, block_q3_K, mmq_x, mmq_y, nwarps, allocate_tiles_q3_K<mmq_y>,
3650-
load_tiles_q3_K<mmq_y, nwarps, need_check>, VDR_Q3_K_Q8_1_MMQ, vec_dot_q3_K_q8_1_mul_mat>
3651-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
36523625
#else
3653-
(void) vec_dot_q3_K_q8_1_mul_mat;
3626+
const int mmq_x = -1;
3627+
const int mmq_y = -1;
3628+
const int nwarps = -1;
36543629
assert(false);
36553630
#endif // __CUDA_ARCH__ >= CC_TURING
3631+
3632+
mul_mat_q<QK_K, QR3_K, QI3_K, false, block_q3_K, mmq_x, mmq_y, nwarps, allocate_tiles_q3_K<mmq_y>,
3633+
load_tiles_q3_K<mmq_y, nwarps, need_check>, VDR_Q3_K_Q8_1_MMQ, vec_dot_q3_K_q8_1_mul_mat>
3634+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
36563635
}
36573636

36583637
#define MMQ_X_Q4_K_AMPERE 64
@@ -3675,23 +3654,20 @@ template <bool need_check> static __global__ void
36753654
const int mmq_x = MMQ_X_Q4_K_AMPERE;
36763655
const int mmq_y = MMQ_Y_Q4_K_AMPERE;
36773656
const int nwarps = NWARPS_Q4_K_AMPERE;
3678-
3679-
mul_mat_q<QK_K, QR4_K, QI4_K, true, block_q4_K, mmq_x, mmq_y, nwarps, allocate_tiles_q4_K<mmq_y>,
3680-
load_tiles_q4_K<mmq_y, nwarps, need_check>, VDR_Q4_K_Q8_1_MMQ, vec_dot_q4_K_q8_1_mul_mat>
3681-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3682-
36833657
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
36843658
const int mmq_x = MMQ_X_Q4_K_PASCAL;
36853659
const int mmq_y = MMQ_Y_Q4_K_PASCAL;
36863660
const int nwarps = NWARPS_Q4_K_PASCAL;
3687-
3688-
mul_mat_q<QK_K, QR4_K, QI4_K, true, block_q4_K, mmq_x, mmq_y, nwarps, allocate_tiles_q4_K<mmq_y>,
3689-
load_tiles_q4_K<mmq_y, nwarps, need_check>, VDR_Q4_K_Q8_1_MMQ, vec_dot_q4_K_q8_1_mul_mat>
3690-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
36913661
#else
3692-
(void) vec_dot_q4_K_q8_1_mul_mat;
3662+
const int mmq_x = -1;
3663+
const int mmq_y = -1;
3664+
const int nwarps = -1;
36933665
assert(false);
36943666
#endif // __CUDA_ARCH__ >= CC_TURING
3667+
3668+
mul_mat_q<QK_K, QR4_K, QI4_K, true, block_q4_K, mmq_x, mmq_y, nwarps, allocate_tiles_q4_K<mmq_y>,
3669+
load_tiles_q4_K<mmq_y, nwarps, need_check>, VDR_Q4_K_Q8_1_MMQ, vec_dot_q4_K_q8_1_mul_mat>
3670+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
36953671
}
36963672

36973673
#define MMQ_X_Q5_K_AMPERE 64
@@ -3710,23 +3686,20 @@ template <bool need_check> static __global__ void mul_mat_q5_K(
37103686
const int mmq_x = MMQ_X_Q5_K_AMPERE;
37113687
const int mmq_y = MMQ_Y_Q5_K_AMPERE;
37123688
const int nwarps = NWARPS_Q5_K_AMPERE;
3713-
3714-
mul_mat_q<QK_K, QR5_K, QI5_K, true, block_q5_K, mmq_x, mmq_y, nwarps, allocate_tiles_q5_K<mmq_y>,
3715-
load_tiles_q5_K<mmq_y, nwarps, need_check>, VDR_Q5_K_Q8_1_MMQ, vec_dot_q5_K_q8_1_mul_mat>
3716-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3717-
37183689
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
37193690
const int mmq_x = MMQ_X_Q5_K_PASCAL;
37203691
const int mmq_y = MMQ_Y_Q5_K_PASCAL;
37213692
const int nwarps = NWARPS_Q5_K_PASCAL;
3722-
3723-
mul_mat_q<QK_K, QR5_K, QI5_K, true, block_q5_K, mmq_x, mmq_y, nwarps, allocate_tiles_q5_K<mmq_y>,
3724-
load_tiles_q5_K<mmq_y, nwarps, need_check>, VDR_Q5_K_Q8_1_MMQ, vec_dot_q5_K_q8_1_mul_mat>
3725-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
37263693
#else
3727-
(void) vec_dot_q5_K_q8_1_mul_mat;
3694+
const int mmq_x = -1;
3695+
const int mmq_y = -1;
3696+
const int nwarps = -1;
37283697
assert(false);
37293698
#endif // __CUDA_ARCH__ >= CC_TURING
3699+
3700+
mul_mat_q<QK_K, QR5_K, QI5_K, true, block_q5_K, mmq_x, mmq_y, nwarps, allocate_tiles_q5_K<mmq_y>,
3701+
load_tiles_q5_K<mmq_y, nwarps, need_check>, VDR_Q5_K_Q8_1_MMQ, vec_dot_q5_K_q8_1_mul_mat>
3702+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
37303703
}
37313704

37323705
#define MMQ_X_Q6_K_AMPERE 64
@@ -3749,23 +3722,20 @@ template <bool need_check> static __global__ void
37493722
const int mmq_x = MMQ_X_Q6_K_AMPERE;
37503723
const int mmq_y = MMQ_Y_Q6_K_AMPERE;
37513724
const int nwarps = NWARPS_Q6_K_AMPERE;
3752-
3753-
mul_mat_q<QK_K, QR6_K, QI6_K, false, block_q6_K, mmq_x, mmq_y, nwarps, allocate_tiles_q6_K<mmq_y>,
3754-
load_tiles_q6_K<mmq_y, nwarps, need_check>, VDR_Q6_K_Q8_1_MMQ, vec_dot_q6_K_q8_1_mul_mat>
3755-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
3756-
37573725
#elif __CUDA_ARCH__ >= MIN_CC_DP4A
37583726
const int mmq_x = MMQ_X_Q6_K_PASCAL;
37593727
const int mmq_y = MMQ_Y_Q6_K_PASCAL;
37603728
const int nwarps = NWARPS_Q6_K_PASCAL;
3761-
3762-
mul_mat_q<QK_K, QR6_K, QI6_K, false, block_q6_K, mmq_x, mmq_y, nwarps, allocate_tiles_q6_K<mmq_y>,
3763-
load_tiles_q6_K<mmq_y, nwarps, need_check>, VDR_Q6_K_Q8_1_MMQ, vec_dot_q6_K_q8_1_mul_mat>
3764-
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst);
37653729
#else
3766-
(void) vec_dot_q6_K_q8_1_mul_mat;
3730+
const int mmq_x = -1;
3731+
const int mmq_y = -1;
3732+
const int nwarps = -1;
37673733
assert(false);
37683734
#endif // __CUDA_ARCH__ >= CC_TURING
3735+
3736+
mul_mat_q<QK_K, QR6_K, QI6_K, false, block_q6_K, mmq_x, mmq_y, nwarps, allocate_tiles_q6_K<mmq_y>,
3737+
load_tiles_q6_K<mmq_y, nwarps, need_check>, VDR_Q6_K_Q8_1_MMQ, vec_dot_q6_K_q8_1_mul_mat>
3738+
(vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, row_stride_x, channel_stride_x, channel_stride_y);
37693739
}
37703740

37713741
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_cuda_t vec_dot_q_cuda>

0 commit comments

Comments
 (0)