@@ -68,7 +68,7 @@ static __device__ __forceinline__ T vec_dot_fattn_vec_KQ_q4_0(
68
68
const int iqs4 = k_KQ % QI4_0;
69
69
const int shift = k_KQ & (QI8_1/2 );
70
70
71
- const int v = (get_int_from_uint8 (K_q4_0[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
71
+ const int v = (get_int_b2 (K_q4_0[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
72
72
const int u = Q_q8[k_KQ_0/WARP_SIZE];
73
73
74
74
const int sumi = ggml_cuda_dp4a (v, u, 0 );
@@ -108,7 +108,7 @@ static __device__ __forceinline__ T vec_dot_fattn_vec_KQ_q4_1(
108
108
const int iqs4 = k_KQ % QI4_1;
109
109
const int shift = k_KQ & (QI8_1/2 );
110
110
111
- const int v = (get_int_from_uint8_aligned (K_q4_1[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
111
+ const int v = (get_int_b4 (K_q4_1[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
112
112
const int u = Q_q8[k_KQ_0/WARP_SIZE];
113
113
114
114
const int sumi = ggml_cuda_dp4a (v, u, 0 );
@@ -153,8 +153,8 @@ static __device__ __forceinline__ T vec_dot_fattn_vec_KQ_q5_0(
153
153
const int iqs8 = k_KQ % QI8_1;
154
154
const int shift = k_KQ & (QI8_1/2 );
155
155
156
- int v = (get_int_from_uint8 (K_q5_0[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
157
- const int vh = get_int_from_uint8 (K_q5_0[ib].qh , 0 ) >> (iqs8 * QI5_0);
156
+ int v = (get_int_b2 (K_q5_0[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
157
+ const int vh = get_int_b2 (K_q5_0[ib].qh , 0 ) >> (iqs8 * QI5_0);
158
158
v |= (vh << 4 ) & 0x00000010 ; // 0 -> 4
159
159
v |= (vh << 11 ) & 0x00001000 ; // 1 -> 12
160
160
v |= (vh << 18 ) & 0x00100000 ; // 2 -> 20
@@ -200,8 +200,8 @@ static __device__ __forceinline__ T vec_dot_fattn_vec_KQ_q5_1(
200
200
const int iqs8 = k_KQ % QI8_1;
201
201
const int shift = k_KQ & (QI8_1/2 );
202
202
203
- int v = (get_int_from_uint8 (K_q5_1[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
204
- const int vh = get_int_from_uint8 (K_q5_1[ib].qh , 0 ) >> (iqs8 * QI5_1);
203
+ int v = (get_int_b2 (K_q5_1[ib].qs , iqs4) >> shift) & 0x0F0F0F0F ;
204
+ const int vh = get_int_b2 (K_q5_1[ib].qh , 0 ) >> (iqs8 * QI5_1);
205
205
v |= (vh << 4 ) & 0x00000010 ; // 0 -> 4
206
206
v |= (vh << 11 ) & 0x00001000 ; // 1 -> 12
207
207
v |= (vh << 18 ) & 0x00100000 ; // 2 -> 20
@@ -249,7 +249,7 @@ static __device__ __forceinline__ T vec_dot_fattn_vec_KQ_q8_0(
249
249
const int ib = k_KQ / QI8_0;
250
250
const int iqs = k_KQ % QI8_0;
251
251
252
- const int v = get_int_from_int8 (K_q8_0[ib].qs , iqs);
252
+ const int v = get_int_b2 (K_q8_0[ib].qs , iqs);
253
253
254
254
T Q_d;
255
255
if (std::is_same<T, half>::value) {
@@ -408,7 +408,7 @@ static __device__ __forceinline__ T dequantize_1_q5_0(const void * __restrict__
408
408
409
409
const T d = x[ib].d ;
410
410
const int ql0 = x[ib].qs [iqs];
411
- const int qh0 = get_int_from_uint8 (x[ib].qh , 0 );
411
+ const int qh0 = get_int_b2 (x[ib].qh , 0 );
412
412
const int ql = ((ql0 >> (4 *shift)) & 0x0F );
413
413
const int qh = ((qh0 >> idq) << 4 ) & 0x10 ;
414
414
const int q = (ql | qh) - 16 ;
@@ -433,7 +433,7 @@ static __device__ __forceinline__ T dequantize_1_q5_1(const void * __restrict__
433
433
434
434
const half2 dm = x[ib].dm ;
435
435
const int ql0 = x[ib].qs [iqs];
436
- const int qh0 = get_int_from_uint8_aligned (x[ib].qh , 0 );
436
+ const int qh0 = get_int_b4 (x[ib].qh , 0 );
437
437
const int ql = ((ql0 >> (4 *shift)) & 0x0F );
438
438
const int qh = ((qh0 >> idq) << 4 ) & 0x10 ;
439
439
const int q = (ql | qh);
0 commit comments