1
1
#include " common.cuh"
2
2
3
- static __device__ __forceinline__ void dequantize_q4_0 (const void * vx, const int ib, const int iqs, dfloat2 & v){
3
+ static __device__ __forceinline__ void dequantize_q4_0 (const void * vx, const int64_t ib, const int iqs, dfloat2 & v){
4
4
const block_q4_0 * x = (const block_q4_0 *) vx;
5
5
6
6
const dfloat d = x[ib].d ;
@@ -19,7 +19,7 @@ static __device__ __forceinline__ void dequantize_q4_0(const void * vx, const in
19
19
#endif // GGML_CUDA_F16
20
20
}
21
21
22
- static __device__ __forceinline__ void dequantize_q4_1 (const void * vx, const int ib, const int iqs, dfloat2 & v){
22
+ static __device__ __forceinline__ void dequantize_q4_1 (const void * vx, const int64_t ib, const int iqs, dfloat2 & v){
23
23
const block_q4_1 * x = (const block_q4_1 *) vx;
24
24
25
25
const dfloat d = __low2half (x[ib].dm );
@@ -39,7 +39,7 @@ static __device__ __forceinline__ void dequantize_q4_1(const void * vx, const in
39
39
#endif // GGML_CUDA_F16
40
40
}
41
41
42
- static __device__ __forceinline__ void dequantize_q5_0 (const void * vx, const int ib, const int iqs, dfloat2 & v){
42
+ static __device__ __forceinline__ void dequantize_q5_0 (const void * vx, const int64_t ib, const int iqs, dfloat2 & v){
43
43
const block_q5_0 * x = (const block_q5_0 *) vx;
44
44
45
45
const dfloat d = x[ib].d ;
@@ -62,7 +62,7 @@ static __device__ __forceinline__ void dequantize_q5_0(const void * vx, const in
62
62
#endif // GGML_CUDA_F16
63
63
}
64
64
65
- static __device__ __forceinline__ void dequantize_q5_1 (const void * vx, const int ib, const int iqs, dfloat2 & v){
65
+ static __device__ __forceinline__ void dequantize_q5_1 (const void * vx, const int64_t ib, const int iqs, dfloat2 & v){
66
66
const block_q5_1 * x = (const block_q5_1 *) vx;
67
67
68
68
const dfloat d = __low2half (x[ib].dm );
@@ -86,7 +86,7 @@ static __device__ __forceinline__ void dequantize_q5_1(const void * vx, const in
86
86
#endif // GGML_CUDA_F16
87
87
}
88
88
89
- static __device__ __forceinline__ void dequantize_q8_0 (const void * vx, const int ib, const int iqs, dfloat2 & v){
89
+ static __device__ __forceinline__ void dequantize_q8_0 (const void * vx, const int64_t ib, const int iqs, dfloat2 & v){
90
90
const block_q8_0 * x = (const block_q8_0 *) vx;
91
91
92
92
const dfloat d = x[ib].d ;
0 commit comments