Skip to content

Commit 7b2a075

Browse files
q8_0 impl
1 parent d598888 commit 7b2a075

File tree

1 file changed

+25
-12
lines changed

1 file changed

+25
-12
lines changed

ggml-cuda.cu

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ typedef float dfloat; // dequantize float
6060
typedef float2 dfloat2;
6161
#endif //GGML_CUDA_DMMV_F16
6262

63+
static __device__ __forceinline__ int get_int_from_int8(const int8_t * x8, const int & i32) {
64+
const uint16_t * x16 = (uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment
65+
66+
int x32 = 0;
67+
x32 |= x16[0] << 0;
68+
x32 |= x16[1] << 16;
69+
70+
return x32;
71+
}
72+
6373
static __device__ __forceinline__ int get_int_from_uint8(const uint8_t * x8, const int & i32) {
6474
const uint16_t * x16 = (uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment
6575

@@ -1602,27 +1612,30 @@ static __device__ __forceinline__ float vec_dot_q5_1_q8_1_mul_mat(
16021612
y_qs[j * (2*WARP_SIZE) + kyqs + (QI8_1/2)], x_dm[index_bx], y_ds[j * (2*WARP_SIZE/QI8_1) + 2*k/QI8_1]);
16031613
}
16041614

1605-
static __device__ __forceinline__ float vec_dot_q8_0_q8_1(
1606-
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
1615+
static __device__ __forceinline__ float vec_dot_q8_0_q8_1_impl(
1616+
const int & vi, const int & ui, const half & d8_0, const half2 & ds8_1) {
16071617

16081618
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
1609-
const block_q8_0 * bq8_0 = (const block_q8_0 *) vbq;
1610-
1611-
int vi;
1612-
memcpy(&vi, &bq8_0->qs[sizeof(int) * (iqs + 0)], sizeof(int));
1613-
const int ui = *((int *) &bq8_1->qs[sizeof(int) * (iqs + 0)]);
1614-
1615-
const float d = __half2float(bq8_0->d) * __half2float(bq8_1->ds.x);
1616-
16171619
// SIMD dot product of quantized values
1618-
int sumi = __dp4a(vi, ui, 0);
1620+
const int sumi = __dp4a(vi, ui, 0);
16191621

1620-
return sumi*d;
1622+
return sumi * __half2float(d8_0) * __half2float(ds8_1.x);
16211623
#else
16221624
return 0.0f; // only to satisfy the compiler
16231625
#endif // __CUDA_ARCH__ >= MIN_CC_DP4A
16241626
}
16251627

1628+
static __device__ __forceinline__ float vec_dot_q8_0_q8_1(
1629+
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
1630+
1631+
const block_q8_0 * bq8_0 = (const block_q8_0 *) vbq;
1632+
1633+
const int vi = get_int_from_int8(bq8_0->qs, iqs);
1634+
const int ui = get_int_from_int8_aligned(bq8_1->qs, iqs);
1635+
1636+
return vec_dot_q8_0_q8_1_impl(vi, ui, bq8_0->d, bq8_1->ds);
1637+
}
1638+
16261639
static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
16271640
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
16281641

0 commit comments

Comments
 (0)