Skip to content

Commit 0e48eb6

Browse files
committed
ggml : uniform 5th bit extraction
1 parent 948d124 commit 0e48eb6

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

ggml-cuda.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ static __global__ void dequantize_block_q5_0(const void * vx, float * y) {
123123
memcpy(&qh, x[i].qh, sizeof(qh));
124124

125125
for (int j = 0; j < qk/2; ++j) {
126-
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
127-
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
126+
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
127+
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
128128

129129
const int32_t x0 = ((x[i].qs[j] & 0xf) | xh_0) - 16;
130130
const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16;
@@ -148,8 +148,8 @@ static __global__ void dequantize_block_q5_1(const void * vx, float * y) {
148148
memcpy(&qh, x[i].qh, sizeof(qh));
149149

150150
for (int j = 0; j < qk/2; ++j) {
151-
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
152-
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
151+
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
152+
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
153153

154154
const int x0 = (x[i].qs[j] & 0xf) | xh_0;
155155
const int x1 = (x[i].qs[j] >> 4) | xh_1;

ggml.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,8 +1311,8 @@ static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict
13111311
memcpy(&qh, x[i].qh, sizeof(qh));
13121312

13131313
for (int j = 0; j < qk/2; ++j) {
1314-
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
1315-
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
1314+
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
1315+
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
13161316

13171317
const int32_t x0 = ((x[i].qs[j] & 0x0F) | xh_0) - 16;
13181318
const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16;
@@ -1338,8 +1338,8 @@ static void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict
13381338
memcpy(&qh, x[i].qh, sizeof(qh));
13391339

13401340
for (int j = 0; j < qk/2; ++j) {
1341-
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
1342-
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
1341+
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
1342+
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
13431343

13441344
const int x0 = (x[i].qs[j] & 0x0F) | xh_0;
13451345
const int x1 = (x[i].qs[j] >> 4) | xh_1;
@@ -12086,8 +12086,8 @@ size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t *
1208612086
memcpy(&qh, &y[i].qh, sizeof(qh));
1208712087

1208812088
for (int j = 0; j < QK5_0; j += 2) {
12089-
const uint8_t vh0 = ((qh & (1u << (j + 0))) >> (j + 0)) << 4;
12090-
const uint8_t vh1 = ((qh & (1u << (j + 1))) >> (j + 1)) << 4;
12089+
const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
12090+
const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));
1209112091

1209212092
// cast to 16 bins
1209312093
const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
@@ -12116,8 +12116,8 @@ size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t *
1211612116
memcpy(&qh, &y[i].qh, sizeof(qh));
1211712117

1211812118
for (int j = 0; j < QK5_1; j += 2) {
12119-
const uint8_t vh0 = ((qh & (1u << (j + 0))) >> (j + 0)) << 4;
12120-
const uint8_t vh1 = ((qh & (1u << (j + 1))) >> (j + 1)) << 4;
12119+
const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
12120+
const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));
1212112121

1212212122
// cast to 16 bins
1212312123
const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;

0 commit comments

Comments
 (0)