@@ -1311,8 +1311,8 @@ static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict
1311
1311
memcpy (& qh , x [i ].qh , sizeof (qh ));
1312
1312
1313
1313
for (int j = 0 ; j < qk /2 ; ++ j ) {
1314
- const uint8_t xh_0 = ((qh & ( 1u << ( j + 0 ))) >> (j + 0 )) << 4 ;
1315
- const uint8_t xh_1 = ((qh & ( 1u << ( j + 16 ))) >> ( j + 12 )) ;
1314
+ const uint8_t xh_0 = ((qh >> (j + 0 )) << 4 ) & 0x10 ;
1315
+ const uint8_t xh_1 = ((qh >> ( j + 12 )) ) & 0x10 ;
1316
1316
1317
1317
const int32_t x0 = ((x [i ].qs [j ] & 0x0F ) | xh_0 ) - 16 ;
1318
1318
const int32_t x1 = ((x [i ].qs [j ] >> 4 ) | xh_1 ) - 16 ;
@@ -1338,8 +1338,8 @@ static void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict
1338
1338
memcpy (& qh , x [i ].qh , sizeof (qh ));
1339
1339
1340
1340
for (int j = 0 ; j < qk /2 ; ++ j ) {
1341
- const uint8_t xh_0 = ((qh & ( 1u << ( j + 0 ))) >> (j + 0 )) << 4 ;
1342
- const uint8_t xh_1 = ((qh & ( 1u << ( j + 16 ))) >> ( j + 12 )) ;
1341
+ const uint8_t xh_0 = ((qh >> (j + 0 )) << 4 ) & 0x10 ;
1342
+ const uint8_t xh_1 = ((qh >> ( j + 12 )) ) & 0x10 ;
1343
1343
1344
1344
const int x0 = (x [i ].qs [j ] & 0x0F ) | xh_0 ;
1345
1345
const int x1 = (x [i ].qs [j ] >> 4 ) | xh_1 ;
@@ -12086,8 +12086,8 @@ size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t *
12086
12086
memcpy (& qh , & y [i ].qh , sizeof (qh ));
12087
12087
12088
12088
for (int j = 0 ; j < QK5_0 ; j += 2 ) {
12089
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12090
- const uint8_t vh1 = ((qh & (1u << (j + 1 ))) >> (j + 1 )) << 4 ;
12089
+ const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12090
+ const uint8_t vh1 = ((qh & (1u << (j + 16 ))) >> (j + 12 )) ;
12091
12091
12092
12092
// cast to 16 bins
12093
12093
const uint8_t vi0 = ((y [i ].qs [j /2 ] & 0x0F ) | vh0 ) / 2 ;
@@ -12116,8 +12116,8 @@ size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t *
12116
12116
memcpy (& qh , & y [i ].qh , sizeof (qh ));
12117
12117
12118
12118
for (int j = 0 ; j < QK5_1 ; j += 2 ) {
12119
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12120
- const uint8_t vh1 = ((qh & (1u << (j + 1 ))) >> (j + 1 )) << 4 ;
12119
+ const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12120
+ const uint8_t vh1 = ((qh & (1u << (j + 16 ))) >> (j + 12 )) ;
12121
12121
12122
12122
// cast to 16 bins
12123
12123
const uint8_t vi0 = ((y [i ].qs [j /2 ] & 0x0F ) | vh0 ) / 2 ;
0 commit comments