@@ -1311,8 +1311,8 @@ static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict
1311
1311
memcpy (& qh , x [i ].qh , sizeof (qh ));
1312
1312
1313
1313
for (int j = 0 ; j < qk /2 ; ++ j ) {
1314
- const uint8_t xh_0 = ((qh & ( 1u << ( j + 0 ))) >> (j + 0 )) << 4 ;
1315
- const uint8_t xh_1 = ((qh & ( 1u << ( j + 16 ))) >> ( j + 12 )) ;
1314
+ const uint8_t xh_0 = ((qh >> (j + 0 )) << 4 ) & 0x10 ;
1315
+ const uint8_t xh_1 = ((qh >> ( j + 12 )) ) & 0x10 ;
1316
1316
1317
1317
const int32_t x0 = ((x [i ].qs [j ] & 0x0F ) | xh_0 ) - 16 ;
1318
1318
const int32_t x1 = ((x [i ].qs [j ] >> 4 ) | xh_1 ) - 16 ;
@@ -1338,8 +1338,8 @@ static void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict
1338
1338
memcpy (& qh , x [i ].qh , sizeof (qh ));
1339
1339
1340
1340
for (int j = 0 ; j < qk /2 ; ++ j ) {
1341
- const uint8_t xh_0 = ((qh & ( 1u << ( j + 0 ))) >> (j + 0 )) << 4 ;
1342
- const uint8_t xh_1 = ((qh & ( 1u << ( j + 16 ))) >> ( j + 12 )) ;
1341
+ const uint8_t xh_0 = ((qh >> (j + 0 )) << 4 ) & 0x10 ;
1342
+ const uint8_t xh_1 = ((qh >> ( j + 12 )) ) & 0x10 ;
1343
1343
1344
1344
const int x0 = (x [i ].qs [j ] & 0x0F ) | xh_0 ;
1345
1345
const int x1 = (x [i ].qs [j ] >> 4 ) | xh_1 ;
@@ -12090,8 +12090,8 @@ size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t *
12090
12090
memcpy (& qh , & y [i ].qh , sizeof (qh ));
12091
12091
12092
12092
for (int j = 0 ; j < QK5_0 ; j += 2 ) {
12093
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12094
- const uint8_t vh1 = ((qh & (1u << (j + 1 ))) >> (j + 1 )) << 4 ;
12093
+ const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12094
+ const uint8_t vh1 = ((qh & (1u << (j + 16 ))) >> (j + 12 )) ;
12095
12095
12096
12096
// cast to 16 bins
12097
12097
const uint8_t vi0 = ((y [i ].qs [j /2 ] & 0x0F ) | vh0 ) / 2 ;
@@ -12120,8 +12120,8 @@ size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t *
12120
12120
memcpy (& qh , & y [i ].qh , sizeof (qh ));
12121
12121
12122
12122
for (int j = 0 ; j < QK5_1 ; j += 2 ) {
12123
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12124
- const uint8_t vh1 = ((qh & (1u << (j + 1 ))) >> (j + 1 )) << 4 ;
12123
+ const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
12124
+ const uint8_t vh1 = ((qh & (1u << (j + 16 ))) >> (j + 12 )) ;
12125
12125
12126
12126
// cast to 16 bins
12127
12127
const uint8_t vi0 = ((y [i ].qs [j /2 ] & 0x0F ) | vh0 ) / 2 ;
0 commit comments