20
20
21
21
// Functions to create the interleaved data layout formats
22
22
23
- // interleave 4 block_q4_0s in blocks of interleave_blcksize
23
+ // interleave 4 block_q4_0s in blocks of blck_size_interleave
24
24
// returns an interleaved block_q4_0x4
25
25
// in the interleaved block_q4_0x4, place deltas for 4 block_q4_0 blocks
26
- // first, then interleave quants from 4 block_q4_0s in blocks of interleave_blcksize
26
+ // first, then interleave quants from 4 block_q4_0s in blocks of blck_size_interleave
27
27
//
28
28
// - in : an array of block_q4_0 pointers
29
- // - interleave_blcksize : the block_q4_0 quants bytes are interleaved in blocks of
30
- // interleave_blcksize bytes
29
+ // - blck_size_interleave : the block_q4_0 quants bytes are interleaved in blocks of
30
+ // blck_size_interleave bytes
31
31
// - xor_mask : the mask to convert the nibbles in block_q4_0 quants bytes
32
32
// from bias offset form to pure sign form (this saves subtract
33
33
// operations durin unpacking)
34
34
//
35
- static block_q4_0x4 make_block_q4_0x4 (block_q4_0 * in , unsigned int interleave_blcksize , unsigned int xor_mask ) {
35
+ static block_q4_0x4 make_block_q4_0x4 (block_q4_0 * in , unsigned int blck_size_interleave , unsigned int xor_mask ) {
36
36
block_q4_0x4 out ;
37
37
38
38
for (int i = 0 ; i < 4 ; i ++ ) {
39
39
out .d [i ] = in [i ].d ;
40
40
}
41
41
42
42
for (int i = 0 ; i < QK4_0 * 2 ; i ++ ) {
43
- int src_offset = (i / (4 * interleave_blcksize )) * interleave_blcksize ;
44
- int src_id = (i % (4 * interleave_blcksize )) / interleave_blcksize ;
45
- src_offset += (i % interleave_blcksize );
43
+ int src_offset = (i / (4 * blck_size_interleave )) * blck_size_interleave ;
44
+ int src_id = (i % (4 * blck_size_interleave )) / blck_size_interleave ;
45
+ src_offset += (i % blck_size_interleave );
46
46
47
47
out .qs [i ] = in [src_id ].qs [src_offset ] ^ xor_mask ;
48
48
}
49
49
50
50
return out ;
51
51
}
52
52
53
- // interleave 8 block_q4_0s in blocks of interleave_blcksize
53
+ // interleave 8 block_q4_0s in blocks of blck_size_interleave
54
54
// returns an interleaved block_q4_0x8
55
55
// in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
56
- // first, then interleave quants from 8 block_q4_0s in blocks of interleave_blcksize
57
- static block_q4_0x8 make_block_q4_0x8 (block_q4_0 * in , unsigned int interleave_blcksize , unsigned int xor_mask ) {
56
+ // first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
57
+ static block_q4_0x8 make_block_q4_0x8 (block_q4_0 * in , unsigned int blck_size_interleave , unsigned int xor_mask ) {
58
58
block_q4_0x8 out ;
59
59
60
60
for (int i = 0 ; i < 8 ; i ++ ) {
61
61
out .d [i ] = in [i ].d ;
62
62
}
63
63
64
64
for (int i = 0 ; i < QK4_0 * 4 ; i ++ ) {
65
- int src_offset = (i / (8 * interleave_blcksize )) * interleave_blcksize ;
66
- int src_id = (i % (8 * interleave_blcksize )) / interleave_blcksize ;
67
- src_offset += (i % interleave_blcksize );
65
+ int src_offset = (i / (8 * blck_size_interleave )) * blck_size_interleave ;
66
+ int src_id = (i % (8 * blck_size_interleave )) / blck_size_interleave ;
67
+ src_offset += (i % blck_size_interleave );
68
68
69
69
out .qs [i ] = in [src_id ].qs [src_offset ] ^ xor_mask ;
70
70
}
@@ -135,7 +135,7 @@ void quantize_q8_0_4x4(const float * restrict x, void * restrict vy, int64_t k)
135
135
}
136
136
#else
137
137
// scalar
138
- const int interleave_blcksize = 4 ;
138
+ const int blck_size_interleave = 4 ;
139
139
float srcv [4 ][QK8_0 ];
140
140
float id [4 ];
141
141
@@ -155,12 +155,12 @@ void quantize_q8_0_4x4(const float * restrict x, void * restrict vy, int64_t k)
155
155
}
156
156
157
157
for (int j = 0 ; j < QK8_0 * 4 ; j ++ ) {
158
- int src_offset = (j / (4 * interleave_blcksize )) * interleave_blcksize ;
159
- int src_id = (j % (4 * interleave_blcksize )) / interleave_blcksize ;
160
- src_offset += (j % interleave_blcksize );
158
+ int src_offset = (j / (4 * blck_size_interleave )) * blck_size_interleave ;
159
+ int src_id = (j % (4 * blck_size_interleave )) / blck_size_interleave ;
160
+ src_offset += (j % blck_size_interleave );
161
161
162
162
float x0 = srcv [src_id ][src_offset ] * id [src_id ];
163
- y [i ].qs [j ] = roundf (x0 );;
163
+ y [i ].qs [j ] = roundf (x0 );
164
164
}
165
165
}
166
166
#endif
@@ -253,7 +253,7 @@ void quantize_q8_0_4x8(const float * restrict x, void * restrict vy, int64_t k)
253
253
}
254
254
#else
255
255
// scalar
256
- const int interleave_blcksize = 8 ;
256
+ const int blck_size_interleave = 8 ;
257
257
float srcv [4 ][QK8_0 ];
258
258
float id [4 ];
259
259
@@ -273,26 +273,30 @@ void quantize_q8_0_4x8(const float * restrict x, void * restrict vy, int64_t k)
273
273
}
274
274
275
275
for (int j = 0 ; j < QK8_0 * 4 ; j ++ ) {
276
- int src_offset = (j / (4 * interleave_blcksize )) * interleave_blcksize ;
277
- int src_id = (j % (4 * interleave_blcksize )) / interleave_blcksize ;
278
- src_offset += (j % interleave_blcksize );
276
+ int src_offset = (j / (4 * blck_size_interleave )) * blck_size_interleave ;
277
+ int src_id = (j % (4 * blck_size_interleave )) / blck_size_interleave ;
278
+ src_offset += (j % blck_size_interleave );
279
279
280
280
float x0 = srcv [src_id ][src_offset ] * id [src_id ];
281
- y [i ].qs [j ] = roundf (x0 );;
281
+ y [i ].qs [j ] = roundf (x0 );
282
282
}
283
283
}
284
284
#endif
285
285
}
286
286
287
- void quantize_mat_q8_0 (const float * restrict x , void * restrict vy , int64_t nrow , int64_t n_per_row , int64_t interleave_blcksize ) {
287
+ void quantize_mat_q8_0 (const float * restrict x , void * restrict vy , int64_t nrow , int64_t n_per_row , int64_t blck_size_interleave ) {
288
288
assert (nrow == 4 );
289
289
UNUSED (nrow );
290
- if (interleave_blcksize == 4 ) quantize_q8_0_4x4 (x , vy , n_per_row );
291
- else if (interleave_blcksize == 8 ) quantize_q8_0_4x8 (x , vy , n_per_row );
292
- else assert (false);
290
+ if (blck_size_interleave == 4 ) {
291
+ quantize_q8_0_4x4 (x , vy , n_per_row );
292
+ } else if (blck_size_interleave == 8 ) {
293
+ quantize_q8_0_4x8 (x , vy , n_per_row );
294
+ } else {
295
+ assert (false);
296
+ }
293
297
}
294
298
295
- static size_t quantize_q4_0_nr_bl (const float * restrict src , void * restrict dst , int64_t nrow , int64_t n_per_row , int nrows_interleaved , int interleave_blcksize ) {
299
+ static size_t quantize_q4_0_nr_bl (const float * restrict src , void * restrict dst , int64_t nrow , int64_t n_per_row , int nrows_interleaved , int blck_size_interleave ) {
296
300
assert (n_per_row % QK4_0 == 0 );
297
301
const int nb = n_per_row / QK4_0 ;
298
302
@@ -311,15 +315,15 @@ static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict ds
311
315
for (int64_t x = 0 ; x < nb ; x ++ ) {
312
316
313
317
for (int i = 0 ; i < nrows_interleaved ; i ++ ) {
314
- quantize_row_q4_0_reference (src + b + i * n_per_row + x * QK4_0 , (block_q4_0 * ) dst_tmp + i , QK4_0 );
318
+ quantize_row_q4_0_ref (src + b + i * n_per_row + x * QK4_0 , (block_q4_0 * ) dst_tmp + i , QK4_0 );
315
319
}
316
320
317
321
if (nrows_interleaved == 8 ) {
318
- * (block_q4_0x8 * ) out_ptr = make_block_q4_0x8 (dst_tmp , interleave_blcksize , 0x88 );
322
+ * (block_q4_0x8 * ) out_ptr = make_block_q4_0x8 (dst_tmp , blck_size_interleave , 0x88 );
319
323
out_ptr = (block_q4_0x8 * ) out_ptr + 1 ;
320
324
}
321
325
else if (nrows_interleaved == 4 ) {
322
- * (block_q4_0x4 * ) out_ptr = make_block_q4_0x4 (dst_tmp , interleave_blcksize , 0x88 );
326
+ * (block_q4_0x4 * ) out_ptr = make_block_q4_0x4 (dst_tmp , blck_size_interleave , 0x88 );
323
327
out_ptr = (block_q4_0x4 * ) out_ptr + 1 ;
324
328
}
325
329
}
0 commit comments