Skip to content

Commit f678640

Browse files
ggerganovNeo Zhang
authored andcommitted
ggml : minor naming changes (ggml-org#8433)
* ggml : minor naming changes ggml-ci * ggml : use PRId64 [no ci] * ggml : revert FA K/Q names
1 parent fa700d1 commit f678640

File tree

10 files changed

+194
-191
lines changed

10 files changed

+194
-191
lines changed

examples/quantize-stats/quantize-stats.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static void test_roundtrip_on_chunk(
154154
}
155155

156156
if (use_reference) {
157-
qfns.from_float_reference(input_scratch, quantized_scratch, chunk_size);
157+
qfns.from_float_ref(input_scratch, quantized_scratch, chunk_size);
158158
} else {
159159
qfns.from_float(input_scratch, quantized_scratch, chunk_size);
160160
}

ggml/include/ggml.h

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -714,9 +714,9 @@ extern "C" {
714714
GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor);
715715
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
716716

717-
GGML_API GGML_CALL int ggml_blck_size(enum ggml_type type);
718-
GGML_API GGML_CALL size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
719-
GGML_API GGML_CALL size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
717+
GGML_API GGML_CALL int64_t ggml_blck_size(enum ggml_type type);
718+
GGML_API GGML_CALL size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
719+
GGML_API GGML_CALL size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
720720

721721
GGML_DEPRECATED(
722722
GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
@@ -2410,31 +2410,31 @@ extern "C" {
24102410
#endif
24112411
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
24122412
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2413-
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
2414-
const void * GGML_RESTRICT y, size_t by, int nrc);
2415-
typedef void (*ggml_from_float_to_mat_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr,
2416-
int64_t k, int64_t bx);
2417-
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2418-
const void * GGML_RESTRICT y, int nr, int nc);
2419-
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2420-
const void * GGML_RESTRICT y, int nr, int nc);
2413+
typedef void (*ggml_from_float_to_mat_t)
2414+
(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
2415+
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
2416+
const void * GGML_RESTRICT y, size_t by, int nrc);
2417+
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2418+
const void * GGML_RESTRICT y, int nr, int nc);
2419+
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2420+
const void * GGML_RESTRICT y, int nr, int nc);
24212421

24222422
typedef struct {
2423-
const char * type_name;
2424-
int blck_size;
2425-
size_t type_size;
2426-
bool is_quantized;
2427-
ggml_to_float_t to_float;
2428-
ggml_from_float_t from_float;
2429-
ggml_from_float_t from_float_reference;
2430-
ggml_vec_dot_t vec_dot;
2431-
enum ggml_type vec_dot_type;
2432-
int64_t nrows; // number of rows to process simultaneously;
2433-
int64_t ncols; // number of columns to process simultaneously;
2434-
int64_t interleave_blcksize; // interleave elements in blocks of interleave_blcksize;
2423+
const char * type_name;
2424+
int64_t blck_size;
2425+
int64_t blck_size_interleave; // interleave elements in blocks
2426+
size_t type_size;
2427+
bool is_quantized;
2428+
ggml_to_float_t to_float;
2429+
ggml_from_float_t from_float;
2430+
ggml_from_float_t from_float_ref;
24352431
ggml_from_float_to_mat_t from_float_to_mat;
2436-
ggml_gemv_t gemv;
2437-
ggml_gemm_t gemm;
2432+
ggml_vec_dot_t vec_dot;
2433+
enum ggml_type vec_dot_type;
2434+
int64_t nrows; // number of rows to process simultaneously
2435+
int64_t ncols; // number of columns to process simultaneously
2436+
ggml_gemv_t gemv;
2437+
ggml_gemm_t gemm;
24382438
} ggml_type_traits_t;
24392439

24402440
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);

ggml/src/ggml-aarch64.c

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,51 +20,51 @@
2020

2121
// Functions to create the interleaved data layout formats
2222

23-
// interleave 4 block_q4_0s in blocks of interleave_blcksize
23+
// interleave 4 block_q4_0s in blocks of blck_size_interleave
2424
// returns an interleaved block_q4_0x4
2525
// in the interleaved block_q4_0x4, place deltas for 4 block_q4_0 blocks
26-
// first, then interleave quants from 4 block_q4_0s in blocks of interleave_blcksize
26+
// first, then interleave quants from 4 block_q4_0s in blocks of blck_size_interleave
2727
//
2828
// - in : an array of block_q4_0 pointers
29-
// - interleave_blcksize : the block_q4_0 quants bytes are interleaved in blocks of
30-
// interleave_blcksize bytes
29+
// - blck_size_interleave : the block_q4_0 quants bytes are interleaved in blocks of
30+
// blck_size_interleave bytes
3131
// - xor_mask : the mask to convert the nibbles in block_q4_0 quants bytes
3232
// from bias offset form to pure sign form (this saves subtract
3333
// operations durin unpacking)
3434
//
35-
static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int interleave_blcksize, unsigned int xor_mask) {
35+
static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave, unsigned int xor_mask) {
3636
block_q4_0x4 out;
3737

3838
for (int i = 0; i < 4; i++) {
3939
out.d[i] = in[i].d;
4040
}
4141

4242
for (int i = 0; i < QK4_0 * 2; i++) {
43-
int src_offset = (i / (4 * interleave_blcksize)) * interleave_blcksize;
44-
int src_id = (i % (4 * interleave_blcksize)) / interleave_blcksize;
45-
src_offset += (i % interleave_blcksize);
43+
int src_offset = (i / (4 * blck_size_interleave)) * blck_size_interleave;
44+
int src_id = (i % (4 * blck_size_interleave)) / blck_size_interleave;
45+
src_offset += (i % blck_size_interleave);
4646

4747
out.qs[i] = in[src_id].qs[src_offset] ^ xor_mask;
4848
}
4949

5050
return out;
5151
}
5252

53-
// interleave 8 block_q4_0s in blocks of interleave_blcksize
53+
// interleave 8 block_q4_0s in blocks of blck_size_interleave
5454
// returns an interleaved block_q4_0x8
5555
// in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
56-
// first, then interleave quants from 8 block_q4_0s in blocks of interleave_blcksize
57-
static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int interleave_blcksize, unsigned int xor_mask) {
56+
// first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
57+
static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave, unsigned int xor_mask) {
5858
block_q4_0x8 out;
5959

6060
for (int i = 0; i < 8; i++) {
6161
out.d[i] = in[i].d;
6262
}
6363

6464
for (int i = 0; i < QK4_0 * 4; i++) {
65-
int src_offset = (i / (8 * interleave_blcksize)) * interleave_blcksize;
66-
int src_id = (i % (8 * interleave_blcksize)) / interleave_blcksize;
67-
src_offset += (i % interleave_blcksize);
65+
int src_offset = (i / (8 * blck_size_interleave)) * blck_size_interleave;
66+
int src_id = (i % (8 * blck_size_interleave)) / blck_size_interleave;
67+
src_offset += (i % blck_size_interleave);
6868

6969
out.qs[i] = in[src_id].qs[src_offset] ^ xor_mask;
7070
}
@@ -135,7 +135,7 @@ void quantize_q8_0_4x4(const float * restrict x, void * restrict vy, int64_t k)
135135
}
136136
#else
137137
// scalar
138-
const int interleave_blcksize = 4;
138+
const int blck_size_interleave = 4;
139139
float srcv[4][QK8_0];
140140
float id[4];
141141

@@ -155,12 +155,12 @@ void quantize_q8_0_4x4(const float * restrict x, void * restrict vy, int64_t k)
155155
}
156156

157157
for (int j = 0; j < QK8_0 * 4; j++) {
158-
int src_offset = (j / (4 * interleave_blcksize)) * interleave_blcksize;
159-
int src_id = (j % (4 * interleave_blcksize)) / interleave_blcksize;
160-
src_offset += (j % interleave_blcksize);
158+
int src_offset = (j / (4 * blck_size_interleave)) * blck_size_interleave;
159+
int src_id = (j % (4 * blck_size_interleave)) / blck_size_interleave;
160+
src_offset += (j % blck_size_interleave);
161161

162162
float x0 = srcv[src_id][src_offset] * id[src_id];
163-
y[i].qs[j] = roundf(x0);;
163+
y[i].qs[j] = roundf(x0);
164164
}
165165
}
166166
#endif
@@ -253,7 +253,7 @@ void quantize_q8_0_4x8(const float * restrict x, void * restrict vy, int64_t k)
253253
}
254254
#else
255255
// scalar
256-
const int interleave_blcksize = 8;
256+
const int blck_size_interleave = 8;
257257
float srcv[4][QK8_0];
258258
float id[4];
259259

@@ -273,26 +273,30 @@ void quantize_q8_0_4x8(const float * restrict x, void * restrict vy, int64_t k)
273273
}
274274

275275
for (int j = 0; j < QK8_0 * 4; j++) {
276-
int src_offset = (j / (4 * interleave_blcksize)) * interleave_blcksize;
277-
int src_id = (j % (4 * interleave_blcksize)) / interleave_blcksize;
278-
src_offset += (j % interleave_blcksize);
276+
int src_offset = (j / (4 * blck_size_interleave)) * blck_size_interleave;
277+
int src_id = (j % (4 * blck_size_interleave)) / blck_size_interleave;
278+
src_offset += (j % blck_size_interleave);
279279

280280
float x0 = srcv[src_id][src_offset] * id[src_id];
281-
y[i].qs[j] = roundf(x0);;
281+
y[i].qs[j] = roundf(x0);
282282
}
283283
}
284284
#endif
285285
}
286286

287-
void quantize_mat_q8_0(const float * restrict x, void * restrict vy, int64_t nrow, int64_t n_per_row, int64_t interleave_blcksize) {
287+
void quantize_mat_q8_0(const float * restrict x, void * restrict vy, int64_t nrow, int64_t n_per_row, int64_t blck_size_interleave) {
288288
assert(nrow == 4);
289289
UNUSED(nrow);
290-
if (interleave_blcksize == 4) quantize_q8_0_4x4(x, vy, n_per_row);
291-
else if (interleave_blcksize == 8) quantize_q8_0_4x8(x, vy, n_per_row);
292-
else assert(false);
290+
if (blck_size_interleave == 4) {
291+
quantize_q8_0_4x4(x, vy, n_per_row);
292+
} else if (blck_size_interleave == 8) {
293+
quantize_q8_0_4x8(x, vy, n_per_row);
294+
} else {
295+
assert(false);
296+
}
293297
}
294298

295-
static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int interleave_blcksize) {
299+
static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int blck_size_interleave) {
296300
assert(n_per_row % QK4_0 == 0);
297301
const int nb = n_per_row / QK4_0;
298302

@@ -311,15 +315,15 @@ static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict ds
311315
for (int64_t x = 0; x < nb; x++) {
312316

313317
for (int i = 0; i < nrows_interleaved; i++ ) {
314-
quantize_row_q4_0_reference(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
318+
quantize_row_q4_0_ref(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
315319
}
316320

317321
if (nrows_interleaved == 8) {
318-
*(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, interleave_blcksize, 0x88);
322+
*(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave, 0x88);
319323
out_ptr = (block_q4_0x8 *) out_ptr + 1;
320324
}
321325
else if (nrows_interleaved == 4) {
322-
*(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, interleave_blcksize, 0x88);
326+
*(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave, 0x88);
323327
out_ptr = (block_q4_0x4 *) out_ptr + 1;
324328
}
325329
}

ggml/src/ggml-aarch64.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,22 @@ extern "C" {
1616
void quantize_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
1717
void quantize_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
1818

19-
void quantize_mat_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nrows, int64_t n_per_row, int64_t interleave_blcksize);
19+
void quantize_mat_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nrows, int64_t n_per_row, int64_t blck_size_interleave);
2020

2121
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
2222
size_t quantize_q4_0_4x4(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
2323
size_t quantize_q4_0_4x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
2424
size_t quantize_q4_0_8x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
2525

2626
// GEMV
27-
void ggml_gemv_q4_0_4x4_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
28-
void ggml_gemv_q4_0_4x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
29-
void ggml_gemv_q4_0_8x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
27+
void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
28+
void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
29+
void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
3030

3131
// GEMM
32-
void ggml_gemm_q4_0_4x4_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
33-
void ggml_gemm_q4_0_4x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
34-
void ggml_gemm_q4_0_8x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
32+
void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
33+
void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
34+
void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
3535

3636
#ifdef __cplusplus
3737
}

0 commit comments

Comments
 (0)