Skip to content

Commit 02f8cdf

Browse files
committed
retain the tensor type as Q4_0
1 parent e44a529 commit 02f8cdf

File tree

4 files changed

+28
-9
lines changed

4 files changed

+28
-9
lines changed

ggml/src/ggml-aarch64.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3540,17 +3540,14 @@ int ggml_prepare_optimal_kernel(struct ggml_tensor * cur, const void * data, siz
35403540
#if defined(__ARM_ARCH)
35413541
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
35423542
repack_q4_0_to_q4_0_8_bl(cur, 8, data, data_size);
3543-
cur->type = GGML_TYPE_Q4_0_8_8;
35443543
ret = 0;
35453544
}
35463545
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
35473546
repack_q4_0_to_q4_0_4_bl(cur, 8, data, data_size);
3548-
cur->type = GGML_TYPE_Q4_0_4_8;
35493547
ret = 0;
35503548
}
35513549
else if (ggml_cpu_has_neon()) {
35523550
repack_q4_0_to_q4_0_4_bl(cur, 4, data, data_size);
3553-
cur->type = GGML_TYPE_Q4_0_4_4;
35543551
ret = 0;
35553552
}
35563553
#endif
@@ -3560,4 +3557,23 @@ int ggml_prepare_optimal_kernel(struct ggml_tensor * cur, const void * data, siz
35603557
GGML_UNUSED(data);
35613558
GGML_UNUSED(data_size);
35623559
}
3560+
3561+
enum ggml_type ggml_get_optimal_type(const struct ggml_tensor * cur) {
3562+
#if defined(__ARM_ARCH)
3563+
if (cur->type == GGML_TYPE_Q4_0) {
3564+
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
3565+
return GGML_TYPE_Q4_0_8_8;
3566+
}
3567+
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
3568+
return GGML_TYPE_Q4_0_4_8;
3569+
}
3570+
else if (ggml_cpu_has_neon()) {
3571+
return GGML_TYPE_Q4_0_4_4;
3572+
}
3573+
}
3574+
#endif
3575+
return cur->type;
3576+
3577+
GGML_UNUSED(cur);
3578+
}
35633579
#endif

ggml/src/ggml-aarch64.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
3535

3636
#ifdef GGML_USE_CPU_AARCH64
3737
int ggml_prepare_optimal_kernel(struct ggml_tensor * cur, const void * data, size_t data_size);
38+
enum ggml_type ggml_get_optimal_type(const struct ggml_tensor * cur);
3839
#endif
3940

4041
#ifdef __cplusplus

ggml/src/ggml-backend.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2638,11 +2638,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
26382638
#ifdef GGML_USE_CPU_AARCH64
26392639
const struct ggml_tensor *tensor = op->src[0];
26402640
if (tensor && tensor->buffer && (strcmp(tensor->buffer->buft->iface.get_name(tensor->buffer->buft),"CPU_AARCH64") == 0)) {
2641-
if ((op->op == GGML_OP_MUL_MAT) &&
2642-
(tensor->type == GGML_TYPE_Q4_0 ||
2643-
tensor->type == GGML_TYPE_Q4_0_4_4 ||
2644-
tensor->type == GGML_TYPE_Q4_0_4_8 ||
2645-
tensor->type == GGML_TYPE_Q4_0_8_8)) {
2641+
if (op->op == GGML_OP_MUL_MAT && tensor->type == GGML_TYPE_Q4_0) {
26462642
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits_cpu(tensor->type)->vec_dot_type;
26472643
}
26482644
return false;

ggml/src/ggml-cpu.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7423,7 +7423,13 @@ static void ggml_compute_forward_mul_mat(
74237423
const int ith = params->ith;
74247424
const int nth = params->nth;
74257425

7426-
const enum ggml_type type = src0->type;
7426+
enum ggml_type type = src0->type;
7427+
7428+
#ifdef GGML_USE_CPU_AARCH64
7429+
if (strcmp(src0->buffer->buft->iface.get_name(src0->buffer->buft),"CPU_AARCH64") == 0) {
7430+
type = ggml_get_optimal_type(src0);
7431+
}
7432+
#endif
74277433

74287434
enum ggml_type const vec_dot_type = type_traits_cpu[type].vec_dot_type;
74297435
ggml_from_float_t const from_float = ggml_get_type_traits(vec_dot_type)->from_float;

0 commit comments

Comments
 (0)