@@ -653,7 +653,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
653
653
#else
654
654
.nrows = 1,
655
655
#endif
656
- .from_float_to_mat = quantize_row_q8_0_aarch64 ,
656
+ .from_float_to_mat = quantize_q8_0_aarch64 ,
657
657
},
658
658
[GGML_TYPE_Q8_1] = {
659
659
.type_name = "q8_1",
@@ -853,16 +853,12 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
853
853
.blck_size = QK4_0,
854
854
.type_size = sizeof(block_q4_0),
855
855
.is_quantized = true,
856
- .to_float = (ggml_to_float_t) dequantize_row_q4_0 ,
857
- .from_float = quantize_row_q4_0 ,
858
- .from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference ,
859
- .vec_dot = ggml_vec_dot_q4_0_q8_0 ,
856
+ .to_float = NULL ,
857
+ .from_float = NULL ,
858
+ .from_float_reference = NULL ,
859
+ .vec_dot = NULL ,
860
860
.vec_dot_type = GGML_TYPE_Q8_0,
861
- #if defined (__ARM_FEATURE_MATMUL_INT8)
862
- .nrows = 2,
863
- #else
864
861
.nrows = 1,
865
- #endif
866
862
#if defined(__ARM_FEATURE_SVE)
867
863
.gemv = ggml_gemv_q4_0_q8_0_aarch64_sve256,
868
864
.gemm = ggml_gemm_q4_0_q8_0_aarch64_sve256,
@@ -11111,8 +11107,7 @@ UseGgmlGemm2:;
11111
11107
if ((ggml_n_dims(src0) == 2) && (ne11 == 1) && (type == GGML_TYPE_Q4_0_AARCH64)) {
11112
11108
gemv(ne00, (float *)((char *) dst->data), (const char *) src0->data, (const char *) wdata, 1, ne01, ith, nth);
11113
11109
}
11114
- else if ((ggml_n_dims(src0) == 2) && (ne11 >= 16) && (type == GGML_TYPE_Q4_0_AARCH64)) {
11115
- // use nrows-sized 16, 8, and 4 GEMM kernels
11110
+ else if ((ggml_n_dims(src0) == 2) && (ne11 >= 2) && (type == GGML_TYPE_Q4_0_AARCH64)) {
11116
11111
for (int row_iter = 0; row_iter < ne11 / 16; row_iter++) {
11117
11112
gemm(ne00, (float *)((char *) dst->data + (row_iter * 16 * nb1)), (const char *) src0->data, (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (row_iter * 16) * row_size : (row_iter * 16 * nb11)), 16, ne01, ith, nth);
11118
11113
}
@@ -11129,28 +11124,6 @@ UseGgmlGemm2:;
11129
11124
gemv(ne00, (float *)((char *) dst->data + (row_iter * nb1)), (const char *) src0->data, (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (row_iter)*row_size : (row_iter * nb11)), 1, ne01, ith, nth);
11130
11125
}
11131
11126
}
11132
- else if ((ggml_n_dims(src0) == 2) && (ne11 >= 8) && (type == GGML_TYPE_Q4_0_AARCH64)) {
11133
- // use nrows-sized 8, and 4 GEMM kernels
11134
- for (int row_iter = 0; row_iter < ne11 / 8; row_iter++) {
11135
- gemm(ne00, (float *)((char *) dst->data + (row_iter * 8 * nb1)), (const char *) src0->data, (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (row_iter * 8) * row_size : (row_iter * 8 * nb11)), 8, ne01, ith, nth);
11136
- }
11137
- int rows_processed = (ne11 / 8) * 8;
11138
- for (int row_iter = 0; row_iter < (ne11 - rows_processed) / 4; row_iter++) {
11139
- gemm(ne00, (float *)((char *) dst->data + ((rows_processed + row_iter * 4) * nb1)), (const char *) src0->data, (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (rows_processed + row_iter * 4) * row_size : ((rows_processed + row_iter * 4) * nb11)), 4, ne01, ith, nth);
11140
- }
11141
- for (int row_iter = ((ne11 / 8) * 8) + ((ne11 - rows_processed) / 4 * 4); row_iter < ne11; row_iter++) {
11142
- gemv(ne00, (float *)((char *) dst->data + (row_iter * nb1)), (const char *) src0->data, (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (row_iter)*row_size : (row_iter * nb11)), 1, ne01, ith, nth);
11143
- }
11144
- }
11145
- else if ((ggml_n_dims(src0) == 2) && (ne11 >= 4) && (type == GGML_TYPE_Q4_0_AARCH64)) {
11146
- // use nrows-sized 4 GEMM kernel
11147
- for (int row_iter = 0; row_iter < ne11 / 4; row_iter++) {
11148
- gemm(ne00, (float *)((char *) dst->data + (row_iter * 4 * nb1)), (const char *) src0->data, (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (row_iter * 4) * row_size : (row_iter * 4 * nb11)), 4, ne01, ith, nth);
11149
- }
11150
- for (int row_iter = (ne11 / 4) * 4; row_iter < ne11; row_iter++) {
11151
- gemv(ne00, (float *)((char *) dst->data + (row_iter * nb1)), (const char *) src0->data, (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (row_iter)*row_size : (row_iter * nb11)), 1, ne01, ith, nth);
11152
- }
11153
- }
11154
11127
else {
11155
11128
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
11156
11129
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
0 commit comments