|
83 | 83 | GGML_METAL_DECL_KERNEL(mul_mm_f16_f32);
|
84 | 84 | GGML_METAL_DECL_KERNEL(mul_mm_q4_0_f32);
|
85 | 85 | GGML_METAL_DECL_KERNEL(mul_mm_q4_1_f32);
|
| 86 | + GGML_METAL_DECL_KERNEL(mul_mm_q8_0_f32); |
86 | 87 | GGML_METAL_DECL_KERNEL(mul_mm_q2_K_f32);
|
87 | 88 | GGML_METAL_DECL_KERNEL(mul_mm_q3_K_f32);
|
88 | 89 | GGML_METAL_DECL_KERNEL(mul_mm_q4_K_f32);
|
@@ -209,6 +210,7 @@ @implementation GGMLMetalClass
|
209 | 210 | GGML_METAL_ADD_KERNEL(mul_mat_q6_K_f32);
|
210 | 211 | GGML_METAL_ADD_KERNEL(mul_mm_f16_f32);
|
211 | 212 | GGML_METAL_ADD_KERNEL(mul_mm_q4_0_f32);
|
| 213 | + GGML_METAL_ADD_KERNEL(mul_mm_q8_0_f32); |
212 | 214 | GGML_METAL_ADD_KERNEL(mul_mm_q4_1_f32);
|
213 | 215 | GGML_METAL_ADD_KERNEL(mul_mm_q2_K_f32);
|
214 | 216 | GGML_METAL_ADD_KERNEL(mul_mm_q3_K_f32);
|
@@ -751,9 +753,10 @@ void ggml_metal_graph_compute(
|
751 | 753 | ne00%32 == 0 &&
|
752 | 754 | ne11 > 1) {
|
753 | 755 | switch (src0->type) {
|
754 |
| - case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32]; break; |
| 756 | + case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32]; break; |
755 | 757 | case GGML_TYPE_Q4_0: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_0_f32]; break;
|
756 | 758 | case GGML_TYPE_Q4_1: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_1_f32]; break;
|
| 759 | + case GGML_TYPE_Q8_0: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q8_0_f32]; break; |
757 | 760 | case GGML_TYPE_Q2_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q2_K_f32]; break;
|
758 | 761 | case GGML_TYPE_Q3_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q3_K_f32]; break;
|
759 | 762 | case GGML_TYPE_Q4_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_K_f32]; break;
|
|
0 commit comments