Skip to content

Commit 7833796

Browse files
committed
metal : restore original F16 mat-vec multiplication
It works after the norm fixes
1 parent ed92c3d commit 7833796

File tree

1 file changed

+2
-6
lines changed

1 file changed

+2
-6
lines changed

ggml-metal.m

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -995,12 +995,8 @@ void ggml_metal_graph_compute(
995995
else if (src0t == GGML_TYPE_Q6_K) {
996996
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
997997
} else {
998-
[encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
999-
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
1000-
1001-
// TODO: this breaks for Q4_0 - understand why and fix it
1002-
//int64_t ny = (ne11 + 3)/4;
1003-
//[encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
998+
int64_t ny = (ne11 + 3)/4;
999+
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
10041000
}
10051001
}
10061002
} break;

0 commit comments

Comments
 (0)