Skip to content

Commit 92e3006

Browse files
authored
Vulkan: Fix mmq int dot float cache size (#12722)
1 parent 833e2b7 commit 92e3006

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,9 @@ void main() {
234234
#endif
235235

236236
#if QUANT_AUXF == 1
237-
FLOAT_TYPE cache_a_dm[TM];
237+
FLOAT_TYPE cache_a_dm[WMITER * TM];
238238
#else
239-
FLOAT_TYPE_VEC2 cache_a_dm[TM];
239+
FLOAT_TYPE_VEC2 cache_a_dm[WMITER * TM];
240240
#endif
241241

242242
FLOAT_TYPE_VEC2 cache_b_ds[TN];
@@ -247,7 +247,6 @@ void main() {
247247
const uint iqs = loadr_a;
248248
const uint buf_ib = loadc_a + l;
249249

250-
// Should ds be gated to a single thread?
251250
if (iqs == 0) {
252251
#if QUANT_AUXF == 1
253252
buf_a_dm[buf_ib] = get_d(ib);
@@ -276,7 +275,6 @@ void main() {
276275

277276
const uint buf_ib = loadc_b + l;
278277

279-
// Should ds be gated to a single thread?
280278
if (iqs == 0) {
281279
buf_b_ds[buf_ib] = FLOAT_TYPE_VEC2(data_b[ib].ds);
282280
}

ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ i32vec2 repack(uint ib, uint iqs) {
1717
}
1818

1919
ACC_TYPE mul_q8_1(int32_t q_sum, float da, vec2 dsb) {
20-
return ACC_TYPE(da * (float(q_sum) * dsb.x - 8.0 * dsb.y));
20+
return ACC_TYPE(da * (float(q_sum) * dsb.x - 8.0f * dsb.y));
2121
}
2222
#endif
2323

@@ -51,7 +51,7 @@ i32vec2 repack(uint ib, uint iqs) {
5151
}
5252

5353
ACC_TYPE mul_q8_1(int32_t q_sum, float da, vec2 dsb) {
54-
return ACC_TYPE(da * (float(q_sum) * dsb.x - 16.0 * dsb.y));
54+
return ACC_TYPE(da * (float(q_sum) * dsb.x - 16.0f * dsb.y));
5555
}
5656
#endif
5757

0 commit comments

Comments
 (0)