File tree Expand file tree Collapse file tree 1 file changed +5
-10
lines changed
ggml/src/ggml-vulkan/vulkan-shaders Expand file tree Collapse file tree 1 file changed +5
-10
lines changed Original file line number Diff line number Diff line change @@ -276,27 +276,22 @@ void main() {
276
276
Lf[r] = eMf[r]*Lf[r] + rowsumf[r];
277
277
}
278
278
279
- uint32_t v_offset = (iv2*p.nb22 + iv3*p.nb23) / 2;
280
-
281
- vec4 PVf[Br][D_per_thread / 4];
282
279
[[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) {
283
280
[[unroll]] for (uint32_t r = 0; r < Br; ++r) {
284
- PVf [r][d] = vec4(0.0) ;
281
+ Of [r][d] = eMf[r] * Of[r][d] ;
285
282
}
286
283
}
284
+
285
+ uint32_t v_offset = (iv2*p.nb22 + iv3*p.nb23) / 2;
286
+
287
287
[[unroll]] for (uint32_t c = 0; c < cols_per_thread; ++c) {
288
288
[[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) {
289
289
vec4 Vf = vec4(data_vv4[v_offset / 4 + (j * Bc + c * cols_per_iter + col_tid) * v_stride / 4 + d * D_split + d_tid]);
290
290
[[unroll]] for (uint32_t r = 0; r < Br; ++r) {
291
- PVf [r][d] += Pf[r][c] * Vf;
291
+ Of [r][d] += Pf[r][c] * Vf;
292
292
}
293
293
}
294
294
}
295
- [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) {
296
- [[unroll]] for (uint32_t r = 0; r < Br; ++r) {
297
- Of[r][d] = eMf[r] * Of[r][d] + PVf[r][d];
298
- }
299
- }
300
295
301
296
barrier();
302
297
}
You can’t perform that action at this time.
0 commit comments