We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4c32832 commit 259469cCopy full SHA for 259469c
src/llama-graph.cpp
@@ -1287,6 +1287,10 @@ ggml_tensor * llm_graph_context::build_attn(
1287
1288
if (wo) {
1289
cur = build_lora_mm(wo, cur);
1290
+ if (arch == LLM_ARCH_GLM4) {
1291
+ // GLM4 seems to have numerical issues with half-precision accumulators
1292
+ ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
1293
+ }
1294
}
1295
1296
if (wo_b) {
@@ -1367,10 +1371,6 @@ ggml_tensor * llm_graph_context::build_attn(
1367
1371
1368
1372
1369
1373
1370
- if (arch == LLM_ARCH_GLM4) {
- // GLM4 seems to have numerical issues with half-precision accumulators
- ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
- }
1374
1375
1376
0 commit comments