We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 278d0e1 commit 7a221b6Copy full SHA for 7a221b6
src/llama.cpp
@@ -8134,7 +8134,7 @@ static struct ggml_tensor * llm_build_kqv(
8134
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
8135
cb(kq, "kq", il);
8136
8137
- if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX) {
+ if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX || model.arch == LLM_ARCH_QWEN2) {
8138
// for this arch, we need to perform the KQ multiplication with F32 precision, otherwise we get NaNs
8139
// ref: https://github.com/ggerganov/llama.cpp/pull/4490#issuecomment-1859055847
8140
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
0 commit comments