We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 945ce4f commit 2186990Copy full SHA for 2186990
src/llama.cpp
@@ -8135,7 +8135,7 @@ static struct ggml_tensor * llm_build_kqv(
8135
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
8136
cb(kq, "kq", il);
8137
8138
- if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX) {
+ if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX || model.arch == LLM_ARCH_QWEN2) {
8139
// for this arch, we need to perform the KQ multiplication with F32 precision, otherwise we get NaNs
8140
// ref: https://github.com/ggerganov/llama.cpp/pull/4490#issuecomment-1859055847
8141
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
0 commit comments