Skip to content

Commit 10532f8

Browse files
committed
support phi4
1 parent b7f9242 commit 10532f8

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

src/llama.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12842,7 +12842,13 @@ struct llm_build_context {
1284212842
struct ggml_tensor * inp_pos = build_inp_pos();
1284312843

1284412844
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
12845-
struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa();
12845+
struct ggml_tensor * KQ_mask = nullptr;
12846+
if (hparams.n_swa == 0) {
12847+
// Phi-4 doesn't use sliding window attention
12848+
KQ_mask = build_inp_KQ_mask();
12849+
} else {
12850+
KQ_mask = build_inp_KQ_mask_swa();
12851+
}
1284612852

1284712853
for (int il = 0; il < n_layer; ++il) {
1284812854
auto residual = inpL;
@@ -12900,7 +12906,7 @@ struct llm_build_context {
1290012906

1290112907
cur = llm_build_kv(ctx0, lctx, kv_self, gf,
1290212908
model.layers[il].wo, model.layers[il].bo,
12903-
Kcur, Vcur, Qcur, KQ_mask_swa, n_tokens, kv_head, n_kv, 1.0f, cb, il);
12909+
Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f, cb, il);
1290412910
}
1290512911

1290612912
if (il == n_layer - 1) {

0 commit comments

Comments
 (0)