Skip to content

Commit a18b5ff

Browse files
ggerganovarthw
authored andcommitted
llama : minor style
1 parent 71f11f8 commit a18b5ff

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

src/llama.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8715,8 +8715,7 @@ static void llm_build_kv_store(
87158715

87168716
GGML_ASSERT(kv.size == n_ctx);
87178717

8718-
struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_k_gqa,
8719-
(ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa))*kv_head);
8718+
struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_k_gqa, ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa)*kv_head);
87208719
cb(k_cache_view, "k_cache_view", il);
87218720

87228721
// note: storing RoPE-ed version of K in the KV cache
@@ -8727,8 +8726,7 @@ static void llm_build_kv_store(
87278726
struct ggml_tensor * v_cache_view = nullptr;
87288727

87298728
if (cparams.flash_attn) {
8730-
v_cache_view = ggml_view_1d(ctx, kv.v_l[il], n_tokens*n_embd_v_gqa,
8731-
(kv_head)*ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa));
8729+
v_cache_view = ggml_view_1d(ctx, kv.v_l[il], n_tokens*n_embd_v_gqa, ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa)*kv_head);
87328730
} else {
87338731
// note: the V cache is transposed when not using flash attention
87348732
v_cache_view = ggml_view_2d(ctx, kv.v_l[il], n_tokens, n_embd_v_gqa,
@@ -9215,8 +9213,7 @@ static struct ggml_tensor * llm_build_kv(
92159213

92169214
struct ggml_tensor * cur;
92179215

9218-
cur = llm_build_kqv(ctx, lctx, kv, graph, wo, wo_b,
9219-
q_cur, kq_mask, n_tokens, n_kv, kq_scale, cb, il);
9216+
cur = llm_build_kqv(ctx, lctx, kv, graph, wo, wo_b, q_cur, kq_mask, n_tokens, n_kv, kq_scale, cb, il);
92209217
cb(cur, "kqv_out", il);
92219218

92229219
return cur;

0 commit comments

Comments
 (0)