Skip to content

Commit 5d9fed7

Browse files
committed
remove shape annotations in llama_eval_internal
1 parent d20ba6f commit 5d9fed7

File tree

1 file changed

+0
-24
lines changed

1 file changed

+0
-24
lines changed

llama.cpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,6 @@ static bool llama_eval_internal(
10901090
ggml_set_name(embd, "embd");
10911091
memcpy(embd->data, tokens, N*ggml_element_size(embd));
10921092

1093-
// inpL shape [n_embd,N,1,1]
10941093
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
10951094

10961095
for (int il = 0; il < n_layer; ++il) {
@@ -1102,7 +1101,6 @@ static bool llama_eval_internal(
11021101

11031102
// norm
11041103
{
1105-
// cur shape [n_embd,N,1,1]
11061104
cur = ggml_rms_norm(ctx0, inpL);
11071105

11081106
// cur = attention_norm*cur
@@ -1114,10 +1112,6 @@ static bool llama_eval_internal(
11141112
// self-attention
11151113
{
11161114
// compute Q and K and RoPE them
1117-
// wq shape [n_embd, n_embd, 1, 1]
1118-
// wk shape [n_embd, n_embd, 1, 1]
1119-
// Qcur shape [n_embd/n_head, n_head, N, 1]
1120-
// Kcur shape [n_embd/n_head, n_head, N, 1]
11211115
struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
11221116
struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
11231117
ggml_set_name(Qcur, "Qcur");
@@ -1126,14 +1120,8 @@ static bool llama_eval_internal(
11261120
// store key and value to memory
11271121
{
11281122
// compute the transposed [N, n_embd] V matrix
1129-
// wv shape [n_embd, n_embd, 1, 1]
1130-
// Vcur shape [n_embd, N, 1, 1]
11311123
struct ggml_tensor * Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wv, cur), n_embd, N));
11321124

1133-
// kv_self.k shape [n_embd * n_ctx * n_layer, 1]
1134-
// kv_self.v shape [n_embd * n_ctx * n_layer, 1]
1135-
// k shape [n_embd * N, 1] == kv_self.k[:,n_past:n_past+N,il,0]
1136-
// v shape [N, n_embd, 1, 1] == kv_self.v[:,n_past:n_past+N,il,0]
11371125
struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past));
11381126
struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd,
11391127
( n_ctx)*ggml_element_size(kv_self.v),
@@ -1144,16 +1132,12 @@ static bool llama_eval_internal(
11441132
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
11451133
}
11461134

1147-
// Qcur shape [n_embd/n_head, n_head, N, 1]
1148-
// Q shape [n_embd/n_head, N, n_head, 1]
11491135
struct ggml_tensor * Q =
11501136
ggml_permute(ctx0,
11511137
Qcur,
11521138
0, 2, 1, 3);
11531139
ggml_set_name(Q, "Q");
11541140

1155-
// kv_self.k shape [n_embd * n_ctx * n_layer, 1]
1156-
// K shape [n_embd/n_head, n_past + N, n_head, 1]
11571141
struct ggml_tensor * K =
11581142
ggml_permute(ctx0,
11591143
ggml_reshape_3d(ctx0,
@@ -1163,7 +1147,6 @@ static bool llama_eval_internal(
11631147
ggml_set_name(K, "K");
11641148

11651149
// K * Q
1166-
// KQ shape [n_past + N, N, n_head, 1]
11671150
struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);
11681151
ggml_set_name(KQ, "KQ");
11691152

@@ -1176,19 +1159,15 @@ static bool llama_eval_internal(
11761159
ggml_set_name(KQ_scaled, "KQ_scaled");
11771160

11781161
// KQ_masked = mask_past(KQ_scaled)
1179-
// KQ_masked shape [n_past + N, N, n_head, 1]
11801162
struct ggml_tensor * KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
11811163
ggml_set_name(KQ_masked, "KQ_masked");
11821164

11831165
// KQ = soft_max(KQ_masked)
1184-
// KQ_soft_max shape [n_past + N, N, n_head, 1]
11851166
struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked);
11861167
ggml_set_name(KQ_soft_max, "KQ_soft_max");
11871168

11881169

11891170
// split cached V into n_head heads
1190-
//// V shape [n_past + N, n_embd/n_head, n_head, 1]
1191-
// V shape [n_past + N, n_embd/n_head, n_head, 1] == kv_self.v[:,:(n_past+N),il,1]
11921171
struct ggml_tensor * V =
11931172
ggml_view_3d(ctx0, kv_self.v,
11941173
n_past + N, n_embd/n_head, n_head,
@@ -1198,7 +1177,6 @@ static bool llama_eval_internal(
11981177
ggml_set_name(V, "V");
11991178

12001179
#if 1
1201-
// KQV shape [n_embd/n_head, N, n_head, 1]
12021180
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max);
12031181
ggml_set_name(KQV, "KQV");
12041182
#else
@@ -1210,12 +1188,10 @@ static bool llama_eval_internal(
12101188
#endif
12111189

12121190
// KQV_merged = KQV.permute(0, 2, 1, 3)
1213-
// KQV_merged shape [n_embd/n_head, n_head, N, 1]
12141191
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
12151192
ggml_set_name(KQV_merged, "KQV_merged");
12161193

12171194
// cur = KQV_merged.contiguous().view(n_embd, N)
1218-
// cur shape [n_embd,N,1,1]
12191195
cur = ggml_cpy(ctx0,
12201196
KQV_merged,
12211197
ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));

0 commit comments

Comments
 (0)