@@ -74,8 +74,8 @@ llama_kv_cache_unified::llama_kv_cache_unified(
74
74
continue ;
75
75
}
76
76
77
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
78
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
77
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
78
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
79
79
80
80
const char * dev_name = " CPU" ;
81
81
@@ -1255,7 +1255,7 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std::
1255
1255
for (const auto & layer : layers) {
1256
1256
const uint32_t il = layer.il ;
1257
1257
1258
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
1258
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
1259
1259
1260
1260
// Write key type
1261
1261
const int32_t k_type_i = (int32_t )layer.k ->type ;
@@ -1277,7 +1277,7 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std::
1277
1277
for (const auto & layer : layers) {
1278
1278
const uint32_t il = layer.il ;
1279
1279
1280
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1280
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1281
1281
1282
1282
// Write value type
1283
1283
const int32_t v_type_i = (int32_t )layer.v ->type ;
@@ -1301,7 +1301,7 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std::
1301
1301
for (const auto & layer : layers) {
1302
1302
const uint32_t il = layer.il ;
1303
1303
1304
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1304
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1305
1305
1306
1306
// Write value type
1307
1307
const int32_t v_type_i = (int32_t )layer.v ->type ;
@@ -1438,7 +1438,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell
1438
1438
for (const auto & layer : layers) {
1439
1439
const uint32_t il = layer.il ;
1440
1440
1441
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
1441
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
1442
1442
1443
1443
// Read type of key
1444
1444
int32_t k_type_i_ref;
@@ -1468,7 +1468,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell
1468
1468
for (const auto & layer : layers) {
1469
1469
const uint32_t il = layer.il ;
1470
1470
1471
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1471
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1472
1472
1473
1473
// Read type of value
1474
1474
int32_t v_type_i_ref;
@@ -1498,7 +1498,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell
1498
1498
for (const auto & layer : layers) {
1499
1499
const uint32_t il = layer.il ;
1500
1500
1501
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1501
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1502
1502
1503
1503
// Read type of value
1504
1504
int32_t v_type_i_ref;
@@ -1793,8 +1793,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
1793
1793
continue ;
1794
1794
}
1795
1795
1796
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s ();
1797
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s ();
1796
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s (i );
1797
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s (i );
1798
1798
1799
1799
const char * dev_name = " CPU" ;
1800
1800
@@ -2498,7 +2498,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
2498
2498
// Iterate and write all the keys first, each row is a cell
2499
2499
// Get whole range at a time
2500
2500
for (uint32_t il = 0 ; il < n_layer; ++il) {
2501
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
2501
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
2502
2502
2503
2503
// Write key type
2504
2504
const int32_t k_type_i = (int32_t )k_l[il]->type ;
@@ -2518,7 +2518,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
2518
2518
2519
2519
if (!v_trans) {
2520
2520
for (uint32_t il = 0 ; il < n_layer; ++il) {
2521
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2521
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2522
2522
2523
2523
// Write value type
2524
2524
const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -2539,7 +2539,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
2539
2539
// When v is transposed, we also need the element size and get the element ranges from each row
2540
2540
const uint32_t kv_size = size;
2541
2541
for (uint32_t il = 0 ; il < n_layer; ++il) {
2542
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2542
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2543
2543
2544
2544
// Write value type
2545
2545
const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -2686,7 +2686,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
2686
2686
2687
2687
// For each layer, read the keys for each cell, one row is one cell, read as one contiguous block
2688
2688
for (uint32_t il = 0 ; il < n_layer; ++il) {
2689
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
2689
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
2690
2690
2691
2691
// Read type of key
2692
2692
int32_t k_type_i_ref;
@@ -2714,7 +2714,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
2714
2714
2715
2715
if (!v_trans) {
2716
2716
for (uint32_t il = 0 ; il < n_layer; ++il) {
2717
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2717
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2718
2718
2719
2719
// Read type of value
2720
2720
int32_t v_type_i_ref;
@@ -2742,7 +2742,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
2742
2742
} else {
2743
2743
// For each layer, read the values for each cell (transposed)
2744
2744
for (uint32_t il = 0 ; il < n_layer; ++il) {
2745
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2745
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2746
2746
2747
2747
// Read type of value
2748
2748
int32_t v_type_i_ref;
0 commit comments