@@ -69,8 +69,8 @@ llama_kv_cache_unified::llama_kv_cache_unified(
69
69
continue ;
70
70
}
71
71
72
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
73
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
72
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
73
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
74
74
75
75
const char * dev_name = " CPU" ;
76
76
@@ -1326,7 +1326,7 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std::
1326
1326
for (const auto & layer : layers) {
1327
1327
const uint32_t il = layer.il ;
1328
1328
1329
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
1329
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
1330
1330
1331
1331
// Write key type
1332
1332
const int32_t k_type_i = (int32_t )layer.k ->type ;
@@ -1348,7 +1348,7 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std::
1348
1348
for (const auto & layer : layers) {
1349
1349
const uint32_t il = layer.il ;
1350
1350
1351
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1351
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1352
1352
1353
1353
// Write value type
1354
1354
const int32_t v_type_i = (int32_t )layer.v ->type ;
@@ -1372,7 +1372,7 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std::
1372
1372
for (const auto & layer : layers) {
1373
1373
const uint32_t il = layer.il ;
1374
1374
1375
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1375
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1376
1376
1377
1377
// Write value type
1378
1378
const int32_t v_type_i = (int32_t )layer.v ->type ;
@@ -1515,7 +1515,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell
1515
1515
for (const auto & layer : layers) {
1516
1516
const uint32_t il = layer.il ;
1517
1517
1518
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
1518
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
1519
1519
1520
1520
// Read type of key
1521
1521
int32_t k_type_i_ref;
@@ -1545,7 +1545,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell
1545
1545
for (const auto & layer : layers) {
1546
1546
const uint32_t il = layer.il ;
1547
1547
1548
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1548
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1549
1549
1550
1550
// Read type of value
1551
1551
int32_t v_type_i_ref;
@@ -1575,7 +1575,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell
1575
1575
for (const auto & layer : layers) {
1576
1576
const uint32_t il = layer.il ;
1577
1577
1578
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
1578
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1579
1579
1580
1580
// Read type of value
1581
1581
int32_t v_type_i_ref;
@@ -2014,8 +2014,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
2014
2014
continue ;
2015
2015
}
2016
2016
2017
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s ();
2018
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s ();
2017
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s (i );
2018
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s (i );
2019
2019
2020
2020
const char * dev_name = " CPU" ;
2021
2021
@@ -2717,7 +2717,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
2717
2717
// Iterate and write all the keys first, each row is a cell
2718
2718
// Get whole range at a time
2719
2719
for (uint32_t il = 0 ; il < n_layer; ++il) {
2720
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
2720
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
2721
2721
2722
2722
// Write key type
2723
2723
const int32_t k_type_i = (int32_t )k_l[il]->type ;
@@ -2737,7 +2737,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
2737
2737
2738
2738
if (!v_trans) {
2739
2739
for (uint32_t il = 0 ; il < n_layer; ++il) {
2740
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2740
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2741
2741
2742
2742
// Write value type
2743
2743
const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -2758,7 +2758,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
2758
2758
// When v is transposed, we also need the element size and get the element ranges from each row
2759
2759
const uint32_t kv_size = size;
2760
2760
for (uint32_t il = 0 ; il < n_layer; ++il) {
2761
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2761
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2762
2762
2763
2763
// Write value type
2764
2764
const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -2905,7 +2905,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
2905
2905
2906
2906
// For each layer, read the keys for each cell, one row is one cell, read as one contiguous block
2907
2907
for (uint32_t il = 0 ; il < n_layer; ++il) {
2908
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
2908
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
2909
2909
2910
2910
// Read type of key
2911
2911
int32_t k_type_i_ref;
@@ -2933,7 +2933,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
2933
2933
2934
2934
if (!v_trans) {
2935
2935
for (uint32_t il = 0 ; il < n_layer; ++il) {
2936
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2936
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2937
2937
2938
2938
// Read type of value
2939
2939
int32_t v_type_i_ref;
@@ -2961,7 +2961,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
2961
2961
} else {
2962
2962
// For each layer, read the values for each cell (transposed)
2963
2963
for (uint32_t il = 0 ; il < n_layer; ++il) {
2964
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
2964
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
2965
2965
2966
2966
// Read type of value
2967
2967
int32_t v_type_i_ref;
0 commit comments