@@ -4335,11 +4335,11 @@ struct llama_beam {
4335
4335
float p; // Cumulative beam probability (renormalized relative to all beams)
4336
4336
bool eos; // Initialize end-of-sentence to false. Callback sets this to true.
4337
4337
// Sort beams by probability. In case of ties, prefer beams at eos.
4338
- bool operator <(llama_beam const & rhs) const {
4338
+ bool operator <(const llama_beam & rhs) const {
4339
4339
return std::make_tuple (p, eos) < std::make_tuple (rhs.p , rhs.eos );
4340
4340
}
4341
4341
// Shift off first n tokens and discard them.
4342
- void shift_tokens (size_t const n) {
4342
+ void shift_tokens (const size_t n) {
4343
4343
if (n) {
4344
4344
std::copy (tokens.begin () + n, tokens.end (), tokens.begin ());
4345
4345
tokens.resize (tokens.size () - n);
@@ -4350,10 +4350,10 @@ struct llama_beam {
4350
4350
4351
4351
// A struct for calculating logit-related info.
4352
4352
struct logit_info {
4353
- float const * const logits;
4354
- int const n_vocab;
4355
- float const max_l;
4356
- float const normalizer;
4353
+ const float * const logits;
4354
+ const int n_vocab;
4355
+ const float max_l;
4356
+ const float normalizer;
4357
4357
struct sum_exp {
4358
4358
float max_l;
4359
4359
float operator ()(float sum, float l) const { return sum + std::exp (l - max_l); }
@@ -4364,19 +4364,19 @@ struct logit_info {
4364
4364
, max_l(*std::max_element (logits, logits + n_vocab))
4365
4365
, normalizer(1 .0f / std::accumulate(logits, logits + n_vocab, 0 .0f , sum_exp{max_l}))
4366
4366
{ }
4367
- llama_token_data get_token_data (llama_token const token_id) const {
4367
+ llama_token_data get_token_data (const llama_token token_id) const {
4368
4368
constexpr auto p = std::numeric_limits<float >::quiet_NaN (); // never used
4369
4369
return {token_id, logits[token_id], p};
4370
4370
}
4371
4371
// Return top k token_data by logit.
4372
4372
std::vector<llama_token_data> top_k (size_t k) {
4373
4373
std::vector<llama_token_data> min_heap; // min-heap by logit
4374
- llama_token const k_min = std::min (static_cast <llama_token>(k), n_vocab);
4374
+ const llama_token k_min = std::min (static_cast <llama_token>(k), n_vocab);
4375
4375
min_heap.reserve (k_min);
4376
4376
for (llama_token token_id = 0 ; token_id < k_min ; ++token_id) {
4377
4377
min_heap.push_back (get_token_data (token_id));
4378
4378
}
4379
- auto comp = [](llama_token_data const & a, llama_token_data const & b) { return a.logit > b.logit ; };
4379
+ auto comp = [](const llama_token_data & a, const llama_token_data & b) { return a.logit > b.logit ; };
4380
4380
std::make_heap (min_heap.begin (), min_heap.end (), comp);
4381
4381
for (llama_token token_id = k_min ; token_id < n_vocab ; ++token_id) {
4382
4382
if (min_heap.front ().logit < logits[token_id]) {
@@ -4420,7 +4420,7 @@ struct beam_search {
4420
4420
}
4421
4421
4422
4422
// Collapse beams to a single beam given by index.
4423
- void collapse_beams (size_t const beam_idx) {
4423
+ void collapse_beams (const size_t beam_idx) {
4424
4424
if (0u < beam_idx) {
4425
4425
std::swap (beams[0 ], beams[beam_idx]);
4426
4426
}
@@ -4434,7 +4434,7 @@ struct beam_search {
4434
4434
// least element to the back(), replace it with the new, then push it into the heap.
4435
4435
void fill_next_beams_by_top_probabilities (llama_beam & beam) {
4436
4436
// Min-heaps use a greater-than comparator.
4437
- auto const comp = [](llama_beam const & a, llama_beam const & b) { return a.p > b.p ; };
4437
+ const auto comp = [](const llama_beam & a, const llama_beam & b) { return a.p > b.p ; };
4438
4438
if (beam.eos ) {
4439
4439
// beam is at end-of-sentence, so just copy it to next_beams if its probability is high enough.
4440
4440
if (next_beams.size () < n_beams) {
@@ -4473,7 +4473,7 @@ struct beam_search {
4473
4473
}
4474
4474
}
4475
4475
for (; i < n_beams ; ++i) {
4476
- float const next_p = beam.p * logit_info.probability_from_logit (next_tokens[i].logit );
4476
+ const float next_p = beam.p * logit_info.probability_from_logit (next_tokens[i].logit );
4477
4477
if (next_beams.front ().p < next_p) {
4478
4478
std::pop_heap (next_beams.begin (), next_beams.end (), comp);
4479
4479
next_beams.back () = beam;
@@ -4503,7 +4503,7 @@ struct beam_search {
4503
4503
4504
4504
// Construct beams_state to send back to caller via the callback function.
4505
4505
// Side effect: set common_prefix_length = find_common_prefix_length();
4506
- llama_beams_state get_beams_state (bool const last_call) {
4506
+ llama_beams_state get_beams_state (const bool last_call) {
4507
4507
for (size_t i = 0 ; i < beams.size () ; ++i) {
4508
4508
beam_views[i] = beams[i].view ();
4509
4509
}
@@ -4516,9 +4516,9 @@ struct beam_search {
4516
4516
// * any of the beams have not yet reached end-of-sentence, AND
4517
4517
// * the highest probability beam(s) (plural in case of ties) are not at end-of-sentence
4518
4518
// (since all other beam probabilities can only decrease)
4519
- void loop (llama_beam_search_callback_fn_t const callback, void * const callback_data) {
4519
+ void loop (const llama_beam_search_callback_fn_t callback, void * const callback_data) {
4520
4520
beams.push_back ({{}, 1 .0f , false }); // Start with one empty beam w/ probability = 1.0 and !eos.
4521
- auto const not_eos = [](llama_beam const & beam) { return !beam.eos ; };
4521
+ const auto not_eos = [](const llama_beam & beam) { return !beam.eos ; };
4522
4522
for (int i = 0 ; i < n_predict && std::any_of (beams.begin (),beams.end (),not_eos) &&
4523
4523
!beams[top_beam_index ()].eos ; ++i) {
4524
4524
callback (callback_data, get_beams_state (false )); // Sets common_prefix_length
@@ -4544,8 +4544,8 @@ struct beam_search {
4544
4544
// As beams grow, the cumulative probabilities decrease.
4545
4545
// Renormalize them to avoid floating point underflow.
4546
4546
static void renormalize_beam_probabilities (std::vector<llama_beam> & beams) {
4547
- auto const sum_p = [](float sum, llama_beam & beam) { return sum + beam.p ; };
4548
- float const inv_sum = 1 .0f / std::accumulate (beams.begin (), beams.end (), 0 .0f , sum_p);
4547
+ const auto sum_p = [](float sum, llama_beam & beam) { return sum + beam.p ; };
4548
+ const float inv_sum = 1 .0f / std::accumulate (beams.begin (), beams.end (), 0 .0f , sum_p);
4549
4549
std::for_each (beams.begin (), beams.end (), [=](llama_beam & beam) { beam.p *= inv_sum; });
4550
4550
}
4551
4551
0 commit comments