@@ -6420,10 +6420,13 @@ struct llama_grammar_candidate {
6420
6420
// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
6421
6421
static std::pair<std::vector<uint32_t >, llama_partial_utf8> decode_utf8 (
6422
6422
const char * src,
6423
+ size_t n_src,
6423
6424
llama_partial_utf8 partial_start) {
6424
6425
static const int lookup[] = { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 2 , 2 , 3 , 4 };
6425
6426
const char * pos = src;
6426
6427
std::vector<uint32_t > code_points;
6428
+ // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
6429
+ code_points.reserve (n_src + 1 );
6427
6430
uint32_t value = partial_start.value ;
6428
6431
int n_remain = partial_start.n_remain ;
6429
6432
@@ -6474,6 +6477,13 @@ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
6474
6477
return std::make_pair (std::move (code_points), llama_partial_utf8{ value, n_remain });
6475
6478
}
6476
6479
6480
+ static std::pair<std::vector<uint32_t >, llama_partial_utf8> decode_utf8 (
6481
+ std::string src,
6482
+ llama_partial_utf8 partial_start
6483
+ ) {
6484
+ return decode_utf8 (src.c_str (), src.size (), partial_start);
6485
+ }
6486
+
6477
6487
// returns true iff pos points to the end of one of the definitions of a rule
6478
6488
static bool llama_grammar_is_end_of_sequence (const llama_grammar_element * pos) {
6479
6489
switch (pos->type ) {
@@ -7123,7 +7133,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
7123
7133
} else if (piece.empty () || piece[0 ] == 0 ) {
7124
7134
candidates->data [i].logit = -INFINITY;
7125
7135
} else {
7126
- candidates_decoded.push_back (decode_utf8 (piece. c_str () , grammar->partial_utf8 ));
7136
+ candidates_decoded.push_back (decode_utf8 (piece, grammar->partial_utf8 ));
7127
7137
candidates_grammar.push_back ({ i, candidates_decoded.back ().first .data (), candidates_decoded.back ().second });
7128
7138
}
7129
7139
}
@@ -7330,7 +7340,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
7330
7340
const std::string piece = llama_token_to_piece (ctx, token);
7331
7341
7332
7342
// Note terminating 0 in decoded string
7333
- const auto decoded = decode_utf8 (piece. c_str () , grammar->partial_utf8 );
7343
+ const auto decoded = decode_utf8 (piece, grammar->partial_utf8 );
7334
7344
const auto & code_points = decoded.first ;
7335
7345
for (auto it = code_points.begin (), end = code_points.end () - 1 ; it != end; ++it) {
7336
7346
grammar->stacks = llama_grammar_accept (grammar->rules , grammar->stacks , *it);
0 commit comments