@@ -120,7 +120,6 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
120
120
static bool llama_grammar_match_partial_char (
121
121
const llama_grammar_element * pos,
122
122
const llama_partial_utf8 partial_utf8) {
123
-
124
123
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
125
124
GGML_ASSERT (is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
126
125
@@ -166,14 +165,12 @@ static bool llama_grammar_match_partial_char(
166
165
return !is_positive_char;
167
166
}
168
167
169
-
170
168
// transforms a grammar pushdown stack into N possible stacks, all ending
171
169
// at a character range (terminal element)
172
170
static void llama_grammar_advance_stack (
173
- const std::vector<std::vector<llama_grammar_element>> & rules,
174
- const std::vector<const llama_grammar_element *> & stack,
175
- std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
176
-
171
+ const llama_grammar_rules & rules,
172
+ const llama_grammar_stack & stack,
173
+ llama_grammar_stacks & new_stacks) {
177
174
if (stack.empty ()) {
178
175
if (std::find (new_stacks.begin (), new_stacks.end (), stack) == new_stacks.end ()) {
179
176
new_stacks.emplace_back (stack);
@@ -189,7 +186,7 @@ static void llama_grammar_advance_stack(
189
186
const llama_grammar_element * subpos = rules[rule_id].data ();
190
187
do {
191
188
// init new stack without the top (pos)
192
- std::vector< const llama_grammar_element *> new_stack (stack.begin (), stack.end () - 1 );
189
+ llama_grammar_stack new_stack (stack.begin (), stack.end () - 1 );
193
190
if (!llama_grammar_is_end_of_sequence (pos + 1 )) {
194
191
// if this rule ref is followed by another element, add that to stack
195
192
new_stack.push_back (pos + 1 );
@@ -233,11 +230,10 @@ static void llama_grammar_advance_stack(
233
230
// produces the N possible stacks if the given char is accepted at those
234
231
// positions
235
232
void llama_grammar_accept (
236
- const std::vector<std::vector<llama_grammar_element>> & rules,
237
- const std::vector<std::vector<const llama_grammar_element *>> & stacks,
238
- const uint32_t chr,
239
- std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
240
-
233
+ const llama_grammar_rules & rules,
234
+ const llama_grammar_stacks & stacks,
235
+ const uint32_t chr,
236
+ llama_grammar_stacks & new_stacks) {
241
237
new_stacks.clear ();
242
238
243
239
for (const auto & stack : stacks) {
@@ -250,7 +246,7 @@ void llama_grammar_accept(
250
246
const llama_grammar_element * pos = match.second ;
251
247
252
248
// update top of stack to next element, if any
253
- std::vector< const llama_grammar_element *> new_stack (stack.begin (), stack.end () - 1 );
249
+ llama_grammar_stack new_stack (stack.begin (), stack.end () - 1 );
254
250
if (!llama_grammar_is_end_of_sequence (pos)) {
255
251
new_stack.push_back (pos);
256
252
}
@@ -259,17 +255,30 @@ void llama_grammar_accept(
259
255
}
260
256
}
261
257
262
- static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates (
263
- const std::vector<std::vector<llama_grammar_element>> & rules,
264
- const std::vector<std::vector<const llama_grammar_element *>> & stacks,
265
- const std::vector<llama_grammar_candidate> & candidates);
258
+ static llama_grammar_candidates llama_grammar_reject_candidates (
259
+ const llama_grammar_rules & rules,
260
+ const llama_grammar_stacks & stacks,
261
+ const llama_grammar_candidates & candidates) {
262
+ GGML_ASSERT (!stacks.empty ()); // REVIEW
263
+
264
+ if (candidates.empty ()) {
265
+ return {};
266
+ }
267
+
268
+ auto rejects = llama_grammar_reject_candidates_for_stack (rules, stacks.front (), candidates);
269
+
270
+ for (size_t i = 1 , size = stacks.size (); i < size; ++i) {
271
+ rejects = llama_grammar_reject_candidates_for_stack (rules, stacks[i], rejects);
272
+ }
273
+ return rejects;
274
+ }
266
275
267
- std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack (
268
- const std::vector<std::vector<llama_grammar_element>> & rules,
269
- const std::vector< const llama_grammar_element *> & stack,
270
- const std::vector<llama_grammar_candidate> & candidates) {
276
+ llama_grammar_candidates llama_grammar_reject_candidates_for_stack (
277
+ const llama_grammar_rules & rules,
278
+ const llama_grammar_stack & stack,
279
+ const llama_grammar_candidates & candidates) {
271
280
272
- std::vector<llama_grammar_candidate> rejects;
281
+ llama_grammar_candidates rejects;
273
282
rejects.reserve (candidates.size ());
274
283
275
284
if (stack.empty ()) {
@@ -283,7 +292,7 @@ std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
283
292
284
293
const llama_grammar_element * stack_pos = stack.back ();
285
294
286
- std::vector<llama_grammar_candidate> next_candidates;
295
+ llama_grammar_candidates next_candidates;
287
296
next_candidates.reserve (candidates.size ());
288
297
289
298
for (const auto & tok : candidates) {
@@ -304,11 +313,11 @@ std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
304
313
const auto * stack_pos_after = llama_grammar_match_char (stack_pos, 0 ).second ;
305
314
306
315
// update top of stack to next element, if any
307
- std::vector< const llama_grammar_element *> stack_after (stack.begin (), stack.end () - 1 );
316
+ llama_grammar_stack stack_after (stack.begin (), stack.end () - 1 );
308
317
if (!llama_grammar_is_end_of_sequence (stack_pos_after)) {
309
318
stack_after.push_back (stack_pos_after);
310
319
}
311
- std::vector<std::vector< const llama_grammar_element *>> next_stacks;
320
+ llama_grammar_stacks next_stacks;
312
321
llama_grammar_advance_stack (rules, stack_after, next_stacks);
313
322
314
323
auto next_rejects = llama_grammar_reject_candidates (rules, next_stacks, next_candidates);
@@ -319,37 +328,19 @@ std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
319
328
return rejects;
320
329
}
321
330
322
- static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates (
323
- const std::vector<std::vector<llama_grammar_element>> & rules,
324
- const std::vector<std::vector<const llama_grammar_element *>> & stacks,
325
- const std::vector<llama_grammar_candidate> & candidates) {
326
- GGML_ASSERT (!stacks.empty ()); // REVIEW
327
-
328
- if (candidates.empty ()) {
329
- return std::vector<llama_grammar_candidate>();
330
- }
331
-
332
- auto rejects = llama_grammar_reject_candidates_for_stack (rules, stacks.front (), candidates);
333
-
334
- for (size_t i = 1 , size = stacks.size (); i < size; ++i) {
335
- rejects = llama_grammar_reject_candidates_for_stack (rules, stacks[i], rejects);
336
- }
337
- return rejects;
338
- }
339
-
340
331
static bool llama_grammar_detect_left_recursion (
341
- const std::vector<std::vector<llama_grammar_element>> & rules,
342
- size_t rule_index,
343
- std::vector<bool > * rules_visited,
344
- std::vector<bool > * rules_in_progress,
345
- std::vector<bool > * rules_may_be_empty) {
332
+ const llama_grammar_rules & rules,
333
+ size_t rule_index,
334
+ std::vector<bool > * rules_visited,
335
+ std::vector<bool > * rules_in_progress,
336
+ std::vector<bool > * rules_may_be_empty) {
346
337
if ((*rules_in_progress)[rule_index]) {
347
338
return true ;
348
339
}
349
340
350
341
(*rules_in_progress)[rule_index] = true ;
351
342
352
- const std::vector<llama_grammar_element> & rule = rules[rule_index];
343
+ const llama_grammar_rule & rule = rules[rule_index];
353
344
354
345
// First check if the rule might produce the empty string. This could be done combined with the second
355
346
// step but it's more readable as two steps.
@@ -400,7 +391,7 @@ struct llama_grammar * llama_grammar_init(
400
391
const llama_grammar_element * pos;
401
392
402
393
// copy rule definitions into vectors
403
- std::vector<std::vector<llama_grammar_element>> vec_rules (n_rules);
394
+ llama_grammar_rules vec_rules (n_rules);
404
395
for (size_t i = 0 ; i < n_rules; i++) {
405
396
for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
406
397
vec_rules[i].push_back (*pos);
@@ -423,10 +414,10 @@ struct llama_grammar * llama_grammar_init(
423
414
}
424
415
425
416
// loop over alternates of start rule to build initial stacks
426
- std::vector<std::vector< const llama_grammar_element *>> stacks;
417
+ llama_grammar_stacks stacks;
427
418
pos = vec_rules[start_rule_index].data ();
428
419
do {
429
- std::vector< const llama_grammar_element *> stack;
420
+ llama_grammar_stack stack;
430
421
if (!llama_grammar_is_end_of_sequence (pos)) {
431
422
// if alternate is nonempty, add to stack
432
423
stack.push_back (pos);
@@ -488,7 +479,7 @@ void llama_grammar_sample(struct llama_context * ctx, llama_token_data_array * c
488
479
std::vector<std::pair<std::vector<uint32_t >, llama_partial_utf8>> candidates_decoded;
489
480
candidates_decoded.reserve (candidates->size );
490
481
491
- std::vector<llama_grammar_candidate> candidates_grammar;
482
+ llama_grammar_candidates candidates_grammar;
492
483
candidates_grammar.reserve (candidates->size );
493
484
494
485
for (size_t i = 0 ; i < candidates->size ; ++i) {
@@ -532,11 +523,13 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
532
523
// Note terminating 0 in decoded string
533
524
const auto decoded = decode_utf8 (piece, grammar->partial_utf8 );
534
525
const auto & code_points = decoded.first ;
535
- std::vector<std::vector<const llama_grammar_element *>> tmp_new_stacks;
526
+
527
+ llama_grammar_stacks tmp_new_stacks;
536
528
for (auto it = code_points.begin (), end = code_points.end () - 1 ; it != end; ++it) {
537
529
llama_grammar_accept (grammar->rules , grammar->stacks , *it, tmp_new_stacks);
538
530
grammar->stacks = tmp_new_stacks;
539
531
}
532
+
540
533
grammar->partial_utf8 = decoded.second ;
541
534
GGML_ASSERT (!grammar->stacks .empty ());
542
535
0 commit comments