@@ -754,10 +754,10 @@ static bool llama_grammar_detect_left_recursion(
754
754
}
755
755
756
756
struct llama_grammar * llama_grammar_init_impl (
757
- const struct llama_vocab & vocab,
757
+ const struct llama_vocab * vocab,
758
758
const llama_grammar_element ** rules,
759
- size_t n_rules,
760
- size_t start_rule_index) {
759
+ size_t n_rules,
760
+ size_t start_rule_index) {
761
761
const llama_grammar_element * pos;
762
762
763
763
// copy rule definitions into vectors
@@ -808,10 +808,10 @@ struct llama_grammar * llama_grammar_init_impl(
808
808
// Important: vec_rules has to be moved here, not copied, because stacks contains
809
809
// pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
810
810
// then the pointers would be invalidated when the local vec_rules goes out of scope.
811
- return new llama_grammar{ vocab, std::move (vec_rules), std::move (stacks), {}, 0 , 0 , 0 };
811
+ return new llama_grammar { vocab, std::move (vec_rules), std::move (stacks), {}, 0 , 0 , 0 };
812
812
}
813
813
814
- struct llama_grammar * llama_grammar_init_impl (const struct llama_vocab & vocab, const char * grammar_str, const char * grammar_root) {
814
+ struct llama_grammar * llama_grammar_init_impl (const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root) {
815
815
llama_grammar_parser parser;
816
816
817
817
// if there is a grammar, parse it
@@ -886,15 +886,15 @@ struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab & vocab,
886
886
// Important: vec_rules has to be moved here, not copied, because stacks contains
887
887
// pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
888
888
// then the pointers would be invalidated when the local vec_rules goes out of scope.
889
- return new llama_grammar{ vocab, std::move (vec_rules), std::move (stacks), {}, 0 , 0 , 0 };
889
+ return new llama_grammar { vocab, std::move (vec_rules), std::move (stacks), {}, 0 , 0 , 0 };
890
890
}
891
891
892
892
void llama_grammar_free_impl (struct llama_grammar * grammar) {
893
893
delete grammar;
894
894
}
895
895
896
896
struct llama_grammar * llama_grammar_copy_impl (const struct llama_grammar & grammar) {
897
- llama_grammar * result = new llama_grammar{ grammar.vocab , grammar.rules , grammar.stacks , grammar.partial_utf8 , 0 , 0 , 0 };
897
+ llama_grammar * result = new llama_grammar { grammar.vocab , grammar.rules , grammar.stacks , grammar.partial_utf8 , 0 , 0 , 0 };
898
898
899
899
// redirect elements in stacks to point to new rules
900
900
for (size_t is = 0 ; is < result->stacks .size (); is++) {
@@ -913,6 +913,8 @@ struct llama_grammar * llama_grammar_copy_impl(const struct llama_grammar & gram
913
913
}
914
914
915
915
void llama_grammar_apply_impl (const struct llama_grammar & grammar, llama_token_data_array * candidates) {
916
+ GGML_ASSERT (grammar.vocab != nullptr );
917
+
916
918
bool allow_eog = false ;
917
919
for (const auto & stack : grammar.stacks ) {
918
920
if (stack.empty ()) {
@@ -929,9 +931,9 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_
929
931
930
932
for (size_t i = 0 ; i < candidates->size ; ++i) {
931
933
const llama_token id = candidates->data [i].id ;
932
- const std::string & piece = grammar.vocab . cache_token_to_piece .at (id);
934
+ const std::string & piece = grammar.vocab -> cache_token_to_piece .at (id);
933
935
934
- if (llama_token_is_eog_impl (grammar.vocab , id)) {
936
+ if (llama_token_is_eog_impl (* grammar.vocab , id)) {
935
937
if (!allow_eog) {
936
938
candidates->data [i].logit = -INFINITY;
937
939
}
@@ -950,7 +952,9 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_
950
952
}
951
953
952
954
void llama_grammar_accept_impl (struct llama_grammar & grammar, llama_token token) {
953
- if (llama_token_is_eog_impl (grammar.vocab , token)) {
955
+ GGML_ASSERT (grammar.vocab != nullptr );
956
+
957
+ if (llama_token_is_eog_impl (*grammar.vocab , token)) {
954
958
for (const auto & stack : grammar.stacks ) {
955
959
if (stack.empty ()) {
956
960
return ;
@@ -959,16 +963,15 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
959
963
GGML_ABORT (" fatal error" );
960
964
}
961
965
962
- const std::string & piece = grammar.vocab . cache_token_to_piece .at (token);
966
+ const std::string & piece = grammar.vocab -> cache_token_to_piece .at (token);
963
967
964
968
// Note terminating 0 in decoded string
965
969
const auto decoded = decode_utf8 (piece, grammar.partial_utf8 );
966
970
const auto & code_points = decoded.first ;
967
971
968
- llama_grammar_stacks tmp_new_stacks;
969
972
for (auto it = code_points.begin (), end = code_points.end () - 1 ; it != end; ++it) {
970
- llama_grammar_accept (grammar.rules , grammar.stacks , *it, tmp_new_stacks );
971
- grammar.stacks = tmp_new_stacks ;
973
+ llama_grammar_stacks new_stacks = llama_grammar_accept (grammar.rules , grammar.stacks , *it);
974
+ grammar.stacks = std::move (new_stacks) ;
972
975
}
973
976
974
977
grammar.partial_utf8 = decoded.second ;
@@ -1045,12 +1048,12 @@ std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
1045
1048
return std::make_pair (std::move (code_points), llama_partial_utf8{ value, n_remain });
1046
1049
}
1047
1050
1048
- void llama_grammar_accept (
1051
+ llama_grammar_stacks llama_grammar_accept (
1049
1052
const llama_grammar_rules & rules,
1050
1053
const llama_grammar_stacks & stacks,
1051
- const uint32_t chr,
1052
- llama_grammar_stacks & new_stacks) {
1053
- new_stacks. clear ( );
1054
+ const uint32_t chr) {
1055
+ llama_grammar_stacks result;
1056
+ result. reserve (stacks. size () );
1054
1057
1055
1058
for (const auto & stack : stacks) {
1056
1059
if (stack.empty ()) {
@@ -1066,9 +1069,11 @@ void llama_grammar_accept(
1066
1069
if (!llama_grammar_is_end_of_sequence (pos)) {
1067
1070
new_stack.push_back (pos);
1068
1071
}
1069
- llama_grammar_advance_stack (rules, new_stack, new_stacks );
1072
+ llama_grammar_advance_stack (rules, new_stack, result );
1070
1073
}
1071
1074
}
1075
+
1076
+ return result;
1072
1077
}
1073
1078
1074
1079
llama_grammar_candidates llama_grammar_reject_candidates_for_stack (
0 commit comments