Skip to content

Commit 7c6e172

Browse files
committed
cont
ggml-ci
1 parent 815e16d commit 7c6e172

File tree

3 files changed

+59
-59
lines changed

3 files changed

+59
-59
lines changed

Package.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import PackageDescription
44

55
var sources = [
66
"src/llama.cpp",
7+
"src/llama-vocab.cpp",
8+
"src/llama-grammar.cpp",
9+
"src/llama-sampling.cpp",
710
"src/unicode.cpp",
811
"src/unicode-data.cpp",
912
"ggml/src/ggml.c",

include/llama.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,8 +1188,12 @@ struct llama_grammar_candidate {
11881188
llama_partial_utf8 partial_utf8;
11891189
};
11901190

1191-
using llama_grammar_rules = std::vector<std::vector<llama_grammar_element>>;
1192-
using llama_grammar_stacks = std::vector<std::vector<const llama_grammar_element *>>;
1191+
using llama_grammar_rule = std::vector< llama_grammar_element>;
1192+
using llama_grammar_stack = std::vector<const llama_grammar_element *>;
1193+
1194+
using llama_grammar_rules = std::vector<llama_grammar_rule>;
1195+
using llama_grammar_stacks = std::vector<llama_grammar_stack>;
1196+
using llama_grammar_candidates = std::vector<llama_grammar_candidate>;
11931197

11941198
const llama_grammar_rules & llama_grammar_get_rules (const struct llama_grammar * grammar);
11951199
llama_grammar_stacks & llama_grammar_get_stacks( struct llama_grammar * grammar);
@@ -1201,9 +1205,9 @@ void llama_grammar_accept(
12011205
llama_grammar_stacks & new_stacks);
12021206

12031207
std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
1204-
const std::vector<std::vector<llama_grammar_element>> & rules,
1205-
const std::vector<const llama_grammar_element *> & stack,
1206-
const std::vector<llama_grammar_candidate> & candidates);
1208+
const llama_grammar_rules & rules,
1209+
const llama_grammar_stack & stack,
1210+
const llama_grammar_candidates & candidates);
12071211

12081212
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
12091213
const std::string & src,

src/llama-grammar.cpp

Lines changed: 47 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
120120
static bool llama_grammar_match_partial_char(
121121
const llama_grammar_element * pos,
122122
const llama_partial_utf8 partial_utf8) {
123-
124123
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
125124
GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
126125

@@ -166,14 +165,12 @@ static bool llama_grammar_match_partial_char(
166165
return !is_positive_char;
167166
}
168167

169-
170168
// transforms a grammar pushdown stack into N possible stacks, all ending
171169
// at a character range (terminal element)
172170
static void llama_grammar_advance_stack(
173-
const std::vector<std::vector<llama_grammar_element>> & rules,
174-
const std::vector<const llama_grammar_element *> & stack,
175-
std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
176-
171+
const llama_grammar_rules & rules,
172+
const llama_grammar_stack & stack,
173+
llama_grammar_stacks & new_stacks) {
177174
if (stack.empty()) {
178175
if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
179176
new_stacks.emplace_back(stack);
@@ -189,7 +186,7 @@ static void llama_grammar_advance_stack(
189186
const llama_grammar_element * subpos = rules[rule_id].data();
190187
do {
191188
// init new stack without the top (pos)
192-
std::vector<const llama_grammar_element *> new_stack(stack.begin(), stack.end() - 1);
189+
llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
193190
if (!llama_grammar_is_end_of_sequence(pos + 1)) {
194191
// if this rule ref is followed by another element, add that to stack
195192
new_stack.push_back(pos + 1);
@@ -233,11 +230,10 @@ static void llama_grammar_advance_stack(
233230
// produces the N possible stacks if the given char is accepted at those
234231
// positions
235232
void llama_grammar_accept(
236-
const std::vector<std::vector<llama_grammar_element>> & rules,
237-
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
238-
const uint32_t chr,
239-
std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
240-
233+
const llama_grammar_rules & rules,
234+
const llama_grammar_stacks & stacks,
235+
const uint32_t chr,
236+
llama_grammar_stacks & new_stacks) {
241237
new_stacks.clear();
242238

243239
for (const auto & stack : stacks) {
@@ -250,7 +246,7 @@ void llama_grammar_accept(
250246
const llama_grammar_element * pos = match.second;
251247

252248
// update top of stack to next element, if any
253-
std::vector<const llama_grammar_element *> new_stack(stack.begin(), stack.end() - 1);
249+
llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
254250
if (!llama_grammar_is_end_of_sequence(pos)) {
255251
new_stack.push_back(pos);
256252
}
@@ -259,17 +255,30 @@ void llama_grammar_accept(
259255
}
260256
}
261257

262-
static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates(
263-
const std::vector<std::vector<llama_grammar_element>> & rules,
264-
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
265-
const std::vector<llama_grammar_candidate> & candidates);
258+
static llama_grammar_candidates llama_grammar_reject_candidates(
259+
const llama_grammar_rules & rules,
260+
const llama_grammar_stacks & stacks,
261+
const llama_grammar_candidates & candidates) {
262+
GGML_ASSERT(!stacks.empty()); // REVIEW
263+
264+
if (candidates.empty()) {
265+
return {};
266+
}
267+
268+
auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
269+
270+
for (size_t i = 1, size = stacks.size(); i < size; ++i) {
271+
rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
272+
}
273+
return rejects;
274+
}
266275

267-
std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
268-
const std::vector<std::vector<llama_grammar_element>> & rules,
269-
const std::vector<const llama_grammar_element *> & stack,
270-
const std::vector<llama_grammar_candidate> & candidates) {
276+
llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
277+
const llama_grammar_rules & rules,
278+
const llama_grammar_stack & stack,
279+
const llama_grammar_candidates & candidates) {
271280

272-
std::vector<llama_grammar_candidate> rejects;
281+
llama_grammar_candidates rejects;
273282
rejects.reserve(candidates.size());
274283

275284
if (stack.empty()) {
@@ -283,7 +292,7 @@ std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
283292

284293
const llama_grammar_element * stack_pos = stack.back();
285294

286-
std::vector<llama_grammar_candidate> next_candidates;
295+
llama_grammar_candidates next_candidates;
287296
next_candidates.reserve(candidates.size());
288297

289298
for (const auto & tok : candidates) {
@@ -304,11 +313,11 @@ std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
304313
const auto * stack_pos_after = llama_grammar_match_char(stack_pos, 0).second;
305314

306315
// update top of stack to next element, if any
307-
std::vector<const llama_grammar_element *> stack_after(stack.begin(), stack.end() - 1);
316+
llama_grammar_stack stack_after(stack.begin(), stack.end() - 1);
308317
if (!llama_grammar_is_end_of_sequence(stack_pos_after)) {
309318
stack_after.push_back(stack_pos_after);
310319
}
311-
std::vector<std::vector<const llama_grammar_element *>> next_stacks;
320+
llama_grammar_stacks next_stacks;
312321
llama_grammar_advance_stack(rules, stack_after, next_stacks);
313322

314323
auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
@@ -319,37 +328,19 @@ std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
319328
return rejects;
320329
}
321330

322-
static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates(
323-
const std::vector<std::vector<llama_grammar_element>> & rules,
324-
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
325-
const std::vector<llama_grammar_candidate> & candidates) {
326-
GGML_ASSERT(!stacks.empty()); // REVIEW
327-
328-
if (candidates.empty()) {
329-
return std::vector<llama_grammar_candidate>();
330-
}
331-
332-
auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
333-
334-
for (size_t i = 1, size = stacks.size(); i < size; ++i) {
335-
rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
336-
}
337-
return rejects;
338-
}
339-
340331
static bool llama_grammar_detect_left_recursion(
341-
const std::vector<std::vector<llama_grammar_element>> & rules,
342-
size_t rule_index,
343-
std::vector<bool> * rules_visited,
344-
std::vector<bool> * rules_in_progress,
345-
std::vector<bool> * rules_may_be_empty) {
332+
const llama_grammar_rules & rules,
333+
size_t rule_index,
334+
std::vector<bool> * rules_visited,
335+
std::vector<bool> * rules_in_progress,
336+
std::vector<bool> * rules_may_be_empty) {
346337
if ((*rules_in_progress)[rule_index]) {
347338
return true;
348339
}
349340

350341
(*rules_in_progress)[rule_index] = true;
351342

352-
const std::vector<llama_grammar_element> & rule = rules[rule_index];
343+
const llama_grammar_rule & rule = rules[rule_index];
353344

354345
// First check if the rule might produce the empty string. This could be done combined with the second
355346
// step but it's more readable as two steps.
@@ -400,7 +391,7 @@ struct llama_grammar * llama_grammar_init(
400391
const llama_grammar_element * pos;
401392

402393
// copy rule definitions into vectors
403-
std::vector<std::vector<llama_grammar_element>> vec_rules(n_rules);
394+
llama_grammar_rules vec_rules(n_rules);
404395
for (size_t i = 0; i < n_rules; i++) {
405396
for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
406397
vec_rules[i].push_back(*pos);
@@ -423,10 +414,10 @@ struct llama_grammar * llama_grammar_init(
423414
}
424415

425416
// loop over alternates of start rule to build initial stacks
426-
std::vector<std::vector<const llama_grammar_element *>> stacks;
417+
llama_grammar_stacks stacks;
427418
pos = vec_rules[start_rule_index].data();
428419
do {
429-
std::vector<const llama_grammar_element *> stack;
420+
llama_grammar_stack stack;
430421
if (!llama_grammar_is_end_of_sequence(pos)) {
431422
// if alternate is nonempty, add to stack
432423
stack.push_back(pos);
@@ -488,7 +479,7 @@ void llama_grammar_sample(struct llama_context * ctx, llama_token_data_array * c
488479
std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
489480
candidates_decoded.reserve(candidates->size);
490481

491-
std::vector<llama_grammar_candidate> candidates_grammar;
482+
llama_grammar_candidates candidates_grammar;
492483
candidates_grammar.reserve(candidates->size);
493484

494485
for (size_t i = 0; i < candidates->size; ++i) {
@@ -532,11 +523,13 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
532523
// Note terminating 0 in decoded string
533524
const auto decoded = decode_utf8(piece, grammar->partial_utf8);
534525
const auto & code_points = decoded.first;
535-
std::vector<std::vector<const llama_grammar_element *>> tmp_new_stacks;
526+
527+
llama_grammar_stacks tmp_new_stacks;
536528
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
537529
llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks);
538530
grammar->stacks = tmp_new_stacks;
539531
}
532+
540533
grammar->partial_utf8 = decoded.second;
541534
GGML_ASSERT(!grammar->stacks.empty());
542535

0 commit comments

Comments
 (0)