|
| 1 | +#define LLAMA_API_INTERNAL |
| 2 | + |
| 3 | +#include "grammar-parser.h" |
| 4 | +#include "ggml.h" |
| 5 | +#include "llama.h" |
| 6 | +#include "unicode.h" |
| 7 | + |
| 8 | +#include <cstdio> |
| 9 | +#include <cstdlib> |
| 10 | +#include <string> |
| 11 | +#include <vector> |
| 12 | + |
| 13 | +static bool llama_sample_grammar_string(struct llama_grammar * grammar, const std::string & input_str, size_t & error_pos, std::string & error_msg) { |
| 14 | + auto decoded = decode_utf8(input_str, {}); |
| 15 | + const auto & code_points = decoded.first; |
| 16 | + |
| 17 | + size_t pos = 0; |
| 18 | + for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { |
| 19 | + auto prev_stacks = grammar->stacks; |
| 20 | + grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); |
| 21 | + if (grammar->stacks.empty()) { |
| 22 | + error_pos = pos; |
| 23 | + error_msg = "Unexpected character '" + unicode_cpt_to_utf8(*it) + "'"; |
| 24 | + grammar->stacks = prev_stacks; |
| 25 | + return false; |
| 26 | + } |
| 27 | + ++pos; |
| 28 | + } |
| 29 | + |
| 30 | + for (const auto & stack : grammar->stacks) { |
| 31 | + if (stack.empty()) { |
| 32 | + return true; |
| 33 | + } |
| 34 | + } |
| 35 | + |
| 36 | + error_pos = pos; |
| 37 | + error_msg = "Unexpected end of input"; |
| 38 | + return false; |
| 39 | +} |
| 40 | + |
| 41 | +static void print_error_message(const std::string & input_str, size_t error_pos, const std::string & error_msg) { |
| 42 | + fprintf(stdout, "Input string is invalid according to the grammar.\n"); |
| 43 | + fprintf(stdout, "Error: %s at position %zu\n", error_msg.c_str(), error_pos); |
| 44 | + fprintf(stdout, "\n"); |
| 45 | + fprintf(stdout, "Input string:\n"); |
| 46 | + fprintf(stdout, "%s", input_str.substr(0, error_pos).c_str()); |
| 47 | + if (error_pos < input_str.size()) { |
| 48 | + fprintf(stdout, "\033[1;31m%c", input_str[error_pos]); |
| 49 | + if (error_pos+1 < input_str.size()) { |
| 50 | + fprintf(stdout, "\033[0;31m%s", input_str.substr(error_pos+1).c_str()); |
| 51 | + } |
| 52 | + fprintf(stdout, "\033[0m\n"); |
| 53 | + } |
| 54 | +} |
| 55 | + |
| 56 | +int main(int argc, char** argv) { |
| 57 | + if (argc != 3) { |
| 58 | + fprintf(stdout, "Usage: %s <grammar_filename> <input_filename>\n", argv[0]); |
| 59 | + return 1; |
| 60 | + } |
| 61 | + |
| 62 | + const std::string grammar_filename = argv[1]; |
| 63 | + const std::string input_filename = argv[2]; |
| 64 | + |
| 65 | + // Read the GBNF grammar file |
| 66 | + FILE* grammar_file = fopen(grammar_filename.c_str(), "r"); |
| 67 | + if (!grammar_file) { |
| 68 | + fprintf(stdout, "Failed to open grammar file: %s\n", grammar_filename.c_str()); |
| 69 | + return 1; |
| 70 | + } |
| 71 | + |
| 72 | + fseek(grammar_file, 0, SEEK_END); |
| 73 | + size_t grammar_size = ftell(grammar_file); |
| 74 | + fseek(grammar_file, 0, SEEK_SET); |
| 75 | + |
| 76 | + std::string grammar_str(grammar_size, ' '); |
| 77 | + fread(&grammar_str[0], 1, grammar_size, grammar_file); |
| 78 | + fclose(grammar_file); |
| 79 | + |
| 80 | + // Parse the GBNF grammar |
| 81 | + auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); |
| 82 | + |
| 83 | + // will be empty (default) if there are parse errors |
| 84 | + if (parsed_grammar.rules.empty()) { |
| 85 | + fprintf(stdout, "%s: failed to parse grammar\n", __func__); |
| 86 | + return 1; |
| 87 | + } |
| 88 | + |
| 89 | + // Ensure that there is a "root" node. |
| 90 | + if (parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()) { |
| 91 | + fprintf(stdout, "%s: grammar does not contain a 'root' symbol\n", __func__); |
| 92 | + return 1; |
| 93 | + } |
| 94 | + |
| 95 | + std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules()); |
| 96 | + |
| 97 | + // Create the LLAMA grammar |
| 98 | + auto grammar = llama_grammar_init( |
| 99 | + grammar_rules.data(), |
| 100 | + grammar_rules.size(), parsed_grammar.symbol_ids.at("root")); |
| 101 | + |
| 102 | + // Read the input file |
| 103 | + FILE* input_file = fopen(input_filename.c_str(), "r"); |
| 104 | + if (!input_file) { |
| 105 | + fprintf(stdout, "Failed to open input file: %s\n", input_filename.c_str()); |
| 106 | + return 1; |
| 107 | + } |
| 108 | + |
| 109 | + fseek(input_file, 0, SEEK_END); |
| 110 | + size_t input_size = ftell(input_file); |
| 111 | + fseek(input_file, 0, SEEK_SET); |
| 112 | + |
| 113 | + std::string input_str(input_size, ' '); |
| 114 | + fread(&input_str[0], 1, input_size, input_file); |
| 115 | + fclose(input_file); |
| 116 | + |
| 117 | + // Validate the input string against the grammar |
| 118 | + size_t error_pos; |
| 119 | + std::string error_msg; |
| 120 | + bool is_valid = llama_sample_grammar_string(grammar, input_str, error_pos, error_msg); |
| 121 | + |
| 122 | + if (is_valid) { |
| 123 | + fprintf(stdout, "Input string is valid according to the grammar.\n"); |
| 124 | + } else { |
| 125 | + print_error_message(input_str, error_pos, error_msg); |
| 126 | + } |
| 127 | + |
| 128 | + // Clean up |
| 129 | + llama_grammar_free(grammar); |
| 130 | + |
| 131 | + return 0; |
| 132 | +} |
0 commit comments