@@ -113,6 +113,7 @@ class Opt {
113
113
llama_context_params ctx_params;
114
114
llama_model_params model_params;
115
115
std::string model_;
116
+ std::string chat_template_file;
116
117
std::string user;
117
118
bool use_jinja = false ;
118
119
int context_size = -1 , ngl = -1 ;
@@ -148,6 +149,16 @@ class Opt {
148
149
return 0 ;
149
150
}
150
151
152
+ int handle_option_with_value (int argc, const char ** argv, int & i, std::string & option_value) {
153
+ if (i + 1 >= argc) {
154
+ return 1 ;
155
+ }
156
+
157
+ option_value = argv[++i];
158
+
159
+ return 0 ;
160
+ }
161
+
151
162
int parse (int argc, const char ** argv) {
152
163
bool options_parsing = true ;
153
164
for (int i = 1 , positional_args_i = 0 ; i < argc; ++i) {
@@ -169,6 +180,11 @@ class Opt {
169
180
verbose = true ;
170
181
} else if (options_parsing && strcmp (argv[i], " --jinja" ) == 0 ) {
171
182
use_jinja = true ;
183
+ } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ){
184
+ if (handle_option_with_value (argc, argv, i, chat_template_file) == 1 ) {
185
+ return 1 ;
186
+ }
187
+ use_jinja = true ;
172
188
} else if (options_parsing && parse_flag (argv, i, " -h" , " --help" )) {
173
189
help = true ;
174
190
return 0 ;
@@ -207,6 +223,11 @@ class Opt {
207
223
" Options:\n "
208
224
" -c, --context-size <value>\n "
209
225
" Context size (default: %d)\n "
226
+ " --chat-template-file <path>\n "
227
+ " Path to the file containing the chat template to use with the model.\n "
228
+ " Only supports jinja templates and implicitly sets the --jinja flag.\n "
229
+ " --jinja\n "
230
+ " Use jinja templating for the chat template of the model\n "
210
231
" -n, -ngl, --ngl <value>\n "
211
232
" Number of GPU layers (default: %d)\n "
212
233
" --temp <value>\n "
@@ -1074,12 +1095,44 @@ static int get_user_input(std::string & user_input, const std::string & user) {
1074
1095
return 0 ;
1075
1096
}
1076
1097
1098
+ // Reads a chat template file to be used
1099
+ static std::string read_chat_template_file (const std::string & chat_template_file) {
1100
+ if (chat_template_file.empty ()){
1101
+ return " " ;
1102
+ }
1103
+
1104
+ FILE* file = ggml_fopen (chat_template_file.c_str (), " r" );
1105
+ if (!file) {
1106
+ std::cerr << " Error opening chat template file '" << chat_template_file << " ': " << strerror (errno) << " \n " ;
1107
+ return " " ;
1108
+ }
1109
+
1110
+ fseek (file, 0 , SEEK_END);
1111
+ size_t size = ftell (file);
1112
+ fseek (file, 0 , SEEK_SET);
1113
+
1114
+ std::vector<unsigned char > data (size);
1115
+ size_t read_size = fread (data.data (), 1 , size, file);
1116
+ fclose (file);
1117
+ if (read_size != size) {
1118
+ std::cerr << " Error reading chat template file '" << chat_template_file << " ': " << strerror (errno) << " \n " ;
1119
+ return " " ;
1120
+ }
1121
+ return std::string (data.begin (), data.end ());
1122
+ }
1123
+
1077
1124
// Main chat loop function
1078
- static int chat_loop (LlamaData & llama_data, const std::string & user, bool use_jinja) {
1125
+ static int chat_loop (LlamaData & llama_data, const std::string & user, const std::string & chat_template_file, bool use_jinja) {
1079
1126
int prev_len = 0 ;
1080
1127
llama_data.fmtted .resize (llama_n_ctx (llama_data.context .get ()));
1081
- auto chat_templates = common_chat_templates_from_model (llama_data.model .get (), " " );
1128
+
1129
+ std::string chat_template = " " ;
1130
+ if (!chat_template_file.empty ()){
1131
+ chat_template = read_chat_template_file (chat_template_file);
1132
+ }
1133
+ auto chat_templates = common_chat_templates_from_model (llama_data.model .get (), chat_template);
1082
1134
GGML_ASSERT (chat_templates.template_default );
1135
+
1083
1136
static const bool stdout_a_terminal = is_stdout_a_terminal ();
1084
1137
while (true ) {
1085
1138
// Get user input
@@ -1165,7 +1218,7 @@ int main(int argc, const char ** argv) {
1165
1218
return 1 ;
1166
1219
}
1167
1220
1168
- if (chat_loop (llama_data, opt.user , opt.use_jinja )) {
1221
+ if (chat_loop (llama_data, opt.user , opt.chat_template_file , opt. use_jinja )) {
1169
1222
return 1 ;
1170
1223
}
1171
1224
0 commit comments