@@ -1039,16 +1039,19 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
1039
1039
}
1040
1040
if (arg == " --in-prefix-bos" ) {
1041
1041
params.input_prefix_bos = true ;
1042
+ params.enable_chat_template = false ;
1042
1043
return true ;
1043
1044
}
1044
1045
if (arg == " --in-prefix" ) {
1045
1046
CHECK_ARG
1046
1047
params.input_prefix = argv[i];
1048
+ params.enable_chat_template = false ;
1047
1049
return true ;
1048
1050
}
1049
1051
if (arg == " --in-suffix" ) {
1050
1052
CHECK_ARG
1051
1053
params.input_suffix = argv[i];
1054
+ params.enable_chat_template = false ;
1052
1055
return true ;
1053
1056
}
1054
1057
if (arg == " --spm-infill" ) {
@@ -1431,7 +1434,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
1431
1434
" halt generation at PROMPT, return control in interactive mode\n "
1432
1435
" can be specified more than once for multiple prompts" });
1433
1436
options.push_back ({ " main" , " -sp, --special" , " special tokens output enabled (default: %s)" , params.special ? " true" : " false" });
1434
- options.push_back ({ " main" , " -cnv, --conversation" , " run in conversation mode (does not print special tokens and suffix/prefix) (default: %s)" , params.conversation ? " true" : " false" });
1437
+ options.push_back ({ " main" , " -cnv, --conversation" , " run in conversation mode (does not print special tokens and suffix/prefix, use default chat template ) (default: %s)" , params.conversation ? " true" : " false" });
1435
1438
options.push_back ({ " main infill" , " -i, --interactive" , " run in interactive mode (default: %s)" , params.interactive ? " true" : " false" });
1436
1439
options.push_back ({ " main infill" , " -if, --interactive-first" , " run in interactive mode and wait for input right away (default: %s)" , params.interactive_first ? " true" : " false" });
1437
1440
options.push_back ({ " main infill" , " -mli, --multiline-input" , " allows you to write or paste multiple lines without ending each in '\\ '" });
@@ -2693,12 +2696,19 @@ std::string llama_chat_format_single(const struct llama_model * model,
2693
2696
const std::vector<llama_chat_msg> & past_msg,
2694
2697
const llama_chat_msg & new_msg,
2695
2698
bool add_ass) {
2699
+ std::ostringstream ss;
2696
2700
auto fmt_past_msg = llama_chat_apply_template (model, tmpl, past_msg, false );
2697
2701
std::vector<llama_chat_msg> chat_new (past_msg);
2702
+ // if the past_msg ends with a newline, we must preserve it in the formatted version
2703
+ if (add_ass && !fmt_past_msg.empty () && fmt_past_msg.back () == ' \n ' ) {
2704
+ ss << " \n " ;
2705
+ };
2706
+ // format chat with new_msg
2698
2707
chat_new.push_back (new_msg);
2699
2708
auto fmt_new_msg = llama_chat_apply_template (model, tmpl, chat_new, add_ass);
2700
- auto formatted = fmt_new_msg.substr (fmt_past_msg.size (), fmt_new_msg.size () - fmt_past_msg.size ());
2701
- return formatted;
2709
+ // get the diff part
2710
+ ss << fmt_new_msg.substr (fmt_past_msg.size (), fmt_new_msg.size () - fmt_past_msg.size ());
2711
+ return ss.str ();
2702
2712
}
2703
2713
2704
2714
std::string llama_chat_format_example (const struct llama_model * model,
0 commit comments