@@ -824,7 +824,8 @@ int main(int argc, char ** argv) {
824
824
params.n_predict = std::min (params.n_predict , model.hparams .n_ctx - (int ) embd_inp.size ());
825
825
826
826
// tokenize the reverse prompt
827
- std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize (vocab, params.antiprompt , false );
827
+ std::vector<gpt_vocab::id> antiprompt_inp;
828
+ processor.Encode (params.antiprompt , &antiprompt_inp);
828
829
829
830
printf (" \n " );
830
831
printf (" %s: prompt: '%s'\n " , __func__, params.prompt .c_str ());
@@ -951,29 +952,28 @@ int main(int argc, char ** argv) {
951
952
952
953
// display text
953
954
if (!input_noecho) {
954
-
955
- // std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1));
956
- // printf("[%s]", check.c_str());
957
- // if(check != "�") { // ensure a multi-byte token is finished generating before outputting the text
958
- // std::string text;
959
- // processor.Decode(all_tokens, &text);
960
- // std::string chunk = text.substr(full_text.length());
961
- // printf("%s", chunk.c_str());
962
- // full_text.reserve (text.size());
963
- // full_text += chunk;
964
-
965
- // // reset color to default if we there is no pending user input
966
- // if (params.use_color && embd_inp.size() <= input_consumed) {
967
- // printf(ANSI_COLOR_RESET);
968
- // }
969
- // fflush(stdout);
970
- // }
971
-
972
- // The code above crashes and is WIP any help appreciated
973
- std::string text;
974
- processor.Decode (all_tokens, &text);
975
- printf (" %s\n " , text.c_str ());
976
- fflush (stdout);
955
+ // check if last token is unprintable token
956
+ std::string check;
957
+ std::vector<gpt_vocab::id> check_token;
958
+ check_token.push_back (all_tokens.at (all_tokens.size ()-1 ));
959
+ processor.Decode (check_token, &check);
960
+ if (check != " �" ) {
961
+ // If the token is printable we wont attempt to print unprintable tokens
962
+ std::string text;
963
+ processor.Decode (all_tokens, &text);
964
+ if (full_text.length () < text.length ()) {
965
+ std::string chunk = text.substr (full_text.length ());
966
+ printf (" %s" , chunk.c_str ());
967
+ full_text.empty ();
968
+ processor.Decode (all_tokens, &full_text);
969
+ // reset color to default if we there is no pending user input
970
+ if (params.use_color && embd_inp.size () <= input_consumed) {
971
+ printf (ANSI_COLOR_RESET);
972
+ }
973
+ fflush (stdout);
974
+ }
975
+
976
+ }
977
977
}
978
978
979
979
// in interactive mode, and not currently processing queued inputs;
0 commit comments