@@ -368,10 +368,10 @@ def generate(self):
368
368
id = llama_cpp .llama_sample_token_mirostat_v2 (self .ctx , candidates_p , llama_cpp .c_float (self .params .mirostat_tau ), llama_cpp .c_float (self .params .mirostat_eta ), llama_cpp .c_float (mirostat_mu ))
369
369
else :
370
370
# Temperature sampling
371
- llama_cpp .llama_sample_top_k (self .ctx , candidates_p , top_k )
372
- llama_cpp .llama_sample_tail_free (self .ctx , candidates_p , llama_cpp .c_float (self .params .tfs_z ))
373
- llama_cpp .llama_sample_typical (self .ctx , candidates_p , llama_cpp .c_float (self .params .typical_p ))
374
- llama_cpp .llama_sample_top_p (self .ctx , candidates_p , llama_cpp .c_float (self .params .top_p ))
371
+ llama_cpp .llama_sample_top_k (self .ctx , candidates_p , top_k , min_keep = llama_cpp . c_size_t ( 1 ) )
372
+ llama_cpp .llama_sample_tail_free (self .ctx , candidates_p , llama_cpp .c_float (self .params .tfs_z ), min_keep = llama_cpp . c_size_t ( 1 ) )
373
+ llama_cpp .llama_sample_typical (self .ctx , candidates_p , llama_cpp .c_float (self .params .typical_p ), min_keep = llama_cpp . c_size_t ( 1 ) )
374
+ llama_cpp .llama_sample_top_p (self .ctx , candidates_p , llama_cpp .c_float (self .params .top_p ), min_keep = llama_cpp . c_size_t ( 1 ) )
375
375
llama_cpp .llama_sample_temperature (self .ctx , candidates_p , llama_cpp .c_float (self .params .temp ))
376
376
id = llama_cpp .llama_sample_token (self .ctx , candidates_p )
377
377
# print("`{}`".format(candidates_p.size))
@@ -382,12 +382,15 @@ def generate(self):
382
382
# replace end of text token with newline token when in interactive mode
383
383
if (id == llama_cpp .llama_token_eos () and self .params .interactive and not self .params .instruct ):
384
384
id = self .llama_token_newline [0 ]
385
+ self .embd .append (id )
385
386
if (self .use_antiprompt ()):
386
387
# tokenize and inject first reverse prompt
387
388
self .embd_inp += self .first_antiprompt [0 ]
388
-
389
- # add it to the context
390
- self .embd .append (id )
389
+ for id in self .first_antiprompt [0 ]:
390
+ self .embd .append (id )
391
+ else :
392
+ # add it to the context
393
+ self .embd .append (id )
391
394
392
395
# echo this to console
393
396
self .output_echo = True
@@ -493,7 +496,7 @@ def output(self):
493
496
# Contains multi-byte UTF8
494
497
for num , pattern in [(2 , 192 ), (3 , 224 ), (4 , 240 )]:
495
498
# Bitwise AND check
496
- if pattern & int .from_bytes (cur_char ) == pattern :
499
+ if pattern & int .from_bytes (cur_char , 'little' ) == pattern :
497
500
self .multibyte_fix = [cur_char ] + ([None ] * (num - 1 ))
498
501
499
502
# Stop incomplete bytes from passing
0 commit comments