9
9
import llama_cpp .llama_types as llama_types
10
10
import llama_cpp .llama_grammar as llama_grammar
11
11
12
+ from ._utils import suppress_stdout_stderr
13
+
12
14
13
15
class LlamaChatCompletionHandler (Protocol ):
14
16
def __call__ (
@@ -775,20 +777,26 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
775
777
776
778
777
779
class Llava15ChatHandler :
778
- def __init__ (self , clip_model_path : str ):
780
+ _clip_free = None
781
+
782
+ def __init__ (self , clip_model_path : str , verbose : bool = False ):
779
783
import llama_cpp .llava_cpp as llava_cpp
780
784
781
785
self ._llava_cpp = llava_cpp
782
786
self .clip_model_path = clip_model_path
787
+ self .verbose = verbose
788
+ self ._clip_free = self ._llava_cpp ._libllava .clip_free # type: ignore
783
789
784
- self .clip_ctx = self ._llava_cpp .clip_model_load (
785
- self .clip_model_path .encode (), 0
786
- )
790
+ with suppress_stdout_stderr (disable = self .verbose ):
791
+ self .clip_ctx = self ._llava_cpp .clip_model_load (
792
+ self .clip_model_path .encode (), 0
793
+ )
787
794
788
795
def __del__ (self ):
789
- if self .clip_ctx is not None :
790
- self ._llava_cpp .clip_free (self .clip_ctx )
791
- self .clip_ctx = None
796
+ with suppress_stdout_stderr (disable = self .verbose ):
797
+ if self .clip_ctx is not None and self ._clip_free is not None :
798
+ self ._clip_free (self .clip_ctx )
799
+ self .clip_ctx = None
792
800
793
801
def load_image (self , image_url : str ) -> bytes :
794
802
if image_url .startswith ("data:" ):
@@ -881,27 +889,28 @@ def __call__(
881
889
c_ubyte_ptr = (
882
890
ctypes .c_ubyte * len (data_array )
883
891
).from_buffer (data_array )
884
- embed = self ._llava_cpp .llava_image_embed_make_with_bytes (
885
- ctx_clip = self .clip_ctx ,
886
- n_threads = llama .context_params .n_threads ,
887
- image_bytes = c_ubyte_ptr ,
888
- image_bytes_length = len (image_bytes ),
889
- )
890
- # image_bytes_p = (ctypes.c_uint8 * len(image_bytes)).from_buffer_copy(image_bytes)
891
- # embed = self._llava_cpp.llava_image_embed_make_with_bytes(ctx_clip=self.clip_ctx, n_threads=1, image_bytes=image_bytes_p, image_bytes_length=len(image_bytes))
892
+ with suppress_stdout_stderr (disable = self .verbose ):
893
+ embed = self ._llava_cpp .llava_image_embed_make_with_bytes (
894
+ ctx_clip = self .clip_ctx ,
895
+ n_threads = llama .context_params .n_threads ,
896
+ image_bytes = c_ubyte_ptr ,
897
+ image_bytes_length = len (image_bytes ),
898
+ )
892
899
try :
893
900
n_past = ctypes .c_int (llama .n_tokens )
894
901
n_past_p = ctypes .pointer (n_past )
895
- self ._llava_cpp .llava_eval_image_embed (
896
- ctx_llama = llama .ctx ,
897
- embed = embed ,
898
- n_batch = llama .n_batch ,
899
- n_past = n_past_p ,
900
- )
902
+ with suppress_stdout_stderr (disable = self .verbose ):
903
+ self ._llava_cpp .llava_eval_image_embed (
904
+ ctx_llama = llama .ctx ,
905
+ embed = embed ,
906
+ n_batch = llama .n_batch ,
907
+ n_past = n_past_p ,
908
+ )
901
909
assert llama .n_ctx () >= n_past .value
902
910
llama .n_tokens = n_past .value
903
911
finally :
904
- self ._llava_cpp .llava_image_embed_free (embed )
912
+ with suppress_stdout_stderr (disable = self .verbose ):
913
+ self ._llava_cpp .llava_image_embed_free (embed )
905
914
if message ["role" ] == "assistant" and message ["content" ] is not None :
906
915
llama .eval (
907
916
llama .tokenize (
@@ -910,7 +919,7 @@ def __call__(
910
919
)
911
920
llama .eval (llama .tokenize (f"{ assistant_role } " .encode ("utf8" ), add_bos = False ))
912
921
913
- prompt = llama ._input_ids .tolist ()
922
+ prompt = llama .input_ids [: llama . n_tokens ] .tolist ()
914
923
915
924
return _convert_completion_to_chat (
916
925
llama .create_completion (
0 commit comments