Skip to content

Commit ca4cb88

Browse files
committed
Fix destructor NoneType is not callable error
1 parent 01cb3a0 commit ca4cb88

File tree

3 files changed

+45
-30
lines changed

3 files changed

+45
-30
lines changed

llama_cpp/llama.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ class _LlamaModel:
213213
214214
NOTE: For stability it's recommended you use the Llama class instead."""
215215

216-
_llama_free_model = llama_cpp._lib.llama_free_model # type: ignore
216+
_llama_free_model = None
217217

218218
def __init__(
219219
self,
@@ -226,6 +226,8 @@ def __init__(
226226
self.params = params
227227
self.verbose = verbose
228228

229+
self._llama_free_model = llama_cpp._lib.llama_free_model # type: ignore
230+
229231
if not os.path.exists(path_model):
230232
raise ValueError(f"Model path does not exist: {path_model}")
231233

@@ -236,7 +238,7 @@ def __init__(
236238

237239
def __del__(self):
238240
with suppress_stdout_stderr(disable=self.verbose):
239-
if self.model is not None:
241+
if self.model is not None and self._llama_free_model is not None:
240242
self._llama_free_model(self.model)
241243
self.model = None
242244

@@ -396,7 +398,7 @@ class _LlamaContext:
396398
397399
NOTE: For stability it's recommended you use the Llama class instead."""
398400

399-
_llama_free = llama_cpp._lib.llama_free # type: ignore
401+
_llama_free = None
400402

401403
def __init__(
402404
self,
@@ -409,14 +411,16 @@ def __init__(
409411
self.params = params
410412
self.verbose = verbose
411413

414+
self._llama_free = llama_cpp._lib.llama_free # type: ignore
415+
412416
with suppress_stdout_stderr(disable=self.verbose):
413417
self.ctx = llama_cpp.llama_new_context_with_model(
414418
self.model.model, self.params
415419
)
416420

417421
def __del__(self):
418422
with suppress_stdout_stderr(disable=self.verbose):
419-
if self.ctx is not None:
423+
if self.ctx is not None and self._llama_free is not None:
420424
self._llama_free(self.ctx)
421425
self.ctx = None
422426

@@ -645,7 +649,7 @@ def default_params():
645649

646650

647651
class _LlamaBatch:
648-
_llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore
652+
_llama_batch_free = None
649653

650654
def __init__(
651655
self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
@@ -655,14 +659,16 @@ def __init__(
655659
self.n_seq_max = n_seq_max
656660
self.verbose = verbose
657661

662+
self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore
663+
658664
with suppress_stdout_stderr(disable=self.verbose):
659665
self.batch = llama_cpp.llama_batch_init(
660666
self.n_tokens, self.embd, self.n_seq_max
661667
)
662668

663669
def __del__(self):
664670
with suppress_stdout_stderr(disable=self.verbose):
665-
if self.batch is not None:
671+
if self.batch is not None and self._llama_batch_free is not None:
666672
self._llama_batch_free(self.batch)
667673
self.batch = None
668674

llama_cpp/llama_chat_format.py

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import llama_cpp.llama_types as llama_types
1010
import llama_cpp.llama_grammar as llama_grammar
1111

12+
from ._utils import suppress_stdout_stderr
13+
1214

1315
class LlamaChatCompletionHandler(Protocol):
1416
def __call__(
@@ -775,20 +777,26 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
775777

776778

777779
class Llava15ChatHandler:
778-
def __init__(self, clip_model_path: str):
780+
_clip_free = None
781+
782+
def __init__(self, clip_model_path: str, verbose: bool = False):
779783
import llama_cpp.llava_cpp as llava_cpp
780784

781785
self._llava_cpp = llava_cpp
782786
self.clip_model_path = clip_model_path
787+
self.verbose = verbose
788+
self._clip_free = self._llava_cpp._libllava.clip_free # type: ignore
783789

784-
self.clip_ctx = self._llava_cpp.clip_model_load(
785-
self.clip_model_path.encode(), 0
786-
)
790+
with suppress_stdout_stderr(disable=self.verbose):
791+
self.clip_ctx = self._llava_cpp.clip_model_load(
792+
self.clip_model_path.encode(), 0
793+
)
787794

788795
def __del__(self):
789-
if self.clip_ctx is not None:
790-
self._llava_cpp.clip_free(self.clip_ctx)
791-
self.clip_ctx = None
796+
with suppress_stdout_stderr(disable=self.verbose):
797+
if self.clip_ctx is not None and self._clip_free is not None:
798+
self._clip_free(self.clip_ctx)
799+
self.clip_ctx = None
792800

793801
def load_image(self, image_url: str) -> bytes:
794802
if image_url.startswith("data:"):
@@ -881,27 +889,28 @@ def __call__(
881889
c_ubyte_ptr = (
882890
ctypes.c_ubyte * len(data_array)
883891
).from_buffer(data_array)
884-
embed = self._llava_cpp.llava_image_embed_make_with_bytes(
885-
ctx_clip=self.clip_ctx,
886-
n_threads=llama.context_params.n_threads,
887-
image_bytes=c_ubyte_ptr,
888-
image_bytes_length=len(image_bytes),
889-
)
890-
# image_bytes_p = (ctypes.c_uint8 * len(image_bytes)).from_buffer_copy(image_bytes)
891-
# embed = self._llava_cpp.llava_image_embed_make_with_bytes(ctx_clip=self.clip_ctx, n_threads=1, image_bytes=image_bytes_p, image_bytes_length=len(image_bytes))
892+
with suppress_stdout_stderr(disable=self.verbose):
893+
embed = self._llava_cpp.llava_image_embed_make_with_bytes(
894+
ctx_clip=self.clip_ctx,
895+
n_threads=llama.context_params.n_threads,
896+
image_bytes=c_ubyte_ptr,
897+
image_bytes_length=len(image_bytes),
898+
)
892899
try:
893900
n_past = ctypes.c_int(llama.n_tokens)
894901
n_past_p = ctypes.pointer(n_past)
895-
self._llava_cpp.llava_eval_image_embed(
896-
ctx_llama=llama.ctx,
897-
embed=embed,
898-
n_batch=llama.n_batch,
899-
n_past=n_past_p,
900-
)
902+
with suppress_stdout_stderr(disable=self.verbose):
903+
self._llava_cpp.llava_eval_image_embed(
904+
ctx_llama=llama.ctx,
905+
embed=embed,
906+
n_batch=llama.n_batch,
907+
n_past=n_past_p,
908+
)
901909
assert llama.n_ctx() >= n_past.value
902910
llama.n_tokens = n_past.value
903911
finally:
904-
self._llava_cpp.llava_image_embed_free(embed)
912+
with suppress_stdout_stderr(disable=self.verbose):
913+
self._llava_cpp.llava_image_embed_free(embed)
905914
if message["role"] == "assistant" and message["content"] is not None:
906915
llama.eval(
907916
llama.tokenize(
@@ -910,7 +919,7 @@ def __call__(
910919
)
911920
llama.eval(llama.tokenize(f"{assistant_role}".encode("utf8"), add_bos=False))
912921

913-
prompt = llama._input_ids.tolist()
922+
prompt = llama.input_ids[:llama.n_tokens].tolist()
914923

915924
return _convert_completion_to_chat(
916925
llama.create_completion(

llama_cpp/server/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ def create_app(settings: Optional[Settings] = None):
384384
chat_handler = None
385385
if settings.chat_format == "llava-1-5":
386386
assert settings.clip_model_path is not None
387-
chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(clip_model_path=settings.clip_model_path)
387+
chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(clip_model_path=settings.clip_model_path, verbose=settings.verbose)
388388
##
389389

390390
llama = llama_cpp.Llama(

0 commit comments

Comments
 (0)