Improve logging messages

abetlen · abetlen · commit 9e5b6d675a49 · 2023-05-03T10:28:10.000-04:00
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -396,7 +396,7 @@ def generate(
             and tuple(self.eval_tokens) == tuple(tokens[: len(self.eval_tokens)])
         ):
             if self.verbose:
-                print("generate cache hit", file=sys.stderr)
+                print("Llama.generate: cache hit", file=sys.stderr)
             reset = False
             tokens = tokens[len(self.eval_tokens) :]
 
@@ -518,7 +518,7 @@ def _create_completion(
 
         if self.cache and prompt_tokens in self.cache:
             if self.verbose:
-                print("cache hit", file=sys.stderr)
+                print("Llama._create_completion: cache hit", file=sys.stderr)
             self.load_state(self.cache[prompt_tokens])
 
         finish_reason = "length"
@@ -538,7 +538,7 @@ def _create_completion(
             if self.cache and len(completion_tokens) == 0:
                 if prompt_tokens not in self.cache:
                     if self.verbose:
-                        print("cache miss", file=sys.stderr)
+                        print("Llama._create_completion: cache miss", file=sys.stderr)
                     self.cache[prompt_tokens] = self.save_state()
 
             completion_tokens.append(token)
@@ -957,6 +957,8 @@ def save_state(self) -> LlamaState:
             raise RuntimeError("Failed to copy llama state data")
         llama_state_compact = (llama_cpp.c_uint8 * int(n_bytes))()
         llama_cpp.ctypes.memmove(llama_state_compact, llama_state, int(n_bytes))
+        if self.verbose:
+            print(f"Llama.save_state: saving {n_bytes} bytes of llama state", file=sys.stderr)
         return LlamaState(
             eval_tokens=self.eval_tokens.copy(),
             eval_logits=self.eval_logits.copy(),