Skip to content

Commit a17445b

Browse files
GregoryComermalfet
authored andcommitted
Clean up CLI output (#473)
1 parent 27f3778 commit a17445b

File tree

4 files changed

+11
-13
lines changed

4 files changed

+11
-13
lines changed

build/builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def _initialize_model(
340340
quantize,
341341
tokenizer=None,
342342
):
343-
print("Loading model ...")
343+
print("Loading model...")
344344

345345
if builder_args.gguf_path and (builder_args.dso_path or builder_args.pte_path):
346346
print("Setting gguf_kwargs for generate.")

build/model.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def from_params(cls, params_path):
6868

6969
@classmethod
7070
def from_table(cls, name: str):
71-
print(f"name {name}")
7271
json_path = config_path / f"{name}.json"
7372
if json_path.is_file():
7473
return ModelArgs.from_params(json_path)
@@ -82,7 +81,6 @@ def from_table(cls, name: str):
8281

8382
@classmethod
8483
def from_name(cls, name: str):
85-
print(f"name {name}")
8684
json_path = config_path / f"{name}.json"
8785
if Path(json_path).is_file():
8886
return ModelArgs.from_params(json_path)

generate.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -539,9 +539,7 @@ def _main(
539539
if generator_args.chat_mode:
540540
max_seq_length = 2048
541541
print(f"Entering Chat Mode. Will continue chatting back and forth with the language model until the models max context length of {max_seq_length} tokens is hit or until the user says /bye")
542-
get_system_prompt = input("Do you want to enter a system prompt? Enter y for yes and anything else for no. \n")
543-
if (get_system_prompt == "y" or get_system_prompt == "Y"):
544-
system_prompt = input("What is your system prompt? \n")
542+
system_prompt = input("System Prompt [Optional]: ")
545543
if is_llama3_model:
546544
chat_formatter = ChatFormat(tokenizer)
547545
else:
@@ -567,12 +565,12 @@ def _main(
567565
i += 1
568566
device_sync(device=builder_args.device)
569567
if i >= 0 and generator_args.chat_mode:
570-
prompt = input("What is your prompt? \n")
568+
prompt = input("User: ")
571569
if (prompt == "/bye"):
572570
print("Exiting Chat.\n")
573571
break
574572
if not is_llama3_model:
575-
if system_prompt is not None:
573+
if system_prompt:
576574
prompt = f"{B_INST} {B_SYS}\n{system_prompt.strip()}\n{E_SYS}\n\n{prompt.strip} {E_INST}"
577575
system_prompt = None # can only provide system prompt on first interaction
578576
else:
@@ -581,7 +579,7 @@ def _main(
581579
tokenizer, prompt, bos=True, device=builder_args.device
582580
)
583581
else:
584-
if system_prompt is not None:
582+
if system_prompt:
585583
encoded = chat_formatter.encode_dialog_prompt([{"role" : "system", "content" : system_prompt}, {"role" : "user", "content" : prompt}])
586584
system_prompt = None
587585
elif(i == 0):
@@ -595,6 +593,8 @@ def _main(
595593
break
596594

597595
if generator_args.chat_mode and i >= 0:
596+
print("Model: ", end="")
597+
598598
buffer = []
599599
period_id = tokenizer.encode(".")[0]
600600
done_generating = False
@@ -667,10 +667,10 @@ def callback(x):
667667
tokens_generated = y.size(0) - prompt_length
668668
tokens_sec = tokens_generated / t
669669
aggregate_metrics["tokens_per_sec"].append(tokens_sec)
670-
logging.info(
670+
logging.debug(
671671
f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_sec:.02f} tokens/sec"
672672
)
673-
logging.info(f"Bandwidth achieved: {model_size * tokens_sec / 1e9:.02f} GB/s")
673+
logging.debug(f"Bandwidth achieved: {model_size * tokens_sec / 1e9:.02f} GB/s")
674674

675675
if (start_pos >= max_seq_length):
676676
print("Max Sequence Length Reached. Ending Conversation.")

tokenizer/tiktoken.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def __init__(self, model_path: str):
8585
mergeable_ranks=mergeable_ranks,
8686
special_tokens=self.special_tokens,
8787
)
88-
logger.info(f"Reloaded Tiktoken model from {model_path}")
88+
logger.debug(f"Reloaded Tiktoken model from {model_path}")
8989

9090
# BOS / EOS token IDs
9191
self.n_words: int = self.model.n_vocab
@@ -96,7 +96,7 @@ def __init__(self, model_path: str):
9696
self.special_tokens["<|end_of_text|>"],
9797
self.special_tokens["<|eot_id|>"],
9898
}
99-
logger.info(
99+
logger.debug(
100100
f"#words: {self.n_words} - BOS ID: {self._bos_id} - EOS ID: {self._eos_id}"
101101
)
102102

0 commit comments

Comments
 (0)