Skip to content

Commit 1d0824b

Browse files
authored
llama : print help to stdout (#2338)
1 parent bc3ec2c commit 1d0824b

File tree

1 file changed

+79
-79
lines changed

1 file changed

+79
-79
lines changed

examples/common.cpp

Lines changed: 79 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -458,91 +458,91 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
458458
}
459459

460460
void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
461-
fprintf(stderr, "usage: %s [options]\n", argv[0]);
462-
fprintf(stderr, "\n");
463-
fprintf(stderr, "options:\n");
464-
fprintf(stderr, " -h, --help show this help message and exit\n");
465-
fprintf(stderr, " -i, --interactive run in interactive mode\n");
466-
fprintf(stderr, " --interactive-first run in interactive mode and wait for input right away\n");
467-
fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n");
468-
fprintf(stderr, " --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n");
469-
fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n");
470-
fprintf(stderr, " halt generation at PROMPT, return control in interactive mode\n");
471-
fprintf(stderr, " (can be specified more than once for multiple prompts).\n");
472-
fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n");
473-
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
474-
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
475-
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
476-
fprintf(stderr, " prompt to start generation with (default: empty)\n");
477-
fprintf(stderr, " -e process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
478-
fprintf(stderr, " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n");
479-
fprintf(stderr, " --prompt-cache-all if specified, saves user input and generations to cache as well.\n");
480-
fprintf(stderr, " not supported with --interactive or other interactive options\n");
481-
fprintf(stderr, " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n");
482-
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
483-
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
484-
fprintf(stderr, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
485-
fprintf(stderr, " -f FNAME, --file FNAME\n");
486-
fprintf(stderr, " prompt file to start generation.\n");
487-
fprintf(stderr, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
488-
fprintf(stderr, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
489-
fprintf(stderr, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
490-
fprintf(stderr, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
491-
fprintf(stderr, " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
492-
fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
493-
fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
494-
fprintf(stderr, " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
495-
fprintf(stderr, " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
496-
fprintf(stderr, " --mirostat N use Mirostat sampling.\n");
497-
fprintf(stderr, " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n");
498-
fprintf(stderr, " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
499-
fprintf(stderr, " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
500-
fprintf(stderr, " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
501-
fprintf(stderr, " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n");
502-
fprintf(stderr, " modifies the likelihood of token appearing in the completion,\n");
503-
fprintf(stderr, " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n");
504-
fprintf(stderr, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
505-
fprintf(stderr, " --cfg-negative-prompt PROMPT \n");
506-
fprintf(stderr, " negative prompt to use for guidance. (default: empty)\n");
507-
fprintf(stderr, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
508-
fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
509-
fprintf(stderr, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base);
510-
fprintf(stderr, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale);
511-
fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
512-
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
513-
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
514-
fprintf(stderr, " not recommended: doubles context memory required and no measurable increase in quality\n");
515-
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
516-
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
517-
fprintf(stderr, " --perplexity compute perplexity over each ctx window of the prompt\n");
518-
fprintf(stderr, " --perplexity-lines compute perplexity over each line of the prompt\n");
519-
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
520-
fprintf(stderr, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
461+
fprintf(stdout, "usage: %s [options]\n", argv[0]);
462+
fprintf(stdout, "\n");
463+
fprintf(stdout, "options:\n");
464+
fprintf(stdout, " -h, --help show this help message and exit\n");
465+
fprintf(stdout, " -i, --interactive run in interactive mode\n");
466+
fprintf(stdout, " --interactive-first run in interactive mode and wait for input right away\n");
467+
fprintf(stdout, " -ins, --instruct run in instruction mode (use with Alpaca models)\n");
468+
fprintf(stdout, " --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n");
469+
fprintf(stdout, " -r PROMPT, --reverse-prompt PROMPT\n");
470+
fprintf(stdout, " halt generation at PROMPT, return control in interactive mode\n");
471+
fprintf(stdout, " (can be specified more than once for multiple prompts).\n");
472+
fprintf(stdout, " --color colorise output to distinguish prompt and user input from generations\n");
473+
fprintf(stdout, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
474+
fprintf(stdout, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
475+
fprintf(stdout, " -p PROMPT, --prompt PROMPT\n");
476+
fprintf(stdout, " prompt to start generation with (default: empty)\n");
477+
fprintf(stdout, " -e process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
478+
fprintf(stdout, " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n");
479+
fprintf(stdout, " --prompt-cache-all if specified, saves user input and generations to cache as well.\n");
480+
fprintf(stdout, " not supported with --interactive or other interactive options\n");
481+
fprintf(stdout, " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n");
482+
fprintf(stdout, " --random-prompt start with a randomized prompt.\n");
483+
fprintf(stdout, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
484+
fprintf(stdout, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
485+
fprintf(stdout, " -f FNAME, --file FNAME\n");
486+
fprintf(stdout, " prompt file to start generation.\n");
487+
fprintf(stdout, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
488+
fprintf(stdout, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
489+
fprintf(stdout, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
490+
fprintf(stdout, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
491+
fprintf(stdout, " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
492+
fprintf(stdout, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
493+
fprintf(stdout, " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
494+
fprintf(stdout, " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
495+
fprintf(stdout, " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
496+
fprintf(stdout, " --mirostat N use Mirostat sampling.\n");
497+
fprintf(stdout, " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n");
498+
fprintf(stdout, " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
499+
fprintf(stdout, " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
500+
fprintf(stdout, " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
501+
fprintf(stdout, " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n");
502+
fprintf(stdout, " modifies the likelihood of token appearing in the completion,\n");
503+
fprintf(stdout, " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n");
504+
fprintf(stdout, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
505+
fprintf(stdout, " --cfg-negative-prompt PROMPT \n");
506+
fprintf(stdout, " negative prompt to use for guidance. (default: empty)\n");
507+
fprintf(stdout, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
508+
fprintf(stdout, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
509+
fprintf(stdout, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base);
510+
fprintf(stdout, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale);
511+
fprintf(stdout, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
512+
fprintf(stdout, " --no-penalize-nl do not penalize newline token\n");
513+
fprintf(stdout, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
514+
fprintf(stdout, " not recommended: doubles context memory required and no measurable increase in quality\n");
515+
fprintf(stdout, " --temp N temperature (default: %.1f)\n", (double)params.temp);
516+
fprintf(stdout, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
517+
fprintf(stdout, " --perplexity compute perplexity over each ctx window of the prompt\n");
518+
fprintf(stdout, " --perplexity-lines compute perplexity over each line of the prompt\n");
519+
fprintf(stdout, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
520+
fprintf(stdout, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
521521
if (llama_mlock_supported()) {
522-
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
522+
fprintf(stdout, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
523523
}
524524
if (llama_mmap_supported()) {
525-
fprintf(stderr, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
525+
fprintf(stdout, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
526526
}
527-
fprintf(stderr, " --numa attempt optimizations that help on some NUMA systems\n");
528-
fprintf(stderr, " if run without this previously, it is recommended to drop the system page cache before using this\n");
529-
fprintf(stderr, " see https://github.com/ggerganov/llama.cpp/issues/1437\n");
527+
fprintf(stdout, " --numa attempt optimizations that help on some NUMA systems\n");
528+
fprintf(stdout, " if run without this previously, it is recommended to drop the system page cache before using this\n");
529+
fprintf(stdout, " see https://github.com/ggerganov/llama.cpp/issues/1437\n");
530530
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
531-
fprintf(stderr, " -ngl N, --n-gpu-layers N\n");
532-
fprintf(stderr, " number of layers to store in VRAM\n");
533-
fprintf(stderr, " -ts SPLIT --tensor-split SPLIT\n");
534-
fprintf(stderr, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
535-
fprintf(stderr, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" );
536-
fprintf(stderr, " -lv, --low-vram don't allocate VRAM scratch buffer\n" );
531+
fprintf(stdout, " -ngl N, --n-gpu-layers N\n");
532+
fprintf(stdout, " number of layers to store in VRAM\n");
533+
fprintf(stdout, " -ts SPLIT --tensor-split SPLIT\n");
534+
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
535+
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" );
536+
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n" );
537537
#endif
538-
fprintf(stderr, " --mtest compute maximum memory usage\n");
539-
fprintf(stderr, " --export export the computation graph to 'llama.ggml'\n");
540-
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
541-
fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
542-
fprintf(stderr, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
543-
fprintf(stderr, " -m FNAME, --model FNAME\n");
544-
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
545-
fprintf(stderr, "\n");
538+
fprintf(stdout, " --mtest compute maximum memory usage\n");
539+
fprintf(stdout, " --export export the computation graph to 'llama.ggml'\n");
540+
fprintf(stdout, " --verbose-prompt print prompt before generation\n");
541+
fprintf(stdout, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
542+
fprintf(stdout, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
543+
fprintf(stdout, " -m FNAME, --model FNAME\n");
544+
fprintf(stdout, " model path (default: %s)\n", params.model.c_str());
545+
fprintf(stdout, "\n");
546546
}
547547

548548
std::string gpt_random_prompt(std::mt19937 & rng) {

0 commit comments

Comments
 (0)