@@ -458,91 +458,91 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
458
458
}
459
459
460
460
void gpt_print_usage (int /* argc*/ , char ** argv, const gpt_params & params) {
461
- fprintf (stderr , " usage: %s [options]\n " , argv[0 ]);
462
- fprintf (stderr , " \n " );
463
- fprintf (stderr , " options:\n " );
464
- fprintf (stderr , " -h, --help show this help message and exit\n " );
465
- fprintf (stderr , " -i, --interactive run in interactive mode\n " );
466
- fprintf (stderr , " --interactive-first run in interactive mode and wait for input right away\n " );
467
- fprintf (stderr , " -ins, --instruct run in instruction mode (use with Alpaca models)\n " );
468
- fprintf (stderr , " --multiline-input allows you to write or paste multiple lines without ending each in '\\ '\n " );
469
- fprintf (stderr , " -r PROMPT, --reverse-prompt PROMPT\n " );
470
- fprintf (stderr , " halt generation at PROMPT, return control in interactive mode\n " );
471
- fprintf (stderr , " (can be specified more than once for multiple prompts).\n " );
472
- fprintf (stderr , " --color colorise output to distinguish prompt and user input from generations\n " );
473
- fprintf (stderr , " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n " );
474
- fprintf (stderr , " -t N, --threads N number of threads to use during computation (default: %d)\n " , params.n_threads );
475
- fprintf (stderr , " -p PROMPT, --prompt PROMPT\n " );
476
- fprintf (stderr , " prompt to start generation with (default: empty)\n " );
477
- fprintf (stderr , " -e process prompt escapes sequences (\\ n, \\ r, \\ t, \\ ', \\\" , \\\\ )\n " );
478
- fprintf (stderr , " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n " );
479
- fprintf (stderr , " --prompt-cache-all if specified, saves user input and generations to cache as well.\n " );
480
- fprintf (stderr , " not supported with --interactive or other interactive options\n " );
481
- fprintf (stderr , " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n " );
482
- fprintf (stderr , " --random-prompt start with a randomized prompt.\n " );
483
- fprintf (stderr , " --in-prefix STRING string to prefix user inputs with (default: empty)\n " );
484
- fprintf (stderr , " --in-suffix STRING string to suffix after user inputs with (default: empty)\n " );
485
- fprintf (stderr , " -f FNAME, --file FNAME\n " );
486
- fprintf (stderr , " prompt file to start generation.\n " );
487
- fprintf (stderr , " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n " , params.n_predict );
488
- fprintf (stderr , " --top-k N top-k sampling (default: %d, 0 = disabled)\n " , params.top_k );
489
- fprintf (stderr , " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n " , (double )params.top_p );
490
- fprintf (stderr , " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n " , (double )params.tfs_z );
491
- fprintf (stderr , " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n " , (double )params.typical_p );
492
- fprintf (stderr , " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , params.repeat_last_n );
493
- fprintf (stderr , " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )params.repeat_penalty );
494
- fprintf (stderr , " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )params.presence_penalty );
495
- fprintf (stderr , " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )params.frequency_penalty );
496
- fprintf (stderr , " --mirostat N use Mirostat sampling.\n " );
497
- fprintf (stderr , " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n " );
498
- fprintf (stderr , " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n " , params.mirostat );
499
- fprintf (stderr , " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n " , (double )params.mirostat_eta );
500
- fprintf (stderr , " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n " , (double )params.mirostat_tau );
501
- fprintf (stderr , " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n " );
502
- fprintf (stderr , " modifies the likelihood of token appearing in the completion,\n " );
503
- fprintf (stderr , " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n " );
504
- fprintf (stderr , " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n " );
505
- fprintf (stderr , " --cfg-negative-prompt PROMPT \n " );
506
- fprintf (stderr , " negative prompt to use for guidance. (default: empty)\n " );
507
- fprintf (stderr , " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n " , params.cfg_scale );
508
- fprintf (stderr , " -c N, --ctx-size N size of the prompt context (default: %d)\n " , params.n_ctx );
509
- fprintf (stderr , " --rope-freq-base N RoPE base frequency (default: %.1f)\n " , params.rope_freq_base );
510
- fprintf (stderr , " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n " , params.rope_freq_scale );
511
- fprintf (stderr , " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n " );
512
- fprintf (stderr , " --no-penalize-nl do not penalize newline token\n " );
513
- fprintf (stderr , " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n " );
514
- fprintf (stderr , " not recommended: doubles context memory required and no measurable increase in quality\n " );
515
- fprintf (stderr , " --temp N temperature (default: %.1f)\n " , (double )params.temp );
516
- fprintf (stderr , " -b N, --batch-size N batch size for prompt processing (default: %d)\n " , params.n_batch );
517
- fprintf (stderr , " --perplexity compute perplexity over each ctx window of the prompt\n " );
518
- fprintf (stderr , " --perplexity-lines compute perplexity over each line of the prompt\n " );
519
- fprintf (stderr , " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n " , params.n_keep );
520
- fprintf (stderr , " --chunks N max number of chunks to process (default: %d, -1 = all)\n " , params.n_chunks );
461
+ fprintf (stdout , " usage: %s [options]\n " , argv[0 ]);
462
+ fprintf (stdout , " \n " );
463
+ fprintf (stdout , " options:\n " );
464
+ fprintf (stdout , " -h, --help show this help message and exit\n " );
465
+ fprintf (stdout , " -i, --interactive run in interactive mode\n " );
466
+ fprintf (stdout , " --interactive-first run in interactive mode and wait for input right away\n " );
467
+ fprintf (stdout , " -ins, --instruct run in instruction mode (use with Alpaca models)\n " );
468
+ fprintf (stdout , " --multiline-input allows you to write or paste multiple lines without ending each in '\\ '\n " );
469
+ fprintf (stdout , " -r PROMPT, --reverse-prompt PROMPT\n " );
470
+ fprintf (stdout , " halt generation at PROMPT, return control in interactive mode\n " );
471
+ fprintf (stdout , " (can be specified more than once for multiple prompts).\n " );
472
+ fprintf (stdout , " --color colorise output to distinguish prompt and user input from generations\n " );
473
+ fprintf (stdout , " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n " );
474
+ fprintf (stdout , " -t N, --threads N number of threads to use during computation (default: %d)\n " , params.n_threads );
475
+ fprintf (stdout , " -p PROMPT, --prompt PROMPT\n " );
476
+ fprintf (stdout , " prompt to start generation with (default: empty)\n " );
477
+ fprintf (stdout , " -e process prompt escapes sequences (\\ n, \\ r, \\ t, \\ ', \\\" , \\\\ )\n " );
478
+ fprintf (stdout , " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n " );
479
+ fprintf (stdout , " --prompt-cache-all if specified, saves user input and generations to cache as well.\n " );
480
+ fprintf (stdout , " not supported with --interactive or other interactive options\n " );
481
+ fprintf (stdout , " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n " );
482
+ fprintf (stdout , " --random-prompt start with a randomized prompt.\n " );
483
+ fprintf (stdout , " --in-prefix STRING string to prefix user inputs with (default: empty)\n " );
484
+ fprintf (stdout , " --in-suffix STRING string to suffix after user inputs with (default: empty)\n " );
485
+ fprintf (stdout , " -f FNAME, --file FNAME\n " );
486
+ fprintf (stdout , " prompt file to start generation.\n " );
487
+ fprintf (stdout , " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n " , params.n_predict );
488
+ fprintf (stdout , " --top-k N top-k sampling (default: %d, 0 = disabled)\n " , params.top_k );
489
+ fprintf (stdout , " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n " , (double )params.top_p );
490
+ fprintf (stdout , " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n " , (double )params.tfs_z );
491
+ fprintf (stdout , " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n " , (double )params.typical_p );
492
+ fprintf (stdout , " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , params.repeat_last_n );
493
+ fprintf (stdout , " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )params.repeat_penalty );
494
+ fprintf (stdout , " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )params.presence_penalty );
495
+ fprintf (stdout , " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )params.frequency_penalty );
496
+ fprintf (stdout , " --mirostat N use Mirostat sampling.\n " );
497
+ fprintf (stdout , " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n " );
498
+ fprintf (stdout , " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n " , params.mirostat );
499
+ fprintf (stdout , " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n " , (double )params.mirostat_eta );
500
+ fprintf (stdout , " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n " , (double )params.mirostat_tau );
501
+ fprintf (stdout , " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n " );
502
+ fprintf (stdout , " modifies the likelihood of token appearing in the completion,\n " );
503
+ fprintf (stdout , " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n " );
504
+ fprintf (stdout , " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n " );
505
+ fprintf (stdout , " --cfg-negative-prompt PROMPT \n " );
506
+ fprintf (stdout , " negative prompt to use for guidance. (default: empty)\n " );
507
+ fprintf (stdout , " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n " , params.cfg_scale );
508
+ fprintf (stdout , " -c N, --ctx-size N size of the prompt context (default: %d)\n " , params.n_ctx );
509
+ fprintf (stdout , " --rope-freq-base N RoPE base frequency (default: %.1f)\n " , params.rope_freq_base );
510
+ fprintf (stdout , " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n " , params.rope_freq_scale );
511
+ fprintf (stdout , " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n " );
512
+ fprintf (stdout , " --no-penalize-nl do not penalize newline token\n " );
513
+ fprintf (stdout , " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n " );
514
+ fprintf (stdout , " not recommended: doubles context memory required and no measurable increase in quality\n " );
515
+ fprintf (stdout , " --temp N temperature (default: %.1f)\n " , (double )params.temp );
516
+ fprintf (stdout , " -b N, --batch-size N batch size for prompt processing (default: %d)\n " , params.n_batch );
517
+ fprintf (stdout , " --perplexity compute perplexity over each ctx window of the prompt\n " );
518
+ fprintf (stdout , " --perplexity-lines compute perplexity over each line of the prompt\n " );
519
+ fprintf (stdout , " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n " , params.n_keep );
520
+ fprintf (stdout , " --chunks N max number of chunks to process (default: %d, -1 = all)\n " , params.n_chunks );
521
521
if (llama_mlock_supported ()) {
522
- fprintf (stderr , " --mlock force system to keep model in RAM rather than swapping or compressing\n " );
522
+ fprintf (stdout , " --mlock force system to keep model in RAM rather than swapping or compressing\n " );
523
523
}
524
524
if (llama_mmap_supported ()) {
525
- fprintf (stderr , " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n " );
525
+ fprintf (stdout , " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n " );
526
526
}
527
- fprintf (stderr , " --numa attempt optimizations that help on some NUMA systems\n " );
528
- fprintf (stderr , " if run without this previously, it is recommended to drop the system page cache before using this\n " );
529
- fprintf (stderr , " see https://github.com/ggerganov/llama.cpp/issues/1437\n " );
527
+ fprintf (stdout , " --numa attempt optimizations that help on some NUMA systems\n " );
528
+ fprintf (stdout , " if run without this previously, it is recommended to drop the system page cache before using this\n " );
529
+ fprintf (stdout , " see https://github.com/ggerganov/llama.cpp/issues/1437\n " );
530
530
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
531
- fprintf (stderr , " -ngl N, --n-gpu-layers N\n " );
532
- fprintf (stderr , " number of layers to store in VRAM\n " );
533
- fprintf (stderr , " -ts SPLIT --tensor-split SPLIT\n " );
534
- fprintf (stderr , " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n " );
535
- fprintf (stderr , " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n " );
536
- fprintf (stderr , " -lv, --low-vram don't allocate VRAM scratch buffer\n " );
531
+ fprintf (stdout , " -ngl N, --n-gpu-layers N\n " );
532
+ fprintf (stdout , " number of layers to store in VRAM\n " );
533
+ fprintf (stdout , " -ts SPLIT --tensor-split SPLIT\n " );
534
+ fprintf (stdout , " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n " );
535
+ fprintf (stdout , " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n " );
536
+ fprintf (stdout , " -lv, --low-vram don't allocate VRAM scratch buffer\n " );
537
537
#endif
538
- fprintf (stderr , " --mtest compute maximum memory usage\n " );
539
- fprintf (stderr , " --export export the computation graph to 'llama.ggml'\n " );
540
- fprintf (stderr , " --verbose-prompt print prompt before generation\n " );
541
- fprintf (stderr , " --lora FNAME apply LoRA adapter (implies --no-mmap)\n " );
542
- fprintf (stderr , " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n " );
543
- fprintf (stderr , " -m FNAME, --model FNAME\n " );
544
- fprintf (stderr , " model path (default: %s)\n " , params.model .c_str ());
545
- fprintf (stderr , " \n " );
538
+ fprintf (stdout , " --mtest compute maximum memory usage\n " );
539
+ fprintf (stdout , " --export export the computation graph to 'llama.ggml'\n " );
540
+ fprintf (stdout , " --verbose-prompt print prompt before generation\n " );
541
+ fprintf (stdout , " --lora FNAME apply LoRA adapter (implies --no-mmap)\n " );
542
+ fprintf (stdout , " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n " );
543
+ fprintf (stdout , " -m FNAME, --model FNAME\n " );
544
+ fprintf (stdout , " model path (default: %s)\n " , params.model .c_str ());
545
+ fprintf (stdout , " \n " );
546
546
}
547
547
548
548
std::string gpt_random_prompt (std::mt19937 & rng) {
0 commit comments