Skip to content

Commit 1f3abe3

Browse files
ngxsonarthw
authored andcommitted
common : bring back missing args, add env var duplication check (ggml-org#9375)
* common : bring back missing args * move duplication check to test-arg-parser * add check for duplicated env var * correct default values
1 parent acafc3a commit 1f3abe3

File tree

4 files changed

+99
-41
lines changed

4 files changed

+99
-41
lines changed

common/common.cpp

Lines changed: 75 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -673,17 +673,8 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
673673
* - if LLAMA_EXAMPLE_* is set (other than COMMON), we only show the option in the corresponding example
674674
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
675675
*/
676-
std::unordered_set<std::string> seen_args;
677676
auto add_opt = [&](llama_arg arg) {
678677
if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
679-
// make sure there is no argument duplications
680-
for (const auto & a : arg.args) {
681-
if (seen_args.find(a) == seen_args.end()) {
682-
seen_args.insert(a);
683-
} else {
684-
throw std::runtime_error(format("found duplicated argument in source code: %s", a));
685-
}
686-
}
687678
options.push_back(std::move(arg));
688679
}
689680
};
@@ -790,8 +781,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
790781
add_opt(llama_arg(
791782
{"-C", "--cpu-mask"}, "M",
792783
"CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")",
793-
[](gpt_params & params, const std::string & value) {
794-
std::string mask = value;
784+
[](gpt_params & params, const std::string & mask) {
795785
params.cpuparams.mask_valid = true;
796786
if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) {
797787
throw std::invalid_argument("invalid cpumask");
@@ -801,8 +791,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
801791
add_opt(llama_arg(
802792
{"-Cr", "--cpu-range"}, "lo-hi",
803793
"range of CPUs for affinity. Complements --cpu-mask",
804-
[](gpt_params & params, const std::string & value) {
805-
std::string range = value;
794+
[](gpt_params & params, const std::string & range) {
806795
params.cpuparams.mask_valid = true;
807796
if (!parse_cpu_range(range, params.cpuparams.cpumask)) {
808797
throw std::invalid_argument("invalid range");
@@ -816,6 +805,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
816805
params.cpuparams.strict_cpu = std::stoul(value);
817806
}
818807
));
808+
add_opt(llama_arg(
809+
{"--prio"}, "N",
810+
format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
811+
[](gpt_params & params, int prio) {
812+
if (prio < 0 || prio > 3) {
813+
throw std::invalid_argument("invalid value");
814+
}
815+
params.cpuparams.priority = (enum ggml_sched_priority) prio;
816+
}
817+
));
819818
add_opt(llama_arg(
820819
{"--poll"}, "<0...100>",
821820
format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
@@ -826,8 +825,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
826825
add_opt(llama_arg(
827826
{"-Cb", "--cpu-mask-batch"}, "M",
828827
"CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)",
829-
[](gpt_params & params, const std::string & value) {
830-
std::string mask = value;
828+
[](gpt_params & params, const std::string & mask) {
831829
params.cpuparams_batch.mask_valid = true;
832830
if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) {
833831
throw std::invalid_argument("invalid cpumask");
@@ -837,8 +835,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
837835
add_opt(llama_arg(
838836
{"-Crb", "--cpu-range-batch"}, "lo-hi",
839837
"ranges of CPUs for affinity. Complements --cpu-mask-batch",
840-
[](gpt_params & params, const std::string & value) {
841-
std::string range = value;
838+
[](gpt_params & params, const std::string & range) {
842839
params.cpuparams_batch.mask_valid = true;
843840
if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) {
844841
throw std::invalid_argument("invalid range");
@@ -852,6 +849,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
852849
params.cpuparams_batch.strict_cpu = value;
853850
}
854851
));
852+
add_opt(llama_arg(
853+
{"--prio-batch"}, "N",
854+
format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams_batch.priority),
855+
[](gpt_params & params, int prio) {
856+
if (prio < 0 || prio > 3) {
857+
throw std::invalid_argument("invalid value");
858+
}
859+
params.cpuparams_batch.priority = (enum ggml_sched_priority) prio;
860+
}
861+
));
855862
add_opt(llama_arg(
856863
{"--poll-batch"}, "<0|1>",
857864
"use polling to wait for work (default: same as --poll)",
@@ -862,8 +869,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
862869
add_opt(llama_arg(
863870
{"-Cd", "--cpu-mask-draft"}, "M",
864871
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",
865-
[](gpt_params & params, const std::string & value) {
866-
std::string mask = value;
872+
[](gpt_params & params, const std::string & mask) {
867873
params.draft_cpuparams.mask_valid = true;
868874
if (!parse_cpu_mask(mask, params.draft_cpuparams.cpumask)) {
869875
throw std::invalid_argument("invalid cpumask");
@@ -873,8 +879,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
873879
add_opt(llama_arg(
874880
{"-Crd", "--cpu-range-draft"}, "lo-hi",
875881
"Ranges of CPUs for affinity. Complements --cpu-mask-draft",
876-
[](gpt_params & params, const std::string & value) {
877-
std::string range = value;
882+
[](gpt_params & params, const std::string & range) {
878883
params.draft_cpuparams.mask_valid = true;
879884
if (!parse_cpu_range(range, params.draft_cpuparams.cpumask)) {
880885
throw std::invalid_argument("invalid range");
@@ -888,18 +893,37 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
888893
params.draft_cpuparams.strict_cpu = value;
889894
}
890895
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
896+
add_opt(llama_arg(
897+
{"--prio-draft"}, "N",
898+
format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.draft_cpuparams.priority),
899+
[](gpt_params & params, int prio) {
900+
if (prio < 0 || prio > 3) {
901+
throw std::invalid_argument("invalid value");
902+
}
903+
params.draft_cpuparams.priority = (enum ggml_sched_priority) prio;
904+
}
905+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
891906
add_opt(llama_arg(
892907
{"--poll-draft"}, "<0|1>",
893908
"Use polling to wait for draft model work (default: same as --poll])",
894909
[](gpt_params & params, int value) {
895910
params.draft_cpuparams.poll = value;
896911
}
897912
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
913+
add_opt(llama_arg(
914+
{"-Cbd", "--cpu-mask-batch-draft"}, "M",
915+
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",
916+
[](gpt_params & params, const std::string & mask) {
917+
params.draft_cpuparams_batch.mask_valid = true;
918+
if (!parse_cpu_mask(mask, params.draft_cpuparams_batch.cpumask)) {
919+
throw std::invalid_argument("invalid cpumask");
920+
}
921+
}
922+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
898923
add_opt(llama_arg(
899924
{"-Crbd", "--cpu-range-batch-draft"}, "lo-hi",
900925
"Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)",
901-
[](gpt_params & params, const std::string & value) {
902-
std::string range = value;
926+
[](gpt_params & params, const std::string & range) {
903927
params.draft_cpuparams_batch.mask_valid = true;
904928
if (!parse_cpu_range(range, params.draft_cpuparams_batch.cpumask)) {
905929
throw std::invalid_argument("invalid cpumask");
@@ -913,6 +937,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
913937
params.draft_cpuparams_batch.strict_cpu = value;
914938
}
915939
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
940+
add_opt(llama_arg(
941+
{"--prio-batch-draft"}, "N",
942+
format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.draft_cpuparams_batch.priority),
943+
[](gpt_params & params, int prio) {
944+
if (prio < 0 || prio > 3) {
945+
throw std::invalid_argument("invalid value");
946+
}
947+
params.draft_cpuparams_batch.priority = (enum ggml_sched_priority) prio;
948+
}
949+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
916950
add_opt(llama_arg(
917951
{"--poll-batch-draft"}, "<0|1>",
918952
"Use polling to wait for draft model work (default: --poll-draft)",
@@ -1124,45 +1158,45 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
11241158
[](gpt_params & params) {
11251159
params.interactive = true;
11261160
}
1127-
).set_examples({LLAMA_EXAMPLE_INFILL}));
1161+
).set_examples({LLAMA_EXAMPLE_MAIN}));
11281162
add_opt(llama_arg(
11291163
{"-if", "--interactive-first"},
11301164
format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"),
11311165
[](gpt_params & params) {
11321166
params.interactive_first = true;
11331167
}
1134-
).set_examples({LLAMA_EXAMPLE_INFILL}));
1168+
).set_examples({LLAMA_EXAMPLE_MAIN}));
11351169
add_opt(llama_arg(
11361170
{"-mli", "--multiline-input"},
11371171
"allows you to write or paste multiple lines without ending each in '\\'",
11381172
[](gpt_params & params) {
11391173
params.multiline_input = true;
11401174
}
1141-
).set_examples({LLAMA_EXAMPLE_INFILL}));
1175+
).set_examples({LLAMA_EXAMPLE_MAIN}));
11421176
add_opt(llama_arg(
11431177
{"--in-prefix-bos"},
11441178
"prefix BOS to user inputs, preceding the `--in-prefix` string",
11451179
[](gpt_params & params) {
11461180
params.input_prefix_bos = true;
11471181
params.enable_chat_template = false;
11481182
}
1149-
).set_examples({LLAMA_EXAMPLE_INFILL}));
1183+
).set_examples({LLAMA_EXAMPLE_MAIN}));
11501184
add_opt(llama_arg(
11511185
{"--in-prefix"}, "STRING",
11521186
"string to prefix user inputs with (default: empty)",
11531187
[](gpt_params & params, const std::string & value) {
11541188
params.input_prefix = value;
11551189
params.enable_chat_template = false;
11561190
}
1157-
).set_examples({LLAMA_EXAMPLE_INFILL}));
1191+
).set_examples({LLAMA_EXAMPLE_MAIN}));
11581192
add_opt(llama_arg(
11591193
{"--in-suffix"}, "STRING",
11601194
"string to suffix after user inputs with (default: empty)",
11611195
[](gpt_params & params, const std::string & value) {
11621196
params.input_suffix = value;
11631197
params.enable_chat_template = false;
11641198
}
1165-
).set_examples({LLAMA_EXAMPLE_INFILL}));
1199+
).set_examples({LLAMA_EXAMPLE_MAIN}));
11661200
add_opt(llama_arg(
11671201
{"--no-warmup"},
11681202
"skip warming up the model with an empty run",
@@ -1499,7 +1533,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
14991533
}
15001534
));
15011535
add_opt(llama_arg(
1502-
{"--all-logits"},
1536+
{"--perplexity", "--all-logits"},
15031537
format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"),
15041538
[](gpt_params & params) {
15051539
params.logits_all = true;
@@ -1554,6 +1588,13 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
15541588
params.kl_divergence = true;
15551589
}
15561590
).set_examples({LLAMA_EXAMPLE_PERPLEXITY}));
1591+
add_opt(llama_arg(
1592+
{"--save-all-logits", "--kl-divergence-base"}, "FNAME",
1593+
"set logits file",
1594+
[](gpt_params & params, const std::string & value) {
1595+
params.logits_file = value;
1596+
}
1597+
).set_examples({LLAMA_EXAMPLE_PERPLEXITY}));
15571598
add_opt(llama_arg(
15581599
{"--ppl-stride"}, "N",
15591600
format("stride for perplexity calculation (default: %d)", params.ppl_stride),
@@ -1802,7 +1843,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
18021843
[](gpt_params & params, const std::string & value) {
18031844
params.model_alias = value;
18041845
}
1805-
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL"));
1846+
).set_examples({LLAMA_EXAMPLE_SERVER}));
18061847
add_opt(llama_arg(
18071848
{"-m", "--model"}, "FNAME",
18081849
ex == LLAMA_EXAMPLE_EXPORT_LORA
@@ -1890,7 +1931,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
18901931
}
18911932
).set_examples({LLAMA_EXAMPLE_PASSKEY}));
18921933
add_opt(llama_arg(
1893-
{"-o", "--output"}, "FNAME",
1934+
{"-o", "--output", "--output-file"}, "FNAME",
18941935
format("output file (default: '%s')",
18951936
ex == LLAMA_EXAMPLE_EXPORT_LORA
18961937
? params.lora_outfile.c_str()
@@ -1932,7 +1973,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
19321973
}
19331974
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
19341975
add_opt(llama_arg(
1935-
{"--chunk"}, "N",
1976+
{"--chunk", "--from-chunk"}, "N",
19361977
format("start processing the input from chunk N (default: %d)", params.i_chunk),
19371978
[](gpt_params & params, int value) {
19381979
params.i_chunk = value;
@@ -2057,7 +2098,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
20572098
}
20582099
).set_examples({LLAMA_EXAMPLE_SERVER}));
20592100
add_opt(llama_arg(
2060-
{"--timeout"}, "N",
2101+
{"-to", "--timeout"}, "N",
20612102
format("server read/write timeout in seconds (default: %d)", params.timeout_read),
20622103
[](gpt_params & params, int value) {
20632104
params.timeout_read = value;

common/common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,6 @@ struct gpt_params {
211211
bool use_mlock = false; // use mlock to keep model in memory
212212
bool verbose_prompt = false; // print prompt tokens before generation
213213
bool display_prompt = true; // print prompt before generation
214-
bool infill = false; // use infill mode
215214
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
216215
bool no_kv_offload = false; // disable KV offloading
217216
bool warmup = true; // warmup run

examples/infill/infill.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,6 @@ int main(int argc, char ** argv) {
306306
LOG_TEE("\n\n");
307307

308308
LOG_TEE("\n##### Infill mode #####\n\n");
309-
if (params.infill) {
310-
printf("\n************\n");
311-
printf("no need to specify '--infill', always running infill\n");
312-
printf("************\n\n");
313-
}
314309
if (params.interactive) {
315310
const char *control_message;
316311
if (params.multiline_input) {

tests/test-arg-parser.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <string>
22
#include <vector>
33
#include <sstream>
4+
#include <unordered_set>
45

56
#undef NDEBUG
67
#include <cassert>
@@ -13,7 +14,29 @@ int main(void) {
1314
printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n");
1415
for (int ex = 0; ex < LLAMA_EXAMPLE_COUNT; ex++) {
1516
try {
16-
gpt_params_parser_init(params, (enum llama_example)ex);
17+
auto options = gpt_params_parser_init(params, (enum llama_example)ex);
18+
std::unordered_set<std::string> seen_args;
19+
std::unordered_set<std::string> seen_env_vars;
20+
for (const auto & opt : options) {
21+
// check for args duplications
22+
for (const auto & arg : opt.args) {
23+
if (seen_args.find(arg) == seen_args.end()) {
24+
seen_args.insert(arg);
25+
} else {
26+
fprintf(stderr, "test-arg-parser: found different handlers for the same argument: %s", arg);
27+
exit(1);
28+
}
29+
}
30+
// check for env var duplications
31+
if (opt.env) {
32+
if (seen_env_vars.find(opt.env) == seen_env_vars.end()) {
33+
seen_env_vars.insert(opt.env);
34+
} else {
35+
fprintf(stderr, "test-arg-parser: found different handlers for the same env var: %s", opt.env);
36+
exit(1);
37+
}
38+
}
39+
}
1740
} catch (std::exception & e) {
1841
printf("%s\n", e.what());
1942
assert(false);

0 commit comments

Comments
 (0)