Skip to content

Commit bd40c57

Browse files
committed
server : use new log
ggml-ci
1 parent 49884ac commit bd40c57

File tree

9 files changed

+277
-426
lines changed

9 files changed

+277
-426
lines changed

common/arg.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,19 +1818,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
18181818
params.system_prompt = system_prompt;
18191819
}
18201820
).set_examples({LLAMA_EXAMPLE_SERVER}));
1821-
add_opt(llama_arg(
1822-
{"--log-format"}, "{text, json}",
1823-
"log output format: json or text (default: json)",
1824-
[](gpt_params & params, const std::string & value) {
1825-
if (value == "json") {
1826-
params.log_json = true;
1827-
} else if (value == "text") {
1828-
params.log_json = false;
1829-
} else {
1830-
throw std::invalid_argument("invalid value");
1831-
}
1832-
}
1833-
).set_examples({LLAMA_EXAMPLE_SERVER}));
18341821
add_opt(llama_arg(
18351822
{"--metrics"},
18361823
format("enable prometheus compatible metrics endpoint (default: %s)", params.endpoint_metrics ? "enabled" : "disabled"),

examples/main/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ int main(int argc, char ** argv) {
676676

677677
const llama_token id = gpt_sampler_sample(smpl, ctx, -1);
678678

679-
gpt_sampler_accept(smpl, id, /* apply_grammar= */ true);
679+
gpt_sampler_accept(smpl, id, /* accept_grammar= */ true);
680680

681681
// LOG_DBG("last: %s\n", string_from(ctx, smpl->prev.to_vector()).c_str());
682682

@@ -697,7 +697,7 @@ int main(int argc, char ** argv) {
697697

698698
// push the prompt in the sampling context in order to apply repetition penalties later
699699
// for the prompt, we don't apply grammar rules
700-
gpt_sampler_accept(smpl, embd_inp[n_consumed], /* apply_grammar= */ false);
700+
gpt_sampler_accept(smpl, embd_inp[n_consumed], /* accept_grammar= */ false);
701701

702702
++n_consumed;
703703
if ((int) embd.size() >= params.n_batch) {

examples/server/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ The project is under active development, and we are [looking for feedback and co
121121
| `-to, --timeout N` | server read/write timeout in seconds (default: 600) |
122122
| `--threads-http N` | number of threads used to process HTTP requests (default: -1)<br/>(env: LLAMA_ARG_THREADS_HTTP) |
123123
| `-spf, --system-prompt-file FNAME` | set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications |
124-
| `--log-format {text, json}` | log output format: json or text (default: json) |
125124
| `--metrics` | enable prometheus compatible metrics endpoint (default: disabled)<br/>(env: LLAMA_ARG_ENDPOINT_METRICS) |
126125
| `--no-slots` | disables slots monitoring endpoint (default: enabled)<br/>(env: LLAMA_ARG_NO_ENDPOINT_SLOTS) |
127126
| `--slot-save-path PATH` | path to save slot kv cache (default: disabled) |

examples/server/bench/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ server --host localhost --port 8080 \
4040
--parallel 8 \
4141
--batch-size 512 \
4242
--ctx-size 4096 \
43-
--log-format text \
4443
-ngl 33
4544
```
4645

examples/server/bench/bench.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,6 @@ def start_server_background(args):
272272
server_args.append('--cont-batching')
273273
server_args.append('--metrics')
274274
server_args.append('--flash-attn')
275-
server_args.extend(['--log-format', "text"])
276275
args = [str(arg) for arg in [server_path, *server_args]]
277276
print(f"bench: starting server with: {' '.join(args)}")
278277
pkwargs = {

0 commit comments

Comments
 (0)