@@ -400,8 +400,10 @@ void server_print_usage(int /*argc*/, char **argv, const gpt_params ¶ms)
400
400
fprintf (stderr, " number of layers to store in VRAM\n " );
401
401
fprintf (stderr, " -m FNAME, --model FNAME\n " );
402
402
fprintf (stderr, " model path (default: %s)\n " , params.model .c_str ());
403
- fprintf (stderr, " -host ip address to listen (default 127.0.0.1)\n " );
404
- fprintf (stderr, " -port PORT port to listen (default 8080)\n " );
403
+ fprintf (stderr, " -a ALIAS, --alias ALIAS\n " );
404
+ fprintf (stderr, " set an alias for the model, will be added as `model` field in completion response\n " );
405
+ fprintf (stderr, " --host ip address to listen (default 127.0.0.1)\n " );
406
+ fprintf (stderr, " --port PORT port to listen (default 8080)\n " );
405
407
fprintf (stderr, " \n " );
406
408
}
407
409
@@ -453,6 +455,15 @@ bool server_params_parse(int argc, char **argv, server_params &sparams, gpt_para
453
455
}
454
456
params.model = argv[i];
455
457
}
458
+ else if (arg == " -a" || arg == " --alias" )
459
+ {
460
+ if (++i >= argc)
461
+ {
462
+ invalid_param = true ;
463
+ break ;
464
+ }
465
+ params.model_alias = argv[i];
466
+ }
456
467
else if (arg == " --embedding" )
457
468
{
458
469
params.embedding = true ;
@@ -645,6 +656,7 @@ int main(int argc, char **argv)
645
656
try
646
657
{
647
658
json data = {
659
+ {" model" , llama.params .model_alias },
648
660
{" content" , llama.generated_text },
649
661
{" tokens_predicted" , llama.num_tokens_predicted }};
650
662
return res.set_content (data.dump (), " application/json" );
0 commit comments