Skip to content

Commit ce4a7b8

Browse files
ggerganovngxson
andauthored
server : various fixes (ggml-org#10704)
* server : various fixes ggml-ci * server : show curent seed in slot_params ggml-ci * fix /slots endpoint * Update examples/server/server.cpp Co-authored-by: Georgi Gerganov <[email protected]> * server : reflect endpoint response changes in the readme ggml-ci --------- Co-authored-by: Xuan Son Nguyen <[email protected]> Co-authored-by: Xuan Son Nguyen <[email protected]>
1 parent 19d8762 commit ce4a7b8

File tree

4 files changed

+178
-97
lines changed

4 files changed

+178
-97
lines changed

examples/server/CMakeLists.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,6 @@ endforeach()
3434
add_executable(${TARGET} ${TARGET_SRCS})
3535
install(TARGETS ${TARGET} RUNTIME)
3636

37-
# clean up generated files in pre-build step
38-
foreach(asset ${PUBLIC_ASSETS})
39-
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
40-
add_custom_command(TARGET ${TARGET} PRE_BUILD
41-
COMMAND "${CMAKE_COMMAND}" -E remove -f "${output}"
42-
)
43-
endforeach()
44-
4537
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
4638

4739
if (LLAMA_SERVER_SSL)

examples/server/README.md

Lines changed: 136 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -618,9 +618,76 @@ This endpoint is public (no API key check). By default, it is read-only. To make
618618

619619
```json
620620
{
621-
"default_generation_settings": { ... },
621+
"default_generation_settings": {
622+
"id": 0,
623+
"id_task": -1,
624+
"n_ctx": 1024,
625+
"speculative": false,
626+
"is_processing": false,
627+
"params": {
628+
"n_predict": -1,
629+
"seed": 4294967295,
630+
"temperature": 0.800000011920929,
631+
"dynatemp_range": 0.0,
632+
"dynatemp_exponent": 1.0,
633+
"top_k": 40,
634+
"top_p": 0.949999988079071,
635+
"min_p": 0.05000000074505806,
636+
"xtc_probability": 0.0,
637+
"xtc_threshold": 0.10000000149011612,
638+
"typical_p": 1.0,
639+
"repeat_last_n": 64,
640+
"repeat_penalty": 1.0,
641+
"presence_penalty": 0.0,
642+
"frequency_penalty": 0.0,
643+
"dry_multiplier": 0.0,
644+
"dry_base": 1.75,
645+
"dry_allowed_length": 2,
646+
"dry_penalty_last_n": -1,
647+
"dry_sequence_breakers": [
648+
"\n",
649+
":",
650+
"\"",
651+
"*"
652+
],
653+
"mirostat": 0,
654+
"mirostat_tau": 5.0,
655+
"mirostat_eta": 0.10000000149011612,
656+
"penalize_nl": false,
657+
"stop": [],
658+
"max_tokens": -1,
659+
"n_keep": 0,
660+
"n_discard": 0,
661+
"ignore_eos": false,
662+
"stream": true,
663+
"n_probs": 0,
664+
"min_keep": 0,
665+
"grammar": "",
666+
"samplers": [
667+
"dry",
668+
"top_k",
669+
"typ_p",
670+
"top_p",
671+
"min_p",
672+
"xtc",
673+
"temperature"
674+
],
675+
"speculative.n_max": 16,
676+
"speculative.n_min": 5,
677+
"speculative.p_min": 0.8999999761581421,
678+
"timings_per_token": false
679+
},
680+
"prompt": "",
681+
"next_token": {
682+
"has_next_token": true,
683+
"has_new_line": false,
684+
"n_remain": -1,
685+
"n_decoded": 0,
686+
"stopping_word": ""
687+
}
688+
},
622689
"total_slots": 1,
623-
"chat_template": ""
690+
"chat_template": "..."
624691
}
625692
```
626693

@@ -739,56 +806,74 @@ Example:
739806

740807
```json
741808
[
742-
{
743-
"dynatemp_exponent": 1.0,
744-
"dynatemp_range": 0.0,
745-
"frequency_penalty": 0.0,
746-
"grammar": "",
747-
"id": 0,
748-
"ignore_eos": false,
749-
"is_processing": false,
750-
"logit_bias": [],
751-
"min_p": 0.05000000074505806,
752-
"mirostat": 0,
753-
"mirostat_eta": 0.10000000149011612,
754-
"mirostat_tau": 5.0,
755-
"model": "llama-2-7b-32k-instruct.Q2_K.gguf",
756-
"n_ctx": 2048,
757-
"n_keep": 0,
758-
"n_predict": 100000,
759-
"n_probs": 0,
760-
"next_token": {
761-
"has_next_token": true,
762-
"n_remain": -1,
763-
"n_decoded": 0,
764-
"stopped_eos": false,
765-
"stopped_limit": false,
766-
"stopped_word": false,
767-
"stopping_word": ""
768-
},
769-
"penalize_nl": true,
770-
"presence_penalty": 0.0,
771-
"prompt": "Say hello to llama.cpp",
772-
"repeat_last_n": 64,
773-
"repeat_penalty": 1.100000023841858,
774-
"samplers": [
775-
"top_k",
776-
"typical_p",
777-
"top_p",
778-
"min_p",
779-
"temperature"
780-
],
781-
"seed": 42,
782-
"stop": [
783-
"\n"
784-
],
785-
"stream": false,
786-
"task_id": 0,
787-
"temperature": 0.0,
788-
"top_k": 40,
789-
"top_p": 0.949999988079071,
790-
"typical_p": 1.0
809+
{
810+
"id": 0,
811+
"id_task": -1,
812+
"n_ctx": 1024,
813+
"speculative": false,
814+
"is_processing": false,
815+
"params": {
816+
"n_predict": -1,
817+
"seed": 4294967295,
818+
"temperature": 0.800000011920929,
819+
"dynatemp_range": 0.0,
820+
"dynatemp_exponent": 1.0,
821+
"top_k": 40,
822+
"top_p": 0.949999988079071,
823+
"min_p": 0.05000000074505806,
824+
"xtc_probability": 0.0,
825+
"xtc_threshold": 0.10000000149011612,
826+
"typical_p": 1.0,
827+
"repeat_last_n": 64,
828+
"repeat_penalty": 1.0,
829+
"presence_penalty": 0.0,
830+
"frequency_penalty": 0.0,
831+
"dry_multiplier": 0.0,
832+
"dry_base": 1.75,
833+
"dry_allowed_length": 2,
834+
"dry_penalty_last_n": -1,
835+
"dry_sequence_breakers": [
836+
"\n",
837+
":",
838+
"\"",
839+
"*"
840+
],
841+
"mirostat": 0,
842+
"mirostat_tau": 5.0,
843+
"mirostat_eta": 0.10000000149011612,
844+
"penalize_nl": false,
845+
"stop": [],
846+
"max_tokens": -1,
847+
"n_keep": 0,
848+
"n_discard": 0,
849+
"ignore_eos": false,
850+
"stream": true,
851+
"n_probs": 0,
852+
"min_keep": 0,
853+
"grammar": "",
854+
"samplers": [
855+
"dry",
856+
"top_k",
857+
"typ_p",
858+
"top_p",
859+
"min_p",
860+
"xtc",
861+
"temperature"
862+
],
863+
"speculative.n_max": 16,
864+
"speculative.n_min": 5,
865+
"speculative.p_min": 0.8999999761581421,
866+
"timings_per_token": false
867+
},
868+
"prompt": "",
869+
"next_token": {
870+
"has_next_token": true,
871+
"has_new_line": false,
872+
"n_remain": -1,
873+
"n_decoded": 0,
874+
"stopping_word": ""
791875
}
876+
}
792877
]
793878
```
794879

0 commit comments

Comments
 (0)