Skip to content

Commit 4be04c8

Browse files
garrnizonggerganov
andauthored
scripts : add non-interactive server-llm.sh (#5303)
* Update server-llm.sh Add flag --non-interactive that allows run script without asking a permission * Update scripts/server-llm.sh --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 5d55b0c commit 4be04c8

File tree

1 file changed

+40
-33
lines changed

1 file changed

+40
-33
lines changed

scripts/server-llm.sh

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ if ! command -v make &> /dev/null; then
4747
fi
4848

4949
# parse arguments
50+
is_interactive=1
5051
port=8888
5152
repo=""
5253
wtype=""
@@ -66,22 +67,27 @@ verbose=0
6667

6768
function print_usage {
6869
printf "Usage:\n"
69-
printf " ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n"
70-
printf " --port: port number, default is 8888\n"
71-
printf " --repo: path to a repo containing GGUF model files\n"
72-
printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
73-
printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n"
74-
printf " --gpu-id: gpu id, default is 0\n"
75-
printf " --n-parallel: number of parallel requests, default is 8\n"
76-
printf " --n-kv: KV cache size, default is 4096\n"
77-
printf " --verbose: verbose output\n\n"
70+
printf " ./server-llm.sh [-interactive] [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n"
71+
printf " --non-interactive: run without asking a permision to run\n"
72+
printf " --port: port number, default is 8888\n"
73+
printf " --repo: path to a repo containing GGUF model files\n"
74+
printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
75+
printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n"
76+
printf " --gpu-id: gpu id, default is 0\n"
77+
printf " --n-parallel: number of parallel requests, default is 8\n"
78+
printf " --n-kv: KV cache size, default is 4096\n"
79+
printf " --verbose: verbose output\n\n"
7880
printf "Example:\n\n"
7981
printf ' bash -c "$(curl -s https://ggml.ai/server-llm.sh)"\n\n'
8082
}
8183

8284
while [[ $# -gt 0 ]]; do
8385
key="$1"
8486
case $key in
87+
--non-interactive)
88+
is_interactive=0
89+
shift
90+
;;
8591
--port)
8692
port="$2"
8793
shift
@@ -176,31 +182,32 @@ repos=(
176182
"https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"
177183
"https://huggingface.co/TheBloke/CausalLM-7B-GGUF"
178184
)
185+
if [ $is_interactive -eq 1 ]; then
186+
printf "\n"
187+
printf "[I] This is a helper script for deploying llama.cpp's server on this machine.\n\n"
188+
printf " Based on the options that follow, the script might download a model file\n"
189+
printf " from the internet, which can be a few GBs in size. The script will also\n"
190+
printf " build the latest llama.cpp source code from GitHub, which can be unstable.\n"
191+
printf "\n"
192+
printf " Upon success, an HTTP server will be started and it will serve the selected\n"
193+
printf " model using llama.cpp for demonstration purposes.\n"
194+
printf "\n"
195+
printf " Please note:\n"
196+
printf "\n"
197+
printf " - All new data will be stored in the current folder\n"
198+
printf " - The server will be listening on all network interfaces\n"
199+
printf " - The server will run with default settings which are not always optimal\n"
200+
printf " - Do not judge the quality of a model based on the results from this script\n"
201+
printf " - Do not use this script to benchmark llama.cpp\n"
202+
printf " - Do not use this script in production\n"
203+
printf " - This script is only for demonstration purposes\n"
204+
printf "\n"
205+
printf " If you don't know what you are doing, please press Ctrl-C to abort now\n"
206+
printf "\n"
207+
printf " Press Enter to continue ...\n\n"
179208

180-
printf "\n"
181-
printf "[I] This is a helper script for deploying llama.cpp's server on this machine.\n\n"
182-
printf " Based on the options that follow, the script might download a model file\n"
183-
printf " from the internet, which can be a few GBs in size. The script will also\n"
184-
printf " build the latest llama.cpp source code from GitHub, which can be unstable.\n"
185-
printf "\n"
186-
printf " Upon success, an HTTP server will be started and it will serve the selected\n"
187-
printf " model using llama.cpp for demonstration purposes.\n"
188-
printf "\n"
189-
printf " Please note:\n"
190-
printf "\n"
191-
printf " - All new data will be stored in the current folder\n"
192-
printf " - The server will be listening on all network interfaces\n"
193-
printf " - The server will run with default settings which are not always optimal\n"
194-
printf " - Do not judge the quality of a model based on the results from this script\n"
195-
printf " - Do not use this script to benchmark llama.cpp\n"
196-
printf " - Do not use this script in production\n"
197-
printf " - This script is only for demonstration purposes\n"
198-
printf "\n"
199-
printf " If you don't know what you are doing, please press Ctrl-C to abort now\n"
200-
printf "\n"
201-
printf " Press Enter to continue ...\n\n"
202-
203-
read
209+
read
210+
fi
204211

205212
if [[ -z "$repo" ]]; then
206213
printf "[+] No repo provided from the command line\n"

0 commit comments

Comments
 (0)