@@ -47,6 +47,7 @@ if ! command -v make &> /dev/null; then
47
47
fi
48
48
49
49
# parse arguments
50
+ is_interactive=1
50
51
port=8888
51
52
repo=" "
52
53
wtype=" "
@@ -66,22 +67,27 @@ verbose=0
66
67
67
68
function print_usage {
68
69
printf " Usage:\n"
69
- printf " ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n"
70
- printf " --port: port number, default is 8888\n"
71
- printf " --repo: path to a repo containing GGUF model files\n"
72
- printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
73
- printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n"
74
- printf " --gpu-id: gpu id, default is 0\n"
75
- printf " --n-parallel: number of parallel requests, default is 8\n"
76
- printf " --n-kv: KV cache size, default is 4096\n"
77
- printf " --verbose: verbose output\n\n"
70
+ printf " ./server-llm.sh [-interactive] [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n"
71
+ printf " --non-interactive: run without asking a permision to run\n"
72
+ printf " --port: port number, default is 8888\n"
73
+ printf " --repo: path to a repo containing GGUF model files\n"
74
+ printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
75
+ printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n"
76
+ printf " --gpu-id: gpu id, default is 0\n"
77
+ printf " --n-parallel: number of parallel requests, default is 8\n"
78
+ printf " --n-kv: KV cache size, default is 4096\n"
79
+ printf " --verbose: verbose output\n\n"
78
80
printf " Example:\n\n"
79
81
printf ' bash -c "$(curl -s https://ggml.ai/server-llm.sh)"\n\n'
80
82
}
81
83
82
84
while [[ $# -gt 0 ]]; do
83
85
key=" $1 "
84
86
case $key in
87
+ --non-interactive)
88
+ is_interactive=0
89
+ shift
90
+ ;;
85
91
--port)
86
92
port=" $2 "
87
93
shift
@@ -176,31 +182,32 @@ repos=(
176
182
" https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"
177
183
" https://huggingface.co/TheBloke/CausalLM-7B-GGUF"
178
184
)
185
+ if [ $is_interactive -eq 1 ]; then
186
+ printf " \n"
187
+ printf " [I] This is a helper script for deploying llama.cpp's server on this machine.\n\n"
188
+ printf " Based on the options that follow, the script might download a model file\n"
189
+ printf " from the internet, which can be a few GBs in size. The script will also\n"
190
+ printf " build the latest llama.cpp source code from GitHub, which can be unstable.\n"
191
+ printf " \n"
192
+ printf " Upon success, an HTTP server will be started and it will serve the selected\n"
193
+ printf " model using llama.cpp for demonstration purposes.\n"
194
+ printf " \n"
195
+ printf " Please note:\n"
196
+ printf " \n"
197
+ printf " - All new data will be stored in the current folder\n"
198
+ printf " - The server will be listening on all network interfaces\n"
199
+ printf " - The server will run with default settings which are not always optimal\n"
200
+ printf " - Do not judge the quality of a model based on the results from this script\n"
201
+ printf " - Do not use this script to benchmark llama.cpp\n"
202
+ printf " - Do not use this script in production\n"
203
+ printf " - This script is only for demonstration purposes\n"
204
+ printf " \n"
205
+ printf " If you don't know what you are doing, please press Ctrl-C to abort now\n"
206
+ printf " \n"
207
+ printf " Press Enter to continue ...\n\n"
179
208
180
- printf " \n"
181
- printf " [I] This is a helper script for deploying llama.cpp's server on this machine.\n\n"
182
- printf " Based on the options that follow, the script might download a model file\n"
183
- printf " from the internet, which can be a few GBs in size. The script will also\n"
184
- printf " build the latest llama.cpp source code from GitHub, which can be unstable.\n"
185
- printf " \n"
186
- printf " Upon success, an HTTP server will be started and it will serve the selected\n"
187
- printf " model using llama.cpp for demonstration purposes.\n"
188
- printf " \n"
189
- printf " Please note:\n"
190
- printf " \n"
191
- printf " - All new data will be stored in the current folder\n"
192
- printf " - The server will be listening on all network interfaces\n"
193
- printf " - The server will run with default settings which are not always optimal\n"
194
- printf " - Do not judge the quality of a model based on the results from this script\n"
195
- printf " - Do not use this script to benchmark llama.cpp\n"
196
- printf " - Do not use this script in production\n"
197
- printf " - This script is only for demonstration purposes\n"
198
- printf " \n"
199
- printf " If you don't know what you are doing, please press Ctrl-C to abort now\n"
200
- printf " \n"
201
- printf " Press Enter to continue ...\n\n"
202
-
203
- read
209
+ read
210
+ fi
204
211
205
212
if [[ -z " $repo " ]]; then
206
213
printf " [+] No repo provided from the command line\n"
0 commit comments