|
1 | 1 | #/bin/bash
|
| 2 | +# |
| 3 | +# sample usage: |
| 4 | +# |
| 5 | +# mkdir tmp |
| 6 | +# |
| 7 | +# # CPU-only build |
| 8 | +# bash ./ci/run.sh ./tmp/results ./tmp/mnt |
| 9 | +# |
| 10 | +# # with CUDA support |
| 11 | +# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt |
| 12 | +# |
2 | 13 |
|
3 | 14 | if [ -z "$2" ]; then
|
4 | 15 | echo "usage: $0 <output-dir> <mnt-dir>"
|
@@ -101,7 +112,7 @@ function gg_run_ctest_release {
|
101 | 112 | (time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
102 | 113 | (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
103 | 114 |
|
104 |
| - if [ -z $GG_BUILD_LOW_PERF ]; then |
| 115 | + if [ -z ${GG_BUILD_LOW_PERF} ]; then |
105 | 116 | (time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
106 | 117 | else
|
107 | 118 | (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
@@ -238,9 +249,130 @@ function gg_sum_open_llama_3b_v2 {
|
238 | 249 | gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
|
239 | 250 | }
|
240 | 251 |
|
| 252 | +# open_llama_7b_v2 |
| 253 | +# requires: GG_BUILD_CUDA |
| 254 | + |
| 255 | +function gg_run_open_llama_7b_v2 { |
| 256 | + cd ${SRC} |
| 257 | + |
| 258 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json |
| 259 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model |
| 260 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json |
| 261 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json |
| 262 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json |
| 263 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin |
| 264 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin |
| 265 | + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json |
| 266 | + |
| 267 | + gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip |
| 268 | + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ |
| 269 | + head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw |
| 270 | + |
| 271 | + path_models="../models-mnt/open-llama/7B-v2" |
| 272 | + path_wiki="../models-mnt/wikitext/wikitext-2-raw" |
| 273 | + |
| 274 | + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release |
| 275 | + |
| 276 | + set -e |
| 277 | + |
| 278 | + (time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_CUBLAS=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log |
| 279 | + (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log |
| 280 | + |
| 281 | + python3 ../convert.py ${path_models} |
| 282 | + |
| 283 | + model_f16="${path_models}/ggml-model-f16.bin" |
| 284 | + model_q8_0="${path_models}/ggml-model-q8_0.bin" |
| 285 | + model_q4_0="${path_models}/ggml-model-q4_0.bin" |
| 286 | + model_q4_1="${path_models}/ggml-model-q4_1.bin" |
| 287 | + model_q5_0="${path_models}/ggml-model-q5_0.bin" |
| 288 | + model_q5_1="${path_models}/ggml-model-q5_1.bin" |
| 289 | + model_q3_k="${path_models}/ggml-model-q3_k.bin" |
| 290 | + model_q4_k="${path_models}/ggml-model-q4_k.bin" |
| 291 | + model_q5_k="${path_models}/ggml-model-q5_k.bin" |
| 292 | + model_q6_k="${path_models}/ggml-model-q6_k.bin" |
| 293 | + |
| 294 | + wiki_test_60="${path_wiki}/wiki.test-60.raw" |
| 295 | + |
| 296 | + ./bin/quantize ${model_f16} ${model_q8_0} q8_0 |
| 297 | + ./bin/quantize ${model_f16} ${model_q4_0} q4_0 |
| 298 | + ./bin/quantize ${model_f16} ${model_q4_1} q4_1 |
| 299 | + ./bin/quantize ${model_f16} ${model_q5_0} q5_0 |
| 300 | + ./bin/quantize ${model_f16} ${model_q5_1} q5_1 |
| 301 | + ./bin/quantize ${model_f16} ${model_q3_k} q3_k |
| 302 | + ./bin/quantize ${model_f16} ${model_q4_k} q4_k |
| 303 | + ./bin/quantize ${model_f16} ${model_q5_k} q5_k |
| 304 | + ./bin/quantize ${model_f16} ${model_q6_k} q6_k |
| 305 | + |
| 306 | + (time ./bin/main --model ${model_f16} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log |
| 307 | + (time ./bin/main --model ${model_q8_0} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log |
| 308 | + (time ./bin/main --model ${model_q4_0} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log |
| 309 | + (time ./bin/main --model ${model_q4_1} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log |
| 310 | + (time ./bin/main --model ${model_q5_0} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log |
| 311 | + (time ./bin/main --model ${model_q5_1} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log |
| 312 | + (time ./bin/main --model ${model_q3_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log |
| 313 | + (time ./bin/main --model ${model_q4_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log |
| 314 | + (time ./bin/main --model ${model_q5_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log |
| 315 | + (time ./bin/main --model ${model_q6_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log |
| 316 | + |
| 317 | + (time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log |
| 318 | + (time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log |
| 319 | + (time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log |
| 320 | + (time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log |
| 321 | + (time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log |
| 322 | + (time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log |
| 323 | + (time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log |
| 324 | + (time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log |
| 325 | + (time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log |
| 326 | + (time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log |
| 327 | + |
| 328 | + function check_ppl { |
| 329 | + qnt="$1" |
| 330 | + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) |
| 331 | + |
| 332 | + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then |
| 333 | + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" |
| 334 | + return 20 |
| 335 | + fi |
| 336 | + |
| 337 | + printf ' - %s @ %s OK\n' "$qnt" "$ppl" |
| 338 | + return 0 |
| 339 | + } |
| 340 | + |
| 341 | + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 342 | + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 343 | + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 344 | + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 345 | + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 346 | + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 347 | + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 348 | + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 349 | + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 350 | + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log |
| 351 | + |
| 352 | + set +e |
| 353 | +} |
| 354 | + |
| 355 | +function gg_sum_open_llama_7b_v2 { |
| 356 | + gg_printf '### %s\n\n' "${ci}" |
| 357 | + |
| 358 | + gg_printf 'OpenLLaMA 7B-v2:\n' |
| 359 | + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" |
| 360 | + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" |
| 361 | + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" |
| 362 | + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" |
| 363 | + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" |
| 364 | + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" |
| 365 | + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" |
| 366 | + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" |
| 367 | + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" |
| 368 | + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" |
| 369 | + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" |
| 370 | + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" |
| 371 | +} |
| 372 | + |
241 | 373 | ## main
|
242 | 374 |
|
243 |
| -if [ -z $GG_BUILD_LOW_PERF ]; then |
| 375 | +if [ -z ${GG_BUILD_LOW_PERF} ]; then |
244 | 376 | rm -rf ${SRC}/models-mnt
|
245 | 377 |
|
246 | 378 | mnt_models=${MNT}/models
|
|
252 | 384 |
|
253 | 385 | ret=0
|
254 | 386 |
|
255 |
| -#test $ret -eq 0 && gg_run ctest_debug |
256 |
| -#test $ret -eq 0 && gg_run ctest_release |
| 387 | +test $ret -eq 0 && gg_run ctest_debug |
| 388 | +test $ret -eq 0 && gg_run ctest_release |
257 | 389 |
|
258 |
| -if [ -z $GG_BUILD_LOW_PERF ]; then |
259 |
| - test $ret -eq 0 && gg_run open_llama_3b_v2 |
| 390 | +if [ -z ${GG_BUILD_LOW_PERF} ]; then |
| 391 | + if [ -z ${GG_BUILD_CUDA} ]; then |
| 392 | + test $ret -eq 0 && gg_run open_llama_3b_v2 |
| 393 | + else |
| 394 | + test $ret -eq 0 && gg_run open_llama_7b_v2 |
| 395 | + fi |
260 | 396 | fi
|
261 | 397 |
|
262 | 398 | exit $ret
|
0 commit comments