Skip to content

Commit 785a36a

Browse files
committed
ci : add 7B CUDA tets
ggml-ci
1 parent 7d5f184 commit 785a36a

File tree

2 files changed

+147
-6
lines changed

2 files changed

+147
-6
lines changed

ci/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,10 @@ It is a good practice, before publishing changes to execute the full CI locally
1616

1717
```bash
1818
mkdir tmp
19+
20+
# CPU-only build
1921
bash ./ci/run.sh ./tmp/results ./tmp/mnt
22+
23+
# with CUDA support
24+
GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
2025
```

ci/run.sh

Lines changed: 142 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
11
#/bin/bash
2+
#
3+
# sample usage:
4+
#
5+
# mkdir tmp
6+
#
7+
# # CPU-only build
8+
# bash ./ci/run.sh ./tmp/results ./tmp/mnt
9+
#
10+
# # with CUDA support
11+
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
12+
#
213

314
if [ -z "$2" ]; then
415
echo "usage: $0 <output-dir> <mnt-dir>"
@@ -101,7 +112,7 @@ function gg_run_ctest_release {
101112
(time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
102113
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
103114

104-
if [ -z $GG_BUILD_LOW_PERF ]; then
115+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
105116
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
106117
else
107118
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
@@ -238,9 +249,130 @@ function gg_sum_open_llama_3b_v2 {
238249
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
239250
}
240251

252+
# open_llama_7b_v2
253+
# requires: GG_BUILD_CUDA
254+
255+
function gg_run_open_llama_7b_v2 {
256+
cd ${SRC}
257+
258+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json
259+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model
260+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json
261+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json
262+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json
263+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin
264+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
265+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
266+
267+
gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
268+
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
269+
head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
270+
271+
path_models="../models-mnt/open-llama/7B-v2"
272+
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
273+
274+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
275+
276+
set -e
277+
278+
(time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_CUBLAS=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
279+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
280+
281+
python3 ../convert.py ${path_models}
282+
283+
model_f16="${path_models}/ggml-model-f16.bin"
284+
model_q8_0="${path_models}/ggml-model-q8_0.bin"
285+
model_q4_0="${path_models}/ggml-model-q4_0.bin"
286+
model_q4_1="${path_models}/ggml-model-q4_1.bin"
287+
model_q5_0="${path_models}/ggml-model-q5_0.bin"
288+
model_q5_1="${path_models}/ggml-model-q5_1.bin"
289+
model_q3_k="${path_models}/ggml-model-q3_k.bin"
290+
model_q4_k="${path_models}/ggml-model-q4_k.bin"
291+
model_q5_k="${path_models}/ggml-model-q5_k.bin"
292+
model_q6_k="${path_models}/ggml-model-q6_k.bin"
293+
294+
wiki_test_60="${path_wiki}/wiki.test-60.raw"
295+
296+
./bin/quantize ${model_f16} ${model_q8_0} q8_0
297+
./bin/quantize ${model_f16} ${model_q4_0} q4_0
298+
./bin/quantize ${model_f16} ${model_q4_1} q4_1
299+
./bin/quantize ${model_f16} ${model_q5_0} q5_0
300+
./bin/quantize ${model_f16} ${model_q5_1} q5_1
301+
./bin/quantize ${model_f16} ${model_q3_k} q3_k
302+
./bin/quantize ${model_f16} ${model_q4_k} q4_k
303+
./bin/quantize ${model_f16} ${model_q5_k} q5_k
304+
./bin/quantize ${model_f16} ${model_q6_k} q6_k
305+
306+
(time ./bin/main --model ${model_f16} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
307+
(time ./bin/main --model ${model_q8_0} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
308+
(time ./bin/main --model ${model_q4_0} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
309+
(time ./bin/main --model ${model_q4_1} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
310+
(time ./bin/main --model ${model_q5_0} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
311+
(time ./bin/main --model ${model_q5_1} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
312+
(time ./bin/main --model ${model_q3_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
313+
(time ./bin/main --model ${model_q4_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
314+
(time ./bin/main --model ${model_q5_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
315+
(time ./bin/main --model ${model_q6_k} -ngl 999 -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
316+
317+
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
318+
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
319+
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
320+
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
321+
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
322+
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
323+
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
324+
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
325+
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
326+
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 999 -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
327+
328+
function check_ppl {
329+
qnt="$1"
330+
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
331+
332+
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
333+
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
334+
return 20
335+
fi
336+
337+
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
338+
return 0
339+
}
340+
341+
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
342+
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
343+
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
344+
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
345+
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
346+
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
347+
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
348+
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
349+
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
350+
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
351+
352+
set +e
353+
}
354+
355+
function gg_sum_open_llama_7b_v2 {
356+
gg_printf '### %s\n\n' "${ci}"
357+
358+
gg_printf 'OpenLLaMA 7B-v2:\n'
359+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
360+
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
361+
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
362+
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
363+
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
364+
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
365+
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
366+
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
367+
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
368+
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
369+
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
370+
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
371+
}
372+
241373
## main
242374

243-
if [ -z $GG_BUILD_LOW_PERF ]; then
375+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
244376
rm -rf ${SRC}/models-mnt
245377

246378
mnt_models=${MNT}/models
@@ -252,11 +384,15 @@ fi
252384

253385
ret=0
254386

255-
#test $ret -eq 0 && gg_run ctest_debug
256-
#test $ret -eq 0 && gg_run ctest_release
387+
test $ret -eq 0 && gg_run ctest_debug
388+
test $ret -eq 0 && gg_run ctest_release
257389

258-
if [ -z $GG_BUILD_LOW_PERF ]; then
259-
test $ret -eq 0 && gg_run open_llama_3b_v2
390+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
391+
if [ -z ${GG_BUILD_CUDA} ]; then
392+
test $ret -eq 0 && gg_run open_llama_3b_v2
393+
else
394+
test $ret -eq 0 && gg_run open_llama_7b_v2
395+
fi
260396
fi
261397

262398
exit $ret

0 commit comments

Comments
 (0)