@@ -568,6 +568,50 @@ function gg_sum_open_llama_7b_v2 {
568
568
# gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
569
569
}
570
570
571
+ # bge-small
572
+
573
+ function gg_run_embd_bge_small {
574
+ cd ${SRC}
575
+
576
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
577
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.model
578
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json
579
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json
580
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin
581
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json
582
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt
583
+
584
+ path_models=" ../models-mnt/bge-small"
585
+
586
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
587
+
588
+ set -e
589
+
590
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT /${ci} -cmake.log
591
+ (time make -j ) 2>&1 | tee -a $OUT /${ci} -make.log
592
+
593
+ python3 ../convert-hf-to-gguf.py ${path_models}
594
+
595
+ model_f16=" ${path_models} /ggml-model-f16.gguf"
596
+ model_q8_0=" ${path_models} /ggml-model-q8_0.gguf"
597
+
598
+ ./bin/quantize ${model_f16} ${model_q8_0} q8_0
599
+
600
+ (time ./bin/embedding --model ${model_f16} -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
601
+ (time ./bin/embedding --model ${model_q8_0} -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
602
+
603
+ set +e
604
+ }
605
+
606
+ function gg_sum_embd_bge_small {
607
+ gg_printf ' ### %s\n\n' " ${ci} "
608
+
609
+ gg_printf ' BGE Small (BERT):\n'
610
+ gg_printf ' - status: %s\n' " $( cat $OUT /${ci} .exit) "
611
+ gg_printf ' - f16: \n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-f16.log) "
612
+ gg_printf ' - q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q8_0.log) "
613
+ }
614
+
571
615
# # main
572
616
573
617
if [ -z ${GG_BUILD_LOW_PERF} ]; then
@@ -591,6 +635,8 @@ test $ret -eq 0 && gg_run ctest_debug
591
635
test $ret -eq 0 && gg_run ctest_release
592
636
593
637
if [ -z ${GG_BUILD_LOW_PERF} ]; then
638
+ test $ret -eq 0 && gg_run embd_bge_small
639
+
594
640
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
595
641
if [ -z ${GG_BUILD_CUDA} ]; then
596
642
test $ret -eq 0 && gg_run open_llama_3b_v2
0 commit comments