Skip to content

Commit 3d90f9f

Browse files
committed
ci : add K-quants
ggml-ci
1 parent a404142 commit 3d90f9f

File tree

2 files changed

+46
-11
lines changed

2 files changed

+46
-11
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ build/
1616
build-em/
1717
build-debug/
1818
build-release/
19+
build-ci-debug/
20+
build-ci-release/
1921
build-static/
2022
build-cublas/
2123
build-opencl/
@@ -25,9 +27,10 @@ build-no-accel/
2527
build-sanitize-addr/
2628
build-sanitize-thread/
2729
out/
30+
tmp/
2831

2932
models/*
30-
*.bin
33+
models-mnt
3134

3235
/main
3336
/quantize

ci/run.sh

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
#/bin/bash
22

3+
if [ -z "$2" ]; then
4+
echo "usage: $0 <output-dir> <mnt-dir>"
5+
exit 1
6+
fi
7+
8+
mkdir -p "$1"
9+
mkdir -p "$2"
10+
11+
OUT=$(realpath "$1")
12+
MNT=$(realpath "$2")
13+
314
sd=`dirname $0`
415
cd $sd/../
5-
616
SRC=`pwd`
7-
OUT="$1"
8-
MNT="$2"
917

1018
## helpers
1119

@@ -120,30 +128,48 @@ function gg_run_open_llama_3b_v2 {
120128
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
121129
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
122130

123-
cd build-ci-release
131+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
124132

125133
set -e
126134

135+
(time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
136+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
137+
127138
path_models="../models-mnt/open-llama/3B-v2"
128139

140+
python3 ../convert.py ${path_models}
141+
129142
model_f16="${path_models}/ggml-model-f16.bin"
143+
model_q8_0="${path_models}/ggml-model-q8_0.bin"
130144
model_q4_0="${path_models}/ggml-model-q4_0.bin"
131145
model_q4_1="${path_models}/ggml-model-q4_1.bin"
132146
model_q5_0="${path_models}/ggml-model-q5_0.bin"
133147
model_q5_1="${path_models}/ggml-model-q5_1.bin"
148+
model_q3_k="${path_models}/ggml-model-q3_k.bin"
149+
model_q4_k="${path_models}/ggml-model-q4_k.bin"
150+
model_q5_k="${path_models}/ggml-model-q5_k.bin"
151+
model_q6_k="${path_models}/ggml-model-q6_k.bin"
134152

135-
python3 ../convert.py ${path_models}
136-
153+
./bin/quantize ${model_f16} ${model_q8_0} q8_0
137154
./bin/quantize ${model_f16} ${model_q4_0} q4_0
138155
./bin/quantize ${model_f16} ${model_q4_1} q4_1
139156
./bin/quantize ${model_f16} ${model_q5_0} q5_0
140157
./bin/quantize ${model_f16} ${model_q5_1} q5_1
158+
./bin/quantize ${model_f16} ${model_q3_k} q3_k
159+
./bin/quantize ${model_f16} ${model_q4_k} q4_k
160+
./bin/quantize ${model_f16} ${model_q5_k} q5_k
161+
./bin/quantize ${model_f16} ${model_q6_k} q6_k
141162

142163
(time ./bin/main --model ${model_f16} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-f16.log
164+
(time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
143165
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
144166
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
145167
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
146168
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
169+
(time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
170+
(time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
171+
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
172+
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
147173

148174
set +e
149175
}
@@ -154,27 +180,33 @@ function gg_sum_open_llama_3b_v2 {
154180
gg_printf 'OpenLLaMA 3B-v2: text generation\n'
155181
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
156182
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
183+
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
157184
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
158185
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
159186
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
160187
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
188+
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
189+
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
190+
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
191+
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
161192
}
162193

163194
## main
164195

165196
if [ -z $GG_BUILD_LOW_PERF ]; then
166197
rm -rf ${SRC}/models-mnt
167198

168-
mkdir -p $(realpath ${MNT}/models)
169-
ln -sfn ${MNT}/models ${SRC}/models-mnt
199+
mnt_models=$(realpath ${MNT}/models)
200+
mkdir -p ${mnt_models}
201+
ln -sfn ${mnt_models} ${SRC}/models-mnt
170202

171203
python3 -m pip install -r ${SRC}/requirements.txt
172204
fi
173205

174206
ret=0
175207

176-
test $ret -eq 0 && gg_run ctest_debug
177-
test $ret -eq 0 && gg_run ctest_release
208+
#test $ret -eq 0 && gg_run ctest_debug
209+
#test $ret -eq 0 && gg_run ctest_release
178210

179211
if [ -z $GG_BUILD_LOW_PERF ]; then
180212
test $ret -eq 0 && gg_run open_llama_3b_v2

0 commit comments

Comments
 (0)