1
1
# /bin/bash
2
2
3
+ if [ -z " $2 " ]; then
4
+ echo " usage: $0 <output-dir> <mnt-dir>"
5
+ exit 1
6
+ fi
7
+
8
+ mkdir -p " $1 "
9
+ mkdir -p " $2 "
10
+
11
+ OUT=$( realpath " $1 " )
12
+ MNT=$( realpath " $2 " )
13
+
3
14
sd=` dirname $0 `
4
15
cd $sd /../
5
-
6
16
SRC=` pwd`
7
- OUT=" $1 "
8
- MNT=" $2 "
9
17
10
18
# # helpers
11
19
@@ -120,30 +128,48 @@ function gg_run_open_llama_3b_v2 {
120
128
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
121
129
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
122
130
123
- cd build-ci-release
131
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
124
132
125
133
set -e
126
134
135
+ (time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT /${ci} -cmake.log
136
+ (time make -j ) 2>&1 | tee -a $OUT /${ci} -make.log
137
+
127
138
path_models=" ../models-mnt/open-llama/3B-v2"
128
139
140
+ python3 ../convert.py ${path_models}
141
+
129
142
model_f16=" ${path_models} /ggml-model-f16.bin"
143
+ model_q8_0=" ${path_models} /ggml-model-q8_0.bin"
130
144
model_q4_0=" ${path_models} /ggml-model-q4_0.bin"
131
145
model_q4_1=" ${path_models} /ggml-model-q4_1.bin"
132
146
model_q5_0=" ${path_models} /ggml-model-q5_0.bin"
133
147
model_q5_1=" ${path_models} /ggml-model-q5_1.bin"
148
+ model_q3_k=" ${path_models} /ggml-model-q3_k.bin"
149
+ model_q4_k=" ${path_models} /ggml-model-q4_k.bin"
150
+ model_q5_k=" ${path_models} /ggml-model-q5_k.bin"
151
+ model_q6_k=" ${path_models} /ggml-model-q6_k.bin"
134
152
135
- python3 ../convert.py ${path_models}
136
-
153
+ ./bin/quantize ${model_f16} ${model_q8_0} q8_0
137
154
./bin/quantize ${model_f16} ${model_q4_0} q4_0
138
155
./bin/quantize ${model_f16} ${model_q4_1} q4_1
139
156
./bin/quantize ${model_f16} ${model_q5_0} q5_0
140
157
./bin/quantize ${model_f16} ${model_q5_1} q5_1
158
+ ./bin/quantize ${model_f16} ${model_q3_k} q3_k
159
+ ./bin/quantize ${model_f16} ${model_q4_k} q4_k
160
+ ./bin/quantize ${model_f16} ${model_q5_k} q5_k
161
+ ./bin/quantize ${model_f16} ${model_q6_k} q6_k
141
162
142
163
(time ./bin/main --model ${model_f16} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
164
+ (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
143
165
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
144
166
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
145
167
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
146
168
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
169
+ (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
170
+ (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
171
+ (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
172
+ (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
147
173
148
174
set +e
149
175
}
@@ -154,27 +180,33 @@ function gg_sum_open_llama_3b_v2 {
154
180
gg_printf ' OpenLLaMA 3B-v2: text generation\n'
155
181
gg_printf ' - status: %s\n' " $( cat $OUT /${ci} .exit) "
156
182
gg_printf ' - f16: \n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-f16.log) "
183
+ gg_printf ' - q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q8_0.log) "
157
184
gg_printf ' - q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_0.log) "
158
185
gg_printf ' - q4_1:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_1.log) "
159
186
gg_printf ' - q5_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_0.log) "
160
187
gg_printf ' - q5_1:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_1.log) "
188
+ gg_printf ' - q3_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q3_k.log) "
189
+ gg_printf ' - q4_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_k.log) "
190
+ gg_printf ' - q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_k.log) "
191
+ gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
161
192
}
162
193
163
194
# # main
164
195
165
196
if [ -z $GG_BUILD_LOW_PERF ]; then
166
197
rm -rf ${SRC} /models-mnt
167
198
168
- mkdir -p $( realpath ${MNT} /models)
169
- ln -sfn ${MNT} /models ${SRC} /models-mnt
199
+ mnt_models=$( realpath ${MNT} /models)
200
+ mkdir -p ${mnt_models}
201
+ ln -sfn ${mnt_models} ${SRC} /models-mnt
170
202
171
203
python3 -m pip install -r ${SRC} /requirements.txt
172
204
fi
173
205
174
206
ret=0
175
207
176
- test $ret -eq 0 && gg_run ctest_debug
177
- test $ret -eq 0 && gg_run ctest_release
208
+ # test $ret -eq 0 && gg_run ctest_debug
209
+ # test $ret -eq 0 && gg_run ctest_release
178
210
179
211
if [ -z $GG_BUILD_LOW_PERF ]; then
180
212
test $ret -eq 0 && gg_run open_llama_3b_v2
0 commit comments