1
- # /bin/bash
1
+ #! /bin/bash
2
2
#
3
3
# sample usage:
4
4
#
11
11
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
12
12
#
13
13
14
+ set -u # Fail on unset variables
15
+
14
16
if [ -z " $2 " ]; then
15
17
echo " usage: $0 <output-dir> <mnt-dir>"
16
18
exit 1
@@ -30,8 +32,20 @@ sd=`dirname $0`
30
32
cd $sd /../
31
33
SRC=` pwd`
32
34
35
+ # Read-only array of quantization types for iteration.
36
+ # Use ${quants[@]:1} to skip f16.
37
+ declare -ra quants=( f16 q8_0 q4_0 q4_1 q5_0 q5_1 q2_k q3_k q4_k q5_k q6_k )
38
+
33
39
# # helpers
34
40
41
+ # Print an error message to stderr and exit with an error.
42
+ # usage: die <format-string> <format-args>
43
+ function die {
44
+ local format=" $1 " ; shift
45
+ >&2 printf " $format " " $@ "
46
+ exit 1
47
+ }
48
+
35
49
# download a file if it does not exist or if it is outdated
36
50
function gg_wget {
37
51
local out=$1
@@ -77,7 +91,9 @@ function gg_run {
77
91
function gg_run_ctest_debug {
78
92
cd ${SRC}
79
93
80
- rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug
94
+ rm -rf build-ci-debug
95
+ mkdir build-ci-debug
96
+ cd build-ci-debug
81
97
82
98
set -e
83
99
@@ -105,14 +121,16 @@ function gg_sum_ctest_debug {
105
121
function gg_run_ctest_release {
106
122
cd ${SRC}
107
123
108
- rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
124
+ rm -rf build-ci-release
125
+ mkdir build-ci-release
126
+ cd build-ci-release
109
127
110
128
set -e
111
129
112
130
(time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT /${ci} -cmake.log
113
131
(time make -j ) 2>&1 | tee -a $OUT /${ci} -make.log
114
132
115
- if [ -z ${GG_BUILD_LOW_PERF} ]; then
133
+ if [[ -z ${GG_BUILD_LOW_PERF+x} ] ]; then
116
134
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT /${ci} -ctest.log
117
135
else
118
136
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT /${ci} -ctest.log
@@ -133,6 +151,7 @@ function gg_sum_ctest_release {
133
151
134
152
function gg_run_ctest_with_model {
135
153
cd ${SRC}
154
+ cd build-ci-release
136
155
set -e
137
156
(time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT /${ci} -ctest_with_model.log
138
157
set +e
@@ -151,81 +170,70 @@ function gg_sum_ctest_with_model {
151
170
# open_llama_3b_v2
152
171
153
172
function gg_run_open_llama_3b_v2 {
154
- cd ${SRC}
173
+ # We use absolute paths here to not have to track CWD as much
174
+ local models_mnt=" $( realpath " ${SRC} /models-mnt" ) "
175
+ local path_models=" ${models_mnt} /open-llama/3B-v2"
176
+ local path_wiki=" ${models_mnt} /wikitext"
177
+ local path_wiki_raw=" ${path_wiki} /wikitext-2-raw"
155
178
156
- gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json
157
- gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model
158
- gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json
159
- gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json
160
- gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
161
- gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
179
+ mkdir -p " ${path_models} " " ${path_wiki} "
162
180
163
- gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
164
- unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
165
- head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
181
+ gg_wget " ${path_models} " https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json
182
+ gg_wget " ${path_models} " https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model
183
+ gg_wget " ${path_models} " https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json
184
+ gg_wget " ${path_models} " https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json
185
+ gg_wget " ${path_models} " https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
186
+ gg_wget " ${path_models} " https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
166
187
167
- path_models=" ../models-mnt/open-llama/3B-v2"
168
- path_wiki=" ../models-mnt/wikitext/wikitext-2-raw"
188
+ gg_wget " ${path_wiki} " https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
189
+ unzip -o " ${path_wiki} /wikitext-2-raw-v1.zip" -d " ${path_wiki} "
190
+ head -n 60 " ${path_wiki_raw} /wiki.test.raw" > " ${path_wiki_raw} /wiki.test-60.raw"
169
191
170
- rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
192
+ rm -rf " ${SRC} /build-ci-release"
193
+ mkdir " ${SRC} /build-ci-release"
194
+ cd " ${SRC} /build-ci-release"
171
195
172
196
set -e
173
197
174
- (time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $ OUT /${ci} -cmake.log
175
- (time make -j ) 2>&1 | tee -a $ OUT /${ci} -make.log
198
+ (time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a " ${ OUT} /${ci} -cmake.log"
199
+ (time make -j ) 2>&1 | tee -a " ${ OUT} /${ci} -make.log"
176
200
177
- python3 .. /convert.py ${path_models}
201
+ python3 " ${SRC} /convert.py" " ${path_models} "
178
202
179
- model_f16=" ${path_models} /ggml-model-f16.gguf"
180
- model_q8_0=" ${path_models} /ggml-model-q8_0.gguf"
181
- model_q4_0=" ${path_models} /ggml-model-q4_0.gguf"
182
- model_q4_1=" ${path_models} /ggml-model-q4_1.gguf"
183
- model_q5_0=" ${path_models} /ggml-model-q5_0.gguf"
184
- model_q5_1=" ${path_models} /ggml-model-q5_1.gguf"
185
- model_q2_k=" ${path_models} /ggml-model-q2_k.gguf"
186
- model_q3_k=" ${path_models} /ggml-model-q3_k.gguf"
187
- model_q4_k=" ${path_models} /ggml-model-q4_k.gguf"
188
- model_q5_k=" ${path_models} /ggml-model-q5_k.gguf"
189
- model_q6_k=" ${path_models} /ggml-model-q6_k.gguf"
203
+ # Get the model path for a quantization
204
+ # usage: model_for <quant>
205
+ function model_for {
206
+ if (( $# != 1 )) ; then
207
+ die ' model_for takes a single quantization, such as q8_0'
208
+ fi
209
+ echo -n " ${path_models} /ggml-model-$1 .gguf"
210
+ }
190
211
191
- wiki_test_60=" ${path_wiki } /wiki.test-60.raw"
212
+ wiki_test_60=" ${path_wiki_raw } /wiki.test-60.raw"
192
213
193
- ./bin/quantize ${model_f16} ${model_q8_0} q8_0
194
- ./bin/quantize ${model_f16} ${model_q4_0} q4_0
195
- ./bin/quantize ${model_f16} ${model_q4_1} q4_1
196
- ./bin/quantize ${model_f16} ${model_q5_0} q5_0
197
- ./bin/quantize ${model_f16} ${model_q5_1} q5_1
198
- ./bin/quantize ${model_f16} ${model_q2_k} q2_k
199
- ./bin/quantize ${model_f16} ${model_q3_k} q3_k
200
- ./bin/quantize ${model_f16} ${model_q4_k} q4_k
201
- ./bin/quantize ${model_f16} ${model_q5_k} q5_k
202
- ./bin/quantize ${model_f16} ${model_q6_k} q6_k
214
+ # Quantize q8_0 through q6_k
215
+ for q in " ${quants[@]: 1} " ; do
216
+ ./bin/quantize " $( model_for f16) " " $( model_for " ${q} " ) " " ${q} "
217
+ done
203
218
204
- (time ./bin/main --model ${model_f16} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
205
- (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
206
- (time ./bin/main --model ${model_q4_0} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
207
- (time ./bin/main --model ${model_q4_1} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
208
- (time ./bin/main --model ${model_q5_0} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
209
- (time ./bin/main --model ${model_q5_1} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
210
- (time ./bin/main --model ${model_q2_k} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q2_k.log
211
- (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
212
- (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
213
- (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
214
- (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
215
-
216
- (time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
217
- (time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
218
- (time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
219
- (time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
220
- (time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
221
- (time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
222
- (time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q2_k.log
223
- (time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
224
- (time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
225
- (time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
226
- (time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
219
+ # Run basic inference for all quants
220
+ for q in " ${quants[@]} " ; do
221
+ ( time \
222
+ ./bin/main --model " $( model_for " ${q} " ) " -s 1234 -n 64 --ignore-eos -p " I believe the meaning of life is"
223
+ ) 2>&1 | tee -a " ${OUT} /${ci} -tg-${q} .log"
224
+ done
227
225
228
- (time ./bin/save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT /${ci} -save-load-state.log
226
+ # Run perplexity with wiki_test_60
227
+ for q in " ${quants[@]} " ; do
228
+ ( time \
229
+ ./bin/perplexity --model " $( model_for $q ) " -f " ${wiki_test_60} " -c 128 -b 128 --chunks 2
230
+ ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
231
+ done
232
+
233
+ # Run examples/save-load-state with q4_0
234
+ ( time \
235
+ ./bin/save-load-state --model " $( model_for q4_0) "
236
+ ) 2>&1 | tee -a " ${OUT} /${ci} -save-load-state.log"
229
237
230
238
function check_ppl {
231
239
qnt=" $1 "
@@ -240,17 +248,11 @@ function gg_run_open_llama_3b_v2 {
240
248
return 0
241
249
}
242
250
243
- check_ppl " f16" " $( cat $OUT /${ci} -tg-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
244
- check_ppl " q8_0" " $( cat $OUT /${ci} -tg-q8_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
245
- check_ppl " q4_0" " $( cat $OUT /${ci} -tg-q4_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
246
- check_ppl " q4_1" " $( cat $OUT /${ci} -tg-q4_1.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
247
- check_ppl " q5_0" " $( cat $OUT /${ci} -tg-q5_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
248
- check_ppl " q5_1" " $( cat $OUT /${ci} -tg-q5_1.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
249
- check_ppl " q2_k" " $( cat $OUT /${ci} -tg-q2_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
250
- check_ppl " q3_k" " $( cat $OUT /${ci} -tg-q3_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
251
- check_ppl " q4_k" " $( cat $OUT /${ci} -tg-q4_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
252
- check_ppl " q5_k" " $( cat $OUT /${ci} -tg-q5_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
253
- check_ppl " q6_k" " $( cat $OUT /${ci} -tg-q6_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
251
+ # Check perplexity results for all quants
252
+ for q in " ${quants[@]} " ; do
253
+ check_ppl " $q " " $( cat " ${OUT} /${ci} -tg-f16.log" | grep " ^\[1\]" ) " \
254
+ | tee -a " ${OUT} /${ci} -ppl.log"
255
+ done
254
256
255
257
# lora
256
258
function compare_ppl {
@@ -267,32 +269,42 @@ function gg_run_open_llama_3b_v2 {
267
269
return 0
268
270
}
269
271
270
- path_lora=" ../models-mnt/open-llama/3B-v2 /lora"
271
- path_shakespeare=" ../models-mnt /shakespeare"
272
+ local path_lora=" ${path_models} /lora"
273
+ local path_shakespeare=" ${models_mnt} /shakespeare"
272
274
273
- shakespeare=" ${path_shakespeare} /shakespeare.txt"
274
- lora_shakespeare=" ${path_lora} /ggml-adapter-model.bin"
275
+ local shakespeare=" ${path_shakespeare} /shakespeare.txt"
276
+ local lora_shakespeare=" ${path_lora} /ggml-adapter-model.bin"
275
277
276
- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json
277
- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin
278
- gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt
278
+ gg_wget " ${path_lora} " https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json
279
+ gg_wget " ${path_lora} " https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin
280
+ gg_wget " ${path_shakespeare} " https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt
279
281
280
- python3 .. /convert-lora-to-ggml.py ${path_lora}
282
+ python3 " ${SRC} /convert-lora-to-ggml.py" " ${path_lora} "
281
283
282
284
# f16
283
- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-f16.log
284
- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-f16.log
285
- compare_ppl " f16 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
285
+ (time ./bin/perplexity --model " $( model_for f16) " -f " ${shakespeare} " -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a " ${OUT} /${ci} -ppl-shakespeare-f16.log"
286
+ (time ./bin/perplexity --model " $( model_for f16) " -f " ${shakespeare} " --lora " ${lora_shakespeare} " -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a " ${OUT} /${ci} -ppl-shakespeare-lora-f16.log"
287
+ compare_ppl " f16 shakespeare" \
288
+ " $( cat " ${OUT} /${ci} -ppl-shakespeare-f16.log" | grep " ^\[1\]" ) " \
289
+ " $( cat " ${OUT} /${ci} -ppl-shakespeare-lora-f16.log" | grep " ^\[1\]" ) " \
290
+ | tee -a " ${OUT} /${ci} -lora-ppl.log"
286
291
287
292
# q8_0
288
- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-q8_0.log
289
- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0.log
290
- compare_ppl " q8_0 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
293
+ (time ./bin/perplexity --model " $( model_for q8_0) " -f " ${shakespeare} " -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a " $OUT /${ci} -ppl-shakespeare-q8_0.log"
294
+ (time ./bin/perplexity --model " $( model_for q8_0) " -f " ${shakespeare} " --lora " ${lora_shakespeare} " -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a " $OUT /${ci} -ppl-shakespeare-lora-q8_0.log"
295
+ compare_ppl " q8_0 shakespeare" \
296
+ " $( cat " ${OUT} /${ci} -ppl-shakespeare-q8_0.log" | grep " ^\[1\]" ) " \
297
+ " $( cat " ${OUT} /${ci} -ppl-shakespeare-lora-q8_0.log" | grep " ^\[1\]" ) " \
298
+ | tee -a " ${OUT} /${ci} -lora-ppl.log"
291
299
292
300
# q8_0 + f16 lora-base
293
- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log
294
- compare_ppl " q8_0 / f16 base shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
295
-
301
+ ( time \
302
+ ./bin/perplexity --model " $( model_for q8_0) " -f " ${shakespeare} " --lora " ${lora_shakespeare} " --lora-base " $( model_for f16) " -c 128 -b 128 --chunks 2
303
+ ) 2>&1 | tee -a " ${OUT} /${ci} -ppl-shakespeare-lora-q8_0-f16.log"
304
+ compare_ppl " q8_0 / f16 base shakespeare" \
305
+ " $( cat " ${OUT} /${ci} -ppl-shakespeare-q8_0.log" | grep " ^\[1\]" ) " \
306
+ " $( cat " ${OUT} /${ci} -ppl-shakespeare-lora-q8_0-f16.log" | grep " ^\[1\]" ) " \
307
+ | tee -a " ${OUT} /${ci} -lora-ppl.log"
296
308
297
309
set +e
298
310
}
@@ -502,31 +514,42 @@ function gg_sum_open_llama_7b_v2 {
502
514
503
515
# # main
504
516
505
- if [ -z ${GG_BUILD_LOW_PERF} ]; then
506
- rm -rf ${SRC} /models-mnt
517
+ ret=0
507
518
508
- mnt_models=${MNT} /models
509
- mkdir -p ${mnt_models}
510
- ln -sfn ${mnt_models} ${SRC} /models-mnt
519
+ # This is necessary to test if a variable is set while `set -u` is enabled.
520
+ # see: https://stackoverflow.com/a/13864829
521
+ # [[ -z ${var+x} ]] evaluates to false if var is set
522
+ # [[ ! -z ${var+x} ]] evaluates to true if var is set
523
+ if [[ ! -z ${GG_BUILD_LOW_PERF+x} ]]; then
524
+ test " ${ret} " -eq 0 && gg_run ctest_debug
525
+ test " ${ret} " -eq 0 && gg_run ctest_release
526
+ exit " ${ret} "
527
+ fi # Otherwise, do extended testing
511
528
512
- python3 -m pip install -r ${SRC} /requirements.txt
513
- python3 -m pip install --editable gguf-py
514
- fi
529
+ rm -rf ${SRC} /models-mnt
515
530
516
- ret=0
531
+ mnt_models=${MNT} /models
532
+ mkdir -p ${mnt_models}
533
+ ln -sfn ${mnt_models} ${SRC} /models-mnt
534
+
535
+ # Create a fresh python3 venv and enter it
536
+ python3 -m venv " ${MNT} /venv"
537
+ source " ${MNT} /venv/bin/activate"
538
+
539
+ pip install --disable-pip-version-check -r ${SRC} /requirements.txt
540
+ pip install --disable-pip-version-check --editable gguf-py
517
541
518
542
test $ret -eq 0 && gg_run ctest_debug
519
543
test $ret -eq 0 && gg_run ctest_release
520
544
521
- if [ -z ${GG_BUILD_LOW_PERF} ]; then
522
- if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
523
- if [ -z ${GG_BUILD_CUDA} ]; then
524
- test $ret -eq 0 && gg_run open_llama_3b_v2
525
- else
526
- test $ret -eq 0 && gg_run open_llama_7b_v2
527
- fi
528
- test $ret -eq 0 && gg_run ctest_with_model
545
+ # Run tests with open_llama
546
+ if [[ -z ${GG_BUILD_VRAM_GB+x} ]] || (( GG_BUILD_VRAM_GB >= 8 )) ; then
547
+ if [[ ! -z ${GG_BUILD_CUDA+x} ]]; then
548
+ test $ret -eq 0 && gg_run open_llama_7b_v2
549
+ else
550
+ test $ret -eq 0 && gg_run open_llama_3b_v2
529
551
fi
552
+ test $ret -eq 0 && gg_run ctest_with_model
530
553
fi
531
554
532
555
exit $ret
0 commit comments