@@ -260,10 +260,20 @@ function gg_run_open_llama_3b_v2 {
260
260
261
261
python3 ../convert-lora-to-ggml.py ${path_lora}
262
262
263
+ # f16
263
264
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-f16.log
264
265
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-f16.log
266
+ compare_ppl " f16 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
267
+
268
+ # q8_0
269
+ (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-q8_0.log
270
+ (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0.log
271
+ compare_ppl " q8_0 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
272
+
273
+ # q8_0 + f16 lora-base
274
+ (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log
275
+ compare_ppl " q8_0 / f16 base shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
265
276
266
- compare_ppl " shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
267
277
268
278
set +e
269
279
}
@@ -288,6 +298,9 @@ function gg_sum_open_llama_3b_v2 {
288
298
gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
289
299
gg_printf ' - shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-f16.log) "
290
300
gg_printf ' - shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log) "
301
+ gg_printf ' - shakespeare (q8_0):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log) "
302
+ gg_printf ' - shakespeare (q8_0 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0.log) "
303
+ gg_printf ' - shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log) "
291
304
}
292
305
293
306
# open_llama_7b_v2
@@ -421,10 +434,20 @@ function gg_run_open_llama_7b_v2 {
421
434
422
435
python3 ../convert-lora-to-ggml.py ${path_lora}
423
436
437
+ # f16
424
438
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-f16.log
425
439
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-f16.log
440
+ compare_ppl " f16 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
441
+
442
+ # currently not supported by the CUDA backend
443
+ # q8_0
444
+ # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
445
+ # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
446
+ # compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
426
447
427
- compare_ppl " shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
448
+ # q8_0 + f16 lora-base
449
+ # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
450
+ # compare_ppl "q8_0 / f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
428
451
429
452
set +e
430
453
}
@@ -449,6 +472,9 @@ function gg_sum_open_llama_7b_v2 {
449
472
gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
450
473
gg_printf ' - shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-f16.log) "
451
474
gg_printf ' - shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log) "
475
+ # gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)"
476
+ # gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)"
477
+ # gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
452
478
}
453
479
454
480
# # main
0 commit comments