Skip to content

Commit 413e7b0

Browse files
authored
ci : add model tests + script wrapper (#4586)
* scripts : add lib.sh and lib_test.sh * scripts : stub out new ci-run.sh script * scripts : switch to PascalCase for functions This looks a little odd at first, but I find it very useful as a convention to know if a command is part of our code vs a builtin. * scripts : add some fancy conversion from snake_case to PascalCase * Add venv to ci/run.sh * Revert scripts work * scripts : add wrapper script for local use of ci/run.sh * Simplify .gitignore for tests, clang-tidy fixes * Label all ctest tests * ci : ctest uses -L main * Attempt at writing ctest_with_model * Update test-model-load-cancel * ci : add ctest_with_model for debug and release ggml-ci * Fix gg_get_model function ggml-ci * got stuck on CMake * Add get_model.cpp to tests/CMakeLists.txt ggml-ci * Fix README.md output for ctest_with_model ggml-ci * workflows : use `-L main` for all ctest ggml-ci * Fixes * GG_RUN_CTEST_MODELFILE => LLAMACPP_TESTMODELFILE * Always show warning rather than failing if model file variable is not set * scripts : update usage text for ci-run.sh
1 parent 6dd3c28 commit 413e7b0

File tree

11 files changed

+199
-48
lines changed

11 files changed

+199
-48
lines changed

.github/workflows/build.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
id: cmake_test
7373
run: |
7474
cd build
75-
ctest --verbose --timeout 900
75+
ctest -L main --verbose --timeout 900
7676
7777
ubuntu-latest-cmake-sanitizer:
7878
runs-on: ubuntu-latest
@@ -107,7 +107,7 @@ jobs:
107107
id: cmake_test
108108
run: |
109109
cd build
110-
ctest --verbose --timeout 900
110+
ctest -L main --verbose --timeout 900
111111
112112
ubuntu-latest-cmake-mpi:
113113
runs-on: ubuntu-latest
@@ -141,7 +141,7 @@ jobs:
141141
id: cmake_test
142142
run: |
143143
cd build
144-
ctest --verbose
144+
ctest -L main --verbose
145145
146146
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
147147
# how to debug it.
@@ -202,7 +202,7 @@ jobs:
202202
id: cmake_test
203203
run: |
204204
cd build
205-
ctest --verbose --timeout 900
205+
ctest -L main --verbose --timeout 900
206206
207207
macOS-latest-cmake-ios:
208208
runs-on: macos-latest
@@ -394,7 +394,7 @@ jobs:
394394
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
395395
run: |
396396
cd build
397-
ctest -C Release --verbose --timeout 900
397+
ctest -L main -C Release --verbose --timeout 900
398398
399399
- name: Test (Intel SDE)
400400
id: cmake_test_sde
@@ -406,7 +406,7 @@ jobs:
406406
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
407407
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
408408
cd build
409-
& $sde -future -- ctest -C Release --verbose --timeout 900
409+
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
410410
411411
- name: Determine tag name
412412
id: tag

.gitignore

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
lcov-report/
2828
gcovr-report/
2929

30-
build*/
30+
build*
3131
out/
3232
tmp/
3333

@@ -89,20 +89,3 @@ examples/jeopardy/results.txt
8989

9090
poetry.lock
9191
poetry.toml
92-
93-
# Test binaries
94-
/tests/test-grammar-parser
95-
/tests/test-llama-grammar
96-
/tests/test-double-float
97-
/tests/test-grad0
98-
/tests/test-opt
99-
/tests/test-quantize-fns
100-
/tests/test-quantize-perf
101-
/tests/test-sampling
102-
/tests/test-tokenizer-0-llama
103-
/tests/test-tokenizer-0-falcon
104-
/tests/test-tokenizer-1-llama
105-
/tests/test-tokenizer-1-bpe
106-
/tests/test-rope
107-
/tests/test-backend-ops
108-
/tests/test-autorelease

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ TEST_TARGETS = \
99
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
1010
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
1111
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
12-
tests/test-backend-ops tests/test-autorelease
12+
tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
1313

1414
# Code coverage output files
1515
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -748,5 +748,8 @@ tests/test-c.o: tests/test-c.c llama.h
748748
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
749749
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
750750

751-
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
751+
tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
752+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
753+
754+
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
752755
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

ci/run.sh

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ mkdir -p "$2"
2222
OUT=$(realpath "$1")
2323
MNT=$(realpath "$2")
2424

25-
rm -v $OUT/*.log
26-
rm -v $OUT/*.exit
27-
rm -v $OUT/*.md
25+
rm -f "$OUT/*.log"
26+
rm -f "$OUT/*.exit"
27+
rm -f "$OUT/*.md"
2828

2929
sd=`dirname $0`
3030
cd $sd/../
@@ -94,7 +94,7 @@ function gg_run_ctest_debug {
9494
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
9595
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
9696

97-
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
97+
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
9898

9999
set +e
100100
}
@@ -123,9 +123,9 @@ function gg_run_ctest_release {
123123
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
124124

125125
if [ -z ${GG_BUILD_LOW_PERF} ]; then
126-
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
126+
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
127127
else
128-
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
128+
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
129129
fi
130130

131131
set +e
@@ -141,6 +141,61 @@ function gg_sum_ctest_release {
141141
gg_printf '```\n'
142142
}
143143

144+
function gg_get_model {
145+
local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
146+
local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
147+
if [[ -s $gguf_3b ]]; then
148+
echo -n "$gguf_3b"
149+
elif [[ -s $gguf_7b ]]; then
150+
echo -n "$gguf_7b"
151+
else
152+
echo >&2 "No model found. Can't run gg_run_ctest_with_model."
153+
exit 1
154+
fi
155+
}
156+
157+
function gg_run_ctest_with_model_debug {
158+
cd ${SRC}
159+
160+
local model; model=$(gg_get_model)
161+
cd build-ci-debug
162+
set -e
163+
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
164+
set +e
165+
cd ..
166+
}
167+
168+
function gg_run_ctest_with_model_release {
169+
cd ${SRC}
170+
171+
local model; model=$(gg_get_model)
172+
cd build-ci-release
173+
set -e
174+
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
175+
set +e
176+
cd ..
177+
}
178+
179+
function gg_sum_ctest_with_model_debug {
180+
gg_printf '### %s\n\n' "${ci}"
181+
182+
gg_printf 'Runs ctest with model files in debug mode\n'
183+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
184+
gg_printf '```\n'
185+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
186+
gg_printf '```\n'
187+
}
188+
189+
function gg_sum_ctest_with_model_release {
190+
gg_printf '### %s\n\n' "${ci}"
191+
192+
gg_printf 'Runs ctest with model files in release mode\n'
193+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
194+
gg_printf '```\n'
195+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
196+
gg_printf '```\n'
197+
}
198+
144199
# open_llama_3b_v2
145200

146201
function gg_run_open_llama_3b_v2 {
@@ -183,8 +238,6 @@ function gg_run_open_llama_3b_v2 {
183238

184239
wiki_test_60="${path_wiki}/wiki.test-60.raw"
185240

186-
./bin/test-autorelease ${model_f16}
187-
188241
./bin/quantize ${model_f16} ${model_q8_0} q8_0
189242
./bin/quantize ${model_f16} ${model_q4_0} q4_0
190243
./bin/quantize ${model_f16} ${model_q4_1} q4_1
@@ -507,14 +560,18 @@ function gg_sum_open_llama_7b_v2 {
507560
## main
508561

509562
if [ -z ${GG_BUILD_LOW_PERF} ]; then
563+
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
510564
rm -rf ${SRC}/models-mnt
511-
512565
mnt_models=${MNT}/models
513566
mkdir -p ${mnt_models}
514567
ln -sfn ${mnt_models} ${SRC}/models-mnt
515568

516-
python3 -m pip install -r ${SRC}/requirements.txt
517-
python3 -m pip install --editable gguf-py
569+
# Create a fresh python3 venv and enter it
570+
python3 -m venv "$MNT/venv"
571+
source "$MNT/venv/bin/activate"
572+
573+
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
574+
pip install --editable gguf-py --disable-pip-version-check
518575
fi
519576

520577
ret=0
@@ -529,6 +586,8 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
529586
else
530587
test $ret -eq 0 && gg_run open_llama_7b_v2
531588
fi
589+
test $ret -eq 0 && gg_run ctest_with_model_debug
590+
test $ret -eq 0 && gg_run ctest_with_model_release
532591
fi
533592
fi
534593

scripts/ci-run.sh

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
this=$(realpath "$0"); readonly this
4+
cd "$(dirname "$this")"
5+
shellcheck "$this"
6+
7+
if (( $# != 1 && $# != 2 )); then
8+
cat >&2 <<'EOF'
9+
usage:
10+
ci-run.sh <tmp_dir> [<cache_dir>]
11+
12+
This script wraps ci/run.sh:
13+
* If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
14+
(openllama_3b_v2: quantized models are about 30GB)
15+
* Persistent model and data files are synced to and from <cache_dir>,
16+
excluding generated .gguf files.
17+
(openllama_3b_v2: persistent files are about 6.6GB)
18+
* <cache_dir> defaults to ~/.cache/llama.cpp
19+
EOF
20+
exit 1
21+
fi
22+
23+
cd .. # => llama.cpp repo root
24+
25+
tmp="$1"
26+
mkdir -p "$tmp"
27+
tmp=$(realpath "$tmp")
28+
echo >&2 "Using tmp=$tmp"
29+
30+
cache="${2-$HOME/.cache/llama.cpp}"
31+
mkdir -p "$cache"
32+
cache=$(realpath "$cache")
33+
echo >&2 "Using cache=$cache"
34+
35+
_sync() {
36+
local from="$1"; shift
37+
local to="$1"; shift
38+
39+
echo >&2 "Syncing from $from to $to"
40+
mkdir -p "$from" "$to"
41+
rsync -a "$from" "$to" --delete-during "$@"
42+
}
43+
44+
_sync "$(realpath .)/" "$tmp/llama.cpp"
45+
_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"
46+
47+
cd "$tmp/llama.cpp"
48+
bash ci/run.sh ci-out ci-mnt
49+
50+
_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P

tests/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!*.*

tests/CMakeLists.txt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
function(llama_build_executable source)
22
get_filename_component(TEST_TARGET ${source} NAME_WE)
3-
add_executable(${TEST_TARGET} ${source})
3+
add_executable(${TEST_TARGET} ${source} get-model.cpp)
44
install(TARGETS ${TEST_TARGET} RUNTIME)
55
target_link_libraries(${TEST_TARGET} PRIVATE common)
66
endfunction()
77

88
function(llama_test_executable name source)
99
get_filename_component(TEST_TARGET ${source} NAME_WE)
1010
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
11+
set_property(TEST ${name} PROPERTY LABELS "main")
1112
endfunction()
1213

1314
function(llama_build_and_test_executable source)
15+
llama_build_and_test_executable_with_label(${source} "main")
16+
endfunction()
17+
18+
function(llama_build_and_test_executable_with_label source label)
1419
get_filename_component(TEST_TARGET ${source} NAME_WE)
15-
add_executable(${TEST_TARGET} ${source})
20+
add_executable(${TEST_TARGET} ${source} get-model.cpp)
1621
install(TARGETS ${TEST_TARGET} RUNTIME)
1722
target_link_libraries(${TEST_TARGET} PRIVATE common)
1823
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
24+
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
1925
endfunction()
2026

2127
# llama_build_and_test_executable(test-double-float.cpp) # SLOW
@@ -49,10 +55,12 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
4955
llama_build_and_test_executable(test-grad0.cpp)
5056
# llama_build_and_test_executable(test-opt.cpp) # SLOW
5157
llama_build_and_test_executable(test-backend-ops.cpp)
52-
llama_build_and_test_executable(test-autorelease.cpp)
5358

5459
llama_build_and_test_executable(test-rope.cpp)
5560

61+
llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
62+
llama_build_and_test_executable_with_label(test-autorelease.cpp "model")
63+
5664
# dummy executable - not installed
5765
get_filename_component(TEST_TARGET test-c.c NAME_WE)
5866
add_executable(${TEST_TARGET} test-c.c)

tests/get-model.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <cstring>
4+
5+
#include "get-model.h"
6+
7+
char * get_model_or_exit(int argc, char *argv[]) {
8+
char * model_path;
9+
if (argc > 1) {
10+
model_path = argv[1];
11+
12+
} else {
13+
model_path = getenv("LLAMACPP_TEST_MODELFILE");
14+
if (!model_path || strlen(model_path) == 0) {
15+
fprintf(stderr, "\033[33mWARNING: No model file provided. Skipping this test. Set LLAMACPP_TEST_MODELFILE=<gguf_model_path> to silence this warning and run this test.\n\033[0m");
16+
exit(EXIT_SUCCESS);
17+
}
18+
}
19+
20+
return model_path;
21+
}

tests/get-model.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#pragma once
2+
char * get_model_or_exit(int, char*[]);

tests/test-autorelease.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,15 @@
55
#include <thread>
66

77
#include "llama.h"
8+
#include "get-model.h"
89

910
// This creates a new context inside a pthread and then tries to exit cleanly.
1011
int main(int argc, char ** argv) {
11-
if (argc < 2) {
12-
printf("Usage: %s model.gguf\n", argv[0]);
13-
return 0; // intentionally return success
14-
}
12+
auto * model_path = get_model_or_exit(argc, argv);
1513

16-
const std::string fname = argv[1];
17-
18-
std::thread([&fname]() {
14+
std::thread([&model_path]() {
1915
llama_backend_init(false);
20-
auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
16+
auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
2117
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
2218
llama_free(ctx);
2319
llama_free_model(model);

0 commit comments

Comments
 (0)