Skip to content

Commit bb0cadf

Browse files
committed
New python based entry point for containers
Signed-off-by: Jiri Podivin <[email protected]>
1 parent dc07dc4 commit bb0cadf

File tree

4 files changed

+59
-3
lines changed

4 files changed

+59
-3
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ ENV LLAMA_CUBLAS=1
3030

3131
RUN make
3232

33-
ENTRYPOINT ["/app/.devops/tools.sh"]
33+
ENTRYPOINT ["/app/.devops/tools.py"]

.devops/full-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ ENV CXX=/opt/rocm/llvm/bin/clang++
4141

4242
RUN make
4343

44-
ENTRYPOINT ["/app/.devops/tools.sh"]
44+
ENTRYPOINT ["/app/.devops/tools.py"]

.devops/full.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ RUN make
1818

1919
ENV LC_ALL=C.utf8
2020

21-
ENTRYPOINT ["/app/.devops/tools.sh"]
21+
ENTRYPOINT ["/app/.devops/tools.py"]

.devops/tools.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/bin/env python3
2+
3+
import argparse
4+
import os
5+
import subprocess as sp
6+
from glob import glob
7+
8+
parser = argparse.ArgumentParser()
9+
10+
subparsers = parser.add_subparsers(dest='command')
11+
12+
parser.add_argument(
13+
'-m', "--model", type=str, required=True,
14+
help="Directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
15+
16+
run = subparsers.add_parser("run", help="Run a model previously converted into ggml")
17+
convert = subparsers.add_parser("convert", help="Convert a llama model into ggml")
18+
quantize = subparsers.add_parser("quantize", help="Optimize with quantization process ggml")
19+
allinone = subparsers.add_parser("all-in-one", help="Execute --convert & --quantize")
20+
server = subparsers.add_parser("server", help="Execute in server mode ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080")
21+
22+
known_args, unknown_args = parser.parse_known_args()
23+
model_path = known_args.model
24+
converted_models = glob(os.path.join(model_path, 'ggml-model-*.gguf'))
25+
26+
if known_args.command == 'convert':
27+
sp.run(['python3', './convert.py', model_path] + unknown_args, check=True)
28+
29+
if known_args.command == 'run':
30+
sp.run(['./main', '-m', model_path] + unknown_args, check=True)
31+
32+
if known_args.command == 'quantize':
33+
if not converted_models:
34+
print(f"No models ready for quantization found in {model_path}")
35+
exit(1)
36+
sp.run(['./quantize', converted_models[0]] + unknown_args, check=True)
37+
38+
if known_args.command == 'all-in-one':
39+
if not converted_models:
40+
sp.run(['python3', './convert.py', model_path], check=True)
41+
converted_models = glob(os.path.join(model_path, 'ggml-model-*.gguf'))
42+
else:
43+
print(
44+
f"Converted models found {converted_models}! No need to convert.")
45+
46+
quantized_models = glob(os.path.join(model_path, f'ggml-model-q*_*.bin'))
47+
48+
if not quantized_models:
49+
sp.run(['./quantize', converted_models[0]] + unknown_args, check=True)
50+
else:
51+
print(
52+
f"Quantized models found {quantized_models}! No need to quantize.")
53+
if known_args.command == "server":
54+
sp.run(['./server', '-m', model_path] + unknown_args, check=True)
55+
56+
exit()

0 commit comments

Comments
 (0)