|
| 1 | +#!/bin/env python3 |
| 2 | + |
| 3 | +import argparse |
| 4 | +import os |
| 5 | +import subprocess as sp |
| 6 | +from glob import glob |
| 7 | + |
| 8 | +parser = argparse.ArgumentParser() |
| 9 | + |
| 10 | +subparsers = parser.add_subparsers(dest='command') |
| 11 | + |
| 12 | +parser.add_argument( |
| 13 | + '-m', "--model", type=str, required=True, |
| 14 | + help="Directory containing model file, or model file itself (*.pth, *.pt, *.bin)") |
| 15 | + |
| 16 | +run = subparsers.add_parser("run", help="Run a model previously converted into ggml") |
| 17 | +convert = subparsers.add_parser("convert", help="Convert a llama model into ggml") |
| 18 | +quantize = subparsers.add_parser("quantize", help="Optimize with quantization process ggml") |
| 19 | +allinone = subparsers.add_parser("all-in-one", help="Execute --convert & --quantize") |
| 20 | +server = subparsers.add_parser("server", help="Execute in server mode ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080") |
| 21 | + |
| 22 | +known_args, unknown_args = parser.parse_known_args() |
| 23 | +model_path = known_args.model |
| 24 | +converted_models = glob(os.path.join(model_path, 'ggml-model-*.gguf')) |
| 25 | + |
| 26 | +if known_args.command == 'convert': |
| 27 | + sp.run(['python3', './convert.py', model_path] + unknown_args, check=True) |
| 28 | + |
| 29 | +if known_args.command == 'run': |
| 30 | + sp.run(['./main', '-m', model_path] + unknown_args, check=True) |
| 31 | + |
| 32 | +if known_args.command == 'quantize': |
| 33 | + if not converted_models: |
| 34 | + print(f"No models ready for quantization found in {model_path}") |
| 35 | + exit(1) |
| 36 | + sp.run(['./quantize', converted_models[0]] + unknown_args, check=True) |
| 37 | + |
| 38 | +if known_args.command == 'all-in-one': |
| 39 | + if not converted_models: |
| 40 | + sp.run(['python3', './convert.py', model_path], check=True) |
| 41 | + converted_models = glob(os.path.join(model_path, 'ggml-model-*.gguf')) |
| 42 | + else: |
| 43 | + print( |
| 44 | + f"Converted models found {converted_models}! No need to convert.") |
| 45 | + |
| 46 | + quantized_models = glob(os.path.join(model_path, f'ggml-model-q*_*.bin')) |
| 47 | + |
| 48 | + if not quantized_models: |
| 49 | + sp.run(['./quantize', converted_models[0]] + unknown_args, check=True) |
| 50 | + else: |
| 51 | + print( |
| 52 | + f"Quantized models found {quantized_models}! No need to quantize.") |
| 53 | +if known_args.command == "server": |
| 54 | + sp.run(['./server', '-m', model_path] + unknown_args, check=True) |
| 55 | + |
| 56 | +exit() |
0 commit comments