|
| 1 | +name: Compile main |
| 2 | + |
| 3 | +on: |
| 4 | + push: |
| 5 | + branches: |
| 6 | + - main |
| 7 | + pull_request: |
| 8 | + workflow_dispatch: |
| 9 | + |
| 10 | +jobs: |
| 11 | + run-tinystories: |
| 12 | + strategy: |
| 13 | + matrix: |
| 14 | + runner: [16-core-ubuntu] |
| 15 | + runs-on: ${{matrix.runner}} |
| 16 | + steps: |
| 17 | + - name: Checkout repo |
| 18 | + uses: actions/checkout@v2 |
| 19 | + - name: Setup Python |
| 20 | + uses: actions/setup-python@v2 |
| 21 | + with: |
| 22 | + python-version: 3.11 |
| 23 | + - name: Print machine info |
| 24 | + run: | |
| 25 | + uname -a |
| 26 | + if [ $(uname -s) == Darwin ]; then |
| 27 | + sysctl machdep.cpu.brand_string |
| 28 | + sysctl machdep.cpu.core_count |
| 29 | + fi |
| 30 | + - name: Install requirements |
| 31 | + run: | |
| 32 | + echo "Intalling pip packages" |
| 33 | + pip install wheel |
| 34 | + pip install cmake |
| 35 | + pip install ninja |
| 36 | + pip install zstd |
| 37 | + pip install -r requirements.txt |
| 38 | +
|
| 39 | + echo "Executorch: cloning" |
| 40 | + mkdir etorch |
| 41 | + cd etorch |
| 42 | + git clone https://github.com/pytorch/executorch.git |
| 43 | + cd executorch |
| 44 | + echo "Inside: ${PWD}" |
| 45 | +
|
| 46 | + echo "Executorch: submodule update" |
| 47 | + git submodule sync |
| 48 | + git submodule update --init |
| 49 | +
|
| 50 | + echo "Executorch: installing python interface" |
| 51 | + ./install_requirements.sh --pybind xnnpack |
| 52 | + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' |
| 53 | +
|
| 54 | + cd ../.. |
| 55 | + echo "Inside: ${PWD}" |
| 56 | + - name: Install GGUF requirements |
| 57 | + run: | |
| 58 | + echo "Intalling pip packages" |
| 59 | + pip install gguf |
| 60 | +
|
| 61 | + git clone https://github.com/ggerganov/llama.cpp.git |
| 62 | + pushd llama.cpp |
| 63 | + make |
| 64 | + popd |
| 65 | + - name: Download GGUF files |
| 66 | + run: | |
| 67 | + mkdir gguf_files |
| 68 | + wget -O gguf_files/llama-2-7b.Q4_0.gguf "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf?download=true" |
| 69 | + ./llama.cpp/quantize --allow-requantize gguf_files/llama-2-7b.Q4_0.gguf gguf_files/llama-2-7b.Q4_0.requant_F32.gguf F32 |
| 70 | + wget -O gguf_files/tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model |
| 71 | + - name: Run inference |
| 72 | + run: | |
| 73 | + export GGUF_PATH=${PWD}/gguf_files/llama-2-7b.Q4_0.gguf |
| 74 | + export TOKENIZER_PATH=${PWD}/gguf_files/tokenizer.model |
| 75 | + export MODEL_NAME=llama-2-7b_Q4_0_gguf |
| 76 | +
|
| 77 | + python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 > ${PWD}/output_eager |
| 78 | + cat ${PWD}/output_eager |
| 79 | +
|
| 80 | + python export.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte |
| 81 | + python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte > ${PWD}/output_et |
| 82 | + cat ${PWD}/output_et |
| 83 | +
|
| 84 | + echo "Tests complete." |
| 85 | +
|
| 86 | + - name: Run inference |
| 87 | + run: | |
| 88 | + export MODEL_DIR=/tmp |
| 89 | + export GGUF_PATH=${PWD}/gguf_files/llama-2-7b.Q4_0.gguf |
| 90 | + export TOKENIZER_PATH=${PWD}/gguf_files/tokenizer.model |
| 91 | + export MODEL_NAME=llama-2-7b_Q4_0_gguf |
| 92 | +
|
| 93 | + python export.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte |
| 94 | + python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et |
| 95 | + cat ./output_et |
| 96 | +
|
| 97 | + echo "******************************************" |
| 98 | + echo "******* Emb: channel-wise quantized ******" |
| 99 | + echo "******************************************" |
| 100 | + python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte |
| 101 | + python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et |
| 102 | + cat ./output_et |
| 103 | +
|
| 104 | + echo "******************************************" |
| 105 | + echo "******** Emb: group-wise quantized *******" |
| 106 | + echo "******************************************" |
| 107 | + python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte |
| 108 | + python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et |
| 109 | + cat ./output_et |
| 110 | +
|
| 111 | + echo "******************************************" |
| 112 | + echo "******* INT8 channel-wise quantized ******" |
| 113 | + echo "******************************************" |
| 114 | + python export.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte |
| 115 | + python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et |
| 116 | + cat ./output_et |
| 117 | +
|
| 118 | + echo "******************************************" |
| 119 | + echo "******** INT8 group-wise quantized *******" |
| 120 | + echo "******************************************" |
| 121 | + python export.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte |
| 122 | + python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et |
| 123 | + cat ./output_et |
| 124 | +
|
| 125 | + echo "******************************************" |
| 126 | + echo "******** INT4 group-wise quantized *******" |
| 127 | + echo "******************************************" |
| 128 | + # python export.py --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte |
| 129 | + # python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et |
| 130 | + # cat ./output_et |
| 131 | +
|
| 132 | + echo "tests complete" |
| 133 | + echo "******************************************" |
0 commit comments