pytorch · Jack-Khuu · Jun 21, 2024 · Jun 14, 2024 · Jun 14, 2024 · Jun 14, 2024
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -725,7 +725,7 @@ jobs:
         run: |
           mkdir gguf_files
           wget -O gguf_files/llama-2-7b.Q4_0.gguf "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf?download=true"
-          ./llama.cpp/quantize --allow-requantize gguf_files/llama-2-7b.Q4_0.gguf gguf_files/llama-2-7b.Q4_0.requant_F32.gguf F32
+          ./llama.cpp/llama-quantize --allow-requantize gguf_files/llama-2-7b.Q4_0.gguf gguf_files/llama-2-7b.Q4_0.requant_F32.gguf F32
 
       - name: Load files
         run: |

diff --git a/build/builder.py b/build/builder.py
@@ -17,7 +17,7 @@
 import torch._inductor.config
 
 from config.model_config import resolve_model_config
-from quantize import quantize_model
+from quantization.quantize import quantize_model
 
 from build.model import Transformer
 from build.utils import device_sync, is_cpu_device, is_cuda_or_cpu_device, name_to_dtype

diff --git a/build/gguf_loader.py b/build/gguf_loader.py
@@ -14,7 +14,7 @@
 import torch
 
 from gguf import GGUFValueType
-from quantize import pack_scales_and_zeros, WeightOnlyInt4Linear
+from quantization.quantize import pack_scales_and_zeros, WeightOnlyInt4Linear
 
 from build.gguf_util import Q4_0, to_float
 from build.model import ModelArgs, Transformer

diff --git a/build/gguf_util.py b/build/gguf_util.py
@@ -6,7 +6,7 @@
 
 import gguf
 import torch
-from quantize import group_dequantize_tensor_from_qparams
+from quantization.quantize import group_dequantize_tensor_from_qparams
 
 
 def to_float(t: gguf.gguf_reader.ReaderTensor):

diff --git a/docs/quantization.md b/docs/quantization.md
@@ -99,7 +99,7 @@ for valid `bitwidth` and `groupsize` values.
 | linear with HQQ (asymmetric) |`'{"linear:hqq" : {"groupsize" : <groupsize>}}'`|
 | embedding | `'{"embedding": {"bitwidth": <bitwidth>, "groupsize":<groupsize>}}'` |
 
-See the available quantization schemes [here](https://github.com/pytorch/torchchat/blob/main/quantize.py#L1260-L1266).
+See the available quantization schemes [here](https://github.com/pytorch/torchchat/blob/main/quantization/quantize.py#L1260-L1266).
 
 ## Examples
 We can mix and match weight quantization with embedding quantization.

diff --git a/install_requirements.sh b/install_requirements.sh
@@ -46,7 +46,7 @@ $PIP_EXECUTABLE install -r requirements.txt --extra-index-url https://download.p
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-NIGHTLY_VERSION=dev20240604
+NIGHTLY_VERSION=dev20240613
 
 # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
 $PIP_EXECUTABLE uninstall -y triton
@@ -63,7 +63,7 @@ fi
 
 # pip packages needed by exir.
 REQUIREMENTS_TO_INSTALL=(
-  torch=="2.4.0.${NIGHTLY_VERSION}"
+  torch=="2.5.0.${NIGHTLY_VERSION}"
 )
 
 # Install the requirements. `--extra-index-url` tells pip to look for package

diff --git a/qops.py → quantization/qops.py b/qops.py → quantization/qops.py
@@ -390,7 +390,7 @@ def _check_k(cls, *, k, groupsize=1, inner_k_tiles=1):
     def _prepare_weight_and_scales_and_zeros(
         cls, weight_bf16, groupsize, inner_k_tiles
     ):
-        from quantize import group_quantize_tensor
+        from quantization.quantize import group_quantize_tensor
 
         weight_int32, scales_and_zeros = group_quantize_tensor(
             weight_bf16, n_bit=4, groupsize=groupsize

diff --git a/quantize.py → quantization/quantize.py b/quantize.py → quantization/quantize.py
@@ -23,7 +23,7 @@
     state_dict_device,
 )
 
-from qops import (
+from quantization.qops import (
     LinearAct8Int4DQ,
     LinearInt4 as WeightOnlyInt4Linear,
     LinearInt8 as WeightOnlyInt8Linear,

diff --git a/requirements.txt b/requirements.txt
@@ -15,9 +15,9 @@ tiktoken
 # Miscellaneous
 snakeviz
 sentencepiece
-numpy
+numpy < 2.0
 gguf
-lm-eval==0.4
+lm-eval==0.4.2
 blobfile
 
 # Build tools