pytorch · mikekgfb · Apr 30, 2024 · Apr 27, 2024 · Apr 27, 2024
diff --git a/build/utils.py b/build/utils.py
@@ -9,9 +9,61 @@
 import logging
 import os
 from pathlib import Path
-from typing import Dict, List
+
+##########################################################################
+###                       unpack packed weights                        ###
+
+from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import torch
+import torch.nn.functional as F
+
+
+def unpack_packed_weights(
+    packed_weights: Dict[str, Any],
+    packed_linear: Callable,
+    input_dtype: torch.dtype,
+    unpacked_dims: Tuple,
+) -> torch.Tensor:
+    """Given a packed weight matrix `packed_weights`, a Callable
+    implementing a packed linear function for the packed format, and the
+    unpacked dimensions of the weights, recreate the unpacked weight
+    matrix.  In addition to the packed weights, as a dictionary to specify
+    whatever arguments the packed routine expects, we also need the input
+    data type because packing may depend on input dtype, or only some
+    input dtypes may be supported. We also need the dimensions of the
+    unpacked matrix.  At present, this does not handle padding, but that will
+    be straightforward to add. Similarly, the same approach can be used
+    for both linear and mm operators.
+
+        Args:
+            packed_weights: Dict[str, Any],
+            packed_linear: Callable,
+            input_dtype: torch.dtype,
+            unpacked_dims: Optional[Tuple]=None
+
+        Example usage:
+            packed_weights = {
+                 "weight" : weight_int4pack,
+                 "qGroupSize": groupsize,
+                 "scales_and_zeros": scales_and_zeros
+            }
+            unpacked_weights = unpack_packed_weights(
+                 _weight_int4pack_linear,
+                 packed_weights,
+                 torch.bfloat6,
+                 (256, 1024),
+            )
+
+
+    """
+    assert len(unpacked_dims) == 2, "unpacked_dims must be a tuple of length 2"
+    cols = unpacked_dims[1]
+
+    unpacked_weights = packed_linear(
+        torch.eye(cols, dtype=input_dtype), **packed_weights
+    ).transpose(0, 1)
+    return unpacked_weights
 
 
 ##########################################################################

diff --git a/eval.py b/eval.py
@@ -28,7 +28,6 @@
 torch._inductor.config.epilogue_fusion = False
 torch._inductor.config.triton.cudagraphs = True
 torch._dynamo.config.cache_size_limit = 100000
-import time
 
 try:
     import lm_eval

diff --git a/generate.py b/generate.py
@@ -210,7 +210,7 @@ def decode_n_tokens(
 ):
     new_tokens, new_probs = [], []
     encountered_eos = False
-    for i in range(
+    for _i in range(
         num_new_tokens - 1
     ):  # -1 to save space to run an EoS if dont generate it naturally
         # Actually better for Inductor to codegen attention here

diff --git a/quantize.py b/quantize.py
@@ -17,6 +17,7 @@
 import torch.nn.functional as F
 from build.utils import (
     find_multiple,
+    get_device_str,
     get_precision,
     name_to_dtype,
     state_dict_device,