Remove padding from Linear,supported by quantization

Michael Gschwind · facebook-github-bot · commit a81bfe2123a5 · 2024-03-01T12:05:28.000-08:00
Summary: Remove padding from Linear,supported by quantization

Reviewed By: kimishpatel

Differential Revision: D54350128

fbshipit-source-id: 62ac2989df9990a57c173a8fadc6ddfd25673c0e
diff --git a/examples/models/llama2/quantize.py b/examples/models/llama2/quantize.py
@@ -22,13 +22,13 @@
 
 
 try:
-    # pyre-ignore
+    # pyre-ignore[21]: Undefined import.
     from fairseq2.nn.embedding import (
         Embedding as fsEmbedding,
         StandardEmbedding as fsStandardEmbedding,
     )
 
-    # pyre-ignore
+    # pyre-ignore[21]: Undefined import.
     from fairseq2.nn.projection import Linear as fsLinear
 except:
     print("Could not import fairseq2 modules.")
@@ -645,14 +645,6 @@ def create_quantized_state_dict(self) -> Dict:
 
                     # print(f"initial weight shape {mod.weight.shape}")
                     input_weight = mod.weight.float()
-                    input_weight_shape_1 = input_weight.shape[1]
-                    if (self.group_size is not None) and (
-                        input_weight_shape_1 % self.group_size != 0
-                    ):
-                        padding = self.group_size - (
-                            input_weight_shape_1 % self.group_size
-                        )
-                        input_weight = F.pad(input_weight, (0, padding))
 
                     # print(f"expanded weight shape {input_weight.shape}")
                     weight, scales, _ = dynamically_quantize_per_channel(
@@ -663,9 +655,8 @@ def create_quantized_state_dict(self) -> Dict:
                         self.group_size,
                         scales_dtype=mod.weight.dtype,
                     )
-                    unpadded_weight = weight[:, :input_weight_shape_1]
 
-                    cur_state_dict[f"{fqn}.weight"] = unpadded_weight
+                    cur_state_dict[f"{fqn}.weight"] = weight
                     # squeeze makes groupsize=rowsize unidimensional
                     cur_state_dict[f"{fqn}.scales"] = scales.squeeze(dim=-1)