fix(model): Add support for bias wqkv tensor in Attention

gabe-l-hart · gabe-l-hart · commit 964ae69465d0 · 2024-10-04T09:27:17.000-06:00
Branch: GraniteCodeSupport

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/torchchat/model.py b/torchchat/model.py
@@ -769,14 +769,16 @@ def load_hook(self, state_dict, prefix, *args):
         #     wv = state_dict.pop(prefix + "wv.weight")
         #     state_dict[prefix + "wqkv.weight"] = torch.cat([wq, wk, wv])
 
-        if prefix + "wqkv.weight" in state_dict:
-            wqkv = state_dict.pop(prefix + "wqkv.weight")
-            q_size = self.n_heads * self.head_dim
-            kv_size = self.n_local_heads * self.head_dim
-            wq, wk, wv = torch.split(wqkv, (q_size, kv_size, kv_size), dim=0)
-            state_dict[prefix + "wq.weight"] = wq
-            state_dict[prefix + "wk.weight"] = wk
-            state_dict[prefix + "wv.weight"] = wv
+        for tensor_suffix in ["weight", "bias"]:
+            wqkv_key = f"{prefix}wqkv.{tensor_suffix}"
+            if wqkv_key in state_dict:
+                wqkv = state_dict.pop(wqkv_key)
+                q_size = self.n_heads * self.head_dim
+                kv_size = self.n_local_heads * self.head_dim
+                wq, wk, wv = torch.split(wqkv, (q_size, kv_size, kv_size), dim=0)
+                state_dict[f"{prefix}wq.{tensor_suffix}"] = wq
+                state_dict[f"{prefix}wk.{tensor_suffix}"] = wk
+                state_dict[f"{prefix}wv.{tensor_suffix}"] = wv
 
         return