add export configs (pytorch#2965)

lucylq · facebook-github-bot · commit 99c897cda4fe · 2024-04-11T13:26:38.000-07:00
Summary: Pull Request resolved: pytorch#2965 Differential Revision: D55953027
diff --git a/examples/models/llama2/builder.py b/examples/models/llama2/builder.py
@@ -62,7 +62,8 @@ def to_torch_dtype(self) -> torch.dtype:
 
 def load_llama_model(
     *,
-    checkpoint: str,
+    checkpoint: Optional[str] = None,
+    checkpoint_dir: Optional[str] = None,
     params_path: str,
     use_kv_cache: bool = False,
     use_sdpa_with_kv_cache: bool = False,
@@ -76,14 +77,17 @@ def load_llama_model(
     Returns:
         An instance of LlamaEdgeManager which contains the eager mode model.
     """
-    assert checkpoint and params_path, "Both checkpoint and params can't be empty"
+    assert (
+        checkpoint or checkpoint_dir
+    ) and params_path, "Both checkpoint/checkpoint_dir and params can't be empty"
     logging.info(
         f"Loading model with checkpoint={checkpoint}, params={params_path}, use_kv_cache={use_kv_cache}, weight_type={weight_type}"
     )
     model, example_inputs, _ = EagerModelFactory.create_model(
         "llama2",
         "Llama2Model",
         checkpoint=checkpoint,
+        checkpoint_dir=checkpoint_dir,
         params=params_path,
         use_kv_cache=use_kv_cache,
         use_sdpa_with_kv_cache=use_sdpa_with_kv_cache,
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
@@ -242,6 +242,13 @@ def build_args_parser() -> argparse.ArgumentParser:
         default=f"{ckpt_dir}/params/demo_rand_params.pth",
         help="checkpoint path",
     )
+
+    parser.add_argument(
+        "--checkpoint_dir",
+        default=None,
+        help="checkpoint directory. Use with a sharded checkpoint, not for the standard llama2 model. Note, checkpoint_dir takes precedence over checkpoint if both are set.",
+    )
+
     parser.add_argument(
         "--calibration_tasks",
         nargs="+",
@@ -417,7 +424,10 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
     """
 
     # load model from checkpoint and params.json
-    checkpoint_path = canonical_path(args.checkpoint)
+    checkpoint_path = canonical_path(args.checkpoint) if args.checkpoint else None
+    checkpoint_dir = (
+        canonical_path(args.checkpoint_dir) if args.checkpoint_dir else None
+    )
     params_path = canonical_path(args.params)
     output_dir_path = canonical_path(args.output_dir, dir=True)
     modelname = "llama2"
@@ -485,6 +495,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
     return (
         load_llama_model(
             checkpoint=checkpoint_path,
+            checkpoint_dir=checkpoint_dir,
             params_path=params_path,
             use_kv_cache=args.use_kv_cache,
             use_sdpa_with_kv_cache=args.use_sdpa_with_kv_cache,
diff --git a/examples/models/llama2/llama_transformer.py b/examples/models/llama2/llama_transformer.py
@@ -324,6 +324,8 @@ def __init__(self, args: ModelArgs):
             multiple_of = args.multiple_of
             hidden_dim = 4 * dim
             hidden_dim = int(2 * hidden_dim / 3)
+            if args.ffn_dim_multiplier is not None:
+                hidden_dim = int(args.ffn_dim_multiplier * hidden_dim)
             hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
 
         self.w1 = nn.Linear(dim, hidden_dim, bias=False)
@@ -425,7 +427,11 @@ def __init__(self, params: ModelArgs):
 
         freqs_cos, freqs_sin = precompute_freqs_cis(
             params.dim // params.n_heads,
-            params.max_seq_len,
+            (
+                params.max_seq_len  # Normal llama2.
+                if params.ffn_dim_multiplier is None
+                else params.max_seq_len * 2  # Sharded checkpoint.
+            ),
             params.rope_freq_base,
         )
         self.register_buffer("freqs_cos", freqs_cos, persistent=False)
diff --git a/examples/models/llama2/model.py b/examples/models/llama2/model.py
@@ -4,7 +4,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+
 import json
+import os
 from pathlib import Path
 
 import torch
@@ -48,6 +50,12 @@ def __init__(self, **kwargs):
             # The 1st way
             ckpt_dir = Path(__file__).absolute().parent / "params"
 
+        # Check if checkpoint_dir was provided for a sharded checkpoint.
+        checkpoint_dir = (
+            kwargs["checkpoint_dir"] if "checkpoint_dir" in kwargs else None
+        )
+
+        # Use single checkpoint file.
         checkpoint_path = (
             kwargs["checkpoint"]
             if "checkpoint" in kwargs
@@ -72,7 +80,35 @@ def __init__(self, **kwargs):
         # Follow the instruction in https://github.com/facebookresearch/llama to download the model
         device = "cpu"
         # flake8: noqa: TOR102
-        checkpoint = torch.load(checkpoint_path, map_location=device, mmap=True)
+        cps = []
+        if checkpoint_dir is not None:
+            # Load multiple checkpoint; ignore the single path.
+            checkpoint_path = None
+            for i in range(4):
+                cp_name = f"consolidated.{i}.pth"
+                print(f"Loading {cp_name}")
+                cps.append(
+                    torch.load(
+                        os.path.join(checkpoint_dir, cp_name),
+                        map_location=device,
+                        mmap=True,
+                    )
+                )
+            checkpoint = {}
+            for key in cps[0].keys():
+                if not torch.allclose(cps[0][key], cps[1][key]):
+                    values = (cps[0][key], cps[1][key], cps[2][key], cps[3][key])
+                    if "wo" in key or "w2" in key:
+                        # Concat on dim=1 for "wo" and "w2".
+                        checkpoint[key] = torch.cat(values, dim=1)
+                    else:
+                        # Concat on dim=0 for everything else.
+                        checkpoint[key] = torch.cat(values, dim=0)
+                else:
+                    # Do not duplicate layers shared between each checkpoint.
+                    checkpoint[key] = cps[0][key]
+        else:
+            checkpoint = torch.load(checkpoint_path, map_location=device, mmap=True)
         fairseq2_checkpoint = kwargs.get("fairseq2", False)
         if fairseq2_checkpoint:
             print("Using fairseq2 checkpoint")