Updated pass

jackzhxng · jackzhxng · commit d538d4343d7b · 2024-12-16T13:06:34.000-08:00
diff --git a/examples/models/llama3_2_vision/runner/native.py b/examples/models/llama3_2_vision/runner/native.py
@@ -19,6 +19,7 @@
 )
 
 from executorch.extension.pybindings.portable_lib import _load_for_executorch
+from executorch.extension.pybindings.portable_lib import _load_for_executorch_from_buffer
 
 # Load custom ops and quantized ops.
 from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
@@ -43,7 +44,10 @@ def __init__(self, args):
             use_kv_cache=args.kv_cache,
             vocab_size=params["vocab_size"],
         )
-        self.model = _load_for_executorch(args.pte)
+        with open(args.pte, "rb") as f:
+            model_bytes = f.read()
+            self.model = _load_for_executorch_from_buffer(model_bytes)
+        # self.model = _load_for_executorch(args.pte)
         self.use_kv_cache = args.kv_cache
 
     def forward(
diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py
@@ -1566,7 +1566,6 @@ def _find_fqn_for_placeholder(
             fqn = self.exported_program.graph_signature.inputs_to_parameters[target]
 
         elif target in self.exported_program.graph_signature.inputs_to_buffers:
-            breakpoint()
             fqn = self.exported_program.graph_signature.inputs_to_buffers[target]
 
             # if the buffer is mutated then record that
@@ -1603,6 +1602,7 @@ def placeholder(
         """
         spec = self.node.meta["spec"]
         constant_tag = self.node.meta.get("constant_tag", None)
+        initialize_buffer = self.node.meta.get("et_init_buffer", None)
         is_user_input = True
 
         if isinstance(target, str) and isinstance(spec, TensorSpec):
@@ -1657,7 +1657,11 @@ def placeholder(
                 spec.storage = real_tensor.untyped_storage()
 
             # User inputs and mutable buffers are not constants, other buffers or parameters are.
-            spec.const = not is_user_input
+            if initialize_buffer:
+                assert is_mutable_buffer
+                spec.const = True
+            else:
+                spec.const = not (is_user_input or is_mutable_buffer)
 
         evalue = (
             self._tensor_spec_to_evalue(spec, constant_tag)
diff --git a/exir/passes/init_mutable_buffer_pass.py b/exir/passes/init_mutable_buffer_pass.py
@@ -13,35 +13,9 @@ class InitMutableBufferPass(ExportPass):
     def __init__(self) -> None:
         super().__init__()
 
-    def update_placeholder_tensor_specs(
-        self,
-        exported_program: torch.export.ExportedProgram,
-        graph_module: torch.fx.GraphModule,
-    ) -> None:
-        """
-        Update the tensor specs for all placeholder nodes such that
-        placeholders that are parameters are marked as constant.
-        """
-        for node in graph_module.graph.nodes:
-            if node.op != "placeholder":
-                continue
-            if "spec" not in node.meta:
-                raise RuntimeError(f"Placeholder node {node} missing meta['spec']")
-            # print(node)
-            spec = node.meta["spec"]
-            if (isinstance(node.target, str) and
-                node.target in exported_program.graph_signature.inputs_to_buffers and exported_program.graph_signature.inputs_to_buffers[node.target] in exported_program.state_dict):
-                # print(f"Setting {node.target}.const = True")
-                # breakpoint()
-                # print(exported_program.state_dict[exported_program.graph_signature.inputs_to_buffers[node.target]])
-                spec.const = True
-
-    # pyre-ignore
     def placeholder(self, name: str, arg, meta):
-        # print(name)
-        meta["spec"] = make_spec(arg, const=meta.data['spec'].const)
-        # if name == "b_kv_cache_cache_pos":
-        #     print("breakpoint")
-        #     breakpoint()
-        
+        if "cache_pos" in name:
+            meta["et_init_buffer"] = True
+
         return super().placeholder(name, arg, meta)
+
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -1354,8 +1354,6 @@ def to_executorch(
             gm, new_signature = insert_write_back_for_buffers_pass(program)
             new_gm = program.graph_module
             for p in edge_to_executorch_passes(config, name):
-                if isinstance(p, InitMutableBufferPass):
-                    p.update_placeholder_tensor_specs(program, new_gm)
                 new_gm_res = p(new_gm)
                 assert new_gm_res is not None
                 new_gm = new_gm_res.graph_module
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -414,7 +414,7 @@ def to_executorch(self) -> "LLMEdgeManager":
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
             )
         )
-        print(self.export_program.to_executorch_program(verbose=True))
+        print(self.export_program.dump_executorch_program(verbose=True))
         logging.info(
             "Required memory for activation in bytes: {}".format(
                 self.export_program._emitter_output.program.execution_plan[
diff --git a/extension/llm/modules/kv_cache.py b/extension/llm/modules/kv_cache.py
@@ -56,7 +56,7 @@ def __init__(
             "v_cache", torch.zeros(cache_shape, dtype=dtype), persistent=False
         )
         self.register_buffer(
-            "cache_pos", torch.arange(0, self.max_seq_len), persistent=True
+            "cache_pos", torch.arange(0, self.max_seq_len), persistent=False
         )
         self.batch_size = batch_size
 

Original file line number	Diff line number	Diff line change
`@@ -414,7 +414,7 @@ def to_executorch(self) -> "LLMEdgeManager":`
`414`	`414`	`sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),`
`415`	`415`	`)`
`416`	`416`	`)`
`417`		`- print(self.export_program.to_executorch_program(verbose=True))`
	`417`	`+ print(self.export_program.dump_executorch_program(verbose=True))`
`418`	`418`	`logging.info(`
`419`	`419`	`"Required memory for activation in bytes: {}".format(`
`420`	`420`	`self.export_program._emitter_output.program.execution_plan[`
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ def __init__(`
`56`	`56`	`"v_cache", torch.zeros(cache_shape, dtype=dtype), persistent=False`
`57`	`57`	`)`
`58`	`58`	`self.register_buffer(`
`59`		`- "cache_pos", torch.arange(0, self.max_seq_len), persistent=True`
	`59`	`+ "cache_pos", torch.arange(0, self.max_seq_len), persistent=False`
`60`	`60`	`)`
`61`	`61`	`self.batch_size = batch_size`
`62`	`62`