Added soc model param to get_qnn_partitioner

Guang Yang · Guang Yang · commit 7edf990301e7 · 2024-09-04T18:57:59.000-07:00
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
@@ -510,11 +510,18 @@ def _export_llama(modelname, args) -> LLMEdgeManager:  # noqa: C901
         modelname = f"coreml_{modelname}"
 
     if args.qnn:
+        # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.serialization.qnn_compile_spec_schema`
+        from executorch.backends.qualcomm.serialization.qnn_compile_spec_schema import (
+            QcomChipset,
+        )
         from executorch.extension.llm.custom_ops import model_sharding
 
         partitioners.append(
             get_qnn_partitioner(
-                args.use_kv_cache, args.pt2e_quantize, args.num_sharding
+                QcomChipset.SM8650,  # Llama 2 works only on SM8650
+                args.use_kv_cache,
+                args.pt2e_quantize,
+                args.num_sharding,
             )
         )
         # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
diff --git a/extension/llm/export/partitioner_lib.py b/extension/llm/export/partitioner_lib.py
@@ -4,8 +4,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 from typing import Optional
 
+FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+
 
 def get_xnnpack_partitioner():
     from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
@@ -105,6 +109,7 @@ def get_coreml_partitioner(
 
 
 def get_qnn_partitioner(
+    soc_model,
     use_kv_cache: bool = False,
     pt2e_quantize: Optional[str] = None,
     num_sharding: int = 0,
@@ -118,11 +123,6 @@ def get_qnn_partitioner(
             QnnPartitioner,
         )
 
-        # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.serialization.qnn_compile_spec_schema`
-        from executorch.backends.qualcomm.serialization.qnn_compile_spec_schema import (
-            QcomChipset,
-        )
-
         # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
         from executorch.backends.qualcomm.utils.utils import (
             generate_htp_compiler_spec,
@@ -133,14 +133,16 @@ def get_qnn_partitioner(
             "Please install the Qualcomm backend follwing https://pytorch.org/executorch/main/build-run-qualcomm-ai-engine-direct-backend.html"
         )
 
+    logging.info(f"Get QNN partitioner for {soc_model.name}.")
+
     use_fp16 = True
     skip_node_op_set = {"llama.fallback.default"}
     if pt2e_quantize is not None:
         use_fp16 = False
 
     return QnnPartitioner(  # pyre-fixme[16]
         generate_qnn_executorch_compiler_spec(  # pyre-fixme[16]
-            soc_model=QcomChipset.SM8450,  # default to SM8450  # pyre-fixme[16]
+            soc_model=soc_model,  # pyre-fixme[16]
             # pyre-fixme[16]
             backend_options=generate_htp_compiler_spec(
                 use_fp16=use_fp16,