pytorch · chunit-quic · Feb 22, 2024 · cccclai · Mar 31, 2024 · chunit-quic
@@ -71,7 +71,6 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DCMAKE_INSTALL_PREFIX=$BUILD_ROOT \
         -DEXECUTORCH_BUILD_QNN=ON \
         -DEXECUTORCH_BUILD_SDK=ON \
-        -DFLATCC_TEST=OFF \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
         -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \

@@ -32,7 +32,8 @@
 from executorch.examples.models.edsr import EdsrModel
 from executorch.examples.models.inception_v3 import InceptionV3Model
 from executorch.examples.models.inception_v4 import InceptionV4Model
-from executorch.examples.models.llama2 import Llama2Model
+
+# from executorch.examples.models.llama2 import Llama2Model
 from executorch.examples.models.mobilebert import MobileBertModelExample
 from executorch.examples.models.mobilenet_v2 import MV2Model
 from executorch.examples.models.mobilenet_v3 import MV3Model
@@ -439,7 +440,8 @@ def test_qnn_backend_example_models(self):
             EdsrModel(),
             InceptionV3Model(),
             InceptionV4Model(),
-            Llama2Model(),
+            # The module of llama is changing frequently. Reopen it when it's stable
+            # Llama2Model(),
             MV2Model(),
             MV3Model(),
             MobileBertModelExample(),
@@ -922,7 +924,8 @@ def test_qnn_backend_example_models(self):
             {"module": EdsrModel(), "annotation": ()},
             {"module": InceptionV3Model(), "annotation": ()},
             {"module": InceptionV4Model(), "annotation": ()},
-            {"module": Llama2Model(), "annotation": ()},
+            # The module of llama is changing frequently. Reopen it when it's stable
+            # {"module": Llama2Model(), "annotation": ()},
             {"module": MV2Model(), "annotation": ()},
             {"module": MV3Model(), "annotation": ()},
             # only works on QNN 2.12 so far
@@ -1221,6 +1224,51 @@ def test_qnn_backend_shared_buffer(self):
         )
 
 
+class TestExampleOssScript(TestQNN):
+    def required_envs(self, conditions=None) -> bool:
+        conditions = [] if conditions is None else conditions
+        return all(
+            [
+                self.executorch_root,
+                self.artifact_dir,
+                *conditions,
+            ]
+        )
+
+    def test_fbnet(self):
+        if not self.required_envs([self.image_dataset]):
+            self.skipTest("missing required envs")
+
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/fbnet.py",
+            "--dataset",
+            self.image_dataset,
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--device",
+            self.device,
+            "--model",
+            self.model,
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+        ]
+        if self.host:
+            cmds.extend(["--host", self.host])
+
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            self.assertGreaterEqual(msg["top_1"], 60)
+            self.assertGreaterEqual(msg["top_5"], 90)
+
+
 class TestExampleScript(TestQNN):
     def required_envs(self, conditions=None) -> bool:
         conditions = [] if conditions is None else conditions
@@ -1442,6 +1490,9 @@ def test_deeplab_v3(self):
             self.assertGreaterEqual(msg["MIoU"], 0.55)
 
     def test_dummy_llama2(self):
+        self.skipTest(
+            "The module of llama is changing frequently. Reopen it when it's stable"
+        )
         if not self.required_envs():
             self.skipTest("missing required envs")
 
@@ -1476,6 +1527,9 @@ def test_dummy_llama2(self):
 
     @unittest.expectedFailure
     def test_ptq_dummy_llama2(self):
+        self.skipTest(
+            "The module of llama is changing frequently. Reopen it when it's stable"
+        )
         if not self.required_envs():
             self.skipTest("missing required envs")
 

diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake
@@ -36,13 +36,13 @@ set_target_properties(
 target_include_directories(portable_kernels INTERFACE ${_root})
 
 if(CMAKE_BUILD_TYPE MATCHES "Debug")
-    set(FLATCC_LIB flatcc_d)
+    set(FLATCCRT_LIB flatccrt_d)
 else()
-    set(FLATCC_LIB flatcc)
+    set(FLATCCRT_LIB flatccrt)
 endif()
 
 set(lib_list
-    etdump bundled_program extension_data_loader ${FLATCC_LIB} mpsdelegate
+    etdump bundled_program extension_data_loader ${FLATCCRT_LIB} mpsdelegate
     qnn_executorch_backend portable_ops_lib extension_module xnnpack_backend
     XNNPACK cpuinfo pthreadpool vulkan_backend optimized_kernels 
     optimized_ops_lib optimized_native_cpu_ops_lib

@@ -0,0 +1,128 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import os
+import re
+import sys
+from multiprocessing.connection import Client
+
+import numpy as np
+import timm
+from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype
+from executorch.examples.qualcomm.scripts.inception_v4 import get_dataset
+from executorch.examples.qualcomm.scripts.utils import (
+    build_executorch_binary,
+    make_output_dir,
+    setup_common_args_and_variables,
+    SimpleADB,
+    topk_accuracy,
+)
+
+
+if __name__ == "__main__":
+    parser = setup_common_args_and_variables()
+    parser.add_argument(
+        "-a",
+        "--artifact",
+        help="path for storing generated artifacts by this example. Default ./fbnet",
+        default="./fbnet",
+        type=str,
+    )
+
+    parser.add_argument(
+        "-d",
+        "--dataset",
+        help=(
+            "path to the validation folder of ImageNet dataset. "
+            "e.g. --dataset imagenet-mini/val "
+            "for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)"
+        ),
+        type=str,
+        required=True,
+    )
+
+    args = parser.parse_args()
+
+    if not args.compile_only and args.device is None:
+        raise RuntimeError(
+            "device serial is required if not compile only. "
+            "Please specify a device serial by -s/--device argument."
+        )
+
+    # ensure the working directory exist.
+    os.makedirs(args.artifact, exist_ok=True)
+
+    instance = timm.create_model("fbnetc_100", pretrained=True).eval()
+
+    data_num = 100
+    inputs, targets, input_list = get_dataset(
+        dataset_path=f"{args.dataset}",
+        data_size=data_num,
+    )
+
+    pte_filename = "fbnet"
+
+    build_executorch_binary(
+        instance,
+        inputs[0],
+        args.model,
+        f"{args.artifact}/{pte_filename}",
+        inputs,
+        quant_dtype=QuantDtype.use_8a8w,
+    )
+
+    if args.compile_only:
+        sys.exit(0)
+
+    adb = SimpleADB(
+        qnn_sdk=os.getenv("QNN_SDK_ROOT"),
+        artifact_path=f"{args.build_folder}",
+        pte_path=f"{args.artifact}/{pte_filename}.pte",
+        workspace=f"/data/local/tmp/executorch/{pte_filename}",
+        device_id=args.device,
+        host_id=args.host,
+        soc_model=args.model,
+    )
+    adb.push(inputs=inputs, input_list=input_list)
+    adb.execute()
+
+    # collect output data
+    output_data_folder = f"{args.artifact}/outputs"
+    make_output_dir(output_data_folder)
+
+    output_raws = []
+
+    def post_process():
+        for f in sorted(
+            os.listdir(output_data_folder), key=lambda f: int(f.split("_")[1])
+        ):
+            filename = os.path.join(output_data_folder, f)
+            if re.match(r"^output_[0-9]+_[1-9].raw$", f):
+                os.remove(filename)
+            else:
+                output = np.fromfile(filename, dtype=np.float32)
+                output_raws.append(output)
+
+    adb.pull(output_path=args.artifact, callback=post_process)
+
+    # top-k analysis
+    predictions = []
+    for i in range(data_num):
+        predictions.append(
+            np.fromfile(
+                os.path.join(output_data_folder, f"output_{i}_0.raw"), dtype=np.float32
+            )
+        )
+
+    k_val = [1, 5]
+    topk = [topk_accuracy(predictions, targets, k).item() for k in k_val]
+    if args.ip and args.port != -1:
+        with Client((args.ip, args.port)) as conn:
+            conn.send(json.dumps({f"top_{k}": topk[i] for i, k in enumerate(k_val)}))
+    else:
+        for i, k in enumerate(k_val):
+            print(f"top_{k}->{topk[i]}%")
@@ -0,0 +1 @@
+pip install timm
@@ -39,6 +39,9 @@ def create_device_inputs(example_inputs, use_kv_cache):
 
 
 if __name__ == "__main__":
+    print(
+        "[WARNING] The module of llama is changing frequently. This script might not work"
+    )
     parser = setup_common_args_and_variables()
     parser.add_argument(
         "-a",