add unit test and refine build flow

shewu-quic · shewu-quic · commit f1ce4b213994 · 2024-03-04T15:53:46.000+08:00
diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
@@ -69,7 +69,6 @@ if [ "$BUILD_AARCH64" = true ]; then
     # If we build debug type, we need to change flatcc to flatcc_d
     cmake .. \
         -DCMAKE_INSTALL_PREFIX=$BUILD_ROOT \
-        -DCMAKE_BUILD_TYPE=RelWithDebInfo \
         -DEXECUTORCH_BUILD_QNN=ON \
         -DEXECUTORCH_BUILD_SDK=ON \
         -DFLATCC_TEST=OFF \
@@ -81,17 +80,8 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
         -DBUCK2=$BUCK2 \
         -B$BUILD_ROOT
-    # The following commands are to enable etdump to 
-    # build flatcc on x86 host (from ExternalProject_Add) 
-    # and to allow backend and runner to link to 
-    # the arch64 flatcc library (from install target).
-    rm -f $PRJ_ROOT/third-party/flatcc/lib/*
-    rm -f $PRJ_ROOT/third-party/flatcc/bin/*
-    cmake --build $BUILD_ROOT -j16 --target install --config Release
-    rm -f $PRJ_ROOT/third-party/flatcc/lib/*
-    rm -f $PRJ_ROOT/third-party/flatcc/bin/*
-    rm -f $BUILD_ROOT/lib/libflatcc.a
-    cmake --build $BUILD_ROOT -j16 --target install --config Release
+
+    cmake --build $BUILD_ROOT -j16 --target install
 
     EXAMPLE_ROOT=examples/qualcomm
     CMAKE_PREFIX_PATH="${BUILD_ROOT}/lib/cmake/ExecuTorch;${BUILD_ROOT}/third-party/gflags;"
diff --git a/backends/qualcomm/serialization/schema.fbs b/backends/qualcomm/serialization/schema.fbs
@@ -147,7 +147,7 @@ table QnnExecuTorchOptions {
   /// Check if on-device graph construction. Default is false.
   online_prepare:bool;
 
-  /// Profliing level of the delegate and the backend. Default is off.
+  /// Profiling level of the delegate and the backend. Default is off.
   profile_level:QnnExecuTorchProfileLevel;
 }
 
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -44,7 +44,7 @@ def setUp(self):
             debug=False,
             saver=False,
             online_prepare=TestQNN.online_prepare,
-            profile=False,
+            profile=TestQNN.enable_profile,
         )
 
     def test_qnn_backend_arange(self):
@@ -374,7 +374,7 @@ def setUp(self):
             debug=False,
             saver=False,
             online_prepare=TestQNN.online_prepare,
-            profile=False,
+            profile=TestQNN.enable_profile,
         )
 
     def test_qnn_backend_conv1d_relu_log_softmax(self):
@@ -466,7 +466,7 @@ def setUp(self):
             debug=False,
             saver=False,
             online_prepare=TestQNN.online_prepare,
-            profile=False,
+            profile=TestQNN.enable_profile,
         )
 
     def test_qnn_backend_arange(self):
@@ -843,7 +843,7 @@ def setUp(self):
             debug=False,
             saver=False,
             online_prepare=TestQNN.online_prepare,
-            profile=False,
+            profile=TestQNN.enable_profile,
         )
 
     def test_qnn_backend_conv1d_relu_log_softmax(self):
@@ -974,6 +974,23 @@ def test_qnn_backend_skip_node_op(self):
             skip_node_op_set={"aten.add.Tensor"},
         )
 
+    def test_qnn_backend_profile_op(self):
+        TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
+            is_fp16=True,
+            soc_model=self.arch_table[TestQNN.model],
+            debug=False,
+            saver=False,
+            profile=True,
+        )
+        module = SimpleModel()  # noqa: F405
+        sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
+        self.lower_module_and_test_output(
+            module,
+            sample_input,
+            expected_partitions=1,
+            expected_profile_events=25,
+        )
+
 
 class TestQNNQuantizedUtils(TestQNN):
     def setUp(self):
@@ -1008,6 +1025,24 @@ def test_qnn_backend_skip_node_op(self):
             skip_node_op_set={"aten.add.Tensor"},
         )
 
+    def test_qnn_backend_profile_op(self):
+        TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
+            is_fp16=False,
+            soc_model=self.arch_table[TestQNN.model],
+            debug=False,
+            saver=False,
+            profile=True,
+        )
+        module = SimpleModel()  # noqa: F405
+        sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
+        module = self.get_qdq_module(module, sample_input)
+        self.lower_module_and_test_output(
+            module,
+            sample_input,
+            expected_partitions=1,
+            expected_profile_events=26,
+        )
+
 
 class TestExampleScript(TestQNN):
     def required_envs(self, conditions=None) -> bool:
@@ -1389,6 +1424,12 @@ def setup_environment():
         help="Conduct on-device graph compilation",
         action="store_true",
     )
+    parser.add_argument(
+        "-P",
+        "--enable_profile",
+        help="Profile the performance of each operator with kProfileDetailed profile level",
+        action="store_true",
+    )
     parser.add_argument(
         "-e",
         "--error_only",
@@ -1406,6 +1447,7 @@ def setup_environment():
     TestQNN.image_dataset = args.image_dataset
     TestQNN.pretrained_weight = args.pretrained_weight
     TestQNN.online_prepare = args.online_prepare
+    TestQNN.enable_profile = args.enable_profile
     TestQNN.error_only = args.error_only
     return sys.argv[:1] + ns_args
 
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import collections
+import copy
 import os
 import tempfile
 import unittest
@@ -27,6 +28,8 @@
 
 from executorch.exir.backend.backend_api import to_backend
 from executorch.exir.backend.compile_spec_schema import CompileSpec
+from executorch.sdk import generate_etrecord
+from executorch.sdk.inspector import Inspector
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
@@ -51,6 +54,7 @@ class TestQNN(unittest.TestCase):
     artifact_dir: Literal = ""
     image_dataset: Literal = ""
     pretrained_weight: Literal = ""
+    enable_profile: bool = False
     online_prepare: bool = False
 
     def _assert_outputs_equal(self, model_output, ref_output):
@@ -100,6 +104,7 @@ def lower_module_and_test_output(
         module: torch.nn.Module,
         sample_inputs: Tuple[torch.Tensor],
         expected_partitions: int = 1,
+        expected_profile_events: int = -1,
         assert_output_equal: bool = True,
         skip_node_id_set: set = None,
         skip_node_op_set: set = None,
@@ -108,6 +113,10 @@ def lower_module_and_test_output(
             self.compiler_specs, skip_node_id_set, skip_node_op_set
         )
         delegated_program = capture_program(module, sample_inputs)
+
+        # this is needed for the ETRecord as lowering modifies the graph in-place
+        edge_copy = copy.deepcopy(delegated_program)
+
         delegated_program.exported_program = to_backend(
             delegated_program.exported_program, qnn_partitioner
         )
@@ -123,8 +132,12 @@ def lower_module_and_test_output(
                 QnnBackend.__name__,
             )
 
+        etrecord_path = "etrecord.bin"
+        if self.enable_profile:
+            generate_etrecord(etrecord_path, edge_copy, exec_prog)
+
         # Check numerics
-        if assert_output_equal:
+        if assert_output_equal or expected_profile_events != -1:
             with tempfile.TemporaryDirectory() as tmp_dir:
                 (
                     input_list,
@@ -137,6 +150,7 @@ def lower_module_and_test_output(
                     tmp_dir,
                 )
 
+                etdump_path = f"{tmp_dir}/etdump.etdp"
                 device_output_dir = f"{tmp_dir}/outputs"
                 device_outputs = []
 
@@ -149,6 +163,14 @@ def post_process():
                         output = torch.from_numpy(output).reshape(ref_outputs[i].shape)
                         device_outputs.append(output)
 
+                def validate_profile():
+                    inspector = Inspector(
+                        etdump_path=etdump_path, etrecord=etrecord_path
+                    )
+                    self.assertTrue(
+                        len(inspector.to_dataframe().index) == expected_profile_events
+                    )
+
                 adb = SimpleADB(
                     qnn_sdk=os.getenv("QNN_SDK_ROOT"),
                     artifact_path=self.build_folder,
@@ -164,6 +186,9 @@ def post_process():
                 adb.pull(output_path=tmp_dir, callback=post_process)
                 self._assert_outputs_equal(device_outputs, ref_outputs)
 
+                if expected_profile_events != -1:
+                    adb.pull_etdump(etdump_path, callback=validate_profile)
+
     def get_qdq_module(
         self,
         module: torch.nn.Module,
diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt
@@ -23,9 +23,14 @@ if(NOT TORCH_ROOT)
   set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
 endif()
 
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Debug)
+endif()
+
 # Find prebuilt libraries. executorch package should contain
 # portable_ops_lib, etdump, bundled_program.
 find_package(executorch CONFIG REQUIRED)
+target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
 find_package(gflags REQUIRED)
 
 set(_common_compile_options -Wno-deprecated-declarations -fPIC)
@@ -56,6 +61,10 @@ generate_bindings_for_kernels(
   ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml ""
 )
 gen_operators_lib("full_portable_ops_lib" portable_kernels executorch)
+target_compile_options(full_portable_ops_lib
+    INTERFACE
+    -DET_EVENT_TRACER_ENABLED
+)
 target_include_directories(full_portable_ops_lib
     PUBLIC
     ${_common_include_directories}
@@ -89,7 +98,7 @@ target_link_libraries(qnn_executor_runner
     qnn_executorch_backend
     full_portable_ops_lib
     etdump
-    flatcc
+    ${FLATCC_LIB}
     gflags
 )
 target_compile_options(qnn_executor_runner
diff --git a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp
@@ -317,7 +317,11 @@ int main(int argc, char** argv) {
   // file.
   etdump_result result = etdump_gen.get_etdump_data();
   if (result.buf != nullptr && result.size > 0) {
-    ET_LOG(Info, "write etdump");
+    ET_LOG(
+        Info,
+        "Write etdump to %s, Size = %zu",
+        FLAGS_etdump_path.c_str(),
+        result.size);
     FILE* f = fopen(FLAGS_etdump_path.c_str(), "w+");
     fwrite((uint8_t*)result.buf, 1, result.size, f);
     fclose(f);
diff --git a/examples/qualcomm/scripts/utils.py b/examples/qualcomm/scripts/utils.py
@@ -51,6 +51,7 @@ def __init__(
         self.host_id = host_id
         self.working_dir = Path(self.pte_path).parent.absolute()
         self.input_list_filename = "input_list.txt"
+        self.etdump_path = f"{self.workspace}/etdump.etdp"
         self.output_folder = f"{self.workspace}/outputs"
         arch_table = {
             "SM8650": "75",
@@ -117,6 +118,7 @@ def execute(self):
                 f"--model_path {os.path.basename(self.pte_path)}",
                 f"--output_folder_path {self.output_folder}",
                 f"--input_list_path {self.input_list_filename}",
+                f"--etdump_path {self.etdump_path}",
             ]
         )
         qnn_executor_runner_cmds = " ".join(
@@ -134,6 +136,11 @@ def pull(self, output_path, callback=None):
         if callback:
             callback()
 
+    def pull_etdump(self, output_path, callback=None):
+        self._adb(["pull", f"{self.etdump_path}", output_path])
+        if callback:
+            callback()
+
 
 def build_executorch_binary(
     model,  # noqa: B006
diff --git a/sdk/CMakeLists.txt b/sdk/CMakeLists.txt
@@ -96,7 +96,7 @@ add_library(bundled_program_schema INTERFACE
             ${_bundled_program_schema__outputs})
 
 # Ensure the host tool is built before the main project
-add_dependencies(etdump_schema flatcc_cli)
+add_dependencies(flatcc etdump_schema)
 
 file(MAKE_DIRECTORY ${_program_schema__include_dir}/executorch/sdk/etdump)
 file(MAKE_DIRECTORY
@@ -108,6 +108,9 @@ add_custom_command(
     ${CMAKE_SOURCE_DIR}/third-party/flatcc/bin/flatcc -cwr -o
     ${_program_schema__include_dir}/executorch/sdk/etdump
     ${_etdump_schema__srcs}
+  COMMAND
+    rm -f ${CMAKE_SOURCE_DIR}/third-party/flatcc/bin/*
+    ${CMAKE_SOURCE_DIR}/third-party/flatcc/lib/*
   WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/sdk
   DEPENDS flatcc_project
   COMMENT "Generating etdump headers"

Original file line number	Diff line number	Diff line change
`@@ -147,7 +147,7 @@ table QnnExecuTorchOptions {`
`147`	`147`	`/// Check if on-device graph construction. Default is false.`
`148`	`148`	`online_prepare:bool;`
`149`	`149`
`150`		`- /// Profliing level of the delegate and the backend. Default is off.`
	`150`	`+ /// Profiling level of the delegate and the backend. Default is off.`
`151`	`151`	`profile_level:QnnExecuTorchProfileLevel;`
`152`	`152`	`}`
`153`	`153`