pytorch
diff --git a/‎.ci/docker/common/install_base.sh
Lines changed: 5 additions & 0 deletions b/‎.ci/docker/common/install_base.sh
Lines changed: 5 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_conda.sh
Lines changed: 10 additions & 2 deletions b/‎.ci/docker/common/install_conda.sh
Lines changed: 10 additions & 2 deletions
diff --git a/‎.github/workflows/lint.yml
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/lint.yml
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/_passes/decompose_select.py
Lines changed: 1 addition & 2 deletions b/‎backends/arm/_passes/decompose_select.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
Lines changed: 2 additions & 1 deletion b/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Lines changed: 35 additions & 0 deletions b/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Lines changed: 35 additions & 0 deletions
diff --git a/‎backends/qualcomm/tests/test_qnn_delegate.py
Lines changed: 157 additions & 1 deletion b/‎backends/qualcomm/tests/test_qnn_delegate.py
Lines changed: 157 additions & 1 deletion
diff --git a/‎backends/qualcomm/tests/utils.py
Lines changed: 51 additions & 0 deletions b/‎backends/qualcomm/tests/utils.py
Lines changed: 51 additions & 0 deletions
@@ -26,6 +26,11 @@ install_ubuntu() {
     libssl-dev \
     zip
 
+  # These libraries are needed by TorchVision
+  apt-get install -y --no-install-recommends \
+    libjpeg-dev \
+    libpng-dev
+
   # Cleanup package manager
   apt-get autoclean && apt-get clean
   rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
@@ -31,8 +31,16 @@ install_miniconda() {
 
 install_python() {
   pushd /opt/conda
-  # Install the correct Python version
+  # Install the selected Python version for CI jobs
   as_ci_user conda create -n "py_${PYTHON_VERSION}" -y --file /opt/conda/conda-env-ci.txt python="${PYTHON_VERSION}"
+
+  # From https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh
+  if [[ $(uname -m) == "aarch64" ]]; then
+    conda_install "openblas==0.3.28=*openmp*"
+  else
+    conda_install mkl=2022.1.0 mkl-include=2022.1.0
+  fi
+
   popd
 }
 
@@ -53,7 +61,7 @@ fix_conda_ubuntu_libstdcxx() {
   # PyTorch sev: https://github.com/pytorch/pytorch/issues/105248
   # Ref: https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh
   if grep -e "2[02].04." /etc/issue >/dev/null; then
-    rm "/opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so.6"
+    rm /opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so*
   fi
 }
 
 
@@ -31,7 +31,7 @@ jobs:
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
-        
+
         # For mypy linting, we need to first install executorch first so that
         # it builds the python package information.
         BUILD_TOOL="cmake"
@@ -74,6 +74,7 @@ jobs:
       docker-image: executorch-ubuntu-22.04-linter
       fetch-depth: 0
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
       script: |
         FILES_NEEDS_FORMAT=$(/opt/google-java-format -n extension/android/src/main/java/org/pytorch/executorch/*.java \
           examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/*.java \
 
@@ -37,14 +37,13 @@ def call(self, graph_module: torch.fx.GraphModule):
             rank = len(input_node.meta["val"].size())
             dim = dim % rank if dim < 0 else dim
             index = index % rank if index < 0 else index
-            dim_list = list(range(rank))
 
             with graph_module.graph.inserting_before(node):
                 slice_node = create_node(
                     graph_module.graph, slice_op, (input_node, dim, index, index + 1)
                 )
                 squeeze_node = create_node(
-                    graph_module.graph, squeeze_op, (slice_node, dim_list)
+                    graph_module.graph, squeeze_op, (slice_node, [dim])
                 )
 
             node.replace_all_uses_with(squeeze_node)
 
@@ -49,7 +49,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
       .def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
       .def(
           "MakeBinaryInfo",
-          py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
+          py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
+      .def("StripProtocol", &PyQnnManager::StripProtocol);
 }
 } // namespace qnn
 } // namespace backends
 
@@ -390,6 +390,41 @@ class PyQnnManager {
     return result;
   }
 
+  py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
+    py::buffer_info info(py::buffer(preprocessed_binary).request());
+
+    void* buf_ptr = nullptr;
+    size_t buf_size = 0;
+    // check if it's a qnn context binary
+    auto [status, signature, ctx_size, ctx_bin] =
+        QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
+
+    if (status == Error::Ok) {
+      buf_size = ctx_size;
+      buf_ptr = ctx_bin;
+    } else {
+      // check if it's a qcir flatbuffers, return fbs if matched
+      auto
+          [status,
+           qcir_fbs_size,
+           qcir_tensor_size,
+           qcir_fbs_ptr,
+           qcir_tensor_ptr] =
+              QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr);
+      if (status == Error::Ok) {
+        buf_size = qcir_fbs_size;
+        buf_ptr = qcir_fbs_ptr;
+      } else {
+        // the format should be DLC, return nothing here
+        return py::array_t<char>(0);
+      }
+    }
+    auto result = py::array_t<char>(buf_size);
+    auto result_buffer = result.request();
+    std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
+    return result;
+  }
+
  private:
   // Store the bytes object instead of a raw pointer so that this module will
   // keep the bytes alive.
 
@@ -20,6 +20,8 @@
     QuantDtype,
     TestQNN,
     to_backend,
+    validate_context_binary,
+    validate_qcir,
 )
 from executorch.backends.qualcomm.utils.constants import (
     QCOM_ANNOTATION,
@@ -30,10 +32,12 @@
 
 from executorch.backends.qualcomm.utils.utils import (
     capture_program,
+    dump_context_from_pte,
     from_context_binary,
     generate_htp_compiler_spec,
     generate_multi_graph_program,
     generate_qnn_executorch_compiler_spec,
+    PyQnnManagerAdaptor,
     skip_annotation,
     update_spill_fill_size,
 )
@@ -2041,6 +2045,81 @@ def test_qnn_backend_context_direct(self):
                 bundle_program["edge_program_manager"].to_executorch(),
             )
 
+    def test_qnn_backend_context_extraction(self):
+        from executorch.exir import EdgeCompileConfig, EdgeProgramManager
+
+        module = SimpleModel()  # noqa: F405
+        sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
+        backend_options = generate_htp_compiler_spec(use_fp16=True)
+        compiler_specs = [
+            self.compiler_specs,
+            generate_qnn_executorch_compiler_spec(
+                soc_model=self.chipset_table[TestQNN.model],
+                backend_options=backend_options,
+                online_prepare=True,
+            ),
+        ]
+        validators = [validate_context_binary, validate_qcir]
+
+        for compiler_spec, validate in zip(compiler_specs, validators):
+            edge_prog_mgr = EdgeProgramManager(
+                edge_programs={
+                    "forward": capture_program(module, sample_input).exported_program
+                },
+                compile_config=EdgeCompileConfig(_use_edge_ops=False),
+            ).to_backend(QnnPartitioner(compiler_spec))
+            lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
+                "lowered_module_0"
+            ]
+            qnn_mgr = PyQnnManagerAdaptor.QnnManager(
+                lowered_module.compile_specs[0].value
+            )
+            qnn_mgr.Init()
+            binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
+            validate(binary)
+
+    def test_qnn_backend_dump_context_from_pte(self):
+        from executorch.exir import EdgeCompileConfig, EdgeProgramManager
+
+        module = SimpleModel()  # noqa: F405
+        sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
+        backend_options = generate_htp_compiler_spec(use_fp16=True)
+        compiler_specs = [
+            self.compiler_specs,
+            generate_qnn_executorch_compiler_spec(
+                soc_model=self.chipset_table[TestQNN.model],
+                backend_options=backend_options,
+                online_prepare=True,
+            ),
+        ]
+        validators = [validate_context_binary, validate_qcir]
+
+        for compiler_spec, validate in zip(compiler_specs, validators):
+            edge_prog_mgr = (
+                EdgeProgramManager(
+                    edge_programs={
+                        "forward": capture_program(
+                            module, sample_input
+                        ).exported_program
+                    },
+                    compile_config=EdgeCompileConfig(_use_edge_ops=False),
+                )
+                .to_backend(QnnPartitioner(compiler_spec))
+                .to_executorch()
+            )
+
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                pte_path = f"{tmp_dir}/model.pte"
+                with open(pte_path, "wb") as f:
+                    edge_prog_mgr.write_to_file(f)
+
+                dump_context_from_pte(pte_path)
+                binary_name = f"{tmp_dir}/forward_0.bin"
+                self.assertTrue(os.path.isfile(binary_name))
+                with open(binary_name, "rb") as f:
+                    stripped_binary = f.read()
+                    validate(stripped_binary)
+
     def test_qnn_backend_draw_graph(self):
         golden_data = """digraph test {
             rankdir=TB
@@ -2433,7 +2512,7 @@ def test_qnn_backend_multi_graphs(self):
             for module, sample_input in zip(modules, sample_inputs)
         ]
         backend_options = generate_htp_compiler_spec(
-            use_fp16=True,
+            use_fp16=False,
         )
         compiler_specs = [
             generate_qnn_executorch_compiler_spec(
@@ -2532,6 +2611,83 @@ def test_qnn_backend_context_direct(self):
                 bundle_program["edge_program_manager"].to_executorch(),
             )
 
+    def test_qnn_backend_context_extraction(self):
+        from executorch.exir import EdgeCompileConfig, EdgeProgramManager
+
+        module = SimpleModel()  # noqa: F405
+        sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
+        module = self.get_qdq_module(module, sample_input)
+        backend_options = generate_htp_compiler_spec(use_fp16=False)
+        compiler_specs = [
+            self.compiler_specs,
+            generate_qnn_executorch_compiler_spec(
+                soc_model=self.chipset_table[TestQNN.model],
+                backend_options=backend_options,
+                online_prepare=True,
+            ),
+        ]
+        validators = [validate_context_binary, validate_qcir]
+
+        for compiler_spec, validate in zip(compiler_specs, validators):
+            edge_prog_mgr = EdgeProgramManager(
+                edge_programs={
+                    "forward": capture_program(module, sample_input).exported_program
+                },
+                compile_config=EdgeCompileConfig(_use_edge_ops=False),
+            ).to_backend(QnnPartitioner(compiler_spec))
+            lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
+                "lowered_module_0"
+            ]
+            qnn_mgr = PyQnnManagerAdaptor.QnnManager(
+                lowered_module.compile_specs[0].value
+            )
+            qnn_mgr.Init()
+            binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
+            validate(binary)
+
+    def test_qnn_backend_dump_context_from_pte(self):
+        from executorch.exir import EdgeCompileConfig, EdgeProgramManager
+
+        module = SimpleModel()  # noqa: F405
+        sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
+        module = self.get_qdq_module(module, sample_input)
+        backend_options = generate_htp_compiler_spec(use_fp16=True)
+        compiler_specs = [
+            self.compiler_specs,
+            generate_qnn_executorch_compiler_spec(
+                soc_model=self.chipset_table[TestQNN.model],
+                backend_options=backend_options,
+                online_prepare=True,
+            ),
+        ]
+        validators = [validate_context_binary, validate_qcir]
+
+        for compiler_spec, validate in zip(compiler_specs, validators):
+            edge_prog_mgr = (
+                EdgeProgramManager(
+                    edge_programs={
+                        "forward": capture_program(
+                            module, sample_input
+                        ).exported_program
+                    },
+                    compile_config=EdgeCompileConfig(_use_edge_ops=False),
+                )
+                .to_backend(QnnPartitioner(compiler_spec))
+                .to_executorch()
+            )
+
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                pte_path = f"{tmp_dir}/model.pte"
+                with open(pte_path, "wb") as f:
+                    edge_prog_mgr.write_to_file(f)
+
+                dump_context_from_pte(pte_path)
+                binary_name = f"{tmp_dir}/forward_0.bin"
+                self.assertTrue(os.path.isfile(binary_name))
+                with open(binary_name, "rb") as f:
+                    stripped_binary = f.read()
+                    validate(stripped_binary)
+
     def test_qnn_backend_draw_graph(self):
         golden_data = """digraph test {
             rankdir=TB
 
@@ -108,6 +108,57 @@ def generate_context_binary(
     assert os.path.isfile(f"{artifact_dir}/model_ctx.bin"), print(result.stderr)
 
 
+def validate_context_binary(ctx_bin: bytes):
+    qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
+    assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
+
+    # flow of qnn tools
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        with open(f"{tmp_dir}/ctx.bin", "wb") as binary_file:
+            binary_file.write(ctx_bin)
+
+        target = "x86_64-linux-clang"
+        cmds = [
+            # qnn-context-binary-utility
+            f"{qnn_sdk}/bin/{target}/qnn-context-binary-utility",
+            "--context_binary",
+            f"{tmp_dir}/ctx.bin",
+            "--json_file",
+            f"{tmp_dir}/ctx.json",
+        ]
+        result = subprocess.run(
+            " ".join(cmds),
+            shell=True,
+            executable="/bin/bash",
+            capture_output=True,
+        )
+        assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr)
+
+
+def validate_qcir(qcir: bytes):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file:
+            binary_file.write(qcir)
+
+        cmds = [
+            "flatc",
+            "-o",
+            tmp_dir,
+            "--raw-binary",
+            "-t",
+            f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs",
+            "--",
+            f"{tmp_dir}/qcir.bin",
+        ]
+        result = subprocess.run(
+            " ".join(cmds),
+            shell=True,
+            executable="/bin/bash",
+            capture_output=True,
+        )
+        assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr)
+
+
 class TestQNN(unittest.TestCase):
     rtol: float = 0
     atol: float = 0
Original file line number	Diff line number	Diff line change
`@@ -37,14 +37,13 @@ def call(self, graph_module: torch.fx.GraphModule):`
`37`	`37`	`rank = len(input_node.meta["val"].size())`
`38`	`38`	`dim = dim % rank if dim < 0 else dim`
`39`	`39`	`index = index % rank if index < 0 else index`
`40`		`- dim_list = list(range(rank))`
`41`	`40`
`42`	`41`	`with graph_module.graph.inserting_before(node):`
`43`	`42`	`slice_node = create_node(`
`44`	`43`	`graph_module.graph, slice_op, (input_node, dim, index, index + 1)`
`45`	`44`	`)`
`46`	`45`	`squeeze_node = create_node(`
`47`		`- graph_module.graph, squeeze_op, (slice_node, dim_list)`
	`46`	`+ graph_module.graph, squeeze_op, (slice_node, [dim])`
`48`	`47`	`)`
`49`	`48`
`50`	`49`	`node.replace_all_uses_with(squeeze_node)`