pytorch
diff --git a/‎backends/qualcomm/CMakeLists.txt
Lines changed: 4 additions & 1 deletion b/‎backends/qualcomm/CMakeLists.txt
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Lines changed: 22 additions & 4 deletions b/‎backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Lines changed: 22 additions & 4 deletions
diff --git a/‎backends/qualcomm/qnn_preprocess.py
Lines changed: 8 additions & 2 deletions b/‎backends/qualcomm/qnn_preprocess.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎backends/qualcomm/runtime/Logging.h
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/runtime/Logging.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Lines changed: 29 additions & 59 deletions b/‎backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Lines changed: 29 additions & 59 deletions
diff --git a/‎backends/qualcomm/runtime/QnnExecuTorchBackend.h
Lines changed: 8 additions & 10 deletions b/‎backends/qualcomm/runtime/QnnExecuTorchBackend.h
Lines changed: 8 additions & 10 deletions
diff --git a/‎backends/qualcomm/runtime/QnnManager.cpp
Lines changed: 14 additions & 13 deletions b/‎backends/qualcomm/runtime/QnnManager.cpp
Lines changed: 14 additions & 13 deletions
diff --git a/‎backends/qualcomm/runtime/QnnManager.h
Lines changed: 3 additions & 1 deletion b/‎backends/qualcomm/runtime/QnnManager.h
Lines changed: 3 additions & 1 deletion
@@ -74,7 +74,10 @@ include_directories(
   ${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include
 )
 
-set(_qnn_schema__srcs backends/qualcomm/serialization/schema.fbs)
+set(_qnn_schema__srcs
+  backends/qualcomm/serialization/qc_compiler_spec.fbs
+  backends/qualcomm/serialization/qc_processed_binary.fbs
+)
 set(_qnn_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
 # Paths to headers generated from the .fbs files.
 set(_qnn_schema__outputs)
 
@@ -8,10 +8,11 @@
 #pragma once
 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
 #include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
+#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
+#include <executorch/backends/qualcomm/qc_processed_binary_generated.h>
 #include <executorch/backends/qualcomm/runtime/Logging.h>
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
-#include <executorch/backends/qualcomm/schema_generated.h>
 #include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
@@ -57,15 +58,23 @@ class PyQnnManager {
     std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
     for (size_t i = 0; i < qcirs.size(); ++i) {
       py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
-      flatbuffers::Verifier verifier(
+      flatbuffers::Verifier verifier_processed_info(
           static_cast<const uint8_t* const>(info.ptr),
           info.size * info.itemsize);
+      if (!qnn_delegate::VerifyProcessedBinaryInfoBuffer(
+              verifier_processed_info)) {
+        QNN_EXECUTORCH_LOG_ERROR("Fail to verify processed binary");
+        return;
+      }
+      auto processed_info = qnn_delegate::GetProcessedBinaryInfo(info.ptr);
 
-      if (!qcir::VerifyContextBuffer(verifier)) {
+      flatbuffers::Verifier verifier_qcir(
+          processed_info->data()->data(), processed_info->data()->size());
+      if (!qcir::VerifyContextBuffer(verifier_qcir)) {
         QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format");
         return;
       }
-      auto context = qcir::GetContext(info.ptr);
+      auto context = qcir::GetContext(processed_info->data()->data());
       for (const auto& graph : *context->graphs()) {
         std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
         for (const auto tensor : *graph->tensors()) {
@@ -102,8 +111,17 @@ class PyQnnManager {
             builder_, graph->name()->str().c_str(), &nodes, &tensors));
       }
     }
+
     auto context = qcir::CreateContextDirect(builder_, &graphs);
     builder_.Finish(context);
+    std::vector<uint8_t> data(
+        builder_.GetBufferPointer(),
+        builder_.GetBufferPointer() + builder_.GetSize());
+    builder_.Reset();
+
+    auto processed_binary = qnn_delegate::CreateProcessedBinaryInfoDirect(
+        builder_, "qcirs_to_context_binary", &data);
+    builder_.Finish(processed_binary);
     qnn_executorch_context_binary_.buffer = builder_.GetBufferPointer();
     qnn_executorch_context_binary_.nbytes = builder_.GetSize();
     qnn_manager_ = std::make_shared<QnnManager>(
 
@@ -20,6 +20,9 @@
 from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors
 from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
 from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option
+from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
+    processed_binary_to_flatbuffer,
+)
 from executorch.exir.backend.backend_details import (
     BackendDetails,
     CompileSpec,
@@ -89,7 +92,9 @@ def preprocess(
                         assert node.target == context_loader_target, err_msg
                         # if graph has context binary loader node, return directly
                         return PreprocessResult(
-                            processed_bytes=node.meta[OpContextLoader.meta_ctx_bin],
+                            processed_bytes=processed_binary_to_flatbuffer(
+                                node.meta[OpContextLoader.meta_ctx_bin]
+                            ),
                             debug_handle_map={},
                         )
                     except:
@@ -111,5 +116,6 @@ def preprocess(
         qnn_manager.Destroy()
         # For now, debug_handle_map is not used by QNN ExecuTorch
         return PreprocessResult(
-            processed_bytes=bytes(qnn_context_binary), debug_handle_map={}
+            processed_bytes=processed_binary_to_flatbuffer(bytes(qnn_context_binary)),
+            debug_handle_map={},
         )
@@ -7,7 +7,7 @@
  */
 #pragma once
 
-#include <executorch/backends/qualcomm/schema_generated.h>
+#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
 #include <executorch/runtime/core/error.h>
 namespace executorch {
 namespace backends {
 
@@ -7,38 +7,14 @@
  */
 
 #include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
+#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
-#include <executorch/backends/qualcomm/schema_generated.h>
 
 namespace executorch {
 namespace backends {
 namespace qnn {
 
-// CRC32 hasher
-class CRC32 {
- public:
-  CRC32() {
-    uint32_t ieee_802_3 = 0x04C11DB7;
-    for (uint32_t i = 0, poly = 0; i < 256; i++, poly = i) {
-      for (size_t j = 0; j < 8; j++) {
-        poly = (poly & 1) ? (ieee_802_3 ^ (poly >> 1)) : (poly >> 1);
-      }
-      lookup_table_.push_back(poly);
-    }
-  }
-  uint32_t hash(const uint8_t* buf, uint32_t length) const {
-    uint32_t val = 0xFFFFFFFF;
-    for (size_t i = 0; i < length; ++i) {
-      val = lookup_table_[(val ^ buf[i]) & 0xFF] ^ (val >> 8);
-    }
-    return val ^ 0xFFFFFFFF;
-  }
-
- private:
-  std::vector<uint32_t> lookup_table_;
-};
-
 using namespace qnn_delegate;
 using executorch::runtime::ArrayRef;
 using executorch::runtime::BackendExecutionContext;
@@ -56,24 +32,6 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
     BackendInitContext& context,
     FreeableBuffer* processed,
     ArrayRef<CompileSpec> compile_specs) const {
-  // record the method name to be executed
-  // method_name_ = context.get_method_name();
-
-  // TODO: this is a temporal solution for multi-graph support, will be
-  //       removed once framework starts to accept runtime configuration
-  // ---
-  // check if current context binary has already been initialized
-  // return cached one for reducing memory footprint
-  uint32_t hash_val = CRC32().hash(
-      static_cast<const uint8_t*>(processed->data()), processed->size());
-  auto iter = delegate_map_.find(hash_val);
-  if (iter != delegate_map_.end()) {
-    QNN_EXECUTORCH_LOG_INFO(
-        "Use cached delegate handle for current method: %s",
-        method_name_.c_str());
-    return iter->second;
-  }
-
   // covert SizedBuffer to qnn ExecuTorch option
   QnnExecuTorchContextBinary qnn_context_blob;
   const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;
@@ -99,6 +57,20 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
   // destructible, we must call the destructor manually in destroy().
   new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);
 
+  // TODO: this is a temporal solution for multi-graph support, will be
+  //       removed once framework starts to accept runtime configuration
+  // ---
+  // check if current context binary has already been initialized
+  // return cached one for reducing memory footprint
+  std::string binary_hash = qnn_manager->GetBinaryHash();
+  auto iter = delegate_map_.find(binary_hash);
+  if (iter != delegate_map_.end()) {
+    QNN_EXECUTORCH_LOG_INFO(
+        "Use cached delegate handle for current method: %s",
+        context.get_method_name());
+    return iter->second;
+  }
+
   ET_CHECK_OR_RETURN_ERROR(
       qnn_manager->Init() == Error::Ok,
       Internal,
@@ -117,7 +89,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
           "Fail to allocate tensor");
     }
   }
-  add_cached_delegate(hash_val, qnn_manager);
+  add_cached_delegate(binary_hash, qnn_manager);
   return qnn_manager;
 }
 
@@ -131,10 +103,11 @@ Error QnnExecuTorchBackend::execute(
       "DelegateHandle has been deleted");
   QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
 
+  std::string method_name = context.get_method_name();
   std::vector<std::shared_ptr<TensorWrapper>> input_tensors =
-      qnn_manager->GetGraphInputs(method_name_);
+      qnn_manager->GetGraphInputs(method_name);
   std::vector<std::shared_ptr<TensorWrapper>> output_tensors =
-      qnn_manager->GetGraphOutputs(method_name_);
+      qnn_manager->GetGraphOutputs(method_name);
   std::vector<Qnn_Tensor_t> input_tensor_structs;
   std::vector<Qnn_Tensor_t> output_tensor_structs;
 
@@ -167,14 +140,14 @@ Error QnnExecuTorchBackend::execute(
 
   ET_CHECK_OR_RETURN_ERROR(
       qnn_manager->Execute(
-          method_name_,
+          method_name,
           input_tensor_structs,
           output_tensor_structs,
           context.event_tracer()) == Error::Ok,
       Internal,
       "Fail to execute graph");
   ET_CHECK_OR_RETURN_ERROR(
-      qnn_manager->ProfileExecuteData(method_name_, context.event_tracer()) ==
+      qnn_manager->ProfileExecuteData(method_name, context.event_tracer()) ==
           Error::Ok,
       Internal,
       "Fail to profile graph");
@@ -195,27 +168,24 @@ bool QnnExecuTorchBackend::is_available() const {
 }
 
 void QnnExecuTorchBackend::add_cached_delegate(
-    uint32_t hash_val,
-    executorch::runtime::DelegateHandle* handle) {
+    const std::string& hash_val,
+    executorch::runtime::DelegateHandle* handle) const {
   std::lock_guard<std::mutex> guard(mutex_);
   delegate_map_[hash_val] = handle;
   delegate_map_rev_[handle] = hash_val;
 }
 
 void QnnExecuTorchBackend::erase_cached_delegate(
-    executorch::runtime::DelegateHandle* handle) {
+    executorch::runtime::DelegateHandle* handle) const {
   std::lock_guard<std::mutex> guard(mutex_);
-  uint32_t hash_val = delegate_map_rev_[handle];
-  delegate_map_.erase(hash_val);
+  auto iter = delegate_map_rev_.find(handle);
+  if (iter == delegate_map_rev_.end()) {
+    return;
+  }
+  delegate_map_.erase(iter->second);
   delegate_map_rev_.erase(handle);
 }
 
-std::mutex QnnExecuTorchBackend::mutex_;
-std::unordered_map<uint32_t, executorch::runtime::DelegateHandle*>
-    QnnExecuTorchBackend::delegate_map_;
-std::unordered_map<executorch::runtime::DelegateHandle*, uint32_t>
-    QnnExecuTorchBackend::delegate_map_rev_;
-
 namespace {
 auto cls = QnnExecuTorchBackend();
 executorch::runtime::Backend backend{"QnnBackend", &cls};
 
@@ -39,18 +39,16 @@ class QnnExecuTorchBackend final
   bool is_available() const override;
 
  private:
-  static void add_cached_delegate(
-      uint32_t hash_val,
-      executorch::runtime::DelegateHandle* handle);
-  static void erase_cached_delegate(
-      executorch::runtime::DelegateHandle* handle);
-
-  static std::mutex mutex_;
-  static std::unordered_map<uint32_t, executorch::runtime::DelegateHandle*>
+  void add_cached_delegate(
+      const std::string& hash_val,
+      executorch::runtime::DelegateHandle* handle) const;
+  void erase_cached_delegate(executorch::runtime::DelegateHandle* handle) const;
+
+  mutable std::mutex mutex_;
+  mutable std::unordered_map<std::string, executorch::runtime::DelegateHandle*>
       delegate_map_;
-  static std::unordered_map<executorch::runtime::DelegateHandle*, uint32_t>
+  mutable std::unordered_map<executorch::runtime::DelegateHandle*, std::string>
       delegate_map_rev_;
-  mutable std::string method_name_;
 };
 
 } // namespace qnn
 
@@ -7,6 +7,7 @@
  */
 
 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
+#include <executorch/backends/qualcomm/qc_processed_binary_generated.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
 #include <executorch/backends/qualcomm/runtime/Utils.h>
@@ -487,18 +488,8 @@ Error QnnManager::GetContextBinary(
 }
 
 Error QnnManager::CompileQcir() {
-  // check if context binary came from flatbuffer
-  flatbuffers::Verifier verifier(
-      static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
-      qnn_context_blob_.nbytes);
-
-  if (!qcir::VerifyContextBuffer(verifier)) {
-    QNN_EXECUTORCH_LOG_ERROR(
-        "Failed to verify qcir. The binary might be broken.");
-    return Error::Internal;
-  }
-
-  auto context = qcir::GetContext(qnn_context_blob_.buffer);
+  auto binary_info = GetProcessedBinaryInfo(qnn_context_blob_.buffer);
+  auto context = qcir::GetContext(binary_info->data()->data());
   for (const auto& graph : *context->graphs()) {
     // qcir tensors to TensorWrapper
     std::vector<std::shared_ptr<TensorWrapper>> graph_inputs, graph_outputs,
@@ -676,7 +667,17 @@ Error QnnManager::Compile(
   }
 
   return Error::Ok;
-};
+}
+
+std::string QnnManager::GetBinaryHash() {
+  flatbuffers::Verifier verifier(
+      static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
+      qnn_context_blob_.nbytes);
+  return VerifyProcessedBinaryInfoBuffer(verifier)
+      ? GetProcessedBinaryInfo(qnn_context_blob_.buffer)->hash()->str()
+      : "";
+}
+
 } // namespace qnn
 } // namespace backends
 } // namespace executorch
 
@@ -9,10 +9,10 @@
 
 #include <executorch/backends/qualcomm/aot/wrappers/OpWrapper.h>
 #include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
+#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
 #include <executorch/backends/qualcomm/runtime/Logging.h>
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
-#include <executorch/backends/qualcomm/schema_generated.h>
 #include <executorch/runtime/core/error.h>
 
 #include <memory>
@@ -103,6 +103,8 @@ class QnnManager {
     return backend_params_ptr_->qnn_context_ptr_->GetGraphNames();
   }
 
+  std::string GetBinaryHash();
+
  private:
   executorch::runtime::Error LoadQnnLibrary();