pytorch · cymbalrush · Mar 31, 2024 · cccclai · Apr 1, 2024
@@ -3,14 +3,15 @@
 # CoreML backend for delegating a EdgeProgram to CoreML.
 
 import json
+
 import shutil
 import uuid
 from dataclasses import asdict, dataclass
 from enum import Enum
 
 from pathlib import Path
 
-from typing import Dict, final, List
+from typing import Any, Dict, final, List, Optional, Tuple
 
 import coremltools as ct
 import executorchcoreml
@@ -30,6 +31,13 @@ class COMPILE_SPEC_KEYS(Enum):
     MODEL_COMPUTE_PRECISION = "model_compute_precision"
 
 
+class MODEL_PATHS(Enum):
+    MODEL = "model.mlpackage"
+    COMPILED_MODEL = "model.mlmodelc"
+    METADATA = "metadata.json"
+    DEBUG_INFO = "debug_info.json"
+
+
 @dataclass
 class ModelMetadata:
     # The model input names.
@@ -40,6 +48,16 @@ class ModelMetadata:
     identifier: str
 
 
+@dataclass
+class ModelDebugInfo:
+    # Version info.
+    versionInfo: Dict[str, str]
+    # Mapping from debug symbol to operation path.
+    debugSymbolToOperationPath: Dict[str, List[Dict[str, str]]]
+    # Mapping from debug symbol to handle.
+    debugSymbolToHandles: Dict[str, List[int]]
+
+
 @final
 class CoreMLBackend(BackendDetails):
     class MODEL_TYPE(Enum):
@@ -165,53 +183,163 @@ def generate_compile_specs(
         return compile_specs
 
     @staticmethod
-    def model_metadata_from_spec(model_spec: ct.proto.Model_pb2) -> Dict[str, str]:
+    def model_metadata_from_spec(
+        model_spec: ct.proto.Model_pb2, identifier: str
+    ) -> Dict[str, str]:
         input_names: List[str] = [input.name for input in model_spec.description.input]
         output_names = [output.name for output in model_spec.description.output]
-        identifier = uuid.uuid4()
 
         return ModelMetadata(
-            inputNames=input_names, outputNames=output_names, identifier=str(identifier)
+            inputNames=input_names, outputNames=output_names, identifier=identifier
+        )
+
+    @staticmethod
+    def get_debug_symbol(operation_path: List[Dict[str, str]]) -> Optional[str]:
+        if len(operation_path) == 0:
+            return None
+
+        operator_name: Optional[str] = operation_path[-1].get("Operator", None)
+        output_name: Optional[str] = operation_path[-1].get("Output", None)
+        if output_name is None or operator_name is None:
+            return None
+
+        return output_name + ":" + operator_name
+
+    @staticmethod
+    def get_model_debug_info(model_package_dir: Path) -> Optional[ModelDebugInfo]:
+        delegate_info_file = model_package_dir / "executorch_debug_handle_mapping.json"
+
+        if not delegate_info_file.is_file():
+            return None
+
+        delegate_info: Optional[Dict[str, Any]] = None
+
+        try:
+            with open(delegate_info_file) as f:
+                delegate_info = json.load(f)
+        except ValueError:
+            return None
+
+        if delegate_info is None:
+            return None
+
+        debug_handle_to_operation_path_mapping: Optional[Dict[str, Any]] = (
+            delegate_info.get("mapping", None)
+        )
+
+        if debug_handle_to_operation_path_mapping is None:
+            return None
+
+        debug_symbol_to_operation_path: Dict[str, List[Dict[str, str]]] = {}
+        debug_symbol_to_handles: Dict[str, List[int]] = {}
+        for (
+            debug_handle,
+            operation_paths,
+        ) in debug_handle_to_operation_path_mapping.items():
+            debug_handle_value: Optional[int] = None
+            try:
+                debug_handle_value = int(debug_handle)
+            except ValueError:
+                debug_handle_value = None
+
+            if debug_handle_value is None:
+                continue
+
+            for operation_path in operation_paths:
+                debug_symbol: Optional[str] = CoreMLBackend.get_debug_symbol(
+                    operation_path=operation_path
+                )
+
+                if debug_symbol is None:
+                    continue
+
+                debug_handle_values: List[int] = debug_symbol_to_handles.get(
+                    debug_symbol, []
+                )
+                debug_handle_values.append(debug_handle_value)
+                debug_symbol_to_handles[debug_symbol] = debug_handle_values
+
+                debug_symbol_to_operation_path[debug_symbol] = operation_path
+
+        version_info: Dict[str, str] = delegate_info.get("version", {})
+
+        return ModelDebugInfo(
+            versionInfo=version_info,
+            debugSymbolToOperationPath=debug_symbol_to_operation_path,
+            debugSymbolToHandles=debug_symbol_to_handles,
         )
 
     @staticmethod
-    def to_bytes(mlmodel: ct.models.MLModel, model_type: MODEL_TYPE) -> bytes:
-        dir_path: Path = Path("tmp")
+    def save_model_metadata(model_metadata: ModelMetadata, model_dir_path: Path):
+        # Store model metadata.
+        model_metadata_path = Path(model_dir_path) / MODEL_PATHS.METADATA.value
+        model_metadata_json = json.dumps(asdict(model_metadata))
+        with open(model_metadata_path, "w") as outfile:
+            outfile.write(model_metadata_json)
+
+    @staticmethod
+    def save_model_debug_info(model_debug_info: ModelDebugInfo, model_dir_path: Path):
+        # Store model debug info.
+        model_debug_info_path = Path(model_dir_path) / MODEL_PATHS.DEBUG_INFO.value
+        model_debug_info_json = json.dumps(asdict(model_debug_info))
+        with open(model_debug_info_path, "w") as outfile:
+            outfile.write(model_debug_info_json)
+
+    @staticmethod
+    def preprocess_model(
+        mlmodel: ct.models.MLModel, model_type: MODEL_TYPE
+    ) -> PreprocessResult:
+        identifier = str(uuid.uuid4())
+        dir_path: Path = Path("tmp") / identifier
         model_dir_path: Path = dir_path / "lowered_module"
         model_spec: ct.proto.Model_pb2 = mlmodel.get_spec()
         model_metadata: ModelMetadata = CoreMLBackend.model_metadata_from_spec(
-            model_spec
+            model_spec=model_spec,
+            identifier=identifier,
         )
-        match model_type:
-            case CoreMLBackend.MODEL_TYPE.MODEL:
-                # Store model.
-                model_path = model_dir_path / "model.mlpackage"
-                mlmodel.save(model_path)
 
+        # Save model.
+        model_path = model_dir_path / MODEL_PATHS.MODEL.value
+        mlmodel.save(model_path)
+        # Extract delegate mapping file.
+        model_debug_info: Optional[ModelDebugInfo] = CoreMLBackend.get_model_debug_info(
+            model_path
+        )
+
+        match model_type:
             case CoreMLBackend.MODEL_TYPE.COMPILED_MODEL:
-                # Store compiled model
-                model_path = model_dir_path / "model.mlmodelc"
+                shutil.rmtree(str(model_path.resolve()))
+                model_path = model_dir_path / MODEL_PATHS.COMPILED_MODEL.value
                 compiled_model_path = mlmodel.get_compiled_model_path()
-
-                shutil.copytree(
+                shutil.move(
                     compiled_model_path,
                     str(model_path.resolve()),
-                    dirs_exist_ok=True,
                 )
 
-        # Store model metadata.
-        model_metadata_path = Path(model_dir_path) / "metadata.json"
-        model_metadata_json = json.dumps(asdict(model_metadata))
-        with open(model_metadata_path, "w") as outfile:
-            outfile.write(model_metadata_json)
+            case _:
+                pass
 
-        # flatten directory contents and convert it to bytes
-        flattened_bytes = executorchcoreml.flatten_directory_contents(
+        CoreMLBackend.save_model_metadata(
+            model_metadata=model_metadata, model_dir_path=model_dir_path
+        )
+        if model_debug_info is not None:
+            CoreMLBackend.save_model_debug_info(
+                model_debug_info=model_debug_info, model_dir_path=model_dir_path
+            )
+
+        processed_bytes: bytes = executorchcoreml.flatten_directory_contents(
             str(model_dir_path.resolve())
         )
 
-        shutil.rmtree(str(model_dir_path.resolve()))
-        return flattened_bytes
+        debug_handle_map: Optional[Dict[str, Tuple[int]]] = None
+        if model_debug_info is not None:
+            debug_handle_map = model_debug_info.debugSymbolToHandles
+
+        shutil.rmtree(str(dir_path.resolve()))
+        return PreprocessResult(
+            processed_bytes=processed_bytes,
+            debug_handle_map=debug_handle_map,
+        )
 
     @classmethod
     def preprocess(
@@ -235,25 +363,14 @@ def preprocess(
             CoreMLBackend.min_deployment_target_from_compile_specs(module_compile_specs)
         )
 
-        skip_model_load: bool = False
-        match model_type:
-            case CoreMLBackend.MODEL_TYPE.MODEL:
-                skip_model_load = True
-
-            case CoreMLBackend.MODEL_TYPE.COMPILED_MODEL:
-                skip_model_load = False
-
         mlmodel = ct.convert(
             model=edge_program,
             source="pytorch",
             convert_to="mlprogram",
             pass_pipeline=ct.PassPipeline.DEFAULT,
-            skip_model_load=skip_model_load,
+            skip_model_load=False,
             compute_precision=model_compute_precision,
             minimum_deployment_target=minimum_deployment_target,
         )
 
-        processed_bytes = CoreMLBackend.to_bytes(mlmodel, model_type=model_type)
-        return PreprocessResult(
-            processed_bytes=processed_bytes,
-        )
+        return CoreMLBackend.preprocess_model(mlmodel, model_type=model_type)
@@ -62,7 +62,7 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
 /// `*errorOut` with NSError.
 #define ETCoreMLLogUnderlyingErrorAndSetNSError(errorOut, errorCode, underlyingNSError, formatString, ...) \
     os_log_error(ETCoreMLErrorUtils.loggingChannel,                                                        \
-                 formatString " (Underlying error: %@)",                                                   \
+                 formatString ", with underlying error= %@.",                                              \
                  ##__VA_ARGS__,                                                                            \
                  (underlyingNSError).localizedDescription);                                                \
     if (errorOut) {                                                                                        \
@@ -71,10 +71,10 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
                                                format:@formatString, ##__VA_ARGS__];                       \
     }
 
-#define ETCoreMLLogError(error, formatString, ...)       \
-    os_log_error(ETCoreMLErrorUtils.loggingChannel,      \
-                 formatString " (Underlying error: %@)", \
-                 ##__VA_ARGS__,                          \
+#define ETCoreMLLogError(error, formatString, ...)  \
+    os_log_error(ETCoreMLErrorUtils.loggingChannel, \
+                 formatString ", with error= %@.",  \
+                 ##__VA_ARGS__,                     \
                  (error).localizedDescription);