Fix dynamic linking issues with prebuilt pip packages (#3049)

dbort · web-flow · commit 1fd5562627c0 · 2024-04-23T10:01:03.000-07:00
* Build pybindings with -D_GLIBCXX_USE_CXX11_ABI=0 to match libtorch.so

libtorch.so builds with the old glibc ABI, so we need to as well,
for any source files that include torch headers.

* Set the RPATH of _portable_lib.so so it can find libtorch

pip wheels will need to be able to find the torch libraries. On Linux,
the .so has non-absolute dependencies on libs like "libtorch.so" without
paths; as long as we `import torch` first, those dependencies will work.

But Apple dylibs do not support non-absolute dependencies, so we need
to tell the loader where to look for its libraries. The LC_LOAD_DYLIB
entries for the torch libraries will look like "@rpath/libtorch.dylib",
so we can add an LC_RPATH entry to look in a directory relative to the
installed location of our _portable_lib.so file.

To see these LC_* values, run `otool -l _portable_lib*.so`.

* Disable wheel delocation on macos

The executorch build system will ensure that .dylib/.so files have
LC_LOAD_DYLIB and LC_RPATH entries that will work when they're
installed.

Delocating (i.e., making copies of the .dylibs that ET's libs depend on)
will break any libs that depend on the torch libraries if users ever
import both `torch` and the executorch library. Both import paths must
load exactly the same file, not just a copy of it.

* Implemement smoke_test.py for pip wheel jobs

This script is run by CI after building the executorch wheel. Before
running this, the job will install the matching torch package as well as
the newly-built executorch package and its dependencies.

For now we test the export of a simple model, and try executing it using
the runtime pybindings.

Test Plan:
```
./install_requirements.sh
python build/packaging/smoke_test.py
```
diff --git a/.github/workflows/build-wheels-m1.yml b/.github/workflows/build-wheels-m1.yml
@@ -54,6 +54,7 @@ jobs:
       # "recursive" default to do less work, and to give the buck daemon fewer
       # files to look at.
       submodules: true
+      delocate-wheel: false
       env-var-script: build/packaging/env_var_script_m1.sh
       pre-script: ${{ matrix.pre-script }}
       post-script: ${{ matrix.post-script }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -532,9 +532,14 @@ if(EXECUTORCH_BUILD_PYBIND)
     list(APPEND _dep_libs custom_ops_aot_lib)
   endif()
   # compile options for pybind
-
-  set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
-                              -fexceptions)
+  set(_pybind_compile_options
+      -Wno-deprecated-declarations
+      -fPIC
+      -frtti
+      -fexceptions
+      # libtorch is built with the old ABI, so we need to do the same for any
+      # .cpp files that include torch, c10, or ATen targets.
+      -D_GLIBCXX_USE_CXX11_ABI=0)
   # util lib
   add_library(
     util
@@ -568,6 +573,24 @@ if(EXECUTORCH_BUILD_PYBIND)
            ${PYBIND_LINK_COREML}
            ${PYBIND_LINK_MPS}
            ${PYBIND_LINK_XNNPACK})
+  if(APPLE)
+    # pip wheels will need to be able to find the torch libraries. On Linux, the
+    # .so has non-absolute dependencies on libs like "libtorch.so" without
+    # paths; as long as we `import torch` first, those dependencies will work.
+    # But Apple dylibs do not support non-absolute dependencies, so we need to
+    # tell the loader where to look for its libraries. The LC_LOAD_DYLIB entries
+    # for the torch libraries will look like "@rpath/libtorch.dylib", so we can
+    # add an LC_RPATH entry to look in a directory relative to the installed
+    # location of our _portable_lib.so file. To see these LC_* values, run
+    # `otool -l _portable_lib*.so`.
+    set_target_properties(
+      portable_lib
+      PROPERTIES # Assume that this library will be installed in
+                 # `site-packages/executorch/extension/pybindings`, and that
+                 # the torch libs are in `site-packages/torch/lib`.
+                 BUILD_RPATH "@loader_path/../../../torch/lib"
+                 INSTALL_RPATH "@loader_path/../../../torch/lib")
+  endif()
 
   install(TARGETS portable_lib
           LIBRARY DESTINATION executorch/extension/pybindings)
diff --git a/build/packaging/smoke_test.py b/build/packaging/smoke_test.py
@@ -5,13 +5,99 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+"""
+This script is run by CI after building the executorch wheel. Before running
+this, the job will install the matching torch package as well as the newly-built
+executorch package and its dependencies.
+"""
+
+# Import this first. If it can't find the torch.so libraries, the dynamic load
+# will fail and the process will exit.
+from executorch.extension.pybindings import portable_lib  # usort: skip
+
+# Import this after importing the ExecuTorch pybindings. If the pybindings
+# links against a different torch.so than this uses, there will be a set of
+# symbol comflicts; the process will either exit now, or there will be issues
+# later in the smoke test.
+import torch  # usort: skip
+
+# Import everything else later to help isolate the critical imports above.
+import os
+import tempfile
+from typing import Tuple
+
+from executorch.exir import to_edge
+from torch.export import export
+
+
+class LinearModel(torch.nn.Module):
+    """Runs Linear on its input, which should have shape [4]."""
+
+    def __init__(self):
+        super().__init__()
+        self.linear = torch.nn.Linear(4, 2)
+
+    def forward(self, x: torch.Tensor):
+        """Expects a single tensor of shape [4]."""
+        return self.linear(x)
+
+
+def linear_model_inputs() -> Tuple[torch.Tensor]:
+    """Returns some example inputs compatible with LinearModel."""
+    # The model takes a single tensor of shape [4] as an input.
+    return (torch.ones(4),)
+
+
+def export_linear_model() -> bytes:
+    """Exports LinearModel and returns the .pte data."""
+
+    # This helps the exporter understand the shapes of tensors used in the model.
+    # Since our model only takes one input, this is a one-tuple.
+    example_inputs = linear_model_inputs()
+
+    # Export the pytorch model and process for ExecuTorch.
+    print("Exporting program...")
+    exported_program = export(LinearModel(), example_inputs)
+    print("Lowering to edge...")
+    edge_program = to_edge(exported_program)
+    print("Creating ExecuTorch program...")
+    et_program = edge_program.to_executorch()
+
+    return et_program.buffer
+
 
 def main():
-    """
-    Run ExecuTorch binary smoke tests. This is a placeholder for future tests. See
-    https://github.com/pytorch/test-infra/wiki/Using-Nova-Reusable-Build-Workflows
-    for more information about Nova binary workflow.
-    """
+    """Tests the export and execution of a simple model."""
+
+    # If the pybindings loaded correctly, we should be able to ask for the set
+    # of operators.
+    ops = portable_lib._get_operator_names()
+    assert len(ops) > 0, "Empty operator list"
+    print(f"Found {len(ops)} operators; first element '{ops[0]}'")
+
+    # Export LinearModel to .pte data.
+    pte_data: bytes = export_linear_model()
+
+    # Try saving to and loading from a file.
+    with tempfile.TemporaryDirectory() as tempdir:
+        pte_file = os.path.join(tempdir, "linear.pte")
+
+        # Save the .pte data to a file.
+        with open(pte_file, "wb") as file:
+            file.write(pte_data)
+            print(f"ExecuTorch program saved to {pte_file} ({len(pte_data)} bytes).")
+
+        # Load the model from disk.
+        m = portable_lib._load_for_executorch(pte_file)
+
+        # Run the model.
+        outputs = m.forward(linear_model_inputs())
+
+        # Should see a single output with shape [2].
+        assert len(outputs) == 1, f"Unexpected output length {len(outputs)}: {outputs}"
+        assert outputs[0].shape == (2,), f"Unexpected output size {outputs[0].shape}"
+
+    print("PASS")
 
 
 if __name__ == "__main__":