Register quantized ops into quantization example (#85)

larryliu0820 · facebook-github-bot · commit 0ece1961ea79 · 2023-08-21T18:25:03.000-07:00
Summary: Pull Request resolved: #85 Enables the ability to `export_to_pte`. Previously quantized models can't run `export_to_pte` because some quantized ops are missing out variants. Recently we added support for custom ops and register them into EXIR by loading shared library. This diff adds support for registering quantized ops out variants and add it into CI. Reviewed By: huydhn Differential Revision: D48541611 fbshipit-source-id: e8740383abb38704a6450de200572400498ce4d5
diff --git a/.ci/scripts/test.sh b/.ci/scripts/test.sh
@@ -35,16 +35,13 @@ test_model() {
     buck2 run //examples/executor_runner:executor_runner -- --model_path "./${MODEL_NAME}.pte"
   elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
     CMAKE_OUTPUT_DIR=cmake-out
-    ./"${CMAKE_OUTPUT_DIR}"/executor_runner --model_path "./${MODEL_NAME}.pte"
+    ./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte"
   else
     echo "Invalid build tool ${BUILD_TOOL}. Only buck2 and cmake are supported atm"
     exit 1
   fi
 }
 
-test_quantized_model() {
-  python -m examples.quantization.example --model_name="${MODEL_NAME}"
-}
 
 which python
 
@@ -53,7 +50,7 @@ echo "Testing ${MODEL_NAME} with ${BUILD_TOOL}..."
 test_model
 
 if [[ "${QUANTIZATION}" == true ]]; then
-  test_quantized_model
+  bash examples/quantization/test_quantize.sh "${MODEL_NAME}"
 else
   echo "The model ${MODEL_NAME} doesn't support quantization yet"
 fi
diff --git a/examples/quantization/example.py b/examples/quantization/example.py
@@ -25,8 +25,7 @@
     XNNPACKQuantizer,
 )
 
-# TODO: maybe move this to examples/export/utils.py?
-# from ..export.export_example import export_to_ff
+from ..export.export_example import export_to_pte
 
 from ..models import MODEL_NAME_TO_MODEL
 
@@ -51,8 +50,7 @@ def quantize(model_name, model, example_inputs):
     m = convert_pt2e(m)
     print("quantized model:", m)
     # make sure we can export to flat buffer
-    # Note: this is not working yet due to missing out variant ops for quantize_per_tensor/dequantize_per_tensor ops
-    # aten = export_to_ff(model_name, m, copy.deepcopy(example_inputs))
+    export_to_pte(model_name, m, copy.deepcopy(example_inputs))
 
 
 def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_inputs):
@@ -114,9 +112,16 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_
         default=False,
         help="flag for verifying XNNPACKQuantizer against fx graph mode quantization",
     )
+    parser.add_argument(
+        "-s",
+        "--so_library",
+        required=False,
+        help="shared library for quantized operators",
+    )
 
     args = parser.parse_args()
-
+    if args.so_library:
+        torch.ops.load_library(args.so_library)
     if not args.verify and args.model_name not in QUANT_MODEL_NAME_TO_MODEL:
         raise RuntimeError(
             f"Model {args.model_name} is not a valid name. or not quantizable right now, "
diff --git a/examples/quantization/test_quantize.sh b/examples/quantization/test_quantize.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Test the end-to-end quantization flow.
+
+set -e
+
+# TODO(larryliu0820): Add CMake build
+test_buck2_quantization() {
+  echo "Building quantized ops shared library"
+  SO_LIB=$(buck2 build //kernels/quantized:aot_lib --show-output | grep "buck-out" | cut -d" " -f2)
+
+  echo "Run example.py"
+  python -m "examples.quantization.example" --so_library="$SO_LIB" --model_name="$1"
+}
+
+test_buck2_quantization "$1"
diff --git a/kernels/quantized/targets.bzl b/kernels/quantized/targets.bzl
@@ -1,5 +1,5 @@
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
-load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib")
+load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib", "exir_custom_ops_aot_lib")
 
 def define_common_targets():
     runtime.export_file(
@@ -15,6 +15,17 @@ def define_common_targets():
         define_static_targets = True,
     )
 
+    # lib used to register quantized ops into EXIR
+    exir_custom_ops_aot_lib(
+        name = "aot_lib",
+        yaml_target = ":quantized.yaml",
+        visibility = ["//executorch/..."],
+        kernels = [":quantized_operators_aten"],
+        deps = [
+            ":all_quantized_ops",
+        ],
+    )
+
     for aten_mode in (True, False):
         aten_suffix = "_aten" if aten_mode else ""
 
diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -123,6 +123,11 @@ def define_op_library(name, deps, android_deps, aten_target, _allow_third_party_
         deps = [
             "//executorch/runtime/kernel:kernel_includes" + aten_suffix,
         ] + deps,
+        # WARNING: using a deprecated API to avoid being built into a shared
+        # library. In the case of dynamically loading so library we don't want
+        # it to depend on other so libraries because that way we have to
+        # specify library directory path.
+        force_static = True,
         # link_whole is necessary because the operators register themselves
         # via static initializers that run at program startup.
         # @lint-ignore BUCKLINT link_whole