pytorch · larryliu0820 · Aug 22, 2023 · huydhn · Aug 22, 2023
diff --git a/.ci/scripts/test.sh b/.ci/scripts/test.sh
@@ -35,16 +35,13 @@ test_model() {
     buck2 run //examples/executor_runner:executor_runner -- --model_path "./${MODEL_NAME}.pte"
   elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
     CMAKE_OUTPUT_DIR=cmake-out
-    ./"${CMAKE_OUTPUT_DIR}"/executor_runner --model_path "./${MODEL_NAME}.pte"
+    ./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte"
   else
     echo "Invalid build tool ${BUILD_TOOL}. Only buck2 and cmake are supported atm"
     exit 1
   fi
 }
 
-test_quantized_model() {
-  python -m examples.quantization.example --model_name="${MODEL_NAME}"
-}
 
 which python
 
@@ -53,7 +50,7 @@ echo "Testing ${MODEL_NAME} with ${BUILD_TOOL}..."
 test_model
 
 if [[ "${QUANTIZATION}" == true ]]; then
-  test_quantized_model
+  bash examples/quantization/test_quantize.sh "${MODEL_NAME}"
 else
   echo "The model ${MODEL_NAME} doesn't support quantization yet"
 fi
diff --git a/examples/quantization/example.py b/examples/quantization/example.py
@@ -25,8 +25,7 @@
     XNNPACKQuantizer,
 )
 
-# TODO: maybe move this to examples/export/utils.py?
-# from ..export.export_example import export_to_ff
+from ..export.export_example import export_to_pte
 
 from ..models import MODEL_NAME_TO_MODEL
 
@@ -51,8 +50,7 @@ def quantize(model_name, model, example_inputs):
     m = convert_pt2e(m)
     print("quantized model:", m)
     # make sure we can export to flat buffer
-    # Note: this is not working yet due to missing out variant ops for quantize_per_tensor/dequantize_per_tensor ops
-    # aten = export_to_ff(model_name, m, copy.deepcopy(example_inputs))
+    export_to_pte(model_name, m, copy.deepcopy(example_inputs))
 
 
 def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_inputs):
@@ -114,9 +112,16 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_
         default=False,
         help="flag for verifying XNNPACKQuantizer against fx graph mode quantization",
     )
+    parser.add_argument(
+        "-s",
+        "--so_library",
+        required=False,
+        help="shared library for quantized operators",
+    )
 
     args = parser.parse_args()
-
+    if args.so_library:
+        torch.ops.load_library(args.so_library)
     if not args.verify and args.model_name not in QUANT_MODEL_NAME_TO_MODEL:
         raise RuntimeError(
             f"Model {args.model_name} is not a valid name. or not quantizable right now, "

diff --git a/examples/quantization/test_quantize.sh b/examples/quantization/test_quantize.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Test the end-to-end quantization flow.
+
+set -e
+
+# TODO(larryliu0820): Add CMake build
+test_buck2_quantization() {
+  echo "Building quantized ops shared library"
+  SO_LIB=$(buck2 build //kernels/quantized:aot_lib --show-output | grep "buck-out" | cut -d" " -f2)
+
+  echo "Run example.py"
+  python -m "examples.quantization.example" --so_library="$SO_LIB" --model_name="$1"
+}
+
+test_buck2_quantization "$1"
@@ -1,5 +1,5 @@
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
-load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib")
+load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib", "exir_custom_ops_aot_lib")
 
 def define_common_targets():
     runtime.export_file(
@@ -15,6 +15,17 @@ def define_common_targets():
         define_static_targets = True,
     )
 
+    # lib used to register quantized ops into EXIR
+    exir_custom_ops_aot_lib(
+        name = "aot_lib",
+        yaml_target = ":quantized.yaml",
+        visibility = ["//executorch/..."],
+        kernels = [":quantized_operators_aten"],
+        deps = [
+            ":all_quantized_ops",
+        ],
+    )
+
     for aten_mode in (True, False):
         aten_suffix = "_aten" if aten_mode else ""
 

@@ -123,6 +123,11 @@ def define_op_library(name, deps, android_deps, aten_target, _allow_third_party_
         deps = [
             "//executorch/runtime/kernel:kernel_includes" + aten_suffix,
         ] + deps,
+        # WARNING: using a deprecated API to avoid being built into a shared
+        # library. In the case of dynamically loading so library we don't want
+        # it to depend on other so libraries because that way we have to
+        # specify library directory path.
+        force_static = True,
         # link_whole is necessary because the operators register themselves
         # via static initializers that run at program startup.
         # @lint-ignore BUCKLINT link_whole