Skip to content

Register quantized ops into quantization example #85

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions .ci/scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,13 @@ test_model() {
buck2 run //examples/executor_runner:executor_runner -- --model_path "./${MODEL_NAME}.pte"
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
CMAKE_OUTPUT_DIR=cmake-out
./"${CMAKE_OUTPUT_DIR}"/executor_runner --model_path "./${MODEL_NAME}.pte"
./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte"
else
echo "Invalid build tool ${BUILD_TOOL}. Only buck2 and cmake are supported atm"
exit 1
fi
}

test_quantized_model() {
python -m examples.quantization.example --model_name="${MODEL_NAME}"
}

which python

Expand All @@ -53,7 +50,7 @@ echo "Testing ${MODEL_NAME} with ${BUILD_TOOL}..."
test_model

if [[ "${QUANTIZATION}" == true ]]; then
test_quantized_model
bash examples/quantization/test_quantize.sh "${MODEL_NAME}"
else
echo "The model ${MODEL_NAME} doesn't support quantization yet"
fi
15 changes: 10 additions & 5 deletions examples/quantization/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
XNNPACKQuantizer,
)

# TODO: maybe move this to examples/export/utils.py?
# from ..export.export_example import export_to_ff
from ..export.export_example import export_to_pte

from ..models import MODEL_NAME_TO_MODEL

Expand All @@ -51,8 +50,7 @@ def quantize(model_name, model, example_inputs):
m = convert_pt2e(m)
print("quantized model:", m)
# make sure we can export to flat buffer
# Note: this is not working yet due to missing out variant ops for quantize_per_tensor/dequantize_per_tensor ops
# aten = export_to_ff(model_name, m, copy.deepcopy(example_inputs))
export_to_pte(model_name, m, copy.deepcopy(example_inputs))


def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_inputs):
Expand Down Expand Up @@ -114,9 +112,16 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_
default=False,
help="flag for verifying XNNPACKQuantizer against fx graph mode quantization",
)
parser.add_argument(
"-s",
"--so_library",
required=False,
help="shared library for quantized operators",
)

args = parser.parse_args()

if args.so_library:
torch.ops.load_library(args.so_library)
if not args.verify and args.model_name not in QUANT_MODEL_NAME_TO_MODEL:
raise RuntimeError(
f"Model {args.model_name} is not a valid name. or not quantizable right now, "
Expand Down
21 changes: 21 additions & 0 deletions examples/quantization/test_quantize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Test the end-to-end quantization flow.

set -e

# TODO(larryliu0820): Add CMake build
test_buck2_quantization() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably a TODO to add a cmake build here later for completeness

echo "Building quantized ops shared library"
SO_LIB=$(buck2 build //kernels/quantized:aot_lib --show-output | grep "buck-out" | cut -d" " -f2)

echo "Run example.py"
python -m "examples.quantization.example" --so_library="$SO_LIB" --model_name="$1"
}

test_buck2_quantization "$1"
13 changes: 12 additions & 1 deletion kernels/quantized/targets.bzl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib")
load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib", "exir_custom_ops_aot_lib")

def define_common_targets():
runtime.export_file(
Expand All @@ -15,6 +15,17 @@ def define_common_targets():
define_static_targets = True,
)

# lib used to register quantized ops into EXIR
exir_custom_ops_aot_lib(
name = "aot_lib",
yaml_target = ":quantized.yaml",
visibility = ["//executorch/..."],
kernels = [":quantized_operators_aten"],
deps = [
":all_quantized_ops",
],
)

for aten_mode in (True, False):
aten_suffix = "_aten" if aten_mode else ""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ def define_op_library(name, deps, android_deps, aten_target, _allow_third_party_
deps = [
"//executorch/runtime/kernel:kernel_includes" + aten_suffix,
] + deps,
# WARNING: using a deprecated API to avoid being built into a shared
# library. In the case of dynamically loading so library we don't want
# it to depend on other so libraries because that way we have to
# specify library directory path.
force_static = True,
# link_whole is necessary because the operators register themselves
# via static initializers that run at program startup.
# @lint-ignore BUCKLINT link_whole
Expand Down