Skip to content

XNNPACK: faster 4-bit packing #2649

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ option(EXECUTORCH_BUILD_ARM_BAREMETAL

option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)

option(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL "Build the AOT util library" OFF)

option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
OFF)

Expand Down Expand Up @@ -372,10 +370,6 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
target_compile_options(executor_runner PUBLIC ${_common_compile_options})
endif()

if(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/aot_util)
endif()

# Add googletest if any test targets should be built
if(EXECUTORCH_BUILD_GTESTS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/googletest)
Expand Down
1 change: 0 additions & 1 deletion backends/xnnpack/operators/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,5 @@ runtime.python_library(
"//executorch/backends/xnnpack/utils:xnnpack_utils",
"//executorch/exir:graph_module",
"//executorch/exir/backend:backend_details",
"//executorch/extension/aot_util:aot_util",
],
)
49 changes: 11 additions & 38 deletions backends/xnnpack/operators/node_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
# LICENSE file in the root directory of this source tree.

import ctypes
import sys

from pathlib import Path
from typing import cast, Dict, List, Optional, Tuple

import torch
Expand Down Expand Up @@ -449,18 +447,6 @@ def define_tensor(
if quant_params is not None:
vals_to_ids[quant_params.q_input] = id_out

@staticmethod
def find_aot_util_path() -> str:
# Look for .so installed by wheel (OSS). TODO(gjcomer) Improve this.
rel_path = "executorch/extension/aot_util/libaot_util.so"
for sys_path in sys.path:
so_path = Path(sys_path) / rel_path
if so_path.exists():
return str(so_path.absolute().as_posix())

# Fall back to buck.
return "//executorch/extension/aot_util:aot_util"

@staticmethod
def convert_to_qc4w(inp: torch.Tensor) -> torch.Tensor:
"""
Expand All @@ -478,37 +464,24 @@ def convert_to_qc4w(inp: torch.Tensor) -> torch.Tensor:
# Assuming we have a 2d tensor
if inp.ndim != 2:
inp = inp.squeeze()
assert (
inp.ndim == 2
), f"convert_to_qc4w: expecting input tensor to be 2d, got {inp.ndim}"
oc, ic = inp.shape
assert (
inp.ndim == 2
), f"convert_to_qc4w: expecting input tensor to be 2d, got {inp.ndim}"

# pad ic
if ic % 2 != 0:
if inp.shape[-1] % 2 != 0:
inp = F.pad(input=inp, pad=(0, 1, 0, 0), mode="constant", value=0)

# Shape after padding
oc, ic = inp.shape
assert ic % 2 == 0, "convert_to_qc4w: expecting ic to be even"

# Adjust inp tensor for zp
inp = inp.to(dtype=torch.uint8) + 8

# prepare result tensor
ric = int((ic + 1) / 2)
result = torch.zeros([oc, ric], dtype=torch.uint8)

try:
aot_path = NodeVisitor.find_aot_util_path()
torch.ops.load_library(aot_path)
result = torch.ops.xnnpack.convert_to_qc4w(inp)
except:
# Fallback to python implementation
# TODO Warn the user? They might be developing in-tree and didn't install,
# in which case, this will be very slow for large models.
for o in range(oc):
for i in range(ric):
j = 2 * i
result[o][i] = inp[o][j]
result[o][i] += inp[o][j + 1] << 4

return result
# Prepare the Result tensor
inp = inp.contiguous().view(-1)
return (inp[1::2] << 4 | inp[::2]).view(oc, int(ic / 2))

def get_serialized_buffer_index(
self,
Expand Down
12 changes: 0 additions & 12 deletions backends/xnnpack/test/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,3 @@ runtime.python_test(
"//executorch/backends/xnnpack:xnnpack_preprocess",
],
)

runtime.python_test(
name = "test_custom_convert_qc4w_op",
srcs = ["ops/test_custom_convert_to_qc4w.py"],
deps = [
"//caffe2:torch",
"//executorch/extension/aot_util:aot_util",
],
external_deps = [
"libtorch",
],
)
54 changes: 0 additions & 54 deletions backends/xnnpack/test/ops/test_custom_convert_to_qc4w.py

This file was deleted.

2 changes: 0 additions & 2 deletions build/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ function(executorch_print_configuration_summary)
" EXECUTORCH_BUILD_COREML : ${EXECUTORCH_BUILD_COREML}")
message(STATUS " EXECUTORCH_BUILD_EXECUTOR_RUNNER : "
"${EXECUTORCH_BUILD_EXECUTOR_RUNNER}")
message(STATUS " EXECUTORCH_BUILD_EXTENSION_AOT_UTIL : "
"${EXECUTORCH_BUILD_EXTENSION_AOT_UTIL}")
message(STATUS " EXECUTORCH_BUILD_EXTENSION_DATA_LOADER : "
"${EXECUTORCH_BUILD_EXTENSION_DATA_LOADER}")
message(STATUS " EXECUTORCH_BUILD_EXTENSION_MODULE : "
Expand Down
11 changes: 0 additions & 11 deletions build/cmake_deps.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,6 @@ filters = [

# ---------------------------------- core end ----------------------------------
# ---------------------------------- extension start ----------------------------------

[targets.extension_aot_util]
buck_targets = [
"//extension/aot_util:aot_util",
]
filters = [
".cpp$",
]
deps = [
"executorch",
]
[targets.extension_data_loader]
buck_targets = [
"//extension/data_loader:buffer_data_loader",
Expand Down
68 changes: 0 additions & 68 deletions extension/aot_util/CMakeLists.txt

This file was deleted.

12 changes: 0 additions & 12 deletions extension/aot_util/TARGETS

This file was deleted.

32 changes: 0 additions & 32 deletions extension/aot_util/convert_to_qc4w.cpp

This file was deleted.

5 changes: 0 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,6 @@ def run(self):
}
ext_modules = []

if os.environ.get("EXECUTORCH_BUILD_AOT_UTIL", "ON") == "ON":
ext_modules.append(
CMakeExtension("executorch.extension.aot_util.aot_util", "extension/aot_util")
)

if os.environ.get("EXECUTORCH_BUILD_PYBIND", "OFF") == "ON":
ext_modules.append(CMakeExtension("executorch.extension.pybindings.portable_lib"))

Expand Down