Skip to content

Commit 07621c9

Browse files
committed
rebase to pacify github
[ghstack-poisoned]
2 parents 089e5ce + 0e35c30 commit 07621c9

File tree

107 files changed

+2256
-775
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+2256
-775
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
01f1cc44cbbfdf6307aa01b803a4ee22f9ade946
1+
5616fa4a68718ead203314a3467f7dd9547153ae

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ cmake --build cmake-out -j16 --target install --config Release
4040

4141
# Install llama runner with torchao
4242
cmake -DPYTHON_EXECUTABLE=python \
43-
-DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') \
4443
-DCMAKE_BUILD_TYPE=Release \
4544
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
4645
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \

.ci/scripts/utils.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,7 @@ build_executorch_runner() {
158158
cmake_install_executorch_lib() {
159159
echo "Installing libexecutorch.a and libportable_kernels.a"
160160
clean_executorch_install_folders
161-
retry cmake -DBUCK2="$BUCK" \
162-
-DCMAKE_INSTALL_PREFIX=cmake-out \
161+
retry cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
163162
-DCMAKE_BUILD_TYPE=Release \
164163
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
165164
-Bcmake-out .

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ jobs:
371371
size=${arr[4]}
372372
# threshold=48120 on devserver with gcc11.4
373373
# todo(lfq): update once binary size is below 50kb.
374-
threshold="51408"
374+
threshold="55504"
375375
if [[ "$size" -le "$threshold" ]]; then
376376
echo "Success $size <= $threshold"
377377
else
@@ -406,7 +406,7 @@ jobs:
406406
output=$(ls -la cmake-out/test/size_test)
407407
arr=($output)
408408
size=${arr[4]}
409-
threshold="47560"
409+
threshold="51656"
410410
if [[ "$size" -le "$threshold" ]]; then
411411
echo "Success $size <= $threshold"
412412
else

backends/arm/quantizer/quantization_annotator.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,10 @@ def _is_ok_for_quantization(
9595
continue
9696

9797
for n_arg in _as_list(node.args[quant_property.index]):
98-
assert isinstance(n_arg, Node)
98+
if not isinstance(n_arg, Node):
99+
raise TypeError(
100+
f"n_arg must be a Node instance, got {type(n_arg).__name__!r}"
101+
)
99102
if not is_ok_for_quantization(n_arg, gm): # type: ignore[attr-defined]
100103
logger.debug(
101104
f'could not quantize node due to input "{node}": '
@@ -108,7 +111,10 @@ def _is_ok_for_quantization(
108111

109112

110113
def _annotate_input(node: Node, quant_property: _QuantProperty):
111-
assert not is_annotated(node)
114+
if is_annotated(node):
115+
raise RuntimeError(
116+
f"Cannot annotate input: node '{node.name}' is already annotated"
117+
)
112118
if quant_property.optional and (
113119
quant_property.index >= len(node.args)
114120
or node.args[quant_property.index] is None
@@ -120,17 +126,28 @@ def _annotate_input(node: Node, quant_property: _QuantProperty):
120126
_as_list(quant_property.qspec),
121127
strict=True,
122128
):
123-
assert isinstance(n_arg, Node)
129+
if not isinstance(n_arg, Node):
130+
raise TypeError(
131+
f"n_arg must be a Node instance, got {type(n_arg).__name__!r}"
132+
)
124133
annotate_input_qspec_map(node, n_arg, qspec)
125134
if quant_property.mark_annotated:
126135
mark_node_as_annotated(n_arg) # type: ignore[attr-defined]
127136

128137

129138
def _annotate_output(node: Node, quant_property: _QuantProperty):
130-
assert not is_annotated(node)
131-
assert not quant_property.mark_annotated
132-
assert not quant_property.optional
133-
assert quant_property.index == 0, "Only one output annotation supported currently"
139+
if is_annotated(node):
140+
raise RuntimeError(
141+
f"Cannot annotate output: node '{node.name}' is already annotated"
142+
)
143+
if quant_property.mark_annotated:
144+
raise ValueError(
145+
"quant_property.mark_annotated must be False for output annotation"
146+
)
147+
if quant_property.optional:
148+
raise ValueError("quant_property.optional must be False for output annotation")
149+
if quant_property.index != 0:
150+
raise ValueError("Only one output annotation supported currently")
134151

135152
annotate_output_qspec(node, quant_property.qspec)
136153

@@ -145,7 +162,9 @@ def _match_pattern(
145162
146163
Each 'pattern' element is composed of a list of disjunctive nodes types.
147164
"""
148-
assert len(pattern) > 0, "No pattern provided"
165+
if len(pattern) < 1:
166+
raise ValueError("No pattern provided")
167+
149168
if filter_fn is not None:
150169
if not filter_fn(node):
151170
return False
@@ -417,8 +436,14 @@ def any_or_hardtanh_min_zero(n: Node):
417436
torch.ops.aten.concatenate.default,
418437
torch.ops.aten.stack.default,
419438
):
420-
assert isinstance(node.args[0], list)
421-
assert len(node.args[0]) != 0
439+
# first argument should be a non-empty list of nodes
440+
if not isinstance(node.args[0], list):
441+
raise TypeError(
442+
"Expected node.args[0] to be a list, got "
443+
f"{type(node.args[0]).__name__!r}"
444+
)
445+
if len(node.args[0]) == 0:
446+
raise ValueError("Expected non-empty list for node.args[0]")
422447

423448
shared_qspec = SharedQuantizationSpec((node.args[0][0], node))
424449
quant_properties.quant_inputs = [

backends/arm/scripts/build_executorch.sh

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -54,47 +54,9 @@ source ${setup_path_script}
5454

5555
et_build_dir="${et_build_root}/cmake-out"
5656

57-
# Used for flatcc host excutable if Devtools is used
58-
et_build_host_dir=${et_build_root}/cmake-out-host-tools
59-
6057
set -x
6158
cd "${et_root_dir}"
6259

63-
if [ "$build_with_etdump" = true ] ; then
64-
( set +x ;
65-
echo "--------------------------------------------------------------------------------" ;
66-
echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_build_host_dir}/bin/flatcc" ;
67-
echo "--------------------------------------------------------------------------------" )
68-
69-
# Build host flatcc bin
70-
# This is a way to work around that the flatcc executable get build for target (e.g. Arm) later
71-
# and get replaced. flatcc is a tool used on the host for etdump and BundleIO handling.
72-
# The way to solve this is to generate it once for the host, then copy it to ${et_build_host_dir}/bin
73-
# and later point that out with -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc later.
74-
75-
cmake \
76-
-DCMAKE_INSTALL_PREFIX=${et_build_host_dir} \
77-
-DCMAKE_BUILD_TYPE=${build_type} \
78-
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
79-
-DEXECUTORCH_ENABLE_LOGGING=ON \
80-
-DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \
81-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
82-
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
83-
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
84-
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
85-
-DFLATCC_ALLOW_WERROR=OFF \
86-
-B"${et_build_host_dir}" \
87-
"${et_root_dir}"
88-
89-
# third-party/flatcc/bin/flatcc gets build already in the in the cmake config step above
90-
# so there is no cmake building step done
91-
92-
# Copy host flatcc excutable so it's saved when we build for target (Arm) later
93-
et_build_host_dir=$(realpath ${et_build_host_dir})
94-
mkdir -p ${et_build_host_dir}/bin
95-
cp third-party/flatcc/bin/flatcc ${et_build_host_dir}/bin
96-
fi
97-
9860
( set +x ;
9961
echo "--------------------------------------------------------------------------------" ;
10062
echo "Build ExecuTorch target libs ${build_type} into '${et_build_dir}'" ;
@@ -111,8 +73,7 @@ if [ "$build_with_etdump" = true ] ; then
11173
build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \
11274
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
11375
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \
114-
-DFLATCC_ALLOW_WERROR=OFF \
115-
-DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc "
76+
-DFLATCC_ALLOW_WERROR=OFF "
11677
fi
11778

11879
echo "Building with Devtools: ${build_devtools_flags} ${build_with_etdump_flags}"

backends/arm/test/tester/analyze_output_utils.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ def print_error_diffs(
154154
output_str += f"BATCH {n}\n"
155155
result_batch = result[n, :, :, :]
156156
reference_batch = reference[n, :, :, :]
157+
158+
if reference_batch.dtype == torch.bool or result_batch.dtype == torch.bool:
159+
mismatches = (reference_batch != result_batch).sum().item()
160+
total = reference_batch.numel()
161+
output_str += f"(BOOLEAN tensor) {mismatches} / {total} elements differ ({mismatches / total:.2%})\n"
162+
continue
163+
157164
is_close = torch.allclose(result_batch, reference_batch, rtol, atol)
158165
if is_close:
159166
output_str += ".\n"
@@ -180,14 +187,15 @@ def print_error_diffs(
180187
output_str += _print_elements(
181188
result[n, :, :, :], reference[n, :, :, :], C, H, W, rtol, atol
182189
)
183-
184-
reference_range = torch.max(reference) - torch.min(reference)
185-
diff = torch.abs(reference - result).flatten()
186-
diff = diff[diff.nonzero()]
187-
if not len(diff) == 0:
188-
diff_percent = diff / reference_range
189-
output_str += "\nMEAN MEDIAN MAX MIN (error as % of reference output range)\n"
190-
output_str += f"{torch.mean(diff_percent):<8.2%} {torch.median(diff_percent):<8.2%} {torch.max(diff_percent):<8.2%} {torch.min(diff_percent):<8.2%}\n"
190+
# Only compute numeric error metrics if tensor is not boolean
191+
if reference.dtype != torch.bool and result.dtype != torch.bool:
192+
reference_range = torch.max(reference) - torch.min(reference)
193+
diff = torch.abs(reference - result).flatten()
194+
diff = diff[diff.nonzero()]
195+
if not len(diff) == 0:
196+
diff_percent = diff / reference_range
197+
output_str += "\nMEAN MEDIAN MAX MIN (error as % of reference output range)\n"
198+
output_str += f"{torch.mean(diff_percent):<8.2%} {torch.median(diff_percent):<8.2%} {torch.max(diff_percent):<8.2%} {torch.min(diff_percent):<8.2%}\n"
191199

192200
# Over-engineer separators to match output width
193201
lines = output_str.split("\n")

backends/mediatek/partitioner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def ops_to_not_decompose(
8181
torch.ops.aten.upsample_bilinear2d.vec,
8282
torch.ops.aten.upsample_nearest2d.default,
8383
torch.ops.aten.upsample_nearest2d.vec,
84+
torch.ops.aten._safe_softmax.default,
8485
]
8586
return (ops_not_decompose, None)
8687

backends/mediatek/scripts/mtk_build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ rm -rf cmake-android-out && mkdir cmake-android-out && cd cmake-android-out
3333
cmake -DBUCK2="$BUCK_PATH" \
3434
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
3535
-DANDROID_ABI=arm64-v8a \
36+
-DANDROID_PLATFORM=android-26 \
3637
-DEXECUTORCH_BUILD_NEURON=ON \
3738
-DNEURON_BUFFER_ALLOCATOR_LIB="$NEURON_BUFFER_ALLOCATOR_LIB" \
3839
..

backends/vulkan/_passes/fuse_quantized_ops.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from executorch.exir import ExportedProgram
1818
from executorch.exir.dialects._ops import ops as exir_ops
1919
from executorch.exir.pass_base import ExportPass, PassResult
20+
from executorch.exir.passes import dead_code_elimination_pass
2021

2122
#################
2223
## linear_qcnw ##
@@ -224,6 +225,8 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
224225
)
225226

226227
graph_module.recompile()
227-
graph_module = super().call(graph_module).graph_module
228+
dead_code_elimination_pass(graph_module)
228229

230+
# Re-trace the graph since new nodes were (potentially) inserted
231+
graph_module = super().call(graph_module).graph_module
229232
return PassResult(graph_module, True)

backends/vulkan/_passes/int4_weight_only_quantizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import torch
88
import torch.nn.functional as F
99

10-
from torchao.quantization.GPTQ import _check_linear_int4_k
10+
from torchao.quantization.GPTQ.GPTQ import _check_linear_int4_k
1111
from torchao.quantization.unified import Quantizer
1212
from torchao.quantization.utils import groupwise_affine_quantize_tensor
1313

backends/vulkan/_passes/tag_memory_meta_pass.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# LICENSE file in the root directory of this source tree.
66

77
import logging
8-
from copy import deepcopy
98
from typing import Any, Optional, Set
109

1110
import executorch.backends.vulkan.utils as utils
@@ -22,6 +21,7 @@
2221
from executorch.exir.dialects._ops import ops as exir_ops
2322

2423
from executorch.exir.pass_base import ExportPass, PassResult
24+
from executorch.exir.tensor import TensorSpec
2525

2626
logger: logging.Logger = logging.getLogger("")
2727
logger.setLevel(logging.INFO)
@@ -52,7 +52,7 @@ def insert_transition_node(
5252
(arg,),
5353
)
5454
clone_node.meta["val"] = arg.meta["val"]
55-
clone_node.meta["spec"] = deepcopy(arg.meta["spec"])
55+
clone_node.meta["spec"] = TensorSpec.from_tensor(clone_node.meta["val"])
5656
clone_node.meta["spec"].const = False
5757
set_memory_metadata(clone_node, storage, layout)
5858
arg.replace_all_uses_with(clone_node, lambda x, y=node: x == y)

backends/vulkan/op_registry.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,14 @@ def update_features_impl(op: OpKey):
230230
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
231231
# Symbolic integer ops
232232
torch.ops.aten.sym_size.int,
233+
operator.add,
234+
operator.lt,
235+
operator.gt,
236+
operator.ge,
237+
operator.le,
238+
# Guard and assert ops
239+
torch.ops.aten._assert_scalar.default,
240+
torch.ops.aten.sym_constrain_range_for_size.default,
233241
]
234242
)
235243
def register_ephemeral_op(features: OpFeatures):
@@ -500,7 +508,12 @@ def register_sdpa_with_kv_cache_op(features: OpFeatures):
500508
return features
501509

502510

503-
@update_features(["llama::update_cache", "llama::custom_sdpa"])
511+
@update_features(
512+
[
513+
"llama::update_cache",
514+
"llama::custom_sdpa",
515+
]
516+
)
504517
def register_sdpa_ops(features: OpFeatures):
505518
features.resize_fn = False
506519
features.buffer_impl = False
@@ -520,8 +533,17 @@ def register_rotary_emb_op(features: OpFeatures):
520533
return features
521534

522535

523-
@update_features(exir_ops.edge.aten.view_copy.default)
524-
def register_view_op(features: OpFeatures):
536+
@update_features(
537+
[
538+
exir_ops.edge.aten.clone.default,
539+
exir_ops.edge.aten.permute.default,
540+
exir_ops.edge.aten.permute_copy.default,
541+
exir_ops.edge.aten.select_copy.int,
542+
exir_ops.edge.aten.slice_copy.Tensor,
543+
exir_ops.edge.aten.view_copy.default,
544+
]
545+
)
546+
def register_view_ops(features: OpFeatures):
525547
features.texture_impl = TextureImplFeatures(
526548
valid_packed_dims=all_packed_dims,
527549
)
@@ -538,10 +560,8 @@ def register_view_op(features: OpFeatures):
538560
# Indexing and lookup
539561
exir_ops.edge.aten.flip.default,
540562
exir_ops.edge.aten.index_select.default,
541-
exir_ops.edge.aten.select_copy.int,
542563
# Tensor creation
543564
exir_ops.edge.aten.arange.start_step,
544-
exir_ops.edge.aten.clone.default,
545565
exir_ops.edge.aten.constant_pad_nd.default,
546566
exir_ops.edge.aten.full.default,
547567
exir_ops.edge.aten.full_like.default,
@@ -564,12 +584,9 @@ def register_ported_op(features: OpFeatures):
564584
# Ops ported from PyTorch Vulkan backend. These ops are in a separate registry becasue they support all packed dimensions
565585
@update_features(
566586
[
567-
# Indexing and lookup
568-
exir_ops.edge.aten.slice_copy.Tensor,
569587
# Shape Manipulation
570588
exir_ops.edge.aten.squeeze_copy.dims,
571589
exir_ops.edge.aten.unsqueeze_copy.default,
572-
exir_ops.edge.aten.permute_copy.default,
573590
# Tensor combination
574591
exir_ops.edge.aten.cat.default,
575592
exir_ops.edge.aten.repeat.default,

backends/vulkan/partitioner/vulkan_partitioner.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,11 @@ def op_node_is_compatible( # noqa: C901: Function is too complex
146146
def node_is_compatible(
147147
self, node: torch.fx.Node, features: Optional[OpFeatures] = None
148148
) -> Tuple[bool, str]:
149-
if utils.is_symint_node(node):
150-
return node.target in vulkan_supported_ops, "Op is compatible"
151-
elif utils.is_tensor_node(node):
149+
if utils.is_tensor_node(node):
152150
return self.op_node_is_compatible(node, features=features)
151+
# For non-tensor nodes, just check if the op is registered
152+
elif hasattr(node, "target"):
153+
return node.target in vulkan_supported_ops, "Op is compatible"
153154

154155
return False, f"Unsupported node type: {node.format_node()}"
155156

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,15 @@ ValueRef ComputeGraph::add_symint(const int32_t val) {
449449
return idx;
450450
}
451451

452+
ValueRef ComputeGraph::get_or_add_value_for_int(const int64_t val) {
453+
for (int i = 0; i < values_.size(); ++i) {
454+
if (values_.at(i).isInt() && values_.at(i).toInt() == val) {
455+
return i;
456+
}
457+
}
458+
return add_scalar(val);
459+
}
460+
452461
ValueRef ComputeGraph::set_input_tensor(
453462
const ValueRef idx,
454463
const bool use_staging) {

0 commit comments

Comments
 (0)