Skip to content

Commit b8fffbc

Browse files
committed
band-aid unittest-buck on "Remove ExecuTorch copy of Vectorized"
All uses are outside ExecuTorch core, so we can just use ATen Vectorized. Differential Revision: [D66396016](https://our.internmc.facebook.com/intern/diff/D66396016/) [ghstack-poisoned]
2 parents 213d8c4 + 65dc8ed commit b8fffbc

File tree

287 files changed

+2948
-2567
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

287 files changed

+2948
-2567
lines changed

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ cmake_install_executorch_libraries() {
156156
-DCMAKE_INSTALL_PREFIX=cmake-out \
157157
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
158158
-DEXECUTORCH_BUILD_QNN="$QNN" \
159-
-DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
160-
-Bcmake-out .
159+
-DQNN_SDK_ROOT="$QNN_SDK_ROOT"
161160
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
162161
}
163162

.ci/scripts/unittest-buck2.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ buck2 query "//backends/apple/... + //backends/example/... + \
1515
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
1616
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
1717

18+
# TODO: optimized ops are unbuildable because they now use ATen; put
19+
# them back after we can use PyTorch in OSS buck.
1820
UNBUILDABLE_OPTIMIZED_OPS_REGEX="_elu|gelu|fft|log_softmax"
19-
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
21+
BUILDABLE_OPTIMIZED_OPS= #$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
2022

2123
# TODO: build prim_ops_test_cpp again once supported_features works in
2224
# OSS buck.
@@ -25,7 +27,9 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
2527
# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
2628
# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
2729
for op in "build" "test"; do
28-
buck2 $op $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
30+
buck2 $op $BUILDABLE_OPTIMIZED_OPS \
31+
//examples/selective_build:select_all_dtype_selective_lib_portable_lib \
32+
//kernels/portable/... \
2933
$BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
3034
//runtime/executor: //runtime/kernel/... //runtime/platform/...
3135
done

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ jobs:
262262
output=$(ls -la ${elf})
263263
arr=($output)
264264
size=${arr[4]}
265-
threshold="103068" # ~100KiB
265+
threshold="103268" # ~100KiB
266266
echo "size: $size, threshold: $threshold"
267267
if [[ "$size" -le "$threshold" ]]; then
268268
echo "Success $size <= $threshold"

.lintrunner.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,6 @@ exclude_patterns = [
271271
'examples/**',
272272
'exir/verification/bindings.cpp',
273273
'extension/**',
274-
# Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
275-
'kernels/portable/cpu/util/elementwise_util.h',
276-
'kernels/portable/cpu/util/math_util.h',
277-
'kernels/portable/cpu/util/vectorized_math.h',
278274
'kernels/optimized/**',
279275
'runtime/core/exec_aten/**',
280276
# Want to be able to keep c10 in sync with PyTorch core.

CMakeLists.txt

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -514,17 +514,6 @@ if(EXECUTORCH_BUILD_CORTEX_M)
514514
endif()
515515

516516
if(EXECUTORCH_BUILD_DEVTOOLS)
517-
if(NOT EXECUTORCH_BUILD_ARM_BAREMETAL)
518-
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER
519-
ON
520-
CACHE BOOL "EXECUTORCH_BUILD_EXTENSION_DATA_LOADER" FORCE
521-
)
522-
else()
523-
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER
524-
OFF
525-
CACHE BOOL "EXECUTORCH_BUILD_EXTENSION_DATA_LOADER" FORCE
526-
)
527-
endif()
528517
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools)
529518
endif()
530519

@@ -565,6 +554,10 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
565554
endif()
566555

567556
if(EXECUTORCH_BUILD_PYBIND)
557+
558+
# Add codegen tools subdirectory for selective_build pybind module
559+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/codegen/tools)
560+
568561
if(NOT EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
569562
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/data_loader)
570563
endif()

backends/arm/quantizer/arm_quantizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,9 @@ def set_module_name(
247247
quantizer.set_module_name("blocks.sub"), it will quantize all supported operator/operator
248248
patterns in the submodule with this module name with the given `quantization_config`
249249
"""
250-
assert (
251-
quantization_config is not None
252-
), " quantization_config == None is not supported yet"
250+
# Validate that quantization_config is provided
251+
if quantization_config is None:
252+
raise ValueError("quantization_config == None is not supported yet")
253253
self.module_name_config[module_name] = quantization_config
254254
return self
255255

backends/arm/quantizer/quantization_config.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,30 +29,40 @@ def get_input_act_qspec(self) -> QuantizationSpec | None:
2929
"""Returns QuantizationSpec 'input_activation' after asserting that input_activation.qscheme is valid."""
3030
if self.input_activation is None:
3131
return None
32-
assert self.input_activation.qscheme in [
32+
# Validate that input_activation uses a supported qscheme
33+
if self.input_activation.qscheme not in [
3334
torch.per_tensor_affine,
3435
torch.per_tensor_symmetric,
35-
], f"Unsupported quantization_spec {self.input_activation} for input_activation."
36+
]:
37+
raise ValueError(
38+
f"Unsupported quantization_spec {self.input_activation} for input_activation."
39+
)
3640
return self.input_activation
3741

3842
def get_output_act_qspec(self) -> QuantizationSpec | None:
3943
"""Returns QuantizationSpec 'output_activation' after asserting that output_activation.qscheme is valid."""
4044
if self.output_activation is None:
4145
return None
42-
assert self.output_activation.qscheme in [
46+
# Validate that output_activation uses a supported qscheme
47+
if self.output_activation.qscheme not in [
4348
torch.per_tensor_affine,
4449
torch.per_tensor_symmetric,
45-
], f"Unsupported quantization_spec {self.output_activation} for output_activation."
50+
]:
51+
raise ValueError(
52+
f"Unsupported quantization_spec {self.output_activation} for output_activation."
53+
)
4654
return self.output_activation
4755

4856
def get_weight_qspec(self) -> QuantizationSpec | None:
4957
"""Returns QuantizationSpec 'weight' after asserting that weight.qscheme is valid."""
5058
if self.weight is None:
5159
return None
52-
assert self.weight.qscheme in [
60+
# Validate that weight uses a supported qscheme
61+
if self.weight.qscheme not in [
5362
torch.per_tensor_symmetric,
5463
torch.per_channel_symmetric,
55-
], f"Unsupported quantization_spec {self.weight} for weight"
64+
]:
65+
raise ValueError(f"Unsupported quantization_spec {self.weight} for weight")
5666
return self.weight
5767

5868
def get_bias_qspec(self, node: torch.fx.Node) -> QuantizationSpec | None:
@@ -61,11 +71,11 @@ def get_bias_qspec(self, node: torch.fx.Node) -> QuantizationSpec | None:
6171
def _derive_qparams_fn(
6272
obs_or_fqs: list[ObserverOrFakeQuantize],
6373
) -> tuple[torch.Tensor, torch.Tensor]:
64-
assert (
65-
len(obs_or_fqs) == 2
66-
), "Expecting two obs/fqs, one for activation and one for weight, got: {}".format(
67-
len(obs_or_fqs)
68-
)
74+
# Validate expected number of observers/fake-quantizes
75+
if len(obs_or_fqs) != 2:
76+
raise ValueError(
77+
f"Expecting two obs/fqs, one for activation and one for weight, got: {len(obs_or_fqs)}"
78+
)
6979
act_obs_or_fq = obs_or_fqs[0]
7080
weight_obs_or_fq = obs_or_fqs[1]
7181
act_scale, act_zp = act_obs_or_fq.calculate_qparams()
@@ -94,9 +104,11 @@ def _derive_qparams_fn(
94104

95105
if self.bias is None:
96106
return None
97-
assert (
98-
self.bias.dtype == torch.float
99-
), "Only float dtype for bias is supported for bias right now"
107+
# Validate that bias dtype is floating-point
108+
if self.bias.dtype != torch.float:
109+
raise ValueError(
110+
"Only float dtype for bias is supported for bias right now"
111+
)
100112
return self.bias
101113

102114
def get_fixed_qspec(

backends/arm/runtime/EthosUBackend.cpp

Lines changed: 10 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -261,24 +261,12 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
261261
event_tracer,
262262
"+EthosUBackend::execute()handles.input.permute_CHW_to_HWC()");
263263
// permuted byte copy CHW to HWC
264-
int c, h, w;
265-
if (tensor_in.dim() == 4) {
266-
c = tensor_in.size(1);
267-
h = tensor_in.size(2);
268-
w = tensor_in.size(3);
269-
} else if (tensor_in.dim() == 5) {
270-
c = tensor_in.size(2);
271-
h = tensor_in.size(3);
272-
w = tensor_in.size(4);
273-
} else {
274-
ET_LOG(
275-
Error,
276-
"Unsupported input tensor dimension %d, expected 4 or 5",
277-
tensor_in.dim());
278-
return Error::InvalidProgram;
279-
}
280264
permute_CHW_to_HWC(
281-
tensor_in.mutable_data_ptr<char>(), scratch_addr, c, h, w);
265+
tensor_in.mutable_data_ptr<char>(),
266+
scratch_addr,
267+
tensor_in.size(1),
268+
tensor_in.size(2),
269+
tensor_in.size(3));
282270
} else if (both_char or both_int or both_short) {
283271
EXECUTORCH_PROF_SCOPE(
284272
event_tracer, "+EthosUBackend::execute()handles.input.memcpy()");
@@ -376,24 +364,12 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
376364
"+EthosUBackend::execute()handles.output.permute_HWC_to_CHW()");
377365

378366
char* output_address = (char*)output_addr;
379-
int c, h, w;
380-
if (tensor_out.dim() == 4) {
381-
c = tensor_out.size(1);
382-
h = tensor_out.size(2);
383-
w = tensor_out.size(3);
384-
} else if (tensor_out.dim() == 5) {
385-
c = tensor_out.size(2);
386-
h = tensor_out.size(3);
387-
w = tensor_out.size(4);
388-
} else {
389-
ET_LOG(
390-
Error,
391-
"Unsupported output tensor dimension %d, expected 4 or 5",
392-
tensor_out.dim());
393-
return Error::InvalidProgram;
394-
}
395367
permute_HWC_to_CHW(
396-
output_address, tensor_out.mutable_data_ptr<char>(), c, h, w);
368+
output_address,
369+
tensor_out.mutable_data_ptr<char>(),
370+
tensor_out.size(1),
371+
tensor_out.size(2),
372+
tensor_out.size(3));
397373
} else {
398374
EXECUTORCH_PROF_SCOPE(
399375
event_tracer, "+EthosUBackend::execute()handles.output.move()");
@@ -454,14 +430,6 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
454430
if (permuted_shape) {
455431
ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
456432
}
457-
} else if (tensor.dim() == 5) {
458-
// Same as above, but for 5D tensors.
459-
permuted_shape = tensor.size(0) == io->shape[0] &&
460-
tensor.size(1) == io->shape[1] && tensor.size(2) == io->shape[4] &&
461-
tensor.size(3) == io->shape[2] && tensor.size(4) == io->shape[3];
462-
if (permuted_shape) {
463-
ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
464-
}
465433
}
466434
*is_permuted = permuted_shape;
467435
return Error::Ok;

backends/arm/test/models/test_llama.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
TosaPipelineMI,
2323
)
2424

25+
from executorch.examples.models.llama.config.llm_config import LlmConfig
2526
from executorch.examples.models.llama.export_llama_lib import (
2627
build_args_parser,
2728
get_llama_model,
@@ -89,8 +90,9 @@ def prepare_model(self):
8990
]
9091
parser = build_args_parser()
9192
args = parser.parse_args(args)
93+
llm_config = LlmConfig.from_args(args)
9294

93-
llama_model, llama_inputs, llama_meta = get_llama_model(args)
95+
llama_model, llama_inputs, llama_meta = get_llama_model(llm_config)
9496

9597
return llama_model, llama_inputs, llama_meta
9698

backends/cadence/fusion_g3/operators/op_clamp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@
2121
#include <executorch/kernels/portable/cpu/util/math_util.h>
2222
#include <executorch/runtime/kernel/kernel_includes.h>
2323

24-
using ::executorch::aten::optional;
2524
using ::executorch::aten::Scalar;
2625
using ::executorch::aten::ScalarType;
2726
using ::executorch::aten::Tensor;
2827
using ::executorch::runtime::canCast;
2928
using ::executorch::runtime::Error;
3029
using ::executorch::runtime::KernelRuntimeContext;
30+
using std::optional;
3131

3232
namespace cadence {
3333
namespace impl {

backends/cadence/fusion_g3/operators/op_dequantize.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ using ::executorch::runtime::Error;
2424
using ::executorch::runtime::KernelRuntimeContext;
2525

2626
template <typename T>
27-
using optional = ::executorch::aten::optional<T>;
27+
using optional = std::optional<T>;
2828
/* ScalarType in Executorch do not have support for below data types.
2929
* So, creating a placeholder for these data types. Once, ScalarTypes is
3030
* updated to have support for below data types, these can be removed and
@@ -51,7 +51,7 @@ void check_dequantize_per_tensor_args(
5151
int64_t quant_min,
5252
int64_t quant_max,
5353
ScalarType dtype,
54-
::executorch::aten::optional<ScalarType>& out_dtype,
54+
std::optional<ScalarType>& out_dtype,
5555
Tensor& out) {
5656
ET_CHECK_MSG(
5757
input.scalar_type() == ScalarType::Byte ||
@@ -93,7 +93,7 @@ Tensor& dequantize_impl(
9393
float* scale_data,
9494
int* zero_point_data,
9595
int* axis,
96-
::executorch::aten::optional<ScalarType> out_dtype) {
96+
std::optional<ScalarType> out_dtype) {
9797
const ::executorch::aten::ArrayRef<Tensor::SizesType> input_size =
9898
input.sizes();
9999

@@ -260,8 +260,8 @@ Tensor& dequantize_impl(
260260
}
261261
}
262262

263-
::executorch::aten::optional<::executorch::aten::ArrayRef<int64_t>>
264-
optional_dim_list{::executorch::aten::ArrayRef<int64_t>{
263+
std::optional<::executorch::aten::ArrayRef<int64_t>> optional_dim_list{
264+
::executorch::aten::ArrayRef<int64_t>{
265265
dims, size_t(input.dim() - 1)}};
266266

267267
// Actual dequantization logic
@@ -466,8 +466,8 @@ Tensor& dequantize_impl(
466466
}
467467
}
468468

469-
::executorch::aten::optional<::executorch::aten::ArrayRef<int64_t>>
470-
optional_dim_list{::executorch::aten::ArrayRef<int64_t>{
469+
std::optional<::executorch::aten::ArrayRef<int64_t>> optional_dim_list{
470+
::executorch::aten::ArrayRef<int64_t>{
471471
dims, size_t(input.dim() - 1)}};
472472

473473
// Actual dequantization logic
@@ -600,7 +600,7 @@ Tensor& dequantize_per_tensor_tensor_args_out(
600600
int64_t quant_min,
601601
int64_t quant_max,
602602
ScalarType dtype,
603-
::executorch::aten::optional<ScalarType> out_dtype,
603+
std::optional<ScalarType> out_dtype,
604604
Tensor& out) {
605605
#ifdef OP_ARG_CHECK
606606
ET_CHECK_MSG(
@@ -639,12 +639,12 @@ Tensor& dequantize_per_channel_out(
639639
KernelRuntimeContext& context,
640640
const Tensor& input,
641641
const Tensor& scale,
642-
const ::executorch::aten::optional<Tensor>& opt_zero_points,
642+
const std::optional<Tensor>& opt_zero_points,
643643
int64_t axis,
644644
int64_t quant_min,
645645
int64_t quant_max,
646646
ScalarType dtype,
647-
::executorch::aten::optional<ScalarType> out_dtype,
647+
std::optional<ScalarType> out_dtype,
648648
Tensor& out) {
649649
if (axis < 0) {
650650
axis += executorch::runtime::nonzero_dim(input);

backends/cadence/fusion_g3/operators/op_div.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
#include <executorch/runtime/kernel/kernel_includes.h>
2020
#include <executorch/runtime/platform/assert.h>
2121

22-
using ::executorch::aten::optional;
2322
using ::executorch::aten::Scalar;
2423
using ::executorch::aten::ScalarType;
25-
using ::executorch::aten::string_view;
2624
using ::executorch::aten::Tensor;
2725
using ::executorch::runtime::canCast;
2826
using ::executorch::runtime::Error;
2927
using ::executorch::runtime::KernelRuntimeContext;
28+
using std::optional;
29+
using std::string_view;
3030

3131
namespace cadence {
3232
namespace impl {
@@ -686,4 +686,4 @@ Tensor& div_scalar_mode_out(
686686
} // namespace native
687687
} // namespace G3
688688
} // namespace impl
689-
} // namespace cadence
689+
} // namespace cadence

backends/cadence/fusion_g3/operators/op_mean.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
#include <executorch/runtime/platform/assert.h>
1818

1919
using ::executorch::aten::ArrayRef;
20-
using ::executorch::aten::optional;
2120
using ::executorch::aten::ScalarType;
2221
using ::executorch::aten::Tensor;
2322
using ::executorch::runtime::Error;
2423
using ::executorch::runtime::KernelRuntimeContext;
24+
using std::optional;
2525

2626
namespace cadence {
2727
namespace impl {

0 commit comments

Comments
 (0)