Skip to content

Commit aa90c26

Browse files
committed
Update base for Update on "Dont quantize the current token for attention"
Differential Revision: [D63497872](https://our.internmc.facebook.com/intern/diff/D63497872/) [ghstack-poisoned]
2 parents c8b3e00 + fe0e676 commit aa90c26

File tree

70 files changed

+1423
-1645
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+1423
-1645
lines changed

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
188188
echo "Exporting ${EXPORTED_MODEL_NAME}"
189189
EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
190190
if [[ "${XNNPACK}" == "ON" ]]; then
191-
EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
191+
EXPORT_ARGS="${EXPORT_ARGS} -X --xnnpack-extended-ops -qmode 8da4w -G 128"
192192
fi
193193
if [[ "${CUSTOM}" == "ON" ]]; then
194194
EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"

backends/cadence/aot/compiler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
)
3131
from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
3232
from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge
33-
from torch._export import capture_pre_autograd_graph
3433
from torch.ao.quantization.pt2e.export_utils import model_is_exported
3534
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
3635

@@ -58,7 +57,7 @@ def convert_pt2(
5857
"""
5958

6059
# Export with dynamo
61-
model_gm = capture_pre_autograd_graph(model, inputs)
60+
model_gm = torch.export.export_for_training(model, inputs).module()
6261

6362
if model_gm_has_SDPA(model_gm): # pyre-fixme[6]
6463
# Decompose SDPA

backends/cadence/cadence_runner/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ find_package(
4444
gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party
4545
)
4646

47-
add_executable(cadence_runner cadence_runner.cpp)
47+
add_executable(cadence_runner
48+
${EXECUTORCH_ROOT}/examples/devtools/example_runner/example_runner.cpp
49+
)
4850
target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
4951

5052
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)

backends/cadence/cadence_runner/TARGETS

Lines changed: 0 additions & 8 deletions
This file was deleted.

backends/cadence/cadence_runner/cadence_runner.cpp

Lines changed: 0 additions & 298 deletions
This file was deleted.

backends/cadence/cadence_runner/targets.bzl

Lines changed: 0 additions & 29 deletions
This file was deleted.

backends/cadence/reference/operators/quantized_conv_out.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
#include <executorch/backends/cadence/reference/kernels/kernels.h>
1010

1111
#include <executorch/runtime/kernel/kernel_includes.h>
12-
#include <algorithm>
13-
#include <cmath>
1412

1513
namespace impl {
1614
namespace reference {

backends/cadence/reference/operators/quantized_layer_norm.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
#include <executorch/backends/cadence/reference/kernels/kernels.h>
1010
#include <executorch/runtime/kernel/kernel_includes.h>
1111

12-
#include <algorithm>
1312
#include <cmath>
14-
#include <tuple>
1513

1614
using Tensor = exec_aten::Tensor;
1715
using executorch::runtime::KernelRuntimeContext;

0 commit comments

Comments
 (0)