Skip to content

Commit 35387f6

Browse files
cccclaifacebook-github-bot
authored andcommitted
prefill model (pytorch#5807)
Summary: python -m executorch.examples.models.llama2.export_llama --disable_dynamic_shape --qnn --pt2e_quantize qnn_16a4w Segfault error stacktrace ``` [INFO] [Qnn ExecuTorch]: Initialize Qnn backend parameters for Qnn executorch backend type 2 [INFO] [Qnn ExecuTorch]: Caching: Caching is in SAVE MODE. [WARNING] [Qnn ExecuTorch]: Qnn API version 2.19.0 is used. The version is tested against 2.18.0. [INFO] [Qnn ExecuTorch]: Running level=3 optimization. AddressSanitizer:DEADLYSIGNAL ================================================================= ==1523599==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000020 (pc 0x7f1585ee38e2 bp 0x7f16d5ab8800 sp 0x7ffed19ab8b0 T0) ==1523599==The signal is caused by a READ memory access. ==1523599==Hint: address points to the zero page. SCARINESS: 10 (null-deref) #0 0x7f1585ee38e2 (/home/chenlai/fbsource/third-party/qualcomm/qnn/qnn-2.26/lib/x86_64-linux-clang/libQnnHtp.so+0x2ce38e2) (BuildId: bc3ab8ddc89a0e65) #1 0x7f1585dd8926 (/home/chenlai/fbsource/third-party/qualcomm/qnn/qnn-2.26/lib/x86_64-linux-clang/libQnnHtp.so+0x2bd8926) (BuildId: bc3ab8ddc89a0e65) pytorch#2 0x7f15844d1161 (/home/chenlai/fbsource/third-party/qualcomm/qnn/qnn-2.26/lib/x86_64-linux-clang/libQnnHtp.so+0x12d1161) (BuildId: bc3ab8ddc89a0e65) pytorch#3 0x7f15844dcac6 (/home/chenlai/fbsource/third-party/qualcomm/qnn/qnn-2.26/lib/x86_64-linux-clang/libQnnHtp.so+0x12dcac6) (BuildId: bc3ab8ddc89a0e65) pytorch#4 0x7f15844d245b (/home/chenlai/fbsource/third-party/qualcomm/qnn/qnn-2.26/lib/x86_64-linux-clang/libQnnHtp.so+0x12d245b) (BuildId: bc3ab8ddc89a0e65) pytorch#5 0x7f15b9bc7b21 in auto torch::executor::qnn::QnnInterface::qnn_backend_validate_op_config<void*, Qnn_OpConfig_t>(void*, Qnn_OpConfig_t) const fbcode/executorch/backends/qualcomm/runtime/backends/QnnFunctionInterface.h:39 pytorch#6 0x7f15b9bc7682 in torch::executor::qnn::QnnBackend::BackendValidateOpConfig(Qnn_OpConfig_t const&) fbcode/executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h:41 pytorch#7 0x7f15b9bc7115 in torch::executor::qnn::QnnManager::IsNodeSupportedByBackend(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&) fbcode/executorch/backends/qualcomm/runtime/QnnManager.cpp:450 pytorch#8 0x7f15b9dd44ee in torch::executor::qnn::PyQnnManager::IsNodeSupportedByBackend(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&) fbcode/executorch/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h:57 pytorch#9 0x7f15b9e5b986 in pybind11::cpp_function::cpp_function<bool, torch::executor::qnn::PyQnnManager, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool (torch::executor::qnn::PyQnnManager::*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&)::operator()(torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&) const fbsource/pybind11/pybind11.h:84 pytorch#10 0x7f15b9e5b8b5 in bool pybind11::detail::argument_loader<torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&>::call_impl<bool, pybind11::cpp_function::cpp_function<bool, torch::executor::qnn::PyQnnManager, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool (torch::executor::qnn::PyQnnManager::*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&)&, 0ul, 1ul, pybind11::detail::void_type>(torch::executor::qnn::PyQnnManager&&, std::integer_sequence<unsigned long, 0ul, 1ul>, pybind11::detail::void_type&&) && fbsource/pybind11/cast.h:2042 pytorch#11 0x7f15b9e53831 in std::enable_if<!std::is_void<bool>::value, bool>::type pybind11::detail::argument_loader<torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&>::call<bool, pybind11::detail::void_type, pybind11::cpp_function::cpp_function<bool, torch::executor::qnn::PyQnnManager, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool (torch::executor::qnn::PyQnnManager::*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&)&>(pybind11::cpp_function::cpp_function<bool, torch::executor::qnn::PyQnnManager, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool (torch::executor::qnn::PyQnnManager::*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&)&) && fbsource/pybind11/cast.h:2014 pytorch#12 0x7f15b9e53454 in void pybind11::cpp_function::initialize<pybind11::cpp_function::cpp_function<bool, torch::executor::qnn::PyQnnManager, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool (torch::executor::qnn::PyQnnManager::*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), bool, torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool&&, torch::executor::qnn::PyQnnManager (*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(pybind11::detail::function_call&)::operator()(pybind11::detail::function_call&) const fbsource/pybind11/pybind11.h:193 pytorch#13 0x7f15b9e530d3 in void pybind11::cpp_function::initialize<pybind11::cpp_function::cpp_function<bool, torch::executor::qnn::PyQnnManager, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool (torch::executor::qnn::PyQnnManager::*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), bool, torch::executor::qnn::PyQnnManager*, std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&, pybind11::name, pybind11::is_method, pybind11::sibling>(bool&&, torch::executor::qnn::PyQnnManager (*)(std::vector<std::shared_ptr<torch::executor::qnn::OpWrapper>, std::allocator<std::shared_ptr<torch::executor::qnn::OpWrapper>>>&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(pybind11::detail::function_call&)::__invoke(pybind11::detail::function_call&) fbsource/pybind11/pybind11.h:170 pytorch#14 0x7f15b9d8f707 in pybind11::cpp_function::dispatcher(_object*, _object*, _object*) fbsource/pybind11/pybind11.h:767 pytorch#15 0x327141 in cfunction_call(_object*, _object*, _object*) (.__uniq.281047882695835599676768160755749362799) (/usr/local/fbcode/platform010/bin/python3.10+0x327141) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#16 0x349630 in _PyObject_MakeTpCall (/usr/local/fbcode/platform010/bin/python3.10+0x349630) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#17 0x5897d4 in method_vectorcall(_object*, _object* const*, unsigned long, _object*) (.__uniq.243338978568352371442406765225626566013.llvm.6236606370933165261) (/usr/local/fbcode/platform010/bin/python3.10+0x5897d4) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#18 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#19 0x331421 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x331421) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#20 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#21 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#22 0x3313f2 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3313f2) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#23 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#24 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#25 0x3313f2 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3313f2) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#26 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#27 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#28 0x3313f2 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3313f2) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#29 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#30 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#31 0x331577 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x331577) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#32 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#33 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#34 0x3313f2 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3313f2) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#35 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#36 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#37 0x3313f2 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3313f2) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#38 0x39b8ca in _PyEval_Vector (/usr/local/fbcode/platform010/bin/python3.10+0x39b8ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#39 0x39ad7d in _PyObject_FastCallDictTstate (/usr/local/fbcode/platform010/bin/python3.10+0x39ad7d) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#40 0x3c8b72 in slot_tp_call(_object*, _object*, _object*) (.__uniq.235726554139783955843240177532338160225) (/usr/local/fbcode/platform010/bin/python3.10+0x3c8b72) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#41 0x392ca8 in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x392ca8) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#42 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#43 0x39b8ca in _PyEval_Vector (/usr/local/fbcode/platform010/bin/python3.10+0x39b8ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#44 0x331b18 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x331b18) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#45 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#46 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#47 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#48 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#49 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#50 0x3313f2 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3313f2) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#51 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#52 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#53 0x3313f2 in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3313f2) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#54 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#55 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#56 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#57 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#58 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#59 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#60 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#61 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#62 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#63 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#64 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#65 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#66 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#67 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#68 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#69 0x327547 in _PyFunction_Vectorcall (/usr/local/fbcode/platform010/bin/python3.10+0x327547) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#70 0x3928df in call_function(_ts*, PyTraceInfo*, _object***, long, _object*) (.__uniq.79849310599369217189729546442812793949) (/usr/local/fbcode/platform010/bin/python3.10+0x3928df) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#71 0x3314ca in _PyEval_EvalFrameDefault (/usr/local/fbcode/platform010/bin/python3.10+0x3314ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#72 0x39b8ca in _PyEval_Vector (/usr/local/fbcode/platform010/bin/python3.10+0x39b8ca) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#73 0x431565 in PyEval_EvalCode (/usr/local/fbcode/platform010/bin/python3.10+0x431565) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#74 0x431447 in run_mod(_mod*, _object*, _object*, _object*, PyCompilerFlags*, _arena*) (.__uniq.251861886623903963524397139660542440724.llvm.17622910512627074885) (/usr/local/fbcode/platform010/bin/python3.10+0x431447) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#75 0x4e3054 in pyrun_file(_IO_FILE*, _object*, int, _object*, _object*, int, PyCompilerFlags*) (.__uniq.251861886623903963524397139660542440724) (/usr/local/fbcode/platform010/bin/python3.10+0x4e3054) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#76 0x4e2b54 in _PyRun_SimpleFileObject (/usr/local/fbcode/platform010/bin/python3.10+0x4e2b54) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#77 0x4e28f1 in _PyRun_AnyFileObject (/usr/local/fbcode/platform010/bin/python3.10+0x4e28f1) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#78 0x4d4a54 in Py_RunMain (/usr/local/fbcode/platform010/bin/python3.10+0x4d4a54) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#79 0x4d286b in pymain_main(_PyArgv*) (.__uniq.297908980262787110426434251325078884054) (/usr/local/fbcode/platform010/bin/python3.10+0x4d286b) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#80 0x4d2759 in Py_BytesMain (/usr/local/fbcode/platform010/bin/python3.10+0x4d2759) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) pytorch#81 0x7f19e282c656 in __libc_start_call_main (/usr/local/fbcode/platform010/lib/libc.so.6+0x2c656) (BuildId: 93cdceeb8322234c38e1f2c93ad0ff10c7632fa6) pytorch#82 0x7f19e282c717 in __libc_start_main@GLIBC_2.2.5 (/usr/local/fbcode/platform010/lib/libc.so.6+0x2c717) (BuildId: 93cdceeb8322234c38e1f2c93ad0ff10c7632fa6) pytorch#83 0x553d90 in _start (/usr/local/fbcode/platform010/bin/python3.10+0x553d90) (BuildId: a620038add613fd8585eb50983ca8e455d54738e) AddressSanitizer can not provide additional info. AddressSanitizer: SEGV (/home/chenlai/fbsource/third-party/qualcomm/qnn/qnn-2.26/lib/x86_64-linux-clang/libQnnHtp.so+0x2ce38e2) (BuildId: bc3ab8ddc89a0e65) ==1523599==ABORTING ``` Differential Revision: D63736779
1 parent 13408b9 commit 35387f6

File tree

16 files changed

+758
-522
lines changed

16 files changed

+758
-522
lines changed

backends/qualcomm/quantizer/custom_annotation.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,29 @@ def annotate_matmul_input1(node: Node, quantization_config: QuantizationConfig):
118118
if "SDPA" in full_qualified_name:
119119
annotate_matmul(node, quantization_config_16a8w)
120120
annotate_matmul_input1(node.args[1], quantization_config_8a8w)
121+
122+
123+
def custom_annotate_matmul_16a8w(gm: torch.fx.GraphModule):
124+
"""
125+
Annotate matmul op with 16a8w quantization config
126+
"""
127+
128+
def annotate_matmul(node: Node, quantization_config: QuantizationConfig):
129+
input_qspec_map = {}
130+
input_act = node.args[0]
131+
input_spec = quantization_config.input_activation
132+
input_qspec_map[input_act] = input_spec
133+
input_act1 = node.args[1]
134+
input_spec1 = quantization_config.weight
135+
input_qspec_map[input_act1] = input_spec1
136+
node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
137+
input_qspec_map=input_qspec_map,
138+
output_qspec=quantization_config.output_activation,
139+
_annotated=True,
140+
)
141+
142+
# Annotate 16a8w for matmul op to get better performance
143+
quantization_config_16a8w = get_16a8w_qnn_ptq_config()
144+
for node in gm.graph.nodes:
145+
if node.op == "call_function" and node.target == torch.ops.aten.matmul.default:
146+
annotate_matmul(node, quantization_config_16a8w)

backends/qualcomm/serialization/qnn_compile_spec_schema.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class QcomChipset(IntEnum):
3434
SM8475 = 42 # v69
3535
SM8550 = 43 # v73
3636
SM8650 = 57 # v75
37+
SSG2115P = 46 # v73... I wish I can know where the number comes from...
3738

3839

3940
@dataclass
@@ -47,6 +48,7 @@ class SocInfo:
4748
QcomChipset.SM8475: SocInfo(QcomChipset.SM8475, HtpInfo(HtpArch.V69, 8)),
4849
QcomChipset.SM8550: SocInfo(QcomChipset.SM8550, HtpInfo(HtpArch.V73, 8)),
4950
QcomChipset.SM8650: SocInfo(QcomChipset.SM8650, HtpInfo(HtpArch.V75, 8)),
51+
QcomChipset.SSG2115P: SocInfo(QcomChipset.SSG2115P, HtpInfo(HtpArch.V73, 2)),
5052
}
5153

5254

backends/qualcomm/serialization/schema.fbs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ enum QcomChipset: int {
3232
SM8450 = 36,
3333
SM8475 = 42,
3434
SM8550 = 43,
35+
SSG2115P = 46,
3536
SM8650 = 57,
3637
}
3738

@@ -170,7 +171,7 @@ table QnnExecuTorchOptions {
170171

171172
/// Profiling level of the delegate and the backend. Default is off.
172173
profile_level:QnnExecuTorchProfileLevel;
173-
174+
174175
/// Enables usage of shared buffer between application and backend for graph I/O.
175176
shared_buffer:bool;
176177

backends/qualcomm/tests/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class TestQNN(unittest.TestCase):
118118
model: QcomChipset = None
119119
compiler_specs: List[CompileSpec] = None
120120
arch_table = {
121+
"SSG2115P": QcomChipset.SSG2115P,
121122
"SM8650": QcomChipset.SM8650,
122123
"SM8550": QcomChipset.SM8550,
123124
"SM8475": QcomChipset.SM8475,

examples/models/llama2/export_llama_lib.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,23 @@
5353
get_quant_embedding_transform,
5454
get_quant_weight_transform,
5555
)
56-
from .source_transformation.quantized_kv_cache import (
57-
replace_kv_cache_with_quantized_kv_cache,
58-
)
56+
57+
# from .source_transformation.quantized_kv_cache import (
58+
# replace_kv_cache_with_quantized_kv_cache,
59+
# )
5960
from .source_transformation.rms_norm import replace_rms_norm_with_native_rms_norm
6061

6162
from .source_transformation.rope import materialze_broadcast_of_rope_freq_cis
62-
from .source_transformation.sdpa import (
63-
replace_causal_mask,
64-
replace_kv_cache_with_coreml_kv_cache,
65-
replace_kv_cache_with_simple_kv_cache,
66-
replace_sdpa_with_coreml_sdpa,
67-
replace_sdpa_with_custom_op,
68-
replace_sdpa_with_flex_sdpa,
69-
replace_sdpa_with_simple_sdpa,
70-
)
63+
64+
# from .source_transformation.sdpa import (
65+
# replace_causal_mask,
66+
# replace_kv_cache_with_coreml_kv_cache,
67+
# replace_kv_cache_with_simple_kv_cache,
68+
# replace_sdpa_with_coreml_sdpa,
69+
# replace_sdpa_with_custom_op,
70+
# replace_sdpa_with_flex_sdpa,
71+
# replace_sdpa_with_simple_sdpa,
72+
# )
7173

7274
IS_FBCODE = True # os.environ.get("FBCODE_PLATFORM", False)
7375
FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
@@ -910,23 +912,20 @@ def _get_source_transforms( # noqa
910912
assert args.use_kv_cache, "quantize_kv_cache requires use_kv_cache=True"
911913
transforms.append(replace_kv_cache_with_quantized_kv_cache)
912914

915+
if args.qnn:
916+
# pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
917+
from executorch.backends.qualcomm.utils.utils import convert_linear_to_conv2d
918+
919+
# transforms.append(replace_kv_cache_with_simple_kv_cache)
920+
# transforms.append(replace_sdpa_with_flex_sdpa)
921+
# transforms.append(replace_causal_mask)
922+
transforms.append(replace_rms_norm_with_native_rms_norm)
923+
if args.optimized_rotation_path:
924+
transforms.append(fuse_layer_norms)
925+
transforms.append(get_model_with_r1_r2(args.optimized_rotation_path))
926+
transforms.append(convert_linear_to_conv2d)
913927
if args.use_kv_cache:
914-
if args.qnn:
915-
# pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
916-
from executorch.backends.qualcomm.utils.utils import (
917-
convert_linear_to_conv2d,
918-
)
919-
920-
transforms.append(replace_kv_cache_with_simple_kv_cache)
921-
transforms.append(replace_sdpa_with_flex_sdpa)
922-
transforms.append(replace_causal_mask)
923-
transforms.append(replace_rms_norm_with_native_rms_norm)
924-
if args.optimized_rotation_path:
925-
transforms.append(fuse_layer_norms)
926-
transforms.append(get_model_with_r1_r2(args.optimized_rotation_path))
927-
transforms.append(convert_linear_to_conv2d)
928-
929-
elif args.mps:
928+
if args.mps:
930929
# Currently mps doesn't support sdpa op, use the simpler decomposition
931930
# to get free perf gain.
932931
transforms.append(replace_sdpa_with_simple_sdpa)

0 commit comments

Comments
 (0)