Skip to content

Commit ee56505

Browse files
committed
Update base for Update on "[ET][EZ] Enable operator<< for Half tensor data"
Useful for debugging `Half` i.e. `fp16` models, when we have `EValue`s that are `Half` dtype and we do the following: ``` std::cout << "===== INPUT =====" << std::endl; for (EValue& v : inputs) { std::cout << v << std::endl; } std::cout << "===== OUTPUT =====" << std::endl; for (EValue& v : outputs) { std::cout << v << std::endl; } ``` ## Before ``` ===== INPUT ===== tensor(sizes=[1, 3, 96, 72], [<unhandled scalar type 5>]) ===== OUTPUT ===== tensor(sizes=[1, 2, 96, 72], [<unhandled scalar type 5>]) ``` ## After ``` ===== INPUT ===== tensor(sizes=[1, 3, 96, 72], [0.279785, 0.271484, 0.364746, ..., 0.150391, 0.836426, 0.019043]) ===== OUTPUT ===== tensor(sizes=[1, 2, 96, 72], [18.2344, -10.0938, 1.35059, ..., -33.6875, 4.07422, -22.5312]) ``` Differential Revision: [D57977366](https://our.internmc.facebook.com/intern/diff/D57977366/) [ghstack-poisoned]
2 parents f89090e + e650dd9 commit ee56505

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2034
-600
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
2+
#!/bin/bash
3+
# Copyright (c) Meta Platforms, Inc. and affiliates.
4+
# All rights reserved.
5+
#
6+
# This source code is licensed under the BSD-style license found in the
7+
# LICENSE file in the root directory of this source tree.
8+
9+
set -ex
10+
11+
install_swiftshader() {
12+
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
13+
_swiftshader_archive=swiftshader-abe07b943-prebuilt.tar.gz
14+
_swiftshader_dir=/tmp/swiftshader
15+
mkdir -p $_swiftshader_dir
16+
17+
_tmp_archive="/tmp/${_swiftshader_archive}"
18+
19+
curl --silent --show-error --location --fail --retry 3 \
20+
--output "${_tmp_archive}" "$_https_amazon_aws/${_swiftshader_archive}"
21+
22+
tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"
23+
24+
export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
25+
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/"
26+
}
27+
28+
install_vulkan_sdk() {
29+
VULKAN_SDK_VERSION=$1
30+
_vulkan_sdk_url="https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
31+
32+
_vulkan_sdk_dir=/tmp/vulkansdk
33+
mkdir -p $_vulkan_sdk_dir
34+
35+
_tmp_archive="/tmp/vulkansdk.tar.gz"
36+
37+
curl --silent --show-error --location --fail --retry 3 \
38+
--output "${_tmp_archive}" "${_vulkan_sdk_url}"
39+
40+
tar -C "${_vulkan_sdk_dir}" -xzf "${_tmp_archive}"
41+
42+
export PATH="${PATH}:${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64/bin/"
43+
}
44+
45+
VULKAN_SDK_VERSION="1.2.198.1"
46+
47+
install_swiftshader
48+
install_vulkan_sdk "${VULKAN_SDK_VERSION}"

.github/workflows/_unittest.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ jobs:
2828
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
2929
conda activate "${CONDA_ENV}"
3030
31+
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
32+
source .ci/scripts/setup-vulkan-linux-deps.sh
33+
3134
# Setup MacOS dependencies as there is no Docker support on MacOS atm
3235
PYTHON_EXECUTABLE=python \
3336
EXECUTORCH_BUILD_PYBIND=ON \
@@ -37,6 +40,7 @@ jobs:
3740
# Run pytest with coverage
3841
pytest -n auto --cov=./ --cov-report=xml
3942
# Run gtest
43+
LLVM_PROFDATA=llvm-profdata-12 LLVM_COV=llvm-cov-12 \
4044
test/run_oss_cpp_tests.sh
4145
4246
macos:
@@ -66,4 +70,5 @@ jobs:
6670
# Run pytest with coverage
6771
${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml
6872
# Run gtest
73+
LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \
6974
${CONDA_RUN} test/run_oss_cpp_tests.sh

.github/workflows/trunk.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
- release/*
88
tags:
99
- ciflow/trunk/*
10+
pull_request:
11+
paths:
12+
- .ci/scripts/**
1013
workflow_dispatch:
1114

1215
concurrency:

CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,23 @@ else()
343343
set(CMAKE_TOOLCHAIN_ANDROID OFF)
344344
endif()
345345

346+
# Add code coverage flags to supported compilers
347+
if(EXECUTORCH_USE_CPP_CODE_COVERAGE)
348+
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
349+
string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path")
350+
string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path")
351+
elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
352+
string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping")
353+
string(APPEND CMAKE_CXX_FLAGS
354+
" -fprofile-instr-generate -fcoverage-mapping"
355+
)
356+
else()
357+
message(ERROR
358+
"Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported"
359+
)
360+
endif()
361+
endif()
362+
346363
# EXECUTORCH_BUILD_HOST_TARGETS: Option to control the building of host-only
347364
# tools like `flatc`, along with example executables like `executor_runner` and
348365
# libraries that it uses, like `gflags`. Disabling this can be helpful when

backends/qualcomm/builders/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
op_mul,
3434
op_pad,
3535
op_pow,
36+
op_prelu,
3637
op_quantize,
3738
op_relu,
3839
op_reshape,
@@ -42,6 +43,7 @@
4243
op_skip_ops,
4344
op_slice_copy,
4445
op_softmax,
46+
op_space_to_depth,
4547
op_sqrt,
4648
op_squeeze,
4749
op_sub,
@@ -50,6 +52,7 @@
5052
op_transpose,
5153
op_unsqueeze,
5254
op_upsample_bilinear2d,
55+
op_upsample_nearest2d,
5356
)
5457

5558
__all__ = [
@@ -75,11 +78,13 @@
7578
op_layer_norm,
7679
op_linear,
7780
op_log_softmax,
81+
op_matmul,
7882
op_max_pool2d,
7983
op_mean_dim,
8084
op_mul,
8185
op_pad,
8286
op_pow,
87+
op_prelu,
8388
op_quantize,
8489
op_relu,
8590
op_reshape,
@@ -89,6 +94,7 @@
8994
op_skip_ops,
9095
op_slice_copy,
9196
op_softmax,
97+
op_space_to_depth,
9298
op_squeeze,
9399
op_sqrt,
94100
op_sub,
@@ -97,5 +103,5 @@
97103
op_transpose,
98104
op_unsqueeze,
99105
op_upsample_bilinear2d,
100-
op_matmul,
106+
op_upsample_nearest2d,
101107
]
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
from typing import Dict
7+
8+
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
9+
10+
import torch
11+
from executorch.exir.dialects._ops import ops as exir_ops
12+
13+
from .node_visitor import get_parameter, NodeVisitor, register_node_visitor
14+
from .qnn_constants import OpPRelu, QNN_OP_PACKAGE_NAME_QTI_AISW
15+
16+
17+
@register_node_visitor
18+
class PReLU(NodeVisitor):
19+
target = ["aten.leaky_relu.default", "aten.prelu.default"]
20+
21+
def __init__(self, *args) -> None:
22+
super().__init__(*args)
23+
24+
def define_node(
25+
self,
26+
node: torch.fx.Node,
27+
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
28+
) -> PyQnnWrapper.PyQnnOpWrapper:
29+
input_node = node.args[0]
30+
input_tensor = self.get_tensor(input_node, node)
31+
prelu_inp_tensor_wrapper = self.define_tensor(
32+
input_node,
33+
input_tensor,
34+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
35+
nodes_to_wrappers,
36+
is_input_tensor=True,
37+
)
38+
39+
if node.target.__name__ == "aten.leaky_relu.default":
40+
coeff = 1e-2 if len(node.args) < 2 else node.args[1]
41+
coeff_tensor = torch.full(input_tensor.shape, coeff).to(torch.float32)
42+
else:
43+
coeff_node = node.args[1]
44+
coeff_tensor = torch.zeros(input_node.meta["val"].shape)
45+
coeff = get_parameter(coeff_node, self.edge_program)
46+
# per-channel activation
47+
if coeff_node.meta["val"].shape[0] > 1:
48+
for i in range(input_node.meta["val"].shape[1]):
49+
coeff_tensor = coeff_tensor.index_fill(
50+
1, torch.tensor([i]), coeff[i]
51+
)
52+
if "axis_order" in input_node.meta:
53+
axis_order = input_node.meta["axis_order"]
54+
coeff_tensor = coeff_tensor.permute(dims=axis_order).contiguous()
55+
# simple min-max quantization
56+
coeff = torch.max(coeff).item()
57+
else:
58+
coeff = coeff.item()
59+
coeff_tensor = torch.full(input_tensor.shape, coeff).to(torch.float32)
60+
61+
# 'graph', 'name', 'op', 'target', 'args', and 'kwargs'
62+
scalar_node = torch.fx.Node(
63+
node.graph,
64+
node.name + "_runtime_scalar",
65+
"call_function",
66+
exir_ops.edge.aten.full.default,
67+
(), # args
68+
{}, # kwargs
69+
)
70+
if pow_quant_attrs := node.meta.get("quant_attrs"):
71+
quant_attrs = pow_quant_attrs.copy()
72+
quant_range = quant_attrs["quant_max"] - quant_attrs["quant_min"]
73+
# coeff is guaranteed to be positive
74+
quant_attrs["zero_point"] = 0
75+
quant_attrs["scale"] = coeff / quant_range
76+
scalar_node.meta["quant_attrs"] = quant_attrs
77+
78+
scalar_tensor_wrapper = self.define_tensor(
79+
scalar_node,
80+
coeff_tensor,
81+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
82+
nodes_to_wrappers,
83+
is_input_tensor=True,
84+
)
85+
prelu_input_tensors = [prelu_inp_tensor_wrapper, scalar_tensor_wrapper]
86+
87+
output_tensor = self.get_tensor(node, node)
88+
output_tensor_wrapper = self.define_tensor(
89+
node,
90+
output_tensor,
91+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
92+
nodes_to_wrappers,
93+
is_input_tensor=False,
94+
)
95+
prelu_output_tensors = [output_tensor_wrapper]
96+
97+
prelu_op = PyQnnWrapper.PyQnnOpWrapper(
98+
node.name,
99+
QNN_OP_PACKAGE_NAME_QTI_AISW,
100+
OpPRelu.op_name,
101+
)
102+
prelu_op.AddInputTensors(prelu_input_tensors)
103+
prelu_op.AddOutputTensors(prelu_output_tensors)
104+
105+
return prelu_op
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Dict
8+
9+
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
10+
11+
import numpy as np
12+
import torch
13+
14+
from .node_visitor import NodeVisitor, register_node_visitor
15+
from .qnn_constants import OpSpaceToDepth, QNN_OP_PACKAGE_NAME_QTI_AISW
16+
17+
18+
@register_node_visitor
19+
class SpaceToDepthVisitor(NodeVisitor):
20+
target = ["aten.pixel_unshuffle.default"]
21+
22+
def __init__(self, *args) -> None:
23+
super().__init__(*args)
24+
25+
def define_node(
26+
self,
27+
node: torch.fx.Node,
28+
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
29+
) -> PyQnnWrapper.PyQnnOpWrapper:
30+
input_node = node.args[0]
31+
input_tensor = self.get_tensor(input_node, node)
32+
input_tensor_wrapper = self.define_tensor(
33+
input_node,
34+
input_tensor,
35+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
36+
nodes_to_wrappers,
37+
is_input_tensor=True,
38+
)
39+
40+
output_tensor = self.get_tensor(node, node)
41+
output_tensor_wrapper = self.define_tensor(
42+
node,
43+
output_tensor,
44+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
45+
nodes_to_wrappers,
46+
is_input_tensor=False,
47+
)
48+
49+
block_size = []
50+
for index in range(1, 3):
51+
block_size.append(input_tensor.shape[index] / output_tensor.shape[index])
52+
block_size = np.array(block_size, dtype=np.uint32)
53+
block_size_shape = [2]
54+
55+
space_to_depth_op = PyQnnWrapper.PyQnnOpWrapper(
56+
node.name,
57+
QNN_OP_PACKAGE_NAME_QTI_AISW,
58+
OpSpaceToDepth.op_name,
59+
)
60+
space_to_depth_op.AddInputTensors([input_tensor_wrapper])
61+
space_to_depth_op.AddOutputTensors([output_tensor_wrapper])
62+
space_to_depth_op.AddTensorParam(
63+
OpSpaceToDepth.param_block_size,
64+
PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
65+
len(block_size.shape),
66+
block_size_shape,
67+
block_size,
68+
True,
69+
)
70+
space_to_depth_op.AddScalarParam(
71+
OpSpaceToDepth.param_mode,
72+
PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
73+
{"data": np.uint32(OpSpaceToDepth.Mode.CRD)},
74+
)
75+
76+
return space_to_depth_op

backends/qualcomm/builders/op_upsample_bilinear2d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def define_node(
6060
reisze_bilinear_op.AddScalarParam(
6161
OpResizeBilinear.param_half_pixel_centers,
6262
PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_BOOL_8,
63-
{"data": True},
63+
{"data": not node.args[2]},
6464
)
6565

6666
return reisze_bilinear_op

0 commit comments

Comments
 (0)