Skip to content

Commit 8c4427c

Browse files
Enable permute_memory_to_nhwc for corstone300 unittests
Differential Revision: D61480408 Pull Request resolved: #4773
1 parent 65d552f commit 8c4427c

File tree

7 files changed

+85
-49
lines changed

7 files changed

+85
-49
lines changed

backends/arm/runtime/ArmBackendEthosU.cpp

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,9 @@ class ArmBackend final : public PyTorchBackendInterface {
148148
if (both_char and permuted_input_shape) {
149149
// permuted byte copy CHW to HWC
150150
permute_CHW_to_HWC(
151-
scratch_addr,
152151
tensor_in.mutable_data_ptr<char>(),
152+
scratch_addr,
153+
tensor_in.size(1),
153154
tensor_in.size(2),
154155
tensor_in.size(3));
155156
} else if (both_char or both_int) {
@@ -204,13 +205,31 @@ class ArmBackend final : public PyTorchBackendInterface {
204205
// Process input EValue into scratch
205206
// Outputs are in the index immediately after inputs
206207
auto tensor_out = args[handles.inputs->count + i]->toTensor();
207-
for (int j = 0; j < tensor_out.numel(); j++) {
208-
if (tensor_out.scalar_type() == ScalarType::Char) {
209-
char* output_address = (char*)output_addr;
210-
tensor_out.mutable_data_ptr<char>()[j] = output_address[j];
211-
} else {
212-
int* output_address = (int*)output_addr;
213-
tensor_out.mutable_data_ptr<int>()[j] = output_address[j];
208+
bool permuted_output_shape;
209+
ET_CHECK_OK_OR_RETURN_ERROR(check_requires_permute(
210+
i,
211+
tensor_out,
212+
&handles.outputs->io[i],
213+
execution_handle->permuted_io_flag,
214+
&permuted_output_shape));
215+
if (tensor_out.scalar_type() == ScalarType::Char and
216+
permuted_output_shape) {
217+
char* output_address = (char*)output_addr;
218+
permute_HWC_to_CHW(
219+
output_address,
220+
tensor_out.mutable_data_ptr<char>(),
221+
tensor_out.size(1),
222+
tensor_out.size(2),
223+
tensor_out.size(3));
224+
} else {
225+
for (int j = 0; j < tensor_out.numel(); j++) {
226+
if (tensor_out.scalar_type() == ScalarType::Char) {
227+
char* output_address = (char*)output_addr;
228+
tensor_out.mutable_data_ptr<char>()[j] = output_address[j];
229+
} else {
230+
int* output_address = (int*)output_addr;
231+
tensor_out.mutable_data_ptr<int>()[j] = output_address[j];
232+
}
214233
}
215234
}
216235
}
@@ -225,51 +244,62 @@ class ArmBackend final : public PyTorchBackendInterface {
225244
private:
226245
Error check_requires_permute(
227246
int index,
228-
const exec_aten::Tensor tensor_in,
229-
VelaIO* input,
247+
const exec_aten::Tensor tensor,
248+
VelaIO* io,
230249
bool permuted_io_flag,
231250
bool* is_permuted) const {
232-
bool permuted_input_shape = false;
233-
if (tensor_in.dim() == 4) {
251+
bool permuted_shape = false;
252+
if (tensor.dim() == 4) {
234253
// special case for NHWC workaround in AOT; as the compilation has
235254
// permuted to channel last in an undetectable way, we assume here
236-
// that the application has similarly permuted any input tensors.
237-
permuted_input_shape = tensor_in.size(0) == input->shape[0] &&
238-
tensor_in.size(1) == input->shape[3] &&
239-
tensor_in.size(2) == input->shape[1] &&
240-
tensor_in.size(3) == input->shape[2];
241-
if (permuted_input_shape) {
242-
ET_LOG(Info, "Tensor input %d will be permuted", index);
255+
// that the application has similarly permuted any input/output tensors.
256+
permuted_shape = tensor.size(0) == io->shape[0] &&
257+
tensor.size(1) == io->shape[3] && tensor.size(2) == io->shape[1] &&
258+
tensor.size(3) == io->shape[2];
259+
if (permuted_shape) {
260+
ET_LOG(Info, "Tensor input/output %d will be permuted", index);
243261
}
244-
if (permuted_io_flag != permuted_input_shape) {
245-
ET_LOG(Error, "Permute compile flag and permuted input don't agree");
262+
if (permuted_io_flag != permuted_shape) {
263+
ET_LOG(
264+
Error,
265+
"Permute compile flag and permuted input/output don't agree");
246266
return Error::InvalidProgram;
247267
}
248268
}
249-
if (!permuted_input_shape) {
269+
if (!permuted_shape) {
250270
// Error check matching shapes in the general case
251-
for (int i = 0; i < tensor_in.dim(); i++) {
252-
if (tensor_in.size(i) != input->shape[i]) {
253-
ET_LOG(Error, "Tensor input %d mismatched shape", index);
271+
for (int i = 0; i < tensor.dim(); i++) {
272+
if (tensor.size(i) != io->shape[i]) {
273+
ET_LOG(Error, "Tensor input/output %d mismatched shape", index);
254274
ET_LOG(
255275
Error,
256276
"dimension %d mismatch, %zd != %d",
257277
index,
258-
tensor_in.size(i),
259-
input->shape[i]);
278+
tensor.size(i),
279+
io->shape[i]);
260280
return Error::InvalidProgram;
261281
}
262282
}
263283
}
264-
*is_permuted = permuted_input_shape;
284+
*is_permuted = permuted_shape;
265285
return Error::Ok;
266286
}
267287

268-
void permute_CHW_to_HWC(char* input, char* output, int H, int W) const {
288+
void permute_CHW_to_HWC(char* input, char* output, int C, int H, int W)
289+
const {
269290
for (int i = 0; i != H * W; ++i) {
270-
output[i * 3 + 0] = input[i + 0 * W * H];
271-
output[i * 3 + 1] = input[i + 1 * W * H];
272-
output[i * 3 + 2] = input[i + 2 * W * H];
291+
for (int j = 0; j < C; ++j) {
292+
output[i * C + j] = input[i + j * W * H];
293+
}
294+
}
295+
}
296+
297+
void permute_HWC_to_CHW(char* input, char* output, int C, int H, int W)
298+
const {
299+
for (int i = 0; i != H * W; ++i) {
300+
for (int j = 0; j < C; ++j) {
301+
output[i + j * W * H] = input[i * C + j];
302+
}
273303
}
274304
}
275305
};

backends/arm/test/ops/test_add.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ class Add2(torch.nn.Module):
3737
torch.FloatTensor([1, 2, 3, 5, 7]),
3838
(torch.FloatTensor([2, 1, 2, 1, 10])),
3939
),
40-
(torch.ones(1, 1, 4, 4), torch.ones(1, 1, 4, 4)),
40+
(torch.ones(1, 10, 4, 6), torch.ones(1, 10, 4, 6)),
4141
(torch.randn(1, 1, 4, 4), torch.ones(1, 1, 4, 1)),
42-
(torch.randn(1, 1, 4, 4), torch.randn(1, 1, 4, 1)),
42+
(torch.randn(1, 3, 4, 4), torch.randn(1, 3, 4, 4)),
4343
(10000 * torch.randn(1, 1, 4, 4), torch.randn(1, 1, 4, 1)),
4444
]
4545

@@ -101,7 +101,7 @@ def _test_add_u55_BI_pipeline(
101101
ArmTester(
102102
module,
103103
example_inputs=test_data,
104-
compile_spec=common.get_u55_compile_spec(),
104+
compile_spec=common.get_u55_compile_spec(permute_memory_to_nhwc=True),
105105
)
106106
.quantize()
107107
.export()

backends/arm/test/ops/test_conv.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
import logging
87
import unittest
98

109
from typing import List, Tuple, Union
@@ -15,9 +14,6 @@
1514
from executorch.backends.arm.test.tester.arm_tester import ArmTester
1615
from parameterized import parameterized
1716

18-
logger = logging.getLogger(__name__)
19-
logger.setLevel(logging.INFO)
20-
2117

2218
class Conv2d(torch.nn.Module):
2319
"""

backends/arm/test/ops/test_linear.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,17 @@
2626
(
2727
"model_linear_rank1_zeros",
2828
torch.zeros(10),
29-
10,
29+
15,
3030
),
3131
(
3232
"model_linear_rank1_ones",
3333
torch.ones(10),
34-
10,
34+
15,
3535
),
3636
(
3737
"model_linear_rank1_negative_ones",
3838
torch.ones(10) * (-1),
39-
10,
39+
20,
4040
),
4141
(
4242
"model_linear_rank1_rand",
@@ -46,12 +46,12 @@
4646
(
4747
"model_linear_rank1_negative_large_rand",
4848
torch.rand(10) * (-100),
49-
10,
49+
30,
5050
),
5151
(
5252
"model_linear_rank1_large_randn",
53-
torch.randn(10) * 100,
54-
10,
53+
torch.randn(15) * 100,
54+
20,
5555
),
5656
]
5757

backends/arm/test/runner_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,12 @@ def run_corstone300(
265265
raise RuntimeError(
266266
f"Corstone simulation failed, log: \n {result_stdout}\n{result.stderr.decode()}"
267267
)
268+
elif "E [" in result_stdout:
269+
logger.error(result_stdout)
268270

269271
tosa_ref_output = np.fromfile(out_path_with_suffix, dtype=np.float32)
270-
tosa_ref_output = torch.from_numpy(tosa_ref_output).reshape(inputs[0].shape)
272+
output_shape = self.output_node.args[0][0].meta["val"].shape
273+
tosa_ref_output = torch.from_numpy(tosa_ref_output).reshape(output_shape)
271274
return [tosa_ref_output]
272275

273276
def run_tosa_ref_model(

backends/arm/test/tester/arm_tester.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,10 @@ def run_method_and_compare_outputs(
252252
if isinstance(arg, tuple) and isinstance(arg[0], torch.Tensor):
253253
test_input.extend(list(arg))
254254

255-
if is_nhwc:
255+
if (
256+
is_nhwc
257+
and test_stage == self.stages[self.stage_name(tester.ToExecutorch)]
258+
):
256259
test_input = self.transpose_data_format(test_input, "NHWC")
257260

258261
input_shapes = [
@@ -263,7 +266,10 @@ def run_method_and_compare_outputs(
263266

264267
reference_output = reference_stage.run_artifact(reference_input)
265268
test_output = tuple(test_stage.run_artifact(test_input))
266-
if is_nhwc:
269+
if (
270+
is_nhwc
271+
and test_stage == self.stages[self.stage_name(tester.ToExecutorch)]
272+
):
267273
test_output = self.transpose_data_format(test_output, "NCHW")
268274

269275
self._compare_outputs(

examples/arm/executor_runner/arm_executor_runner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ void et_pal_emit_log_message(
7171
size_t line,
7272
const char* message,
7373
ET_UNUSED size_t length) {
74-
fprintf(stderr, "%c executorch:%s:%zu] %s\n", level, filename, line, message);
74+
fprintf(
75+
stderr, "%c [executorch:%s:%zu] %s\n", level, filename, line, message);
7576
}
7677

7778
namespace {

0 commit comments

Comments
 (0)