Skip to content

Commit 1913faf

Browse files
committed
Update base for Update on "upgrade lm_eval to 0.4.5"
We have been using a pretty old `lm_eval` version. This is blocking us from upgrading other libraries like `transformers` and blocking some others work. For example, #6489. In newer versions `lm_eval`, `pretrainedModel` becomes a required parameter. In 0.4.2, it defaults to `gpt2` if not provided. This PR upgrades our `lm_eval` version to the latest version 0.4.5 and set `pretrainedModel` to its original default value `gpt2`. Test Plan: Run eval before and after this PR. Make sure the perplexity number stays around the same. <img width="682" alt="Screenshot 2024-10-28 at 12 22 45 PM" src="https://github.com/user-attachments/assets/f7bccc55-ad5a-4f90-8eae-eefdd8e9997a"> Differential Revision: [D65079913](https://our.internmc.facebook.com/intern/diff/D65079913) [ghstack-poisoned]
2 parents 85d3ff6 + 2c32bf3 commit 1913faf

40 files changed

+872
-39
lines changed

.github/workflows/ghstack_land.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ on:
1111
- 'gh/kimishpatel/[0-9]+/base'
1212
- 'gh/kirklandsign/[0-9]+/base'
1313
- 'gh/larryliu0820/[0-9]+/base'
14+
- 'gh/lucylq/[0-9]+/base'
1415
- 'gh/manuelcandales/[0-9]+/base'
1516
- 'gh/mcr229/[0-9]+/base'
1617
- 'gh/swolchok/[0-9]+/base'

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ tools.
9292
├── runtime # Core C++ runtime.
9393
| ├── backend # Backend delegate runtime APIs.
9494
| ├── core # Core structures used across all levels of the runtime.
95-
| ├── executor # Model loading, initalization, and execution.
95+
| ├── executor # Model loading, initialization, and execution.
9696
| ├── kernel # Kernel registration and management.
9797
| ├── platform # Layer between architecture specific code and portable C++.
9898
├── schema # ExecuTorch PTE file format flatbuffer

backends/arm/arm_partitioner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
6262
exir_ops.edge.aten.relu.default,
6363
exir_ops.edge.aten.rsqrt.default,
6464
exir_ops.edge.aten._softmax.default,
65+
exir_ops.edge.aten.select_copy.int,
6566
exir_ops.edge.aten._log_softmax.default,
6667
exir_ops.edge.aten.slice_copy.Tensor,
6768
exir_ops.edge.aten.sub.Tensor,

backends/arm/operators/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
op_relu,
2929
op_repeat,
3030
op_rsqrt,
31+
op_select,
3132
op_sigmoid,
3233
op_slice,
3334
op_squeeze,

backends/arm/operators/op_select.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2024 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from typing import List
7+
8+
import serializer.tosa_serializer as ts
9+
from executorch.backends.arm.operators.node_visitor import (
10+
NodeVisitor,
11+
register_node_visitor,
12+
)
13+
14+
from executorch.backends.arm.tosa_mapping import TosaArg
15+
16+
from executorch.backends.arm.tosa_utils import build_reshape, tosa_shape
17+
from serializer.tosa_serializer import TosaOp
18+
from torch.fx import Node
19+
20+
21+
@register_node_visitor
22+
class SelectVisitor(NodeVisitor):
23+
target = "aten.select_copy.int"
24+
25+
def __init__(self, *args):
26+
super().__init__(*args)
27+
28+
def define_node(
29+
self,
30+
node: Node,
31+
tosa_graph: ts.TosaSerializer,
32+
inputs: List[TosaArg],
33+
output: TosaArg,
34+
is_quant_node: bool,
35+
) -> None:
36+
37+
assert len(inputs) == 3
38+
input_node, dim, index = inputs
39+
shape = input_node.shape
40+
rank = len(shape)
41+
42+
dim = dim.number % rank if dim.number < 0 else dim.number
43+
index = index.number % rank if index.number < 0 else index.number
44+
45+
# For aten.select_copy, the output will be rank[input_shape - 1]
46+
# For TOSA rank(in) == rank(out).
47+
# Add an intermediate with the same rank
48+
expanded_shape = tuple(1 if i == dim else shape[i] for i in range(rank))
49+
expanded_shape = tosa_shape(expanded_shape, input_node.dim_order)
50+
51+
output_reshaped = tosa_graph.addIntermediate(
52+
expanded_shape, ts.DType.INT8 if is_quant_node else output.dtype
53+
)
54+
55+
attr_slice = ts.TosaSerializerAttribute()
56+
57+
start_attr = [index if i == dim else 0 for i in input_node.dim_order]
58+
size_attr = [
59+
1 if i == dim else input_node.shape[i] for i in input_node.dim_order
60+
]
61+
62+
attr_slice.SliceAttribute(start_attr, size_attr)
63+
64+
tosa_graph.addOperator(
65+
TosaOp.Op().SLICE, [input_node.name], [output_reshaped.name], attr_slice
66+
)
67+
68+
# Reshape back to original rank of output.
69+
build_reshape(tosa_graph, output_reshaped.name, output.shape, output.name)

backends/arm/quantizer/quantization_annotation/generic_annotator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
# torch.ops.aten.view_as_real.default,
3535
# torch.ops.aten.view_as_real_copy.default,
3636
torch.ops.aten.view_copy.default,
37+
torch.ops.aten.select.int,
38+
torch.ops.aten.select_copy.int,
3739
torch.ops.aten.slice.Tensor,
3840
torch.ops.aten.slice_copy.Tensor,
3941
# 'concat' should be handled separately as it has a sequence of inputs and

backends/arm/test/ops/test_select.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# Copyright 2024 Arm Limited and/or its affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
import unittest
9+
10+
import torch
11+
12+
from executorch.backends.arm.test import common
13+
from executorch.backends.arm.test.tester.arm_tester import ArmTester
14+
from executorch.exir.backend.compile_spec_schema import CompileSpec
15+
from parameterized import parameterized
16+
17+
test_data_t = tuple[torch.Tensor, int, int]
18+
19+
test_data_suite: list[tuple[test_data_t]] = [
20+
# (test_data, dim, index)
21+
((torch.zeros(5, 3, 20), -1, 0),),
22+
((torch.zeros(5, 3, 20), 0, -1),),
23+
((torch.zeros(5, 3, 20), 0, 4),),
24+
((torch.ones(10, 10, 10), 0, 2),),
25+
((torch.rand(5, 3, 20, 2), 0, 2),),
26+
((torch.rand(10, 10) - 0.5, 0, 0),),
27+
((torch.randn(10) + 10, 0, 1),),
28+
((torch.randn(10) - 10, 0, 2),),
29+
((torch.arange(-16, 16, 0.2), 0, 1),),
30+
]
31+
32+
33+
class TestSelect(unittest.TestCase):
34+
class SelectCopy(torch.nn.Module):
35+
def __init__(self):
36+
super().__init__()
37+
38+
def forward(self, x, dim: int, index: int):
39+
return torch.select_copy(x, dim=dim, index=index)
40+
41+
class SelectInt(torch.nn.Module):
42+
def __init__(self):
43+
super().__init__()
44+
45+
def forward(self, x, dim: int, index: int):
46+
return torch.select(x, dim=dim, index=index)
47+
48+
def _test_select_tosa_MI_pipeline(
49+
self,
50+
module: torch.nn.Module,
51+
test_data: test_data_t,
52+
export_target: str,
53+
):
54+
# For 4D tensors, do not permute to NHWC
55+
permute = False if len(test_data[0].shape) == 4 else True
56+
(
57+
ArmTester(
58+
module,
59+
example_inputs=test_data,
60+
compile_spec=common.get_tosa_compile_spec(
61+
permute_memory_to_nhwc=permute
62+
),
63+
)
64+
.export()
65+
.check([export_target])
66+
.check_not(["torch.ops.quantized_decomposed"])
67+
.to_edge()
68+
.partition()
69+
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
70+
.to_executorch()
71+
.run_method_and_compare_outputs(inputs=test_data)
72+
)
73+
74+
def _test_select_tosa_BI_pipeline(
75+
self,
76+
module: torch.nn.Module,
77+
test_data: test_data_t,
78+
export_target: str,
79+
):
80+
# For 4D tensors, do not permute to NHWC
81+
permute = False if len(test_data[0].shape) == 4 else True
82+
(
83+
ArmTester(
84+
module,
85+
example_inputs=test_data,
86+
compile_spec=common.get_tosa_compile_spec(
87+
permute_memory_to_nhwc=permute
88+
),
89+
)
90+
.quantize()
91+
.export()
92+
.check([export_target])
93+
.check(["torch.ops.quantized_decomposed"])
94+
.to_edge()
95+
.partition()
96+
.dump_artifact()
97+
.dump_operator_distribution()
98+
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
99+
.to_executorch()
100+
.run_method_and_compare_outputs(inputs=test_data)
101+
)
102+
103+
def _test_select_ethos_BI_pipeline(
104+
self,
105+
compile_spec: list[CompileSpec],
106+
module: torch.nn.Module,
107+
test_data: test_data_t,
108+
export_target: str,
109+
):
110+
(
111+
ArmTester(
112+
module,
113+
example_inputs=test_data,
114+
compile_spec=compile_spec,
115+
)
116+
.quantize()
117+
.export()
118+
.check([export_target])
119+
.check(["torch.ops.quantized_decomposed"])
120+
.to_edge()
121+
.partition()
122+
.dump_artifact()
123+
.dump_operator_distribution()
124+
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
125+
.to_executorch()
126+
)
127+
128+
def _test_select_tosa_u55_BI_pipeline(
129+
self, module: torch.nn.Module, test_data: test_data_t, export_target: str
130+
):
131+
# For 4D tensors, do not permute to NHWC
132+
permute = False if len(test_data[0].shape) == 4 else True
133+
self._test_select_ethos_BI_pipeline(
134+
common.get_u55_compile_spec(permute_memory_to_nhwc=permute),
135+
module,
136+
test_data,
137+
export_target,
138+
)
139+
140+
def _test_select_tosa_u85_BI_pipeline(
141+
self, module: torch.nn.Module, test_data: test_data_t, export_target: str
142+
):
143+
# For 4D tensors, do not permute to NHWC
144+
permute = False if len(test_data[0].shape) == 4 else True
145+
self._test_select_ethos_BI_pipeline(
146+
common.get_u85_compile_spec(permute_memory_to_nhwc=permute),
147+
module,
148+
test_data,
149+
export_target,
150+
)
151+
152+
@parameterized.expand(test_data_suite)
153+
def test_select_copy_tosa_MI(self, test_data: test_data_t):
154+
self._test_select_tosa_MI_pipeline(
155+
self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int"
156+
)
157+
158+
@parameterized.expand(test_data_suite)
159+
def test_select_int_tosa_MI(self, test_data: test_data_t):
160+
self._test_select_tosa_MI_pipeline(
161+
self.SelectInt(), test_data, export_target="torch.ops.aten.select.int"
162+
)
163+
164+
@parameterized.expand(test_data_suite)
165+
def test_select_copy_tosa_BI(self, test_data: test_data_t):
166+
self._test_select_tosa_BI_pipeline(
167+
self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int"
168+
)
169+
170+
@parameterized.expand(test_data_suite)
171+
def test_select_int_tosa_BI(self, test_data: test_data_t):
172+
self._test_select_tosa_BI_pipeline(
173+
self.SelectInt(), test_data, export_target="torch.ops.aten.select.int"
174+
)
175+
176+
@parameterized.expand(test_data_suite)
177+
def test_select_copy_tosa_u55_BI(self, test_data: test_data_t):
178+
self._test_select_tosa_u55_BI_pipeline(
179+
self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int"
180+
)
181+
182+
@parameterized.expand(test_data_suite)
183+
def test_select_int_tosa_u55_BI(self, test_data: test_data_t):
184+
self._test_select_tosa_u55_BI_pipeline(
185+
self.SelectInt(), test_data, export_target="torch.ops.aten.select.int"
186+
)
187+
188+
@parameterized.expand(test_data_suite)
189+
def test_select_copy_tosa_u85_BI(self, test_data: test_data_t):
190+
self._test_select_tosa_u85_BI_pipeline(
191+
self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int"
192+
)
193+
194+
@parameterized.expand(test_data_suite)
195+
def test_select_int_tosa_u85_BI(self, test_data: test_data_t):
196+
self._test_select_tosa_u85_BI_pipeline(
197+
self.SelectInt(), test_data, export_target="torch.ops.aten.select.int"
198+
)

backends/cadence/aot/ops_registrations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def quantized_relu_meta(
188188
out_multiplier: torch.Tensor,
189189
out_shift: torch.Tensor,
190190
) -> torch.Tensor:
191-
return X.new_empty(X.size(), dtype=torch.uint8)
191+
return X.new_empty(X.size(), dtype=X.dtype)
192192

193193

194194
@register_fake("cadence::quantized_matmul")

backends/cadence/hifi/operators/dequantize_per_tensor.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ void dequantize_per_tensor_out(
4545
const int32_t* input_data = input.const_data_ptr<int32_t>();
4646
dequantize<int32_t>(out_data, input_data, scale, zero_point, numel);
4747
} else {
48-
ET_CHECK_MSG(false, "Unhandled input dtype %hhd", input.scalar_type());
48+
ET_CHECK_MSG(
49+
false,
50+
"Unhandled input dtype %hhd",
51+
static_cast<int8_t>(input.scalar_type()));
4952
}
5053
}
5154

backends/cadence/hifi/operators/quantize_per_tensor.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@ void quantize_per_tensor_out(
4949
cadence::impl::HiFi::kernels::quantize<int32_t>(
5050
out_data, input_data, 1. / scale, zero_point, numel);
5151
} else {
52-
ET_CHECK_MSG(false, "Unhandled input dtype %hhd", out.scalar_type());
52+
ET_CHECK_MSG(
53+
false,
54+
"Unhandled output dtype %hhd",
55+
static_cast<int8_t>(out.scalar_type()));
5356
}
5457
}
5558

backends/cadence/reference/operators/quantized_conv_out.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,11 @@ void quantized_conv_out(
248248
output_scale,
249249
(int8_t)output_zero_point,
250250
per_tensor_quantized);
251+
} else {
252+
ET_CHECK_MSG(
253+
false,
254+
"Unhandled input dtype %hhd",
255+
static_cast<int8_t>(input.scalar_type()));
251256
}
252257
}
253258

0 commit comments

Comments
 (0)