Skip to content

Commit 0193cff

Browse files
committed
Update base for Update on "Only include base64.h in tiktoken.cpp"
Because it is only been used inside `tiktoken.cpp`. Differential Revision: [D59664316](https://our.internmc.facebook.com/intern/diff/D59664316/) [ghstack-poisoned]
2 parents 853a937 + 4b45264 commit 0193cff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1201
-739
lines changed

.github/workflows/android.yml

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ jobs:
2727
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
2828
strategy:
2929
matrix:
30-
include:
31-
- build-tool: buck2
30+
tiktoken: [OFF, ON]
3231
with:
3332
# NB: The example model dl3 requires lots of memory (T161064121)
3433
runner: linux.12xlarge
@@ -44,30 +43,30 @@ jobs:
4443
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
4544
conda activate "${CONDA_ENV}"
4645
47-
BUILD_TOOL=${{ matrix.build-tool }}
4846
# Setup MacOS dependencies as there is no Docker support on MacOS atm
49-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
47+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
5048
# Build Android library
49+
export EXECUTORCH_USE_TIKTOKEN=${{ matrix.tiktoken }}
5150
bash build/build_android_library.sh
5251
# Build Android demo app
5352
bash build/test_android_ci.sh
5453
55-
mkdir -p artifacts-to-be-uploaded
56-
mkdir -p artifacts-to-be-uploaded/arm64-v8a/
57-
mkdir -p artifacts-to-be-uploaded/x86_64/
54+
mkdir -p artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN
55+
mkdir -p artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/arm64-v8a/
56+
mkdir -p artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/x86_64/
5857
# Copy the jar to S3
59-
cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/
58+
cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
6059
# Copy the app and its test suite to S3
61-
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/
62-
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/
60+
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
61+
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
6362
# Also copy the libraries
64-
cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/arm64-v8a/
65-
cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/arm64-v8a/
66-
cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/x86_64/
67-
cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/x86_64/
63+
cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/arm64-v8a/
64+
cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/arm64-v8a/
65+
cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/x86_64/
66+
cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/x86_64/
6867
# Copyp AAR to S3
69-
cp executorch.aar artifacts-to-be-uploaded/
70-
cp executorch-llama.aar artifacts-to-be-uploaded/
68+
cp executorch.aar artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
69+
cp executorch-llama.aar artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
7170
7271
# Upload the app and its test suite to S3 so that they can be downloaded by the test job
7372
upload-artifacts:

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ if(MAX_KERNEL_NUM)
485485
)
486486
endif()
487487

488-
if(EXECUTORCH_BUILD_PYBIND)
488+
if(EXECUTORCH_BUILD_PYBIND AND APPLE)
489489
# shared version
490490
add_library(
491491
executorch_no_prim_ops_shared SHARED ${_executorch_no_prim_ops__srcs}

backends/apple/coreml/runtime/inmemoryfs/inmemory_filesystem.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,6 @@ InMemoryFileSystem::InMemoryNode* get_node(InMemoryFileSystem::InMemoryNode* nod
115115
return node;
116116
}
117117

118-
std::string toString(time_t time) {
119-
constexpr auto format = "%Y-%m-%dT%TZ";
120-
std::stringstream stream;
121-
stream << std::put_time(gmtime(&time), format);
122-
return stream.str();
123-
}
124118

125119
time_t toTime(const std::string& str) {
126120
constexpr auto format = "%Y-%m-%dT%TZ";

backends/cadence/aot/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ python_library(
2828
"compiler.py",
2929
],
3030
deps = [
31+
"fbsource//third-party/pypi/pyre-extensions:pyre-extensions",
3132
":passes",
3233
":utils",
3334
"//caffe2:torch",

backends/cadence/aot/compiler.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@
1818
ReplaceSqueezeAndUnsqueezeWithViewPass,
1919
)
2020
from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
21-
from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
21+
from executorch.backends.cadence.aot.quantizer.quantizer import (
22+
CadenceGenericQuantizer,
23+
CadenceQuantizer,
24+
)
2225
from executorch.backends.cadence.aot.utils import model_is_quantized
2326
from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge
27+
from pyre_extensions import assert_is_instance
2428
from torch._export import capture_pre_autograd_graph
2529
from torch.ao.quantization.pt2e.export_utils import model_is_exported
2630
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
@@ -53,8 +57,10 @@ def quantize_pt2(
5357
converted_model = convert_pt2e(prepared_model)
5458

5559
# Get patterns and apply fusion of dq -> op -> q to qop
56-
# pyre-fixme[16]: Pyre doesn't get that CadenceQuantizer has a patterns attribute
57-
patterns = [q.pattern for q in quantizer.quantizers]
60+
patterns = [
61+
assert_is_instance(q, CadenceGenericQuantizer).pattern
62+
for q in quantizer.quantizers
63+
]
5864
QuantFusion(patterns)(converted_model)
5965

6066
return converted_model

backends/cadence/aot/quantizer/TARGETS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ python_library(
1717
srcs = [
1818
"patterns.py",
1919
],
20+
typing = True,
2021
deps = [
2122
":utils",
2223
"//caffe2:torch",
@@ -28,7 +29,9 @@ python_library(
2829
srcs = [
2930
"quantizer.py",
3031
],
32+
typing = True,
3133
deps = [
34+
"fbsource//third-party/pypi/pyre-extensions:pyre-extensions",
3235
":patterns",
3336
":utils",
3437
"//caffe2:torch",

backends/cadence/aot/quantizer/fusion_pass.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import torch
1212
from executorch.backends.cadence.aot.quantizer.patterns import (
1313
AddmmPattern,
14+
BmmPattern,
1415
Conv1dPattern,
1516
Conv2dPattern,
1617
LayerNormFunctionalPattern,
@@ -361,9 +362,7 @@ def call(self, graph_module: fx.GraphModule) -> PassResult: # noqa: C901
361362
inputs_inputs + weights_inputs + other_inputs + bias_inputs
362363
)
363364
kwargs = {}
364-
if isinstance(pattern, Conv1dPattern) or isinstance(
365-
pattern, Conv2dPattern
366-
):
365+
if isinstance(pattern, (Conv1dPattern, Conv2dPattern)):
367366
args, kwargs = get_args_and_kwargs_conv(
368367
graph_module,
369368
inputs_inputs,
@@ -396,7 +395,7 @@ def call(self, graph_module: fx.GraphModule) -> PassResult: # noqa: C901
396395
other_inputs,
397396
quant_node,
398397
)
399-
elif isinstance(pattern, MatmulPattern):
398+
elif isinstance(pattern, (BmmPattern, MatmulPattern)):
400399
args, kwargs = get_args_and_kwargs_matmul(
401400
inputs_inputs,
402401
dequants_inputs,

backends/cadence/aot/quantizer/patterns.py

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,17 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
# pyre-strict
8+
79
from abc import ABC, abstractmethod
810
from dataclasses import dataclass, field
9-
from typing import Any, Callable, List, Optional, Tuple, Type, Union
11+
from typing import Callable, List, Optional, Tuple, Type, Union
1012

1113
import torch
1214
from executorch.backends.cadence.aot.quantizer.utils import get_bias_qparams
1315

1416
from torch import fx
17+
from torch._ops import OpOverload
1518
from torch.ao.quantization.quantizer import (
1619
DerivedQuantizationSpec,
1720
SharedQuantizationSpec,
@@ -44,18 +47,22 @@ class PartitionAnchors:
4447

4548
class QuantizationPattern(ABC):
4649
@abstractmethod
47-
def partition_types(self):
50+
def partition_types(
51+
self,
52+
) -> Union[List[Type[torch.nn.Module]], List[Callable[..., torch.Tensor]]]:
4853
"""
4954
List of types to be passed to find_sequential_partitions.
5055
"""
5156
pass
5257

5358
@abstractmethod
54-
def get_anchors(self, gm, fused_partition) -> Optional[PartitionAnchors]:
59+
def get_anchors(
60+
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
61+
) -> Optional[PartitionAnchors]:
5562
pass
5663

5764
@abstractmethod
58-
def replacement_op(self) -> Callable[..., Any]:
65+
def replacement_op(self) -> OpOverload:
5966
"""
6067
Operator (most likely a custom one) that this partition should be fused into in
6168
the backend. Refer to the QuantFusion pass for examples.
@@ -91,10 +98,30 @@ def get_anchors(
9198
output=[(addmm_node,)],
9299
)
93100

94-
def replacement_op(self):
101+
def replacement_op(self) -> OpOverload:
95102
return torch.ops.cadence.quantized_linear
96103

97104

105+
class BmmPattern(QuantizationPattern):
106+
def partition_types(self) -> List[Callable[..., torch.Tensor]]:
107+
return [torch.bmm]
108+
109+
def get_anchors(
110+
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
111+
) -> PartitionAnchors:
112+
bmm_node = fused_partition[0].nodes[-1]
113+
114+
return PartitionAnchors(
115+
inputs=[(bmm_node, 0), (bmm_node, 1)],
116+
weights=[],
117+
biases=[],
118+
output=[(bmm_node,)],
119+
)
120+
121+
def replacement_op(self) -> OpOverload:
122+
return torch.ops.cadence.quantized_matmul.default
123+
124+
98125
class Conv1dPattern(QuantizationPattern):
99126
def partition_types(self) -> List[Type[torch.nn.Module]]:
100127
return [torch.nn.Conv1d]
@@ -129,7 +156,7 @@ def get_anchors(
129156
output=[(conv1d_node,)],
130157
)
131158

132-
def replacement_op(self):
159+
def replacement_op(self) -> OpOverload:
133160
return torch.ops.cadence.quantized_conv.default
134161

135162

@@ -167,15 +194,17 @@ def get_anchors(
167194
output=[(conv2d_node,)],
168195
)
169196

170-
def replacement_op(self):
197+
def replacement_op(self) -> OpOverload:
171198
return torch.ops.cadence.quantized_conv.default
172199

173200

174201
class LayerNormPattern(QuantizationPattern):
175-
def partition_types(self):
202+
def partition_types(self) -> List[Type[torch.nn.Module]]:
176203
return [torch.nn.LayerNorm]
177204

178-
def get_anchors(self, gm, fused_partition) -> PartitionAnchors:
205+
def get_anchors(
206+
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
207+
) -> PartitionAnchors:
179208
layer_norm_node = fused_partition[0].nodes[-1]
180209

181210
# Weights and biases are used as fp32 by our kernel, so they are
@@ -189,15 +218,17 @@ def get_anchors(self, gm, fused_partition) -> PartitionAnchors:
189218
output=[(layer_norm_node,)],
190219
)
191220

192-
def replacement_op(self):
221+
def replacement_op(self) -> OpOverload:
193222
return torch.ops.cadence.quantized_layer_norm.default
194223

195224

196225
class LayerNormFunctionalPattern(QuantizationPattern):
197-
def partition_types(self):
226+
def partition_types(self) -> List[Callable[..., torch.Tensor]]:
198227
return [torch.nn.functional.layer_norm]
199228

200-
def get_anchors(self, gm, fused_partition) -> PartitionAnchors:
229+
def get_anchors(
230+
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
231+
) -> PartitionAnchors:
201232
layer_norm_node = fused_partition[0].nodes[-1]
202233

203234
others = [(layer_norm_node, 1)]
@@ -221,7 +252,7 @@ def get_anchors(self, gm, fused_partition) -> PartitionAnchors:
221252
output=[(layer_norm_node,)],
222253
)
223254

224-
def replacement_op(self):
255+
def replacement_op(self) -> OpOverload:
225256
return torch.ops.cadence.quantized_layer_norm.default
226257

227258

@@ -259,12 +290,12 @@ def get_anchors(
259290
output=[(linear_node,)],
260291
)
261292

262-
def replacement_op(self):
293+
def replacement_op(self) -> OpOverload:
263294
return torch.ops.cadence.quantized_linear.default
264295

265296

266297
class LinearFunctionalPattern(QuantizationPattern):
267-
def partition_types(self):
298+
def partition_types(self) -> List[Callable[..., torch.Tensor]]:
268299
return [torch.nn.functional.linear]
269300

270301
def get_anchors(
@@ -297,12 +328,12 @@ def get_anchors(
297328
output=[(linear_node,)],
298329
)
299330

300-
def replacement_op(self):
331+
def replacement_op(self) -> OpOverload:
301332
return torch.ops.cadence.quantized_linear.default
302333

303334

304335
class MatmulPattern(QuantizationPattern):
305-
def partition_types(self):
336+
def partition_types(self) -> List[Callable[..., torch.Tensor]]:
306337
return [torch.matmul]
307338

308339
def get_anchors(
@@ -317,7 +348,7 @@ def get_anchors(
317348
output=[(matmul_node,)],
318349
)
319350

320-
def replacement_op(self):
351+
def replacement_op(self) -> OpOverload:
321352
return torch.ops.cadence.quantized_matmul.default
322353

323354

@@ -339,5 +370,5 @@ def get_anchors(
339370
],
340371
)
341372

342-
def replacement_op(self):
373+
def replacement_op(self) -> OpOverload:
343374
return torch.ops.cadence.quantized_relu.default

0 commit comments

Comments
 (0)