Skip to content

Commit 83e7624

Browse files
authored
[retry] Use pytorch-labs/tokenizers and remove tokenizer/ (#1401) (#1443)
* [retry] Use pytorch-labs/tokenizers and remove tokenizer/ (#1401) Summary: Retry of #1401 Test Plan: Re-run the repro command in #1413: ``` python3 torchchat.py generate llama3.2-1b-base --prompt "write me a story about a boy and his bear" ``` Reviewers: Subscribers: Tasks: Tags: * Use latest commit Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * Try enabling _GLIBCXX_USE_CXX11_ABI Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * Fix PUBLIC issue Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent 019f76f commit 83e7624

File tree

14 files changed

+245
-1183
lines changed

14 files changed

+245
-1183
lines changed

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -922,7 +922,7 @@ jobs:
922922
path: |
923923
./et-build
924924
./torchchat/utils/scripts
925-
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
925+
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh', '**/build_native.sh') }}
926926
- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
927927
continue-on-error: true
928928
run: |
@@ -1033,7 +1033,7 @@ jobs:
10331033
10341034
# Pull submodules (re2, abseil) for Tiktoken
10351035
git submodule sync
1036-
git submodule update --init
1036+
git submodule update --init --recursive
10371037
./runner/build_android.sh
10381038
echo "Tests complete."
10391039

.gitmodules

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
[submodule "tokenizer/third-party/abseil-cpp"]
2-
path = tokenizer/third-party/abseil-cpp
3-
url = https://github.com/abseil/abseil-cpp.git
4-
[submodule "tokenizer/third-party/re2"]
5-
path = tokenizer/third-party/re2
6-
url = https://github.com/google/re2.git
7-
[submodule "tokenizer/third-party/sentencepiece"]
8-
path = tokenizer/third-party/sentencepiece
9-
url = https://github.com/google/sentencepiece.git
1+
[submodule "runner/third-party/tokenizers"]
2+
path = runner/third-party/tokenizers
3+
url = https://github.com/pytorch-labs/tokenizers

CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,21 @@ ELSE()
77
ENDIF()
88

99
project(Torchchat)
10+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
1011

1112
# include tokenizer
12-
add_subdirectory(tokenizer)
13+
add_subdirectory(runner/third-party/tokenizers)
1314

1415
# include et_run executable
1516
include(runner/et.cmake)
1617
if(TARGET et_run)
17-
target_link_libraries(et_run PUBLIC tokenizer microkernels-prod)
18+
target_link_libraries(et_run PUBLIC tokenizers microkernels-prod)
19+
target_include_directories(et_run PUBLIC runner/third-party/tokenizers/include)
1820
endif()
1921

2022
# include aoti_run executable
2123
include(runner/aoti.cmake)
2224
if(TARGET aoti_run)
23-
target_link_libraries(aoti_run tokenizer)
25+
target_link_libraries(aoti_run tokenizers)
26+
target_include_directories(aoti_run PUBLIC runner/third-party/tokenizers/include)
2427
endif()

0 commit comments

Comments
 (0)