Skip to content

Commit c2405a9

Browse files
larryliu0820facebook-github-bot
authored andcommitted
Move headers from include/ to include/pytorch/tokenizers/ (#20)
Summary: Pull Request resolved: #20 Mostly for avoiding internal confusion. Reviewed By: lucylq Differential Revision: D69677244
1 parent bba6759 commit c2405a9

29 files changed

+40
-40
lines changed
File renamed without changes.

include/detail/bpe_tokenizer_base.h renamed to include/pytorch/tokenizers/bpe_tokenizer_base.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
#include <re2/re2.h>
2222

2323
// Local
24-
#include "result.h"
25-
#include "tokenizer.h"
24+
#include <pytorch/tokenizers/result.h>
25+
#include <pytorch/tokenizers/tokenizer.h>
2626

2727
namespace tokenizers {
2828
namespace detail {

include/error.h renamed to include/pytorch/tokenizers/error.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313

1414
#pragma once
1515

16+
#include <pytorch/tokenizers/log.h>
1617
#include <stdint.h>
17-
#include "log.h"
1818

1919
namespace tokenizers {
2020

include/hf_tokenizer.h renamed to include/pytorch/tokenizers/hf_tokenizer.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
#include <re2/re2.h>
2020

2121
// Local
22-
#include "detail/bpe_tokenizer_base.h"
23-
#include "error.h"
24-
#include "pre_tokenizer.h"
25-
#include "result.h"
26-
#include "token_decoder.h"
22+
#include <pytorch/tokenizers/bpe_tokenizer_base.h>
23+
#include <pytorch/tokenizers/error.h>
24+
#include <pytorch/tokenizers/pre_tokenizer.h>
25+
#include <pytorch/tokenizers/result.h>
26+
#include <pytorch/tokenizers/token_decoder.h>
2727

2828
namespace tokenizers {
2929
class HFTokenizer : public detail::BPETokenizerBase {

include/llama2c_tokenizer.h renamed to include/pytorch/tokenizers/llama2c_tokenizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
*/
88
// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
99
#pragma once
10+
#include <pytorch/tokenizers/tokenizer.h>
1011
#include <memory>
11-
#include "tokenizer.h"
1212

1313
namespace tokenizers {
1414

include/log.h renamed to include/pytorch/tokenizers/log.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include <cstdarg>
1717
#include <cstddef>
18+
#include <cstdint>
1819
#include <cstdio>
1920
#include <cstdlib>
2021

File renamed without changes.

include/result.h renamed to include/pytorch/tokenizers/result.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313

1414
#pragma once
1515

16+
#include <pytorch/tokenizers/error.h>
1617
#include <cassert>
1718
#include <new>
1819
#include <utility>
19-
#include "error.h"
2020

2121
namespace tokenizers {
2222

include/sentencepiece.h renamed to include/pytorch/tokenizers/sentencepiece.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
// A tokenizer that works with sentencepiece. Used by Llama2.
1111
#pragma once
1212

13+
#include <pytorch/tokenizers/tokenizer.h>
1314
#include <memory>
1415
#include <vector>
1516
#include "sentencepiece_processor.h"
16-
#include "tokenizer.h"
1717
namespace tokenizers {
1818

1919
struct TokenIndex {

include/tiktoken.h renamed to include/pytorch/tokenizers/tiktoken.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
#include "re2/re2.h"
1818

1919
// Local
20-
#include "detail/bpe_tokenizer_base.h"
21-
#include "result.h"
22-
#include "tokenizer.h"
20+
#include <pytorch/tokenizers/bpe_tokenizer_base.h>
21+
#include <pytorch/tokenizers/result.h>
22+
#include <pytorch/tokenizers/tokenizer.h>
2323

2424
namespace tokenizers {
2525

File renamed without changes.

include/tokenizer.h renamed to include/pytorch/tokenizers/tokenizer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313

1414
#pragma once
1515

16+
#include <pytorch/tokenizers/error.h>
17+
#include <pytorch/tokenizers/result.h>
1618
#include <string>
1719
#include <vector>
18-
#include "error.h"
19-
#include "result.h"
2020

2121
namespace tokenizers {
2222

src/bpe_tokenizer_base.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*/
88
// @lint-ignore-every LICENSELINT
99

10-
#include "detail/bpe_tokenizer_base.h"
10+
#include <pytorch/tokenizers/bpe_tokenizer_base.h>
1111

1212
// Standard
1313
#include <inttypes.h>

src/hf_tokenizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*/
88
// @lint-ignore-every LICENSELINT
99

10-
#include "hf_tokenizer.h"
10+
#include <pytorch/tokenizers/hf_tokenizer.h>
1111

1212
// Standard
1313
#include <filesystem>

src/llama2c_tokenizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88
// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
9-
#include "llama2c_tokenizer.h"
9+
#include <pytorch/tokenizers/llama2c_tokenizer.h>
1010
#include <cstring>
1111

1212
namespace tokenizers {

src/pre_tokenizer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
* This source code is licensed under the BSD-style license found in the
66
* LICENSE file in the root directory of this source tree.
77
*/
8-
#include "pre_tokenizer.h"
8+
9+
// Local
10+
#include <pytorch/tokenizers/pre_tokenizer.h>
11+
#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
912

1013
// Standard
1114
#include <algorithm>
@@ -15,9 +18,6 @@
1518
// Third Party
1619
#include <nlohmann/json.hpp>
1720

18-
// Local
19-
#include "unicode.h"
20-
2121
using json = nlohmann::json;
2222

2323
namespace tokenizers {

src/sentencepiece.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
// A tokenizer that works with sentencepiece.
1010

11-
#include "sentencepiece.h"
11+
#include <pytorch/tokenizers/sentencepiece.h>
1212
#include <cinttypes>
1313
#include <string>
1414
#include "third_party/absl/strings/str_replace.h"

src/tiktoken.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
limitations under the License.
2626
*************************************************************************/
2727

28-
#include "tiktoken.h"
28+
#include <pytorch/tokenizers/base64.h>
29+
#include <pytorch/tokenizers/tiktoken.h>
2930
#include <cinttypes>
3031
#include <fstream>
3132
#include <limits>
32-
#include "base64.h"
3333
#include "re2/re2.h"
3434

3535
namespace tokenizers {

src/token_decoder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*/
88
// @lint-ignore-every LICENSELINT
99

10-
#include "token_decoder.h"
10+
#include <pytorch/tokenizers/token_decoder.h>
1111

1212
// Standard
1313
#include <cstdarg>
@@ -16,7 +16,7 @@
1616
#include <nlohmann/json.hpp>
1717

1818
// Local
19-
#include "unicode.h"
19+
#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
2020

2121
using json = nlohmann::json;
2222

targets.bzl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,12 @@ def define_common_targets():
1111
runtime.cxx_library(
1212
name = "headers",
1313
exported_headers = subdir_glob([
14-
("include", "*.h"),
15-
("include", "**/*.h"),
14+
("include", "pytorch/tokenizers/*.h"),
1615
]),
17-
header_namespace = "",
1816
visibility = [
1917
"@EXECUTORCH_CLIENTS",
2018
],
19+
header_namespace = "",
2120
)
2221

2322
runtime.cxx_library(
@@ -66,7 +65,7 @@ def define_common_targets():
6665
"third-party/llama.cpp-unicode/src/unicode-data.cpp",
6766
],
6867
exported_headers = subdir_glob([
69-
("third-party/llama.cpp-unicode/include", "*.h"),
68+
("include", "pytorch/tokenizers/third-party/llama.cpp-unicode/*.h"),
7069
]),
7170
header_namespace = "",
7271
)

test/test_base64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#include "base64.h"
9+
#include <pytorch/tokenizers/base64.h>
1010
#include "gtest/gtest.h"
1111

1212
namespace tokenizers {

test/test_llama2c_tokenizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include <TestResourceUtils/TestResourceUtils.h>
1111
#endif
1212
#include <gtest/gtest.h>
13-
#include "llama2c_tokenizer.h"
13+
#include <pytorch/tokenizers/llama2c_tokenizer.h>
1414

1515
using namespace ::testing;
1616

test/test_pre_tokenizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include <re2/re2.h>
1313

1414
// Local
15-
#include "pre_tokenizer.h"
15+
#include <pytorch/tokenizers/pre_tokenizer.h>
1616

1717
using json = nlohmann::json;
1818
using namespace tokenizers;

test/test_sentencepiece.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include <TestResourceUtils/TestResourceUtils.h>
1212
#endif
1313
#include <gtest/gtest.h>
14-
#include "sentencepiece.h"
14+
#include <pytorch/tokenizers/sentencepiece.h>
1515

1616
namespace tokenizers {
1717

test/test_tiktoken.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include <TestResourceUtils/TestResourceUtils.h>
1212
#endif
1313
#include <gtest/gtest.h>
14-
#include "tiktoken.h"
14+
#include <pytorch/tokenizers/tiktoken.h>
1515

1616
using namespace ::testing;
1717

third-party/llama.cpp-unicode/src/unicode-data.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ SOFTWARE.
2727

2828
// generated with scripts/gen-unicode-data.py
2929

30-
#include "unicode-data.h"
30+
#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h>
3131

3232
#include <cstdint>
3333
#include <unordered_map>

third-party/llama.cpp-unicode/src/unicode.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ SOFTWARE.
2929
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
3030
#endif
3131

32-
#include "unicode.h"
33-
#include "unicode-data.h"
32+
#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
33+
#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h>
3434

3535
#include <algorithm>
3636
#include <cassert>

0 commit comments

Comments
 (0)