pytorch-labs
diff --git a/‎include/base64.h renamed to ‎include/pytorch/tokenizers/base64.h b/‎include/base64.h renamed to ‎include/pytorch/tokenizers/base64.h
diff --git a/‎include/detail/bpe_tokenizer_base.h renamed to ‎include/pytorch/tokenizers/bpe_tokenizer_base.h b/‎include/detail/bpe_tokenizer_base.h renamed to ‎include/pytorch/tokenizers/bpe_tokenizer_base.h
diff --git a/‎include/error.h renamed to ‎include/pytorch/tokenizers/error.h b/‎include/error.h renamed to ‎include/pytorch/tokenizers/error.h
diff --git a/‎include/hf_tokenizer.h renamed to ‎include/pytorch/tokenizers/hf_tokenizer.h b/‎include/hf_tokenizer.h renamed to ‎include/pytorch/tokenizers/hf_tokenizer.h
diff --git a/‎include/llama2c_tokenizer.h renamed to ‎include/pytorch/tokenizers/llama2c_tokenizer.h b/‎include/llama2c_tokenizer.h renamed to ‎include/pytorch/tokenizers/llama2c_tokenizer.h
diff --git a/‎include/log.h renamed to ‎include/pytorch/tokenizers/log.h b/‎include/log.h renamed to ‎include/pytorch/tokenizers/log.h
diff --git a/‎include/pre_tokenizer.h renamed to ‎include/pytorch/tokenizers/pre_tokenizer.h b/‎include/pre_tokenizer.h renamed to ‎include/pytorch/tokenizers/pre_tokenizer.h
diff --git a/‎include/result.h renamed to ‎include/pytorch/tokenizers/result.h b/‎include/result.h renamed to ‎include/pytorch/tokenizers/result.h
diff --git a/‎include/sentencepiece.h renamed to ‎include/pytorch/tokenizers/sentencepiece.h b/‎include/sentencepiece.h renamed to ‎include/pytorch/tokenizers/sentencepiece.h
diff --git a/‎include/tiktoken.h renamed to ‎include/pytorch/tokenizers/tiktoken.h b/‎include/tiktoken.h renamed to ‎include/pytorch/tokenizers/tiktoken.h
diff --git a/‎include/token_decoder.h renamed to ‎include/pytorch/tokenizers/token_decoder.h b/‎include/token_decoder.h renamed to ‎include/pytorch/tokenizers/token_decoder.h
diff --git a/‎include/tokenizer.h renamed to ‎include/pytorch/tokenizers/tokenizer.h b/‎include/tokenizer.h renamed to ‎include/pytorch/tokenizers/tokenizer.h
diff --git a/‎src/bpe_tokenizer_base.cpp
Lines changed: 1 addition & 1 deletion b/‎src/bpe_tokenizer_base.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/hf_tokenizer.cpp
Lines changed: 1 addition & 1 deletion b/‎src/hf_tokenizer.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/llama2c_tokenizer.cpp
Lines changed: 1 addition & 1 deletion b/‎src/llama2c_tokenizer.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pre_tokenizer.cpp
Lines changed: 4 additions & 4 deletions b/‎src/pre_tokenizer.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/sentencepiece.cpp
Lines changed: 1 addition & 1 deletion b/‎src/sentencepiece.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/tiktoken.cpp
Lines changed: 2 additions & 2 deletions b/‎src/tiktoken.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/token_decoder.cpp
Lines changed: 2 additions & 2 deletions b/‎src/token_decoder.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎targets.bzl
Lines changed: 4 additions & 5 deletions b/‎targets.bzl
Lines changed: 4 additions & 5 deletions
diff --git a/‎test/test_base64.cpp
Lines changed: 1 addition & 1 deletion b/‎test/test_base64.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_llama2c_tokenizer.cpp
Lines changed: 1 addition & 1 deletion b/‎test/test_llama2c_tokenizer.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_pre_tokenizer.cpp
Lines changed: 1 addition & 1 deletion b/‎test/test_pre_tokenizer.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_sentencepiece.cpp
Lines changed: 1 addition & 1 deletion b/‎test/test_sentencepiece.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_tiktoken.cpp
Lines changed: 1 addition & 1 deletion b/‎test/test_tiktoken.cpp
Lines changed: 1 addition & 1 deletion
@@ -7,7 +7,7 @@
  */
 // @lint-ignore-every LICENSELINT
 
-#include "detail/bpe_tokenizer_base.h"
+#include <pytorch/tokenizers/bpe_tokenizer_base.h>
 
 // Standard
 #include <inttypes.h>
 
@@ -7,7 +7,7 @@
  */
 // @lint-ignore-every LICENSELINT
 
-#include "hf_tokenizer.h"
+#include <pytorch/tokenizers/hf_tokenizer.h>
 
 // Standard
 #include <filesystem>
 
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 // @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
-#include "llama2c_tokenizer.h"
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 #include <cstring>
 
 namespace tokenizers {
 
@@ -5,7 +5,10 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
-#include "pre_tokenizer.h"
+
+// Local
+#include <pytorch/tokenizers/pre_tokenizer.h>
+#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
 
 // Standard
 #include <algorithm>
@@ -15,9 +18,6 @@
 // Third Party
 #include <nlohmann/json.hpp>
 
-// Local
-#include "unicode.h"
-
 using json = nlohmann::json;
 
 namespace tokenizers {
 
@@ -8,7 +8,7 @@
 
 // A tokenizer that works with sentencepiece.
 
-#include "sentencepiece.h"
+#include <pytorch/tokenizers/sentencepiece.h>
 #include <cinttypes>
 #include <string>
 #include "third_party/absl/strings/str_replace.h"
 
@@ -25,11 +25,11 @@
    limitations under the License.
  *************************************************************************/
 
-#include "tiktoken.h"
+#include <pytorch/tokenizers/base64.h>
+#include <pytorch/tokenizers/tiktoken.h>
 #include <cinttypes>
 #include <fstream>
 #include <limits>
-#include "base64.h"
 #include "re2/re2.h"
 
 namespace tokenizers {
 
@@ -7,7 +7,7 @@
  */
 // @lint-ignore-every LICENSELINT
 
-#include "token_decoder.h"
+#include <pytorch/tokenizers/token_decoder.h>
 
 // Standard
 #include <cstdarg>
@@ -16,7 +16,7 @@
 #include <nlohmann/json.hpp>
 
 // Local
-#include "unicode.h"
+#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
 
 using json = nlohmann::json;
 
 
@@ -11,13 +11,13 @@ def define_common_targets():
     runtime.cxx_library(
         name = "headers",
         exported_headers = subdir_glob([
-            ("include", "*.h"),
-            ("include", "**/*.h"),
+            ("include", "pytorch/tokenizers/*.h"),
+            ("include", "pytorch/tokenizers/**/*.h"),
         ]),
-        header_namespace = "",
         visibility = [
             "@EXECUTORCH_CLIENTS",
         ],
+        header_namespace = "",
     )
 
     runtime.cxx_library(
@@ -66,9 +66,8 @@ def define_common_targets():
             "third-party/llama.cpp-unicode/src/unicode-data.cpp",
         ],
         exported_headers = subdir_glob([
-            ("third-party/llama.cpp-unicode/include", "*.h"),
+            ("third-party/llama.cpp-unicode/include", "pytorch/tokenizers/third-party/llama.cpp-unicode/*.h"),
         ]),
-        header_namespace = "",
     )
 
     runtime.cxx_library(
 
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include "base64.h"
+#include <pytorch/tokenizers/base64.h>
 #include "gtest/gtest.h"
 
 namespace tokenizers {
 
@@ -10,7 +10,7 @@
 #include <TestResourceUtils/TestResourceUtils.h>
 #endif
 #include <gtest/gtest.h>
-#include "llama2c_tokenizer.h"
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 
 using namespace ::testing;
 
 
@@ -12,7 +12,7 @@
 #include <re2/re2.h>
 
 // Local
-#include "pre_tokenizer.h"
+#include <pytorch/tokenizers/pre_tokenizer.h>
 
 using json = nlohmann::json;
 using namespace tokenizers;
 
@@ -11,7 +11,7 @@
 #include <TestResourceUtils/TestResourceUtils.h>
 #endif
 #include <gtest/gtest.h>
-#include "sentencepiece.h"
+#include <pytorch/tokenizers/sentencepiece.h>
 
 namespace tokenizers {
 
 
@@ -11,7 +11,7 @@
 #include <TestResourceUtils/TestResourceUtils.h>
 #endif
 #include <gtest/gtest.h>
-#include "tiktoken.h"
+#include <pytorch/tokenizers/tiktoken.h>
 
 using namespace ::testing;