@@ -77,7 +77,6 @@ class TOKENIZER_TYPE(IntEnum):
77
77
{"name" : "mixtral-spm" , "tokt" : TOKENIZER_TYPE .SPM , "repo" : "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1" , },
78
78
{"name" : "refact" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/smallcloudai/Refact-1_6-base" , },
79
79
{"name" : "command-r" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/CohereForAI/c4ai-command-r-v01" , },
80
- {"name" : "qwen" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/Qwen/Qwen-7B" , },
81
80
{"name" : "qwen2" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/Qwen/Qwen1.5-7B" , },
82
81
{"name" : "olmo" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/allenai/OLMo-1.7-7B-hf" , },
83
82
{"name" : "dbrx" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/databricks/dbrx-base" , },
@@ -126,28 +125,11 @@ def download_file_with_auth(url, token, save_path):
126
125
logger .info (f"Downloading { name } to { model_name_or_path } " )
127
126
128
127
# model and repo urls are not the same
129
- # url = "https://huggingface.co/Qwen/Qwen-tokenizer/raw/main/tokenizer.json"
130
- if name == "qwen" : # qwen is an outlier and will raise a FileNotFoundError
131
- # override the tokenizer path
132
- model_tokenizer_path = f"{ model_name_or_path } /qwen.tiktoken"
133
- # fetch the qwens BPE tokenizer
134
- download_file_with_auth (
135
- url = "https://huggingface.co/Qwen/Qwen-7B/raw/main/qwen.tiktoken" ,
136
- token = token ,
137
- save_path = model_tokenizer_path
138
- )
139
- # fetch qwens tokenizer script; this is required.
140
- download_file_with_auth (
141
- url = "https://huggingface.co/Qwen/Qwen-7B/raw/main/tokenization_qwen.py" ,
142
- token = token ,
143
- save_path = f"{ model_name_or_path } /tokenization_qwen.py"
144
- )
145
- else : # Get the models tokenizer
146
- download_file_with_auth (
147
- url = f"{ url_resolve } /tokenizer.json" ,
148
- token = token ,
149
- save_path = model_tokenizer_path
150
- )
128
+ download_file_with_auth (
129
+ url = f"{ url_resolve } /tokenizer.json" ,
130
+ token = token ,
131
+ save_path = model_tokenizer_path
132
+ )
151
133
152
134
# Get the models hyper params
153
135
download_file_with_auth (
0 commit comments