@@ -102,8 +102,7 @@ def download_file_with_auth(url, token, save_path):
102
102
repo = model ["repo" ]
103
103
tokt = model ["tokt" ]
104
104
105
- # set url paths
106
- url_main = f"{ repo } /raw/main"
105
+ # NOTE: We should always be using resolve to download files
107
106
url_resolve = f"{ repo } /resolve/main"
108
107
109
108
# set dir paths
@@ -138,28 +137,18 @@ def download_file_with_auth(url, token, save_path):
138
137
)
139
138
else : # Get the models tokenizer
140
139
download_file_with_auth (
141
- url = f"{ url_main } /tokenizer.json" ,
140
+ url = f"{ url_resolve } /tokenizer.json" ,
142
141
token = token ,
143
142
save_path = model_tokenizer_path
144
143
)
145
144
146
145
# Get the models hyper params
147
146
download_file_with_auth (
148
- url = f"{ url_main } /config.json" ,
147
+ url = f"{ url_resolve } /config.json" ,
149
148
token = token ,
150
149
save_path = f"{ model_name_or_path } /config.json"
151
150
)
152
151
153
- # if downloaded file is less than 1KB, we likely need to download an LFS instead
154
- if os .path .getsize (model_tokenizer_path ) < 1024 :
155
- # remove the file
156
- os .remove (model_tokenizer_path )
157
- download_file_with_auth (
158
- url = f"{ url_resolve } /tokenizer.json" ,
159
- token = token ,
160
- save_path = model_tokenizer_path
161
- )
162
-
163
152
# Handle sentencepiece tokenizer
164
153
if tokt == TOKENIZER_TYPE .SPM :
165
154
download_file_with_auth (
@@ -170,7 +159,7 @@ def download_file_with_auth(url, token, save_path):
170
159
171
160
# Get the tokenizer config
172
161
download_file_with_auth (
173
- url = f"{ url_main } /tokenizer_config.json" ,
162
+ url = f"{ url_resolve } /tokenizer_config.json" ,
174
163
token = token ,
175
164
save_path = f"{ model_name_or_path } /tokenizer_config.json"
176
165
)
0 commit comments