patch: Apply fix for downloading related model files

teleprint-me · teleprint-me · commit de3d9e3b3326 · 2024-05-08T18:50:28.000-04:00
diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py
@@ -102,8 +102,7 @@ def download_file_with_auth(url, token, save_path):
     repo = model["repo"]
     tokt = model["tokt"]
 
-    # set url paths
-    url_main = f"{repo}/raw/main"
+    # NOTE: We should always be using resolve to download files
     url_resolve = f"{repo}/resolve/main"
 
     # set dir paths
@@ -138,28 +137,18 @@ def download_file_with_auth(url, token, save_path):
         )
     else:  # Get the models tokenizer
         download_file_with_auth(
-            url=f"{url_main}/tokenizer.json",
+            url=f"{url_resolve}/tokenizer.json",
             token=token,
             save_path=model_tokenizer_path
         )
 
     # Get the models hyper params
     download_file_with_auth(
-        url=f"{url_main}/config.json",
+        url=f"{url_resolve}/config.json",
         token=token,
         save_path=f"{model_name_or_path}/config.json"
     )
 
-    # if downloaded file is less than 1KB, we likely need to download an LFS instead
-    if os.path.getsize(model_tokenizer_path) < 1024:
-        # remove the file
-        os.remove(model_tokenizer_path)
-        download_file_with_auth(
-            url=f"{url_resolve}/tokenizer.json",
-            token=token,
-            save_path=model_tokenizer_path
-        )
-
     # Handle sentencepiece tokenizer
     if tokt == TOKENIZER_TYPE.SPM:
         download_file_with_auth(
@@ -170,7 +159,7 @@ def download_file_with_auth(url, token, save_path):
 
     # Get the tokenizer config
     download_file_with_auth(
-        url=f"{url_main}/tokenizer_config.json",
+        url=f"{url_resolve}/tokenizer_config.json",
         token=token,
         save_path=f"{model_name_or_path}/tokenizer_config.json"
     )