Add new hf protocol for ollama

ericcurtin · ericcurtin · commit fccbb95f4184 · 2025-01-27T15:12:45.000Z
https://huggingface.co/docs/hub/en/ollama Signed-off-by: Eric Curtin <ecurtin@redhat.com>
diff --git a/examples/run/run.cpp b/examples/run/run.cpp
@@ -563,8 +563,8 @@ class LlamaData {
 
   private:
 #ifdef LLAMA_USE_CURL
-    int download(const std::string & url, const std::vector<std::string> & headers, const std::string & output_file,
-                 const bool progress, std::string * response_str = nullptr) {
+    int download(const std::string & url, const std::string & output_file, const bool progress,
+                 const std::vector<std::string> & headers = {}, std::string * response_str = nullptr) {
         HttpClient http;
         if (http.init(url, headers, output_file, progress, response_str)) {
             return 1;
@@ -573,28 +573,45 @@ class LlamaData {
         return 0;
     }
 #else
-    int download(const std::string &, const std::vector<std::string> &, const std::string &, const bool,
+    int download(const std::string &, const std::string &, const bool, const std::vector<std::string> & = {},
                  std::string * = nullptr) {
         printe("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
         return 1;
     }
 #endif
 
-    int huggingface_dl(const std::string & model, const std::vector<std::string> headers, const std::string & bn) {
+    int huggingface_dl(const std::string & model, const std::string & bn) {
         // Find the second occurrence of '/' after protocol string
         size_t pos = model.find('/');
         pos        = model.find('/', pos + 1);
+        std::string              hfr, hff;
+        std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json" };
+        std::string              url;
         if (pos == std::string::npos) {
-            return 1;
+            std::string tag = "latest";
+            url             = "https://huggingface.co/v2/" + model + "/manifests/" + tag;
+            std::string manifest_str;
+            const int   ret = download(url, "", false, headers, &manifest_str);
+            if (ret) {
+                return ret;
+            }
+
+            nlohmann::json manifest = nlohmann::json::parse(manifest_str);
+            hfr                     = model;
+            hff                     = manifest["ggufFile"]["rfilename"];
+        } else {
+            hfr = model.substr(0, pos);
+            hff = model.substr(pos + 1);
+            printf("%s\n%s\n", hfr.c_str(), hff.c_str());
+            url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff;
         }
 
-        const std::string hfr = model.substr(0, pos);
-        const std::string hff = model.substr(pos + 1);
-        const std::string url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff;
-        return download(url, headers, bn, true);
+        url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff;
+        return download(url, bn, true, headers);
     }
 
-    int ollama_dl(std::string & model, const std::vector<std::string> headers, const std::string & bn) {
+    int ollama_dl(std::string & model, const std::string & bn) {
+        const std::vector<std::string> headers = { "Accept: application/vnd.docker.distribution.manifest.v2+json" };
         if (model.find('/') == std::string::npos) {
             model = "library/" + model;
         }
@@ -608,7 +625,7 @@ class LlamaData {
 
         std::string manifest_url = "https://registry.ollama.ai/v2/" + model + "/manifests/" + model_tag;
         std::string manifest_str;
-        const int   ret = download(manifest_url, headers, "", false, &manifest_str);
+        const int   ret = download(manifest_url, "", false, {}, &manifest_str);
         if (ret) {
             return ret;
         }
@@ -623,7 +640,7 @@ class LlamaData {
         }
 
         std::string blob_url = "https://registry.ollama.ai/v2/" + model + "/blobs/" + layer;
-        return download(blob_url, headers, bn, true);
+        return download(blob_url, bn, true, headers);
     }
 
     std::string basename(const std::string & path) {
@@ -653,22 +670,18 @@ class LlamaData {
             return ret;
         }
 
-        const std::string              bn      = basename(model_);
-        const std::vector<std::string> headers = { "--header",
-                                                   "Accept: application/vnd.docker.distribution.manifest.v2+json" };
+        const std::string bn = basename(model_);
         if (string_starts_with(model_, "hf://") || string_starts_with(model_, "huggingface://")) {
             rm_until_substring(model_, "://");
-            ret = huggingface_dl(model_, headers, bn);
+            ret = huggingface_dl(model_, bn);
         } else if (string_starts_with(model_, "hf.co/")) {
             rm_until_substring(model_, "hf.co/");
-            ret = huggingface_dl(model_, headers, bn);
-        } else if (string_starts_with(model_, "ollama://")) {
-            rm_until_substring(model_, "://");
-            ret = ollama_dl(model_, headers, bn);
+            ret = huggingface_dl(model_, bn);
         } else if (string_starts_with(model_, "https://")) {
-            ret = download(model_, headers, bn, true);
-        } else {
-            ret = ollama_dl(model_, headers, bn);
+            ret = download(model_, bn, true);
+        } else {  // ollama:// or nothing
+            rm_until_substring(model_, "://");
+            ret = ollama_dl(model_, bn);
         }
 
         model_ = bn;