@@ -162,6 +162,10 @@ struct common_hf_file_res {
162
162
163
163
#ifdef LLAMA_USE_CURL
164
164
165
+ bool common_has_curl () {
166
+ return true ;
167
+ }
168
+
165
169
#ifdef __linux__
166
170
#include < linux/limits.h>
167
171
#elif defined(_WIN32)
@@ -527,64 +531,89 @@ static bool common_download_model(
527
531
return true ;
528
532
}
529
533
530
- /* *
531
- * Allow getting the HF file from the HF repo with tag (like ollama), for example:
532
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
533
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
534
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
535
- * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
536
- *
537
- * Return pair of <repo, file> (with "repo" already having tag removed)
538
- *
539
- * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
540
- */
541
- static struct common_hf_file_res common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & bearer_token) {
542
- auto parts = string_split<std::string>(hf_repo_with_tag, ' :' );
543
- std::string tag = parts.size () > 1 ? parts.back () : " latest" ;
544
- std::string hf_repo = parts[0 ];
545
- if (string_split<std::string>(hf_repo, ' /' ).size () != 2 ) {
546
- throw std::invalid_argument (" error: invalid HF repo format, expected <user>/<model>[:quant]\n " );
547
- }
548
-
549
- // fetch model info from Hugging Face Hub API
534
+ std::pair<long , std::vector<char >> common_remote_get_content (const std::string & url, const common_remote_params & params) {
550
535
curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
551
536
curl_slist_ptr http_headers;
552
- std::string res_str ;
537
+ std::vector< char > res_buffer ;
553
538
554
- std::string model_endpoint = get_model_endpoint ();
555
-
556
- std::string url = model_endpoint + " v2/" + hf_repo + " /manifests/" + tag;
557
539
curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
558
540
curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L );
541
+ curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
559
542
typedef size_t (*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
560
543
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
561
- static_cast <std::string *>(data)->append ((char * ) ptr, size * nmemb);
544
+ auto data_vec = static_cast <std::vector<char > *>(data);
545
+ data_vec->insert (data_vec->end (), (char *)ptr, (char *)ptr + size * nmemb);
562
546
return size * nmemb;
563
547
};
564
548
curl_easy_setopt (curl.get (), CURLOPT_WRITEFUNCTION, static_cast <CURLOPT_WRITEFUNCTION_PTR>(write_callback));
565
- curl_easy_setopt (curl.get (), CURLOPT_WRITEDATA, &res_str );
549
+ curl_easy_setopt (curl.get (), CURLOPT_WRITEDATA, &res_buffer );
566
550
#if defined(_WIN32)
567
551
curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
568
552
#endif
569
- if (!bearer_token.empty ()) {
570
- std::string auth_header = " Authorization: Bearer " + bearer_token;
571
- http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
553
+ if (params.timeout > 0 ) {
554
+ curl_easy_setopt (curl.get (), CURLOPT_TIMEOUT, params.timeout );
555
+ }
556
+ if (params.max_size > 0 ) {
557
+ curl_easy_setopt (curl.get (), CURLOPT_MAXFILESIZE, params.max_size );
572
558
}
573
- // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
574
559
http_headers.ptr = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp" );
575
- http_headers.ptr = curl_slist_append (http_headers.ptr , " Accept: application/json" );
560
+ for (const auto & header : params.headers ) {
561
+ http_headers.ptr = curl_slist_append (http_headers.ptr , header.c_str ());
562
+ }
576
563
curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
577
564
578
565
CURLcode res = curl_easy_perform (curl.get ());
579
566
580
567
if (res != CURLE_OK) {
581
- throw std::runtime_error (" error: cannot make GET request to HF API" );
568
+ std::string error_msg = curl_easy_strerror (res);
569
+ throw std::runtime_error (" error: cannot make GET request: " + error_msg);
582
570
}
583
571
584
572
long res_code;
585
- std::string ggufFile = " " ;
586
- std::string mmprojFile = " " ;
587
573
curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &res_code);
574
+
575
+ return { res_code, std::move (res_buffer) };
576
+ }
577
+
578
+ /* *
579
+ * Allow getting the HF file from the HF repo with tag (like ollama), for example:
580
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
581
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
582
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
583
+ * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
584
+ *
585
+ * Return pair of <repo, file> (with "repo" already having tag removed)
586
+ *
587
+ * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
588
+ */
589
+ static struct common_hf_file_res common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & bearer_token) {
590
+ auto parts = string_split<std::string>(hf_repo_with_tag, ' :' );
591
+ std::string tag = parts.size () > 1 ? parts.back () : " latest" ;
592
+ std::string hf_repo = parts[0 ];
593
+ if (string_split<std::string>(hf_repo, ' /' ).size () != 2 ) {
594
+ throw std::invalid_argument (" error: invalid HF repo format, expected <user>/<model>[:quant]\n " );
595
+ }
596
+
597
+ std::string url = get_model_endpoint () + " v2/" + hf_repo + " /manifests/" + tag;
598
+
599
+ // headers
600
+ std::vector<std::string> headers;
601
+ headers.push_back (" Accept: application/json" );
602
+ if (!bearer_token.empty ()) {
603
+ headers.push_back (" Authorization: Bearer " + bearer_token);
604
+ }
605
+ // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
606
+ // User-Agent header is already set in common_remote_get_content, no need to set it here
607
+
608
+ // make the request
609
+ common_remote_params params;
610
+ params.headers = headers;
611
+ auto res = common_remote_get_content (url, params);
612
+ long res_code = res.first ;
613
+ std::string res_str (res.second .data (), res.second .size ());
614
+ std::string ggufFile;
615
+ std::string mmprojFile;
616
+
588
617
if (res_code == 200 ) {
589
618
// extract ggufFile.rfilename in json, using regex
590
619
{
@@ -618,6 +647,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
618
647
619
648
#else
620
649
650
+ bool common_has_curl () {
651
+ return false ;
652
+ }
653
+
621
654
static bool common_download_file_single (const std::string &, const std::string &, const std::string &) {
622
655
LOG_ERR (" error: built without CURL, cannot download model from internet\n " );
623
656
return false ;
@@ -640,6 +673,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
640
673
return {};
641
674
}
642
675
676
+ std::pair<long , std::vector<char >> common_remote_get_content (const std::string & url, const common_remote_params & params) {
677
+ throw std::runtime_error (" error: built without CURL, cannot download model from the internet" );
678
+ }
679
+
643
680
#endif // LLAMA_USE_CURL
644
681
645
682
//
0 commit comments