Skip to content

Commit 0741014

Browse files
committed
common: llama_load_model_from_url support split
1 parent 18ff6ca commit 0741014

File tree

3 files changed

+122
-43
lines changed

3 files changed

+122
-43
lines changed

common/common.cpp

Lines changed: 116 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "common.h"
22
#include "llama.h"
3+
#include "ggml.h"
34

45
#include <algorithm>
56
#include <cassert>
@@ -39,6 +40,7 @@
3940
#endif
4041
#if defined(LLAMA_USE_CURL)
4142
#include <curl/curl.h>
43+
#include <curl/easy.h>
4244
#endif
4345

4446
#if defined(_MSC_VER)
@@ -61,7 +63,6 @@
6163
#else
6264
#include <sys/syslimits.h>
6365
#endif
64-
#define LLAMA_CURL_MAX_PATH_LENGTH PATH_MAX
6566
#define LLAMA_CURL_MAX_HEADER_LENGTH 256
6667
#endif // LLAMA_USE_CURL
6768

@@ -1653,24 +1654,25 @@ void llama_batch_add(
16531654

16541655
#ifdef LLAMA_USE_CURL
16551656

1656-
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
1657-
struct llama_model_params params) {
1658-
// Basic validation of the model_url
1659-
if (!model_url || strlen(model_url) == 0) {
1660-
fprintf(stderr, "%s: invalid model_url\n", __func__);
1661-
return NULL;
1657+
static std::string llama_download_hide_password_in_url(const std::string & url) {
1658+
std::size_t protocol_pos = url.find("://");
1659+
if (protocol_pos == std::string::npos) {
1660+
return url; // Malformed URL
16621661
}
16631662

1664-
// Initialize libcurl globally
1665-
auto curl = curl_easy_init();
1666-
1667-
if (!curl) {
1668-
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
1669-
return NULL;
1663+
std::size_t at_pos = url.find('@', protocol_pos + 3);
1664+
if (at_pos == std::string::npos) {
1665+
return url; // No password in URL
16701666
}
16711667

1668+
return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
1669+
}
1670+
1671+
static bool llama_download_file(CURL * curl, const char * url, const char * path) {
1672+
bool force_download = false;
1673+
16721674
// Set the URL, allow to follow http redirection
1673-
curl_easy_setopt(curl, CURLOPT_URL, model_url);
1675+
curl_easy_setopt(curl, CURLOPT_URL, url);
16741676
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
16751677
#if defined(_WIN32)
16761678
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
@@ -1680,24 +1682,24 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
16801682

16811683
// Check if the file already exists locally
16821684
struct stat model_file_info;
1683-
auto file_exists = (stat(path_model, &model_file_info) == 0);
1685+
auto file_exists = (stat(path, &model_file_info) == 0);
16841686

16851687
// If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
16861688
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1687-
char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1688-
snprintf(etag_path, sizeof(etag_path), "%s.etag", path_model);
1689+
char etag_path[PATH_MAX] = {0};
1690+
snprintf(etag_path, sizeof(etag_path), "%s.etag", path);
16891691

16901692
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1691-
char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1692-
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path_model);
1693+
char last_modified_path[PATH_MAX] = {0};
1694+
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path);
16931695

16941696
if (file_exists) {
16951697
auto * f_etag = fopen(etag_path, "r");
16961698
if (f_etag) {
16971699
if (!fgets(etag, sizeof(etag), f_etag)) {
16981700
fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path);
16991701
} else {
1700-
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, etag_path, etag);
1702+
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, etag_path, etag);
17011703
}
17021704
fclose(f_etag);
17031705
}
@@ -1707,7 +1709,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
17071709
if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) {
17081710
fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path);
17091711
} else {
1710-
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, last_modified_path,
1712+
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, last_modified_path,
17111713
last_modified);
17121714
}
17131715
fclose(f_last_modified);
@@ -1747,38 +1749,42 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
17471749
if (res != CURLE_OK) {
17481750
curl_easy_cleanup(curl);
17491751
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
1750-
return NULL;
1752+
return false;
17511753
}
17521754

17531755
long http_code = 0;
17541756
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
17551757
if (http_code != 200) {
17561758
// HEAD not supported, we don't know if the file has changed
17571759
// force trigger downloading
1758-
file_exists = false;
1760+
force_download = true;
17591761
fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
17601762
}
17611763
}
17621764

17631765
// If the ETag or the Last-Modified headers are different: trigger a new download
1764-
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
1765-
char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1766-
snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
1766+
bool should_download = !file_exists
1767+
|| force_download
1768+
|| (strlen(headers.etag) > 0 && strcmp(etag, headers.etag) != 0)
1769+
|| (strlen(headers.last_modified) > 0 && strcmp(last_modified, headers.last_modified) != 0);
1770+
if (should_download) {
1771+
char path_temporary[PATH_MAX] = {0};
1772+
snprintf(path_temporary, sizeof(path_temporary), "%s.downloadInProgress", path);
17671773
if (file_exists) {
1768-
fprintf(stderr, "%s: deleting previous downloaded model file: %s\n", __func__, path_model);
1769-
if (remove(path_model) != 0) {
1774+
fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path);
1775+
if (remove(path) != 0) {
17701776
curl_easy_cleanup(curl);
1771-
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path_model);
1772-
return NULL;
1777+
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path);
1778+
return false;
17731779
}
17741780
}
17751781

17761782
// Set the output file
1777-
auto * outfile = fopen(path_model_temporary, "wb");
1783+
auto * outfile = fopen(path_temporary, "wb");
17781784
if (!outfile) {
17791785
curl_easy_cleanup(curl);
1780-
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
1781-
return NULL;
1786+
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path);
1787+
return false;
17821788
}
17831789

17841790
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
@@ -1793,14 +1799,14 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
17931799
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
17941800

17951801
// start the download
1796-
fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1797-
model_url, path_model, headers.etag, headers.last_modified);
1802+
fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1803+
llama_download_hide_password_in_url(url).c_str(), path, headers.etag, headers.last_modified);
17981804
auto res = curl_easy_perform(curl);
17991805
if (res != CURLE_OK) {
18001806
fclose(outfile);
18011807
curl_easy_cleanup(curl);
18021808
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
1803-
return NULL;
1809+
return false;
18041810
}
18051811

18061812
long http_code = 0;
@@ -1809,7 +1815,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
18091815
fclose(outfile);
18101816
curl_easy_cleanup(curl);
18111817
fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
1812-
return NULL;
1818+
return false;
18131819
}
18141820

18151821
// Clean up
@@ -1821,7 +1827,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
18211827
if (etag_file) {
18221828
fputs(headers.etag, etag_file);
18231829
fclose(etag_file);
1824-
fprintf(stderr, "%s: model etag saved %s: %s\n", __func__, etag_path, headers.etag);
1830+
fprintf(stderr, "%s: file etag saved %s: %s\n", __func__, etag_path, headers.etag);
18251831
}
18261832
}
18271833

@@ -1831,16 +1837,85 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
18311837
if (last_modified_file) {
18321838
fputs(headers.last_modified, last_modified_file);
18331839
fclose(last_modified_file);
1834-
fprintf(stderr, "%s: model last modified saved %s: %s\n", __func__, last_modified_path,
1840+
fprintf(stderr, "%s: file last modified saved %s: %s\n", __func__, last_modified_path,
18351841
headers.last_modified);
18361842
}
18371843
}
18381844

1839-
if (rename(path_model_temporary, path_model) != 0) {
1845+
if (rename(path_temporary, path) != 0) {
18401846
curl_easy_cleanup(curl);
1841-
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
1847+
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary, path);
1848+
return false;
1849+
}
1850+
}
1851+
1852+
return true;
1853+
}
1854+
1855+
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
1856+
struct llama_model_params params) {
1857+
// Basic validation of the model_url
1858+
if (!model_url || strlen(model_url) == 0) {
1859+
fprintf(stderr, "%s: invalid model_url\n", __func__);
1860+
return NULL;
1861+
}
1862+
1863+
// Initialize libcurl
1864+
auto * curl = curl_easy_init();
1865+
1866+
if (!curl) {
1867+
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
1868+
return NULL;
1869+
}
1870+
1871+
if (!llama_download_file(curl, model_url, path_model)) {
1872+
return NULL;
1873+
}
1874+
1875+
// check for additional GGUFs split to download
1876+
int n_split = 0;
1877+
{
1878+
struct gguf_init_params gguf_params = {
1879+
/*.no_alloc = */ true,
1880+
/*.ctx = */ NULL,
1881+
};
1882+
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
1883+
if (!ctx_gguf) {
1884+
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model);
1885+
return NULL;
1886+
}
1887+
1888+
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT);
1889+
if (key_n_split >= 0) {
1890+
n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
1891+
}
1892+
1893+
gguf_free(ctx_gguf);
1894+
}
1895+
1896+
auto basename = [](const std::string& pathname)->std::string {
1897+
return pathname.substr(pathname.find_last_of("/\\") + 1);
1898+
};
1899+
auto dirname = [](const std::string& pathname)->std::string {
1900+
return pathname.substr(0,pathname.find_last_of("/\\"));
1901+
};
1902+
1903+
if (n_split > 1) {
1904+
char split_path[PATH_MAX] = {0};
1905+
strncpy(split_path, path_model, sizeof(split_path) - 1);
1906+
char split_prefix[PATH_MAX] = {0};
1907+
if (!llama_split_prefix(split_prefix, split_path, strlen(split_path), 0, n_split)) {
1908+
fprintf(stderr, "\n%s: unexpected input file name: %s"
1909+
" n_split=%d\n", __func__, split_path, n_split);
18421910
return NULL;
18431911
}
1912+
for (int idx = 1; idx < n_split; idx++) {
1913+
llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split);
1914+
auto split_url = dirname(model_url) + "/" + basename(split_path);
1915+
if (!llama_download_file(curl, split_url.c_str(), split_path)) {
1916+
return NULL;
1917+
}
1918+
}
18441919
}
18451920

18461921
curl_easy_cleanup(curl);

common/common.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,9 @@ struct llama_control_vector_load_info {
302302
// Load control vectors, scale each by strength, and add them together.
303303
// On error, returns {-1, empty}
304304
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);
305+
306+
//
307+
// Split utils
308+
//
309+
static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = "split.no";
310+
static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = "split.count";

examples/gguf-split/gguf-split.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ enum split_operation : uint8_t {
2626
SPLIT_OP_MERGE,
2727
};
2828

29-
static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = "split.no";
30-
static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = "split.count";
3129
static const char * const LLM_KV_GENERAL_SPLIT_N_TENSORS = "split.tensors.count";
3230

3331
struct split_params {

0 commit comments

Comments
 (0)