Skip to content

Commit f68d6a2

Browse files
phymbertggerganov
authored andcommitted
common: llama_load_model_from_url using --model-url (ggml-org#6098)
* common: llama_load_model_from_url with libcurl dependency Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 0bb088b commit f68d6a2

File tree

16 files changed

+399
-57
lines changed

16 files changed

+399
-57
lines changed

.github/workflows/build.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,28 @@ jobs:
4848
CC=gcc-8 make tests -j $(nproc)
4949
make test -j $(nproc)
5050
51+
ubuntu-focal-make-curl:
52+
runs-on: ubuntu-20.04
53+
54+
steps:
55+
- name: Clone
56+
id: checkout
57+
uses: actions/checkout@v3
58+
59+
- name: Dependencies
60+
id: depends
61+
run: |
62+
sudo apt-get update
63+
sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
64+
65+
- name: Build
66+
id: make_build
67+
env:
68+
LLAMA_FATAL_WARNINGS: 1
69+
LLAMA_CURL: 1
70+
run: |
71+
CC=gcc-8 make -j $(nproc)
72+
5173
ubuntu-latest-cmake:
5274
runs-on: ubuntu-latest
5375

.github/workflows/server.yml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ jobs:
5757
cmake \
5858
python3-pip \
5959
wget \
60-
language-pack-en
60+
language-pack-en \
61+
libcurl4-openssl-dev
6162
6263
- name: Build
6364
id: cmake_build
@@ -67,6 +68,7 @@ jobs:
6768
cmake .. \
6869
-DLLAMA_NATIVE=OFF \
6970
-DLLAMA_BUILD_SERVER=ON \
71+
-DLLAMA_CURL=ON \
7072
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
7173
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
7274
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
@@ -101,12 +103,21 @@ jobs:
101103
with:
102104
fetch-depth: 0
103105

106+
- name: libCURL
107+
id: get_libcurl
108+
env:
109+
CURL_VERSION: 8.6.0_6
110+
run: |
111+
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
112+
mkdir $env:RUNNER_TEMP/libcurl
113+
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
114+
104115
- name: Build
105116
id: cmake_build
106117
run: |
107118
mkdir build
108119
cd build
109-
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
120+
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
110121
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
111122
112123
- name: Python setup
@@ -120,6 +131,11 @@ jobs:
120131
run: |
121132
pip install -r examples/server/tests/requirements.txt
122133
134+
- name: Copy Libcurl
135+
id: prepare_libcurl
136+
run: |
137+
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
138+
123139
- name: Tests
124140
id: server_integration_tests
125141
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some
9999
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
100100
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
101101
"llama: max. batch size for using peer access")
102+
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
102103
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
103104
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
104105
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,11 @@ include scripts/get-flags.mk
595595
CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
596596
endif
597597

598+
ifdef LLAMA_CURL
599+
override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
600+
override LDFLAGS := $(LDFLAGS) -lcurl
601+
endif
602+
598603
#
599604
# Print build information
600605
#

common/CMakeLists.txt

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,17 @@ if (BUILD_SHARED_LIBS)
6868
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
6969
endif()
7070

71+
set(LLAMA_COMMON_EXTRA_LIBS build_info)
72+
73+
# Use curl to download model url
74+
if (LLAMA_CURL)
75+
find_package(CURL REQUIRED)
76+
add_definitions(-DLLAMA_USE_CURL)
77+
include_directories(${CURL_INCLUDE_DIRS})
78+
find_library(CURL_LIBRARY curl REQUIRED)
79+
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
80+
endif ()
81+
7182
target_include_directories(${TARGET} PUBLIC .)
7283
target_compile_features(${TARGET} PUBLIC cxx_std_11)
73-
target_link_libraries(${TARGET} PRIVATE build_info PUBLIC llama)
84+
target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama)

common/common.cpp

Lines changed: 237 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
#include <sys/stat.h>
3838
#include <unistd.h>
3939
#endif
40+
#if defined(LLAMA_USE_CURL)
41+
#include <curl/curl.h>
42+
#endif
4043

4144
#if defined(_MSC_VER)
4245
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -50,6 +53,18 @@
5053
#define GGML_USE_CUBLAS_SYCL_VULKAN
5154
#endif
5255

56+
#if defined(LLAMA_USE_CURL)
57+
#ifdef __linux__
58+
#include <linux/limits.h>
59+
#elif defined(_WIN32)
60+
#define PATH_MAX MAX_PATH
61+
#else
62+
#include <sys/syslimits.h>
63+
#endif
64+
#define LLAMA_CURL_MAX_PATH_LENGTH PATH_MAX
65+
#define LLAMA_CURL_MAX_HEADER_LENGTH 256
66+
#endif // LLAMA_USE_CURL
67+
5368
int32_t get_num_physical_cores() {
5469
#ifdef __linux__
5570
// enumerate the set of thread siblings, num entries is num cores
@@ -644,6 +659,13 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
644659
}
645660
params.model = argv[i];
646661
}
662+
if (arg == "-mu" || arg == "--model-url") {
663+
if (++i >= argc) {
664+
invalid_param = true;
665+
break;
666+
}
667+
params.model_url = argv[i];
668+
}
647669
if (arg == "-md" || arg == "--model-draft") {
648670
arg_found = true;
649671
if (++i >= argc) {
@@ -1368,6 +1390,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
13681390
printf(" layer range to apply the control vector(s) to, start and end inclusive\n");
13691391
printf(" -m FNAME, --model FNAME\n");
13701392
printf(" model path (default: %s)\n", params.model.c_str());
1393+
printf(" -mu MODEL_URL, --model-url MODEL_URL\n");
1394+
printf(" model download url (default: %s)\n", params.model_url.c_str());
13711395
printf(" -md FNAME, --model-draft FNAME\n");
13721396
printf(" draft model for speculative decoding\n");
13731397
printf(" -ld LOGDIR, --logdir LOGDIR\n");
@@ -1613,10 +1637,222 @@ void llama_batch_add(
16131637
batch.n_tokens++;
16141638
}
16151639

1640+
#ifdef LLAMA_USE_CURL
1641+
1642+
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
1643+
struct llama_model_params params) {
1644+
// Basic validation of the model_url
1645+
if (!model_url || strlen(model_url) == 0) {
1646+
fprintf(stderr, "%s: invalid model_url\n", __func__);
1647+
return NULL;
1648+
}
1649+
1650+
// Initialize libcurl globally
1651+
auto curl = curl_easy_init();
1652+
1653+
if (!curl) {
1654+
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
1655+
return NULL;
1656+
}
1657+
1658+
// Set the URL, allow to follow http redirection
1659+
curl_easy_setopt(curl, CURLOPT_URL, model_url);
1660+
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
1661+
#if defined(_WIN32)
1662+
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
1663+
// operating system. Currently implemented under MS-Windows.
1664+
curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
1665+
#endif
1666+
1667+
// Check if the file already exists locally
1668+
struct stat model_file_info;
1669+
auto file_exists = (stat(path_model, &model_file_info) == 0);
1670+
1671+
// If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
1672+
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1673+
char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1674+
snprintf(etag_path, sizeof(etag_path), "%s.etag", path_model);
1675+
1676+
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1677+
char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1678+
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path_model);
1679+
1680+
if (file_exists) {
1681+
auto * f_etag = fopen(etag_path, "r");
1682+
if (f_etag) {
1683+
if (!fgets(etag, sizeof(etag), f_etag)) {
1684+
fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path);
1685+
} else {
1686+
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, etag_path, etag);
1687+
}
1688+
fclose(f_etag);
1689+
}
1690+
1691+
auto * f_last_modified = fopen(last_modified_path, "r");
1692+
if (f_last_modified) {
1693+
if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) {
1694+
fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path);
1695+
} else {
1696+
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, last_modified_path,
1697+
last_modified);
1698+
}
1699+
fclose(f_last_modified);
1700+
}
1701+
}
1702+
1703+
// Send a HEAD request to retrieve the etag and last-modified headers
1704+
struct llama_load_model_from_url_headers {
1705+
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1706+
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
1707+
};
1708+
llama_load_model_from_url_headers headers;
1709+
{
1710+
typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
1711+
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
1712+
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
1713+
1714+
const char * etag_prefix = "etag: ";
1715+
if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) {
1716+
strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF
1717+
}
1718+
1719+
const char * last_modified_prefix = "last-modified: ";
1720+
if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) {
1721+
strncpy(headers->last_modified, buffer + strlen(last_modified_prefix),
1722+
n_items - strlen(last_modified_prefix) - 2); // Remove CRLF
1723+
}
1724+
return n_items;
1725+
};
1726+
1727+
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
1728+
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
1729+
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
1730+
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headers);
1731+
1732+
CURLcode res = curl_easy_perform(curl);
1733+
if (res != CURLE_OK) {
1734+
curl_easy_cleanup(curl);
1735+
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
1736+
return NULL;
1737+
}
1738+
1739+
long http_code = 0;
1740+
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
1741+
if (http_code != 200) {
1742+
// HEAD not supported, we don't know if the file has changed
1743+
// force trigger downloading
1744+
file_exists = false;
1745+
fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
1746+
}
1747+
}
1748+
1749+
// If the ETag or the Last-Modified headers are different: trigger a new download
1750+
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
1751+
char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
1752+
snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
1753+
if (file_exists) {
1754+
fprintf(stderr, "%s: deleting previous downloaded model file: %s\n", __func__, path_model);
1755+
if (remove(path_model) != 0) {
1756+
curl_easy_cleanup(curl);
1757+
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path_model);
1758+
return NULL;
1759+
}
1760+
}
1761+
1762+
// Set the output file
1763+
auto * outfile = fopen(path_model_temporary, "wb");
1764+
if (!outfile) {
1765+
curl_easy_cleanup(curl);
1766+
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
1767+
return NULL;
1768+
}
1769+
1770+
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
1771+
auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
1772+
return fwrite(data, size, nmemb, (FILE *)fd);
1773+
};
1774+
curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
1775+
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
1776+
curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile);
1777+
1778+
// display download progress
1779+
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
1780+
1781+
// start the download
1782+
fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1783+
model_url, path_model, headers.etag, headers.last_modified);
1784+
auto res = curl_easy_perform(curl);
1785+
if (res != CURLE_OK) {
1786+
fclose(outfile);
1787+
curl_easy_cleanup(curl);
1788+
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
1789+
return NULL;
1790+
}
1791+
1792+
long http_code = 0;
1793+
curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code);
1794+
if (http_code < 200 || http_code >= 400) {
1795+
fclose(outfile);
1796+
curl_easy_cleanup(curl);
1797+
fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
1798+
return NULL;
1799+
}
1800+
1801+
// Clean up
1802+
fclose(outfile);
1803+
1804+
// Write the new ETag to the .etag file
1805+
if (strlen(headers.etag) > 0) {
1806+
auto * etag_file = fopen(etag_path, "w");
1807+
if (etag_file) {
1808+
fputs(headers.etag, etag_file);
1809+
fclose(etag_file);
1810+
fprintf(stderr, "%s: model etag saved %s: %s\n", __func__, etag_path, headers.etag);
1811+
}
1812+
}
1813+
1814+
// Write the new lastModified to the .etag file
1815+
if (strlen(headers.last_modified) > 0) {
1816+
auto * last_modified_file = fopen(last_modified_path, "w");
1817+
if (last_modified_file) {
1818+
fputs(headers.last_modified, last_modified_file);
1819+
fclose(last_modified_file);
1820+
fprintf(stderr, "%s: model last modified saved %s: %s\n", __func__, last_modified_path,
1821+
headers.last_modified);
1822+
}
1823+
}
1824+
1825+
if (rename(path_model_temporary, path_model) != 0) {
1826+
curl_easy_cleanup(curl);
1827+
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
1828+
return NULL;
1829+
}
1830+
}
1831+
1832+
curl_easy_cleanup(curl);
1833+
1834+
return llama_load_model_from_file(path_model, params);
1835+
}
1836+
1837+
#else
1838+
1839+
struct llama_model * llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/,
1840+
struct llama_model_params /*params*/) {
1841+
fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
1842+
return nullptr;
1843+
}
1844+
1845+
#endif // LLAMA_USE_CURL
1846+
16161847
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
16171848
auto mparams = llama_model_params_from_gpt_params(params);
16181849

1619-
llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
1850+
llama_model * model = nullptr;
1851+
if (!params.model_url.empty()) {
1852+
model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), mparams);
1853+
} else {
1854+
model = llama_load_model_from_file(params.model.c_str(), mparams);
1855+
}
16201856
if (model == NULL) {
16211857
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
16221858
return std::make_tuple(nullptr, nullptr);

0 commit comments

Comments
 (0)