|
37 | 37 | #include <sys/stat.h>
|
38 | 38 | #include <unistd.h>
|
39 | 39 | #endif
|
| 40 | +#if defined(LLAMA_USE_CURL) |
| 41 | +#include <curl/curl.h> |
| 42 | +#endif |
40 | 43 |
|
41 | 44 | #if defined(_MSC_VER)
|
42 | 45 | #pragma warning(disable: 4244 4267) // possible loss of data
|
|
50 | 53 | #define GGML_USE_CUBLAS_SYCL_VULKAN
|
51 | 54 | #endif
|
52 | 55 |
|
| 56 | +#if defined(LLAMA_USE_CURL) |
| 57 | +#ifdef __linux__ |
| 58 | +#include <linux/limits.h> |
| 59 | +#elif defined(_WIN32) |
| 60 | +#define PATH_MAX MAX_PATH |
| 61 | +#else |
| 62 | +#include <sys/syslimits.h> |
| 63 | +#endif |
| 64 | +#define LLAMA_CURL_MAX_PATH_LENGTH PATH_MAX |
| 65 | +#define LLAMA_CURL_MAX_HEADER_LENGTH 256 |
| 66 | +#endif // LLAMA_USE_CURL |
| 67 | + |
53 | 68 | int32_t get_num_physical_cores() {
|
54 | 69 | #ifdef __linux__
|
55 | 70 | // enumerate the set of thread siblings, num entries is num cores
|
@@ -644,6 +659,13 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
644 | 659 | }
|
645 | 660 | params.model = argv[i];
|
646 | 661 | }
|
| 662 | + if (arg == "-mu" || arg == "--model-url") { |
| 663 | + if (++i >= argc) { |
| 664 | + invalid_param = true; |
| 665 | + break; |
| 666 | + } |
| 667 | + params.model_url = argv[i]; |
| 668 | + } |
647 | 669 | if (arg == "-md" || arg == "--model-draft") {
|
648 | 670 | arg_found = true;
|
649 | 671 | if (++i >= argc) {
|
@@ -1368,6 +1390,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
1368 | 1390 | printf(" layer range to apply the control vector(s) to, start and end inclusive\n");
|
1369 | 1391 | printf(" -m FNAME, --model FNAME\n");
|
1370 | 1392 | printf(" model path (default: %s)\n", params.model.c_str());
|
| 1393 | + printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); |
| 1394 | + printf(" model download url (default: %s)\n", params.model_url.c_str()); |
1371 | 1395 | printf(" -md FNAME, --model-draft FNAME\n");
|
1372 | 1396 | printf(" draft model for speculative decoding\n");
|
1373 | 1397 | printf(" -ld LOGDIR, --logdir LOGDIR\n");
|
@@ -1613,10 +1637,222 @@ void llama_batch_add(
|
1613 | 1637 | batch.n_tokens++;
|
1614 | 1638 | }
|
1615 | 1639 |
|
| 1640 | +#ifdef LLAMA_USE_CURL |
| 1641 | + |
| 1642 | +struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, |
| 1643 | + struct llama_model_params params) { |
| 1644 | + // Basic validation of the model_url |
| 1645 | + if (!model_url || strlen(model_url) == 0) { |
| 1646 | + fprintf(stderr, "%s: invalid model_url\n", __func__); |
| 1647 | + return NULL; |
| 1648 | + } |
| 1649 | + |
| 1650 | + // Initialize libcurl globally |
| 1651 | + auto curl = curl_easy_init(); |
| 1652 | + |
| 1653 | + if (!curl) { |
| 1654 | + fprintf(stderr, "%s: error initializing libcurl\n", __func__); |
| 1655 | + return NULL; |
| 1656 | + } |
| 1657 | + |
| 1658 | + // Set the URL, allow to follow http redirection |
| 1659 | + curl_easy_setopt(curl, CURLOPT_URL, model_url); |
| 1660 | + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); |
| 1661 | +#if defined(_WIN32) |
| 1662 | + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of |
| 1663 | + // operating system. Currently implemented under MS-Windows. |
| 1664 | + curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); |
| 1665 | +#endif |
| 1666 | + |
| 1667 | + // Check if the file already exists locally |
| 1668 | + struct stat model_file_info; |
| 1669 | + auto file_exists = (stat(path_model, &model_file_info) == 0); |
| 1670 | + |
| 1671 | + // If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files |
| 1672 | + char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; |
| 1673 | + char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; |
| 1674 | + snprintf(etag_path, sizeof(etag_path), "%s.etag", path_model); |
| 1675 | + |
| 1676 | + char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; |
| 1677 | + char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; |
| 1678 | + snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path_model); |
| 1679 | + |
| 1680 | + if (file_exists) { |
| 1681 | + auto * f_etag = fopen(etag_path, "r"); |
| 1682 | + if (f_etag) { |
| 1683 | + if (!fgets(etag, sizeof(etag), f_etag)) { |
| 1684 | + fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path); |
| 1685 | + } else { |
| 1686 | + fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, etag_path, etag); |
| 1687 | + } |
| 1688 | + fclose(f_etag); |
| 1689 | + } |
| 1690 | + |
| 1691 | + auto * f_last_modified = fopen(last_modified_path, "r"); |
| 1692 | + if (f_last_modified) { |
| 1693 | + if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) { |
| 1694 | + fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path); |
| 1695 | + } else { |
| 1696 | + fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, last_modified_path, |
| 1697 | + last_modified); |
| 1698 | + } |
| 1699 | + fclose(f_last_modified); |
| 1700 | + } |
| 1701 | + } |
| 1702 | + |
| 1703 | + // Send a HEAD request to retrieve the etag and last-modified headers |
| 1704 | + struct llama_load_model_from_url_headers { |
| 1705 | + char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; |
| 1706 | + char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; |
| 1707 | + }; |
| 1708 | + llama_load_model_from_url_headers headers; |
| 1709 | + { |
| 1710 | + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); |
| 1711 | + auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { |
| 1712 | + llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata; |
| 1713 | + |
| 1714 | + const char * etag_prefix = "etag: "; |
| 1715 | + if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) { |
| 1716 | + strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF |
| 1717 | + } |
| 1718 | + |
| 1719 | + const char * last_modified_prefix = "last-modified: "; |
| 1720 | + if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) { |
| 1721 | + strncpy(headers->last_modified, buffer + strlen(last_modified_prefix), |
| 1722 | + n_items - strlen(last_modified_prefix) - 2); // Remove CRLF |
| 1723 | + } |
| 1724 | + return n_items; |
| 1725 | + }; |
| 1726 | + |
| 1727 | + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb |
| 1728 | + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress |
| 1729 | + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback)); |
| 1730 | + curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headers); |
| 1731 | + |
| 1732 | + CURLcode res = curl_easy_perform(curl); |
| 1733 | + if (res != CURLE_OK) { |
| 1734 | + curl_easy_cleanup(curl); |
| 1735 | + fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); |
| 1736 | + return NULL; |
| 1737 | + } |
| 1738 | + |
| 1739 | + long http_code = 0; |
| 1740 | + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); |
| 1741 | + if (http_code != 200) { |
| 1742 | + // HEAD not supported, we don't know if the file has changed |
| 1743 | + // force trigger downloading |
| 1744 | + file_exists = false; |
| 1745 | + fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code); |
| 1746 | + } |
| 1747 | + } |
| 1748 | + |
| 1749 | + // If the ETag or the Last-Modified headers are different: trigger a new download |
| 1750 | + if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { |
| 1751 | + char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; |
| 1752 | + snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model); |
| 1753 | + if (file_exists) { |
| 1754 | + fprintf(stderr, "%s: deleting previous downloaded model file: %s\n", __func__, path_model); |
| 1755 | + if (remove(path_model) != 0) { |
| 1756 | + curl_easy_cleanup(curl); |
| 1757 | + fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path_model); |
| 1758 | + return NULL; |
| 1759 | + } |
| 1760 | + } |
| 1761 | + |
| 1762 | + // Set the output file |
| 1763 | + auto * outfile = fopen(path_model_temporary, "wb"); |
| 1764 | + if (!outfile) { |
| 1765 | + curl_easy_cleanup(curl); |
| 1766 | + fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); |
| 1767 | + return NULL; |
| 1768 | + } |
| 1769 | + |
| 1770 | + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); |
| 1771 | + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { |
| 1772 | + return fwrite(data, size, nmemb, (FILE *)fd); |
| 1773 | + }; |
| 1774 | + curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); |
| 1775 | + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback)); |
| 1776 | + curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile); |
| 1777 | + |
| 1778 | + // display download progress |
| 1779 | + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); |
| 1780 | + |
| 1781 | + // start the download |
| 1782 | + fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, |
| 1783 | + model_url, path_model, headers.etag, headers.last_modified); |
| 1784 | + auto res = curl_easy_perform(curl); |
| 1785 | + if (res != CURLE_OK) { |
| 1786 | + fclose(outfile); |
| 1787 | + curl_easy_cleanup(curl); |
| 1788 | + fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); |
| 1789 | + return NULL; |
| 1790 | + } |
| 1791 | + |
| 1792 | + long http_code = 0; |
| 1793 | + curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code); |
| 1794 | + if (http_code < 200 || http_code >= 400) { |
| 1795 | + fclose(outfile); |
| 1796 | + curl_easy_cleanup(curl); |
| 1797 | + fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code); |
| 1798 | + return NULL; |
| 1799 | + } |
| 1800 | + |
| 1801 | + // Clean up |
| 1802 | + fclose(outfile); |
| 1803 | + |
| 1804 | + // Write the new ETag to the .etag file |
| 1805 | + if (strlen(headers.etag) > 0) { |
| 1806 | + auto * etag_file = fopen(etag_path, "w"); |
| 1807 | + if (etag_file) { |
| 1808 | + fputs(headers.etag, etag_file); |
| 1809 | + fclose(etag_file); |
| 1810 | + fprintf(stderr, "%s: model etag saved %s: %s\n", __func__, etag_path, headers.etag); |
| 1811 | + } |
| 1812 | + } |
| 1813 | + |
| 1814 | + // Write the new lastModified to the .etag file |
| 1815 | + if (strlen(headers.last_modified) > 0) { |
| 1816 | + auto * last_modified_file = fopen(last_modified_path, "w"); |
| 1817 | + if (last_modified_file) { |
| 1818 | + fputs(headers.last_modified, last_modified_file); |
| 1819 | + fclose(last_modified_file); |
| 1820 | + fprintf(stderr, "%s: model last modified saved %s: %s\n", __func__, last_modified_path, |
| 1821 | + headers.last_modified); |
| 1822 | + } |
| 1823 | + } |
| 1824 | + |
| 1825 | + if (rename(path_model_temporary, path_model) != 0) { |
| 1826 | + curl_easy_cleanup(curl); |
| 1827 | + fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model); |
| 1828 | + return NULL; |
| 1829 | + } |
| 1830 | + } |
| 1831 | + |
| 1832 | + curl_easy_cleanup(curl); |
| 1833 | + |
| 1834 | + return llama_load_model_from_file(path_model, params); |
| 1835 | +} |
| 1836 | + |
| 1837 | +#else |
| 1838 | + |
| 1839 | +struct llama_model * llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/, |
| 1840 | + struct llama_model_params /*params*/) { |
| 1841 | + fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); |
| 1842 | + return nullptr; |
| 1843 | +} |
| 1844 | + |
| 1845 | +#endif // LLAMA_USE_CURL |
| 1846 | + |
1616 | 1847 | std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
|
1617 | 1848 | auto mparams = llama_model_params_from_gpt_params(params);
|
1618 | 1849 |
|
1619 |
| - llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams); |
| 1850 | + llama_model * model = nullptr; |
| 1851 | + if (!params.model_url.empty()) { |
| 1852 | + model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), mparams); |
| 1853 | + } else { |
| 1854 | + model = llama_load_model_from_file(params.model.c_str(), mparams); |
| 1855 | + } |
1620 | 1856 | if (model == NULL) {
|
1621 | 1857 | fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
1622 | 1858 | return std::make_tuple(nullptr, nullptr);
|
|
0 commit comments