@@ -43,6 +43,25 @@ std::initializer_list<enum llama_example> mmproj_examples = {
43
43
// TODO: add LLAMA_EXAMPLE_SERVER when it's ready
44
44
};
45
45
46
+ static std::string read_file (const std::string & fname) {
47
+ std::ifstream file (fname);
48
+ if (!file) {
49
+ throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , fname.c_str ()));
50
+ }
51
+ std::string content ((std::istreambuf_iterator<char >(file)), std::istreambuf_iterator<char >());
52
+ file.close ();
53
+ return content;
54
+ }
55
+
56
+ static void write_file (const std::string & fname, const std::string & content) {
57
+ std::ofstream file (fname);
58
+ if (!file) {
59
+ throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , fname.c_str ()));
60
+ }
61
+ file << content;
62
+ file.close ();
63
+ }
64
+
46
65
common_arg & common_arg::set_examples (std::initializer_list<enum llama_example> examples) {
47
66
this ->examples = std::move (examples);
48
67
return *this ;
@@ -200,9 +219,11 @@ struct curl_slist_ptr {
200
219
201
220
static bool curl_perform_with_retry (const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
202
221
int remaining_attempts = max_attempts;
222
+ char * method = nullptr ;
223
+ curl_easy_getinfo (curl, CURLINFO_EFFECTIVE_METHOD, &method);
203
224
204
225
while (remaining_attempts > 0 ) {
205
- LOG_INF (" %s: Trying to download from %s (attempt %d of %d)...\n " , __func__ , url.c_str (), max_attempts - remaining_attempts + 1 , max_attempts);
226
+ LOG_INF (" %s: %s %s (attempt %d of %d)...\n " , __func__ , method , url.c_str (), max_attempts - remaining_attempts + 1 , max_attempts);
206
227
207
228
CURLcode res = curl_easy_perform (curl);
208
229
if (res == CURLE_OK) {
@@ -213,6 +234,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
213
234
LOG_WRN (" %s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n " , __func__, curl_easy_strerror (res), exponential_backoff_delay);
214
235
215
236
remaining_attempts--;
237
+ if (remaining_attempts == 0 ) break ;
216
238
std::this_thread::sleep_for (std::chrono::milliseconds (exponential_backoff_delay));
217
239
}
218
240
@@ -231,8 +253,6 @@ static bool common_download_file_single(const std::string & url, const std::stri
231
253
return false ;
232
254
}
233
255
234
- bool force_download = false ;
235
-
236
256
// Set the URL, allow to follow http redirection
237
257
curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
238
258
curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
@@ -256,7 +276,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
256
276
257
277
// If the file exists, check its JSON metadata companion file.
258
278
std::string metadata_path = path + " .json" ;
259
- nlohmann::json metadata;
279
+ nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
260
280
std::string etag;
261
281
std::string last_modified;
262
282
@@ -266,7 +286,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
266
286
if (metadata_in.good ()) {
267
287
try {
268
288
metadata_in >> metadata;
269
- LOG_INF (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (), metadata.dump ().c_str ());
289
+ LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (), metadata.dump ().c_str ());
270
290
if (metadata.contains (" url" ) && metadata.at (" url" ).is_string ()) {
271
291
auto previous_url = metadata.at (" url" ).get <std::string>();
272
292
if (previous_url != url) {
@@ -296,7 +316,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
296
316
};
297
317
298
318
common_load_model_from_url_headers headers;
319
+ bool head_request_ok = false ;
320
+ bool should_download = !file_exists; // by default, we should download if the file does not exist
299
321
322
+ // get ETag to see if the remote file has changed
300
323
{
301
324
typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
302
325
auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
@@ -325,23 +348,28 @@ static bool common_download_file_single(const std::string & url, const std::stri
325
348
curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
326
349
curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
327
350
328
- bool was_perform_successful = curl_perform_with_retry (url, curl.get (), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
351
+ // we only allow retrying once for HEAD requests
352
+ // this is for the use case of using running offline (no internet), retrying can be annoying
353
+ bool was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 );
329
354
if (!was_perform_successful) {
330
- return false ;
355
+ head_request_ok = false ;
331
356
}
332
357
333
358
long http_code = 0 ;
334
359
curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
335
- if (http_code ! = 200 ) {
336
- // HEAD not supported, we don't know if the file has changed
337
- // force trigger downloading
338
- force_download = true ;
339
- LOG_ERR ( " %s: HEAD invalid http status code received: %ld \n " , __func__, http_code) ;
360
+ if (http_code = = 200 ) {
361
+ head_request_ok = true ;
362
+ } else {
363
+ LOG_WRN ( " %s: HEAD invalid http status code received: %ld \n " , __func__, http_code) ;
364
+ head_request_ok = false ;
340
365
}
341
366
}
342
367
343
- bool should_download = !file_exists || force_download;
344
- if (!should_download) {
368
+ // if head_request_ok is false, we don't have the etag or last-modified headers
369
+ // we leave should_download as-is, which is true if the file does not exist
370
+ if (head_request_ok) {
371
+ // check if ETag or Last-Modified headers are different
372
+ // if it is, we need to download the file again
345
373
if (!etag.empty () && etag != headers.etag ) {
346
374
LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__, etag.c_str (), headers.etag .c_str ());
347
375
should_download = true ;
@@ -350,6 +378,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
350
378
should_download = true ;
351
379
}
352
380
}
381
+
353
382
if (should_download) {
354
383
std::string path_temporary = path + " .downloadInProgress" ;
355
384
if (file_exists) {
@@ -424,13 +453,15 @@ static bool common_download_file_single(const std::string & url, const std::stri
424
453
{" etag" , headers.etag },
425
454
{" lastModified" , headers.last_modified }
426
455
});
427
- std::ofstream (metadata_path) << metadata.dump (4 );
428
- LOG_INF (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
456
+ write_file (metadata_path, metadata.dump (4 ) );
457
+ LOG_DBG (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
429
458
430
459
if (rename (path_temporary.c_str (), path.c_str ()) != 0 ) {
431
460
LOG_ERR (" %s: unable to rename file: %s to %s\n " , __func__, path_temporary.c_str (), path.c_str ());
432
461
return false ;
433
462
}
463
+ } else {
464
+ LOG_INF (" %s: using cached file: %s\n " , __func__, path.c_str ());
434
465
}
435
466
436
467
return true ;
@@ -605,16 +636,37 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
605
636
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
606
637
// User-Agent header is already set in common_remote_get_content, no need to set it here
607
638
639
+ // we use "=" to avoid clashing with other component, while still being allowed on windows
640
+ std::string cached_response_fname = " manifest=" + hf_repo + " =" + tag + " .json" ;
641
+ string_replace_all (cached_response_fname, " /" , " _" );
642
+ std::string cached_response_path = fs_get_cache_file (cached_response_fname);
643
+
608
644
// make the request
609
645
common_remote_params params;
610
646
params.headers = headers;
611
- auto res = common_remote_get_content (url, params);
612
- long res_code = res.first ;
613
- std::string res_str (res.second .data (), res.second .size ());
647
+ long res_code = 0 ;
648
+ std::string res_str;
649
+ bool use_cache = false ;
650
+ try {
651
+ auto res = common_remote_get_content (url, params);
652
+ res_code = res.first ;
653
+ res_str = std::string (res.second .data (), res.second .size ());
654
+ } catch (const std::exception & e) {
655
+ LOG_WRN (" error: failed to get manifest: %s\n " , e.what ());
656
+ LOG_WRN (" try reading from cache\n " );
657
+ // try to read from cache
658
+ try {
659
+ res_str = read_file (cached_response_path);
660
+ res_code = 200 ;
661
+ use_cache = true ;
662
+ } catch (const std::exception & e) {
663
+ throw std::runtime_error (" error: failed to get manifest (check your internet connection)" );
664
+ }
665
+ }
614
666
std::string ggufFile;
615
667
std::string mmprojFile;
616
668
617
- if (res_code == 200 ) {
669
+ if (res_code == 200 || res_code == 304 ) {
618
670
// extract ggufFile.rfilename in json, using regex
619
671
{
620
672
std::regex pattern (" \" ggufFile\" [\\ s\\ S]*?\" rfilename\"\\ s*:\\ s*\" ([^\" ]+)\" " );
@@ -631,6 +683,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
631
683
mmprojFile = match[1 ].str ();
632
684
}
633
685
}
686
+ if (!use_cache) {
687
+ // if not using cached response, update the cache file
688
+ write_file (cached_response_path, res_str);
689
+ }
634
690
} else if (res_code == 401 ) {
635
691
throw std::runtime_error (" error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token" );
636
692
} else {
@@ -1142,6 +1198,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
1142
1198
fprintf (stderr, " %s\n " , ex.what ());
1143
1199
ctx_arg.params = params_org;
1144
1200
return false ;
1201
+ } catch (std::exception & ex) {
1202
+ fprintf (stderr, " %s\n " , ex.what ());
1203
+ exit (1 ); // for other exceptions, we exit with status code 1
1145
1204
}
1146
1205
1147
1206
return true ;
@@ -1442,13 +1501,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1442
1501
{" -f" , " --file" }, " FNAME" ,
1443
1502
" a file containing the prompt (default: none)" ,
1444
1503
[](common_params & params, const std::string & value) {
1445
- std::ifstream file (value);
1446
- if (!file) {
1447
- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
1448
- }
1504
+ params.prompt = read_file (value);
1449
1505
// store the external file name in params
1450
1506
params.prompt_file = value;
1451
- std::copy (std::istreambuf_iterator<char >(file), std::istreambuf_iterator<char >(), back_inserter (params.prompt ));
1452
1507
if (!params.prompt .empty () && params.prompt .back () == ' \n ' ) {
1453
1508
params.prompt .pop_back ();
1454
1509
}
@@ -1458,11 +1513,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1458
1513
{" -sysf" , " --system-prompt-file" }, " FNAME" ,
1459
1514
" a file containing the system prompt (default: none)" ,
1460
1515
[](common_params & params, const std::string & value) {
1461
- std::ifstream file (value);
1462
- if (!file) {
1463
- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
1464
- }
1465
- std::copy (std::istreambuf_iterator<char >(file), std::istreambuf_iterator<char >(), back_inserter (params.system_prompt ));
1516
+ params.system_prompt = read_file (value);
1466
1517
if (!params.system_prompt .empty () && params.system_prompt .back () == ' \n ' ) {
1467
1518
params.system_prompt .pop_back ();
1468
1519
}
@@ -1887,15 +1938,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1887
1938
{" --grammar-file" }, " FNAME" ,
1888
1939
" file to read grammar from" ,
1889
1940
[](common_params & params, const std::string & value) {
1890
- std::ifstream file (value);
1891
- if (!file) {
1892
- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
1893
- }
1894
- std::copy (
1895
- std::istreambuf_iterator<char >(file),
1896
- std::istreambuf_iterator<char >(),
1897
- std::back_inserter (params.sampling .grammar )
1898
- );
1941
+ params.sampling .grammar = read_file (value);
1899
1942
}
1900
1943
).set_sparam ());
1901
1944
add_opt (common_arg (
@@ -2815,14 +2858,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2815
2858
" list of built-in templates:\n %s" , list_builtin_chat_templates ().c_str ()
2816
2859
),
2817
2860
[](common_params & params, const std::string & value) {
2818
- std::ifstream file (value);
2819
- if (!file) {
2820
- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
2821
- }
2822
- std::copy (
2823
- std::istreambuf_iterator<char >(file),
2824
- std::istreambuf_iterator<char >(),
2825
- std::back_inserter (params.chat_template ));
2861
+ params.chat_template = read_file (value);
2826
2862
}
2827
2863
).set_examples ({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_CHAT_TEMPLATE_FILE" ));
2828
2864
add_opt (common_arg (
0 commit comments