@@ -1114,7 +1114,7 @@ struct llama_mlock {
1114
1114
suggest = false ;
1115
1115
}
1116
1116
1117
- fprintf (stderr, " warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n %s" ,
1117
+ LLAMA_LOG_WARN ( " warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n %s" ,
1118
1118
size, this ->size , errmsg, suggest ? MLOCK_SUGGESTION : " " );
1119
1119
return false ;
1120
1120
}
@@ -1123,7 +1123,7 @@ struct llama_mlock {
1123
1123
1124
1124
static void raw_unlock (void * addr, size_t size) {
1125
1125
if (munlock (addr, size)) {
1126
- fprintf (stderr, " warning: failed to munlock buffer: %s\n " , std::strerror (errno));
1126
+ LLAMA_LOG_WARN ( " warning: failed to munlock buffer: %s\n " , std::strerror (errno));
1127
1127
}
1128
1128
}
1129
1129
#elif defined(_WIN32)
@@ -1141,7 +1141,7 @@ struct llama_mlock {
1141
1141
return true ;
1142
1142
}
1143
1143
if (tries == 2 ) {
1144
- fprintf (stderr, " warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n " ,
1144
+ LLAMA_LOG_WARN ( " warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n " ,
1145
1145
len, size, llama_format_win_err (GetLastError ()).c_str ());
1146
1146
return false ;
1147
1147
}
@@ -1150,7 +1150,7 @@ struct llama_mlock {
1150
1150
// set size and try again.
1151
1151
SIZE_T min_ws_size, max_ws_size;
1152
1152
if (!GetProcessWorkingSetSize (GetCurrentProcess (), &min_ws_size, &max_ws_size)) {
1153
- fprintf (stderr, " warning: GetProcessWorkingSetSize failed: %s\n " ,
1153
+ LLAMA_LOG_WARN ( " warning: GetProcessWorkingSetSize failed: %s\n " ,
1154
1154
llama_format_win_err (GetLastError ()).c_str ());
1155
1155
return false ;
1156
1156
}
@@ -1163,7 +1163,7 @@ struct llama_mlock {
1163
1163
min_ws_size += increment;
1164
1164
max_ws_size += increment;
1165
1165
if (!SetProcessWorkingSetSize (GetCurrentProcess (), min_ws_size, max_ws_size)) {
1166
- fprintf (stderr, " warning: SetProcessWorkingSetSize failed: %s\n " ,
1166
+ LLAMA_LOG_WARN ( " warning: SetProcessWorkingSetSize failed: %s\n " ,
1167
1167
llama_format_win_err (GetLastError ()).c_str ());
1168
1168
return false ;
1169
1169
}
@@ -1172,7 +1172,7 @@ struct llama_mlock {
1172
1172
1173
1173
static void raw_unlock (void * ptr, size_t len) {
1174
1174
if (!VirtualUnlock (ptr, len)) {
1175
- fprintf (stderr, " warning: failed to VirtualUnlock buffer: %s\n " ,
1175
+ LLAMA_LOG_WARN ( " warning: failed to VirtualUnlock buffer: %s\n " ,
1176
1176
llama_format_win_err (GetLastError ()).c_str ());
1177
1177
}
1178
1178
}
@@ -1184,7 +1184,7 @@ struct llama_mlock {
1184
1184
}
1185
1185
1186
1186
bool raw_lock (const void * addr, size_t len) const {
1187
- fprintf (stderr, " warning: mlock not supported on this system\n " );
1187
+ LLAMA_LOG_WARN ( " warning: mlock not supported on this system\n " );
1188
1188
return false ;
1189
1189
}
1190
1190
@@ -2085,13 +2085,13 @@ namespace GGUFMeta {
2085
2085
__func__, override_type_to_str (override ->tag ), override ->key );
2086
2086
switch (override ->tag ) {
2087
2087
case LLAMA_KV_OVERRIDE_BOOL: {
2088
- printf (" %s\n " , override ->bool_value ? " true" : " false" );
2088
+ LLAMA_LOG_INFO (" %s\n " , override ->bool_value ? " true" : " false" );
2089
2089
} break ;
2090
2090
case LLAMA_KV_OVERRIDE_INT: {
2091
- printf (" %" PRId64 " \n " , override ->int_value );
2091
+ LLAMA_LOG_INFO (" %" PRId64 " \n " , override ->int_value );
2092
2092
} break ;
2093
2093
case LLAMA_KV_OVERRIDE_FLOAT: {
2094
- printf (" %.6f\n " , override ->float_value );
2094
+ LLAMA_LOG_INFO (" %.6f\n " , override ->float_value );
2095
2095
} break ;
2096
2096
default :
2097
2097
// Shouldn't be possible to end up here, but just in case...
@@ -6993,7 +6993,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
6993
6993
if (match + special_token.length () > raw_text_base_offset + raw_text_base_length) break ;
6994
6994
6995
6995
#ifdef PRETOKENIZERDEBUG
6996
- fprintf (stderr, " FF: (%ld %ld %ld) '%s'\n " , raw_text->length (), raw_text_base_offset, raw_text_base_length, raw_text->substr (raw_text_base_offset, raw_text_base_length).c_str ());
6996
+ LLAMA_LOG_WARN ( " FF: (%ld %ld %ld) '%s'\n " , raw_text->length (), raw_text_base_offset, raw_text_base_length, raw_text->substr (raw_text_base_offset, raw_text_base_length).c_str ());
6997
6997
#endif
6998
6998
auto source = std::distance (buffer.begin (), it);
6999
6999
@@ -7006,7 +7006,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
7006
7006
buffer.emplace_after (it, (*raw_text), left_reminder_offset, left_reminder_length);
7007
7007
7008
7008
#ifdef PRETOKENIZERDEBUG
7009
- fprintf (stderr, " FL: (%ld %ld) '%s'\n " , left_reminder_offset, left_reminder_length, raw_text->substr (left_reminder_offset, left_reminder_length).c_str ());
7009
+ LLAMA_LOG_WARN ( " FL: (%ld %ld) '%s'\n " , left_reminder_offset, left_reminder_length, raw_text->substr (left_reminder_offset, left_reminder_length).c_str ());
7010
7010
#endif
7011
7011
it++;
7012
7012
}
@@ -7022,7 +7022,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
7022
7022
buffer.emplace_after (it, (*raw_text), right_reminder_offset, right_reminder_length);
7023
7023
7024
7024
#ifdef PRETOKENIZERDEBUG
7025
- fprintf (stderr, " FR: (%ld %ld) '%s'\n " , right_reminder_offset, right_reminder_length, raw_text->substr (right_reminder_offset, right_reminder_length).c_str ());
7025
+ LLAMA_LOG_WARN ( " FR: (%ld %ld) '%s'\n " , right_reminder_offset, right_reminder_length, raw_text->substr (right_reminder_offset, right_reminder_length).c_str ());
7026
7026
#endif
7027
7027
7028
7028
it++;
@@ -7038,7 +7038,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
7038
7038
raw_text_base_length = right_reminder_length;
7039
7039
7040
7040
#ifdef PRETOKENIZERDEBUG
7041
- fprintf (stderr, " RR: (%ld %ld) '%s'\n " , raw_text_base_offset, raw_text_base_length, raw_text->substr (raw_text_base_offset, raw_text_base_length).c_str ());
7041
+ LLAMA_LOG_WARN ( " RR: (%ld %ld) '%s'\n " , raw_text_base_offset, raw_text_base_length, raw_text->substr (raw_text_base_offset, raw_text_base_length).c_str ());
7042
7042
#endif
7043
7043
} else {
7044
7044
if (source == 0 ) {
@@ -7095,7 +7095,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
7095
7095
}
7096
7096
7097
7097
#ifdef PRETOKENIZERDEBUG
7098
- fprintf (stderr, " TT: (%ld %ld %ld) '%s'\n " , raw_text.length (), fragment.offset , fragment.length , raw_text.c_str ());
7098
+ LLAMA_LOG_WARN ( TT: (%ld %ld %ld) ' %s' \n" , raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
7099
7099
#endif
7100
7100
llm_tokenizer_spm tokenizer(vocab);
7101
7101
llama_escape_whitespace(raw_text);
@@ -7116,7 +7116,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
7116
7116
auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
7117
7117
7118
7118
#ifdef PRETOKENIZERDEBUG
7119
- fprintf (stderr, " TT: (%ld %ld %ld) '%s'\n " , raw_text.length (), fragment.offset , fragment.length , raw_text.c_str ());
7119
+ LLAMA_LOG_WARN( TT: (%ld %ld %ld) '%s'\n " , raw_text.length (), fragment.offset , fragment.length , raw_text.c_str ());
7120
7120
#endif
7121
7121
llm_tokenizer_bpe tokenizer (vocab);
7122
7122
tokenizer.tokenize (raw_text, output);
@@ -8641,7 +8641,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
8641
8641
if (params->imatrix ) {
8642
8642
imatrix_data = static_cast <const std::unordered_map<std::string, std::vector<float >>*>(params->imatrix );
8643
8643
if (imatrix_data) {
8644
- printf (" ================================ Have weights data with %d entries\n " ,int (imatrix_data->size ()));
8644
+ LLAMA_LOG_INFO (" ================================ Have weights data with %d entries\n " ,int (imatrix_data->size ()));
8645
8645
}
8646
8646
}
8647
8647
@@ -8764,23 +8764,23 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
8764
8764
if (imatrix_data) {
8765
8765
auto it = imatrix_data->find (tensor->name );
8766
8766
if (it == imatrix_data->end ()) {
8767
- printf (" \n ====== %s: did not find weights for %s\n " , __func__, tensor->name );
8767
+ LLAMA_LOG_INFO (" \n ====== %s: did not find weights for %s\n " , __func__, tensor->name );
8768
8768
} else {
8769
8769
if (it->second .size () == (size_t )tensor->ne [0 ]) {
8770
8770
imatrix = it->second .data ();
8771
8771
} else {
8772
- printf (" \n ====== %s: imatrix size %d is different from tensor size %d for %s\n " , __func__,
8772
+ LLAMA_LOG_INFO (" \n ====== %s: imatrix size %d is different from tensor size %d for %s\n " , __func__,
8773
8773
int (it->second .size ()), int (tensor->ne [0 ]), tensor->name );
8774
8774
}
8775
8775
}
8776
8776
}
8777
8777
if ((new_type == GGML_TYPE_IQ2_XXS ||
8778
8778
new_type == GGML_TYPE_IQ2_XS ||
8779
8779
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp (tensor->name , " token_embd.weight" ) != 0 )) && !imatrix) {
8780
- fprintf (stderr, " \n\n ============================================================\n " );
8781
- fprintf (stderr, " Missing importance matrix for tensor %s in a very low-bit quantization\n " , tensor->name );
8782
- fprintf (stderr, " The result will be garbage, so bailing out\n " );
8783
- fprintf (stderr, " ============================================================\n\n " );
8780
+ LLAMA_LOG_ERROR ( " \n\n ============================================================\n " );
8781
+ LLAMA_LOG_ERROR ( " Missing importance matrix for tensor %s in a very low-bit quantization\n " , tensor->name );
8782
+ LLAMA_LOG_ERROR ( " The result will be garbage, so bailing out\n " );
8783
+ LLAMA_LOG_ERROR ( " ============================================================\n\n " );
8784
8784
throw std::runtime_error (format (" Missing importance matrix for tensor %s in a very low-bit quantization" , tensor->name ));
8785
8785
}
8786
8786
0 commit comments