Skip to content

Commit 5047dd3

Browse files
authored
llama : use _impl suffix instead of _internal (#11060)
ggml-ci
1 parent 46e3556 commit 5047dd3

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

src/llama-quant.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static void zeros(std::ofstream & file, size_t n) {
2222
}
2323
}
2424

25-
struct quantize_state_internal {
25+
struct quantize_state_impl {
2626
const llama_model & model;
2727
const llama_model_quantize_params * params;
2828

@@ -43,13 +43,13 @@ struct quantize_state_internal {
4343
// used to figure out if a model shares tok_embd with the output weight
4444
bool has_output = false;
4545

46-
quantize_state_internal(const llama_model & model, const llama_model_quantize_params * params)
46+
quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params)
4747
: model(model)
4848
, params(params)
4949
{}
5050
};
5151

52-
static void llama_tensor_dequantize_internal(
52+
static void llama_tensor_dequantize_impl(
5353
struct ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
5454
const size_t nelements, const int nthread
5555
) {
@@ -121,7 +121,7 @@ static void llama_tensor_dequantize_internal(
121121
workers.clear();
122122
}
123123

124-
static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) {
124+
static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) {
125125
const std::string name = ggml_get_name(tensor);
126126

127127
// TODO: avoid hardcoded tensor names - use the TN_* constants
@@ -410,7 +410,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
410410
return new_type;
411411
}
412412

413-
static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
413+
static size_t llama_tensor_quantize_impl(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
414414
if (nthread < 2) {
415415
// single-thread
416416
size_t new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
@@ -464,7 +464,7 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
464464
return new_size;
465465
}
466466

467-
static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
467+
static void llama_model_quantize_impl(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
468468
ggml_type default_type;
469469
llama_ftype ftype = params->ftype;
470470

@@ -534,7 +534,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
534534
llm_load_hparams(ml, model);
535535
llm_load_stats (ml, model);
536536

537-
struct quantize_state_internal qs(model, params);
537+
struct quantize_state_impl qs(model, params);
538538

539539
if (params->only_copy) {
540540
ftype = model.ftype;
@@ -837,7 +837,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
837837
} else if (ggml_is_quantized(tensor->type) && !params->allow_requantize) {
838838
throw std::runtime_error(format("requantizing from type %s is disabled", ggml_type_name(tensor->type)));
839839
} else {
840-
llama_tensor_dequantize_internal(tensor, f32_conv_buf, workers, nelements, nthread);
840+
llama_tensor_dequantize_impl(tensor, f32_conv_buf, workers, nelements, nthread);
841841
f32_data = (float *) f32_conv_buf.data();
842842
}
843843

@@ -866,7 +866,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
866866
void * new_data_03 = (char *)new_data + ggml_row_size(new_type, n_per_row) * i03 * nrows;
867867
const float * imatrix_03 = imatrix ? imatrix + i03 * n_per_row : nullptr;
868868

869-
new_size += llama_tensor_quantize_internal(new_type, f32_data_03, new_data_03, chunk_size, nrows, n_per_row, imatrix_03, workers, nthread_use);
869+
new_size += llama_tensor_quantize_impl(new_type, f32_data_03, new_data_03, chunk_size, nrows, n_per_row, imatrix_03, workers, nthread_use);
870870
}
871871
LLAMA_LOG_INFO("size = %8.2f MiB -> %8.2f MiB\n", ggml_nbytes(tensor)/1024.0/1024.0, new_size/1024.0/1024.0);
872872
}
@@ -919,7 +919,7 @@ uint32_t llama_model_quantize(
919919
const char * fname_out,
920920
const llama_model_quantize_params * params) {
921921
try {
922-
llama_model_quantize_internal(fname_inp, fname_out, params);
922+
llama_model_quantize_impl(fname_inp, fname_out, params);
923923
} catch (const std::exception & err) {
924924
LLAMA_LOG_ERROR("%s: failed to quantize: %s\n", __func__, err.what());
925925
return 1;

src/llama.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10717,7 +10717,7 @@ static enum ggml_status llama_graph_compute(
1071710717
// return positive int on warning
1071810718
// return negative int on error
1071910719
//
10720-
static int llama_decode_internal(
10720+
static int llama_decode_impl(
1072110721
llama_context & lctx,
1072210722
llama_batch inp_batch) {
1072310723

@@ -11052,7 +11052,7 @@ static int llama_decode_internal(
1105211052
// return positive int on warning
1105311053
// return negative int on error
1105411054
//
11055-
static int llama_encode_internal(
11055+
static int llama_encode_impl(
1105611056
llama_context & lctx,
1105711057
llama_batch inp_batch) {
1105811058

@@ -11234,7 +11234,7 @@ static int llama_encode_internal(
1123411234
}
1123511235

1123611236
// find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
11237-
static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
11237+
static void llama_kv_cache_defrag_impl(struct llama_context & lctx) {
1123811238
auto & kv_self = lctx.kv_self;
1123911239

1124011240
const auto & hparams = lctx.model.hparams;
@@ -11454,7 +11454,7 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
1145411454
//LLAMA_LOG_INFO("(tmp log) KV defrag time: %.3f ms\n", (t_end - t_start)/1000.0);
1145511455
}
1145611456

11457-
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
11457+
static void llama_kv_cache_update_impl(struct llama_context & lctx) {
1145811458
bool need_reserve = false;
1145911459

1146011460
if (lctx.kv_self.has_shift) {
@@ -11490,7 +11490,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
1149011490

1149111491
// defragment the KV cache if needed
1149211492
if (lctx.kv_self.do_defrag) {
11493-
llama_kv_cache_defrag_internal(lctx);
11493+
llama_kv_cache_defrag_impl(lctx);
1149411494

1149511495
need_reserve = true;
1149611496

@@ -12191,7 +12191,7 @@ void llama_kv_cache_defrag(struct llama_context * ctx) {
1219112191
}
1219212192

1219312193
void llama_kv_cache_update(struct llama_context * ctx) {
12194-
llama_kv_cache_update_internal(*ctx);
12194+
llama_kv_cache_update_impl(*ctx);
1219512195
}
1219612196

1219712197
bool llama_kv_cache_can_shift(struct llama_context * ctx) {
@@ -12203,7 +12203,7 @@ bool llama_kv_cache_can_shift(struct llama_context * ctx) {
1220312203
int32_t llama_encode(
1220412204
struct llama_context * ctx,
1220512205
struct llama_batch batch) {
12206-
const int ret = llama_encode_internal(*ctx, batch);
12206+
const int ret = llama_encode_impl(*ctx, batch);
1220712207
if (ret != 0) {
1220812208
LLAMA_LOG_ERROR("%s: failed to encode, ret = %d\n", __func__, ret);
1220912209
}
@@ -12214,7 +12214,7 @@ int32_t llama_encode(
1221412214
int32_t llama_decode(
1221512215
struct llama_context * ctx,
1221612216
struct llama_batch batch) {
12217-
const int ret = llama_decode_internal(*ctx, batch);
12217+
const int ret = llama_decode_impl(*ctx, batch);
1221812218
if (ret != 0) {
1221912219
LLAMA_LOG_ERROR("%s: failed to decode, ret = %d\n", __func__, ret);
1222012220
}

0 commit comments

Comments
 (0)