Skip to content

Commit fe4e9e2

Browse files
ggerganovNeoZhangJianyu
authored andcommitted
llama : use _impl suffix instead of _internal (ggml-org#11060)
ggml-ci
1 parent 4254ce5 commit fe4e9e2

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

src/llama-quant.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static void zeros(std::ofstream & file, size_t n) {
2222
}
2323
}
2424

25-
struct quantize_state_internal {
25+
struct quantize_state_impl {
2626
const llama_model & model;
2727
const llama_model_quantize_params * params;
2828

@@ -43,13 +43,13 @@ struct quantize_state_internal {
4343
// used to figure out if a model shares tok_embd with the output weight
4444
bool has_output = false;
4545

46-
quantize_state_internal(const llama_model & model, const llama_model_quantize_params * params)
46+
quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params)
4747
: model(model)
4848
, params(params)
4949
{}
5050
};
5151

52-
static void llama_tensor_dequantize_internal(
52+
static void llama_tensor_dequantize_impl(
5353
struct ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
5454
const size_t nelements, const int nthread
5555
) {
@@ -121,7 +121,7 @@ static void llama_tensor_dequantize_internal(
121121
workers.clear();
122122
}
123123

124-
static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) {
124+
static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) {
125125
const std::string name = ggml_get_name(tensor);
126126

127127
// TODO: avoid hardcoded tensor names - use the TN_* constants
@@ -410,7 +410,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
410410
return new_type;
411411
}
412412

413-
static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
413+
static size_t llama_tensor_quantize_impl(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
414414
if (nthread < 2) {
415415
// single-thread
416416
size_t new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
@@ -464,7 +464,7 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
464464
return new_size;
465465
}
466466

467-
static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
467+
static void llama_model_quantize_impl(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
468468
ggml_type default_type;
469469
llama_ftype ftype = params->ftype;
470470

@@ -534,7 +534,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
534534
llm_load_hparams(ml, model);
535535
llm_load_stats (ml, model);
536536

537-
struct quantize_state_internal qs(model, params);
537+
struct quantize_state_impl qs(model, params);
538538

539539
if (params->only_copy) {
540540
ftype = model.ftype;
@@ -837,7 +837,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
837837
} else if (ggml_is_quantized(tensor->type) && !params->allow_requantize) {
838838
throw std::runtime_error(format("requantizing from type %s is disabled", ggml_type_name(tensor->type)));
839839
} else {
840-
llama_tensor_dequantize_internal(tensor, f32_conv_buf, workers, nelements, nthread);
840+
llama_tensor_dequantize_impl(tensor, f32_conv_buf, workers, nelements, nthread);
841841
f32_data = (float *) f32_conv_buf.data();
842842
}
843843

@@ -866,7 +866,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
866866
void * new_data_03 = (char *)new_data + ggml_row_size(new_type, n_per_row) * i03 * nrows;
867867
const float * imatrix_03 = imatrix ? imatrix + i03 * n_per_row : nullptr;
868868

869-
new_size += llama_tensor_quantize_internal(new_type, f32_data_03, new_data_03, chunk_size, nrows, n_per_row, imatrix_03, workers, nthread_use);
869+
new_size += llama_tensor_quantize_impl(new_type, f32_data_03, new_data_03, chunk_size, nrows, n_per_row, imatrix_03, workers, nthread_use);
870870
}
871871
LLAMA_LOG_INFO("size = %8.2f MiB -> %8.2f MiB\n", ggml_nbytes(tensor)/1024.0/1024.0, new_size/1024.0/1024.0);
872872
}
@@ -919,7 +919,7 @@ uint32_t llama_model_quantize(
919919
const char * fname_out,
920920
const llama_model_quantize_params * params) {
921921
try {
922-
llama_model_quantize_internal(fname_inp, fname_out, params);
922+
llama_model_quantize_impl(fname_inp, fname_out, params);
923923
} catch (const std::exception & err) {
924924
LLAMA_LOG_ERROR("%s: failed to quantize: %s\n", __func__, err.what());
925925
return 1;

src/llama.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10726,7 +10726,7 @@ static enum ggml_status llama_graph_compute(
1072610726
// return positive int on warning
1072710727
// return negative int on error
1072810728
//
10729-
static int llama_decode_internal(
10729+
static int llama_decode_impl(
1073010730
llama_context & lctx,
1073110731
llama_batch inp_batch) {
1073210732

@@ -11061,7 +11061,7 @@ static int llama_decode_internal(
1106111061
// return positive int on warning
1106211062
// return negative int on error
1106311063
//
11064-
static int llama_encode_internal(
11064+
static int llama_encode_impl(
1106511065
llama_context & lctx,
1106611066
llama_batch inp_batch) {
1106711067

@@ -11243,7 +11243,7 @@ static int llama_encode_internal(
1124311243
}
1124411244

1124511245
// find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
11246-
static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
11246+
static void llama_kv_cache_defrag_impl(struct llama_context & lctx) {
1124711247
auto & kv_self = lctx.kv_self;
1124811248

1124911249
const auto & hparams = lctx.model.hparams;
@@ -11463,7 +11463,7 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
1146311463
//LLAMA_LOG_INFO("(tmp log) KV defrag time: %.3f ms\n", (t_end - t_start)/1000.0);
1146411464
}
1146511465

11466-
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
11466+
static void llama_kv_cache_update_impl(struct llama_context & lctx) {
1146711467
bool need_reserve = false;
1146811468

1146911469
if (lctx.kv_self.has_shift) {
@@ -11499,7 +11499,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
1149911499

1150011500
// defragment the KV cache if needed
1150111501
if (lctx.kv_self.do_defrag) {
11502-
llama_kv_cache_defrag_internal(lctx);
11502+
llama_kv_cache_defrag_impl(lctx);
1150311503

1150411504
need_reserve = true;
1150511505

@@ -12200,7 +12200,7 @@ void llama_kv_cache_defrag(struct llama_context * ctx) {
1220012200
}
1220112201

1220212202
void llama_kv_cache_update(struct llama_context * ctx) {
12203-
llama_kv_cache_update_internal(*ctx);
12203+
llama_kv_cache_update_impl(*ctx);
1220412204
}
1220512205

1220612206
bool llama_kv_cache_can_shift(struct llama_context * ctx) {
@@ -12212,7 +12212,7 @@ bool llama_kv_cache_can_shift(struct llama_context * ctx) {
1221212212
int32_t llama_encode(
1221312213
struct llama_context * ctx,
1221412214
struct llama_batch batch) {
12215-
const int ret = llama_encode_internal(*ctx, batch);
12215+
const int ret = llama_encode_impl(*ctx, batch);
1221612216
if (ret != 0) {
1221712217
LLAMA_LOG_ERROR("%s: failed to encode, ret = %d\n", __func__, ret);
1221812218
}
@@ -12223,7 +12223,7 @@ int32_t llama_encode(
1222312223
int32_t llama_decode(
1222412224
struct llama_context * ctx,
1222512225
struct llama_batch batch) {
12226-
const int ret = llama_decode_internal(*ctx, batch);
12226+
const int ret = llama_decode_impl(*ctx, batch);
1222712227
if (ret != 0) {
1222812228
LLAMA_LOG_ERROR("%s: failed to decode, ret = %d\n", __func__, ret);
1222912229
}

0 commit comments

Comments
 (0)