Skip to content

Commit 42ddf48

Browse files
committed
llama : revert enum name changes from this PR
ggml-ci
1 parent 5f5b1b5 commit 42ddf48

File tree

6 files changed

+55
-55
lines changed

6 files changed

+55
-55
lines changed

common/common.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
295295
break;
296296
}
297297
std::string value(argv[i]);
298-
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
299-
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
300-
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
298+
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_NONE; }
299+
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_LINEAR; }
300+
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_YARN; }
301301
else { invalid_param = true; break; }
302302
} else if (arg == "--rope-scale") {
303303
if (++i >= argc) {
@@ -630,11 +630,11 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
630630
}
631631
std::string arg_next = argv[i];
632632
if (arg_next == "none") {
633-
params.split_mode = LLAMA_SPLIT_MODE_NONE;
633+
params.split_mode = LLAMA_SPLIT_NONE;
634634
} else if (arg_next == "layer") {
635-
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
635+
params.split_mode = LLAMA_SPLIT_LAYER;
636636
} else if (arg_next == "row") {
637-
params.split_mode = LLAMA_SPLIT_MODE_ROW;
637+
params.split_mode = LLAMA_SPLIT_ROW;
638638
} else {
639639
invalid_param = true;
640640
break;

common/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct gpt_params {
6161
float p_split = 0.1f; // speculative decoding split probability
6262
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
6363
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
64-
llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
64+
llama_split_mode split_mode = LLAMA_SPLIT_LAYER; // how to split the model across GPUs
6565
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
6666
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
6767
int32_t n_beams = 0; // if non-zero then use beam search of given width.
@@ -75,7 +75,7 @@ struct gpt_params {
7575
float yarn_beta_fast = 32.0f; // YaRN low correction dim
7676
float yarn_beta_slow = 1.0f; // YaRN high correction dim
7777
int32_t yarn_orig_ctx = 0; // YaRN original context length
78-
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
78+
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED;
7979
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
8080

8181
// // sampling parameters

examples/llama-bench/llama-bench.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ static const char * output_format_str(output_formats format) {
157157

158158
static const char * split_mode_str(llama_split_mode mode) {
159159
switch (mode) {
160-
case LLAMA_SPLIT_MODE_NONE: return "none";
161-
case LLAMA_SPLIT_MODE_LAYER: return "layer";
162-
case LLAMA_SPLIT_MODE_ROW: return "row";
160+
case LLAMA_SPLIT_NONE: return "none";
161+
case LLAMA_SPLIT_LAYER: return "layer";
162+
case LLAMA_SPLIT_ROW: return "row";
163163
default: GGML_ASSERT(!"invalid split mode");
164164
}
165165
}
@@ -193,7 +193,7 @@ static const cmd_params cmd_params_defaults = {
193193
/* type_v */ {GGML_TYPE_F16},
194194
/* n_threads */ {get_num_physical_cores()},
195195
/* n_gpu_layers */ {99},
196-
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
196+
/* split_mode */ {LLAMA_SPLIT_LAYER},
197197
/* main_gpu */ {0},
198198
/* no_kv_offload */ {false},
199199
/* mul_mat_q */ {true},
@@ -358,11 +358,11 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
358358
for (const auto & m : p) {
359359
llama_split_mode mode;
360360
if (m == "none") {
361-
mode = LLAMA_SPLIT_MODE_NONE;
361+
mode = LLAMA_SPLIT_NONE;
362362
} else if (m == "layer") {
363-
mode = LLAMA_SPLIT_MODE_LAYER;
363+
mode = LLAMA_SPLIT_LAYER;
364364
} else if (m == "row") {
365-
mode = LLAMA_SPLIT_MODE_ROW;
365+
mode = LLAMA_SPLIT_ROW;
366366
} else {
367367
invalid_param = true;
368368
break;

examples/server/server.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,9 +2082,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
20822082
break;
20832083
}
20842084
std::string value(argv[i]);
2085-
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
2086-
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
2087-
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
2085+
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_NONE; }
2086+
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_LINEAR; }
2087+
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_YARN; }
20882088
else { invalid_param = true; break; }
20892089
}
20902090
else if (arg == "--rope-freq-base")
@@ -2208,15 +2208,15 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
22082208
std::string arg_next = argv[i];
22092209
if (arg_next == "none")
22102210
{
2211-
params.split_mode = LLAMA_SPLIT_MODE_NONE;
2211+
params.split_mode = LLAMA_SPLIT_NONE;
22122212
}
22132213
else if (arg_next == "layer")
22142214
{
2215-
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
2215+
params.split_mode = LLAMA_SPLIT_LAYER;
22162216
}
22172217
else if (arg_next == "row")
22182218
{
2219-
params.split_mode = LLAMA_SPLIT_MODE_ROW;
2219+
params.split_mode = LLAMA_SPLIT_ROW;
22202220
}
22212221
else {
22222222
invalid_param = true;

llama.cpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -850,9 +850,9 @@ struct LLM_TN {
850850
//
851851

852852
static std::map<int32_t, const char *> LLAMA_ROPE_SCALING_TYPES = {
853-
{ LLAMA_ROPE_SCALING_TYPE_NONE, "none" },
854-
{ LLAMA_ROPE_SCALING_TYPE_LINEAR, "linear" },
855-
{ LLAMA_ROPE_SCALING_TYPE_YARN, "yarn" },
853+
{ LLAMA_ROPE_SCALING_NONE, "none" },
854+
{ LLAMA_ROPE_SCALING_LINEAR, "linear" },
855+
{ LLAMA_ROPE_SCALING_YARN, "yarn" },
856856
};
857857

858858
static int32_t llama_rope_scaling_type_from_string(const std::string & name) {
@@ -862,7 +862,7 @@ static int32_t llama_rope_scaling_type_from_string(const std::string & name) {
862862
}
863863
}
864864

865-
return LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
865+
return LLAMA_ROPE_SCALING_UNSPECIFIED;
866866
}
867867

868868
static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
@@ -1581,7 +1581,7 @@ struct llama_hparams {
15811581
bool causal_attn = true;
15821582
bool need_kq_pos = false;
15831583

1584-
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
1584+
enum llama_pooling_type pooling_type = LLAMA_POOLING_NONE;
15851585
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
15861586

15871587
bool operator!=(const llama_hparams & other) const {
@@ -3007,7 +3007,7 @@ static void llm_load_hparams(
30073007
std::string rope_scaling("linear");
30083008
ml.get_key(LLM_KV_ROPE_SCALING_TYPE, rope_scaling, false);
30093009
hparams.rope_scaling_type_train = llama_rope_scaling_type_from_string(rope_scaling);
3010-
GGML_ASSERT(hparams.rope_scaling_type_train != LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED);
3010+
GGML_ASSERT(hparams.rope_scaling_type_train != LLAMA_ROPE_SCALING_UNSPECIFIED);
30113011

30123012
// rope_freq_scale (inverse of the kv) is optional
30133013
float ropescale = 0.0f;
@@ -3655,7 +3655,7 @@ static bool llm_load_tensors(
36553655
model.buft_layer[i] = llama_default_buffer_type_cpu(true);
36563656
}
36573657

3658-
if (split_mode == LLAMA_SPLIT_MODE_LAYER) {
3658+
if (split_mode == LLAMA_SPLIT_LAYER) {
36593659
// calculate the split points
36603660
int device_count = llama_get_device_count();
36613661
bool all_zero = tensor_split == nullptr || std::all_of(tensor_split, tensor_split + device_count, [](float x) { return x == 0.0f; });
@@ -3694,10 +3694,10 @@ static bool llm_load_tensors(
36943694
}
36953695
} else {
36963696
ggml_backend_buffer_type_t split_buft;
3697-
if (split_mode == LLAMA_SPLIT_MODE_ROW) {
3697+
if (split_mode == LLAMA_SPLIT_ROW) {
36983698
split_buft = llama_default_buffer_type_split(main_gpu, tensor_split);
36993699
} else {
3700-
// LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_LAYER in backends where it is not supported
3700+
// LLAMA_SPLIT_NONE or LLAMA_SPLIT_LAYER in backends where it is not supported
37013701
split_buft = llama_default_buffer_type_offload(main_gpu);
37023702
}
37033703
// assign the repeating layers
@@ -5028,7 +5028,7 @@ struct llm_build_context {
50285028
n_kv (worst_case ? n_ctx : kv_self.n),
50295029
kv_head (worst_case ? n_ctx - n_tokens : kv_self.head),
50305030
n_orig_ctx (cparams.n_yarn_orig_ctx),
5031-
pooling_type (cparams.do_pooling ? hparams.pooling_type : LLAMA_POOLING_TYPE_NONE),
5031+
pooling_type (cparams.do_pooling ? hparams.pooling_type : LLAMA_POOLING_NONE),
50325032
rope_type (hparams.rope_type),
50335033
cb (cb),
50345034
buf_compute_meta (lctx.buf_compute_meta) {
@@ -6011,12 +6011,12 @@ struct llm_build_context {
60116011
cur = inpL;
60126012

60136013
// pooling layer
6014-
if (pooling_type == LLAMA_POOLING_TYPE_MEAN) {
6014+
if (pooling_type == LLAMA_POOLING_MEAN) {
60156015
cur = ggml_mul_mat(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), inp_mean);
6016-
} else if (pooling_type == LLAMA_POOLING_TYPE_CLS) {
6016+
} else if (pooling_type == LLAMA_POOLING_CLS) {
60176017
cur = ggml_get_rows(ctx0, cur, inp_cls);
60186018
} else {
6019-
GGML_ASSERT(pooling_type == LLAMA_POOLING_TYPE_NONE && "Invalid pooling type");
6019+
GGML_ASSERT(pooling_type == LLAMA_POOLING_NONE && "Invalid pooling type");
60206020
}
60216021
cb(cur, "result_embd", -1);
60226022

@@ -7684,7 +7684,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
76847684
}
76857685
}
76867686

7687-
if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {
7687+
if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_MEAN) {
76887688
const int64_t n_tokens = batch.n_tokens;
76897689

76907690
GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_mean->buffer));
@@ -7712,7 +7712,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
77127712
}
77137713
}
77147714

7715-
if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {
7715+
if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_CLS) {
77167716
const int64_t n_tokens = batch.n_tokens;
77177717

77187718
GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_cls->buffer));
@@ -11286,7 +11286,7 @@ static int llama_apply_lora_from_file_internal(
1128611286
struct llama_model_params llama_model_default_params() {
1128711287
struct llama_model_params result = {
1128811288
/*.n_gpu_layers =*/ 0,
11289-
/*.split_mode =*/ LLAMA_SPLIT_MODE_LAYER,
11289+
/*.split_mode =*/ LLAMA_SPLIT_LAYER,
1129011290
/*.main_gpu =*/ 0,
1129111291
/*.tensor_split =*/ nullptr,
1129211292
/*.progress_callback =*/ nullptr,
@@ -11312,7 +11312,7 @@ struct llama_context_params llama_context_default_params() {
1131211312
/*.n_batch =*/ 512,
1131311313
/*.n_threads =*/ GGML_DEFAULT_N_THREADS, // TODO: better default
1131411314
/*.n_threads_batch =*/ GGML_DEFAULT_N_THREADS,
11315-
/*.rope_scaling_type =*/ LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
11315+
/*.rope_scaling_type =*/ LLAMA_ROPE_SCALING_UNSPECIFIED,
1131611316
/*.rope_freq_base =*/ 0.0f,
1131711317
/*.rope_freq_scale =*/ 0.0f,
1131811318
/*.yarn_ext_factor =*/ -1.0f,
@@ -11500,16 +11500,16 @@ struct llama_context * llama_new_context_with_model(
1150011500
cparams.cb_eval_user_data = params.cb_eval_user_data;
1150111501

1150211502
auto rope_scaling_type = params.rope_scaling_type;
11503-
if (rope_scaling_type == LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED) {
11503+
if (rope_scaling_type == LLAMA_ROPE_SCALING_UNSPECIFIED) {
1150411504
rope_scaling_type = hparams.rope_scaling_type_train;
1150511505
}
1150611506

11507-
if (rope_scaling_type == LLAMA_ROPE_SCALING_TYPE_NONE) {
11507+
if (rope_scaling_type == LLAMA_ROPE_SCALING_NONE) {
1150811508
cparams.rope_freq_scale = 1.0f; // never scale if scaling type is none
1150911509
}
1151011510

1151111511
if (cparams.yarn_ext_factor < 0.0f) { // negative indicates 'not set'
11512-
cparams.yarn_ext_factor = rope_scaling_type == LLAMA_ROPE_SCALING_TYPE_YARN ? 1.0f : 0.0f;
11512+
cparams.yarn_ext_factor = rope_scaling_type == LLAMA_ROPE_SCALING_YARN ? 1.0f : 0.0f;
1151311513
}
1151411514

1151511515
if (params.seed == LLAMA_DEFAULT_SEED) {
@@ -11543,8 +11543,8 @@ struct llama_context * llama_new_context_with_model(
1154311543
}
1154411544
#elif defined(GGML_USE_CUBLAS)
1154511545
if (model->n_gpu_layers > 0) {
11546-
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
11547-
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
11546+
// with split_mode LLAMA_SPLIT_NONE or LLAMA_SPLIT_ROW, only the main GPU backend is used
11547+
if (model->split_mode == LLAMA_SPLIT_NONE || model->split_mode == LLAMA_SPLIT_ROW) {
1154811548
ggml_backend_t backend = ggml_backend_cuda_init(model->main_gpu);
1154911549
if (backend == nullptr) {
1155011550
LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, model->main_gpu);
@@ -11553,7 +11553,7 @@ struct llama_context * llama_new_context_with_model(
1155311553
}
1155411554
ctx->backends.push_back(backend);
1155511555
} else {
11556-
// LLAMA_SPLIT_MODE_LAYER requires a backend for each GPU
11556+
// LLAMA_SPLIT_LAYER requires a backend for each GPU
1155711557
for (int device = 0; device < ggml_backend_cuda_get_device_count(); ++device) {
1155811558
ggml_backend_t backend = ggml_backend_cuda_init(device);
1155911559
if (backend == nullptr) {

llama.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,23 +114,23 @@ extern "C" {
114114
};
115115

116116
enum llama_rope_scaling_type {
117-
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED = -1,
118-
LLAMA_ROPE_SCALING_TYPE_NONE = 0,
119-
LLAMA_ROPE_SCALING_TYPE_LINEAR = 1,
120-
LLAMA_ROPE_SCALING_TYPE_YARN = 2,
121-
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN,
117+
LLAMA_ROPE_SCALING_UNSPECIFIED = -1,
118+
LLAMA_ROPE_SCALING_NONE = 0,
119+
LLAMA_ROPE_SCALING_LINEAR = 1,
120+
LLAMA_ROPE_SCALING_YARN = 2,
121+
LLAMA_ROPE_SCALING_MAX_VALUE = LLAMA_ROPE_SCALING_YARN,
122122
};
123123

124124
enum llama_pooling_type {
125-
LLAMA_POOLING_TYPE_NONE = 0,
126-
LLAMA_POOLING_TYPE_MEAN = 1,
127-
LLAMA_POOLING_TYPE_CLS = 2,
125+
LLAMA_POOLING_NONE = 0,
126+
LLAMA_POOLING_MEAN = 1,
127+
LLAMA_POOLING_CLS = 2,
128128
};
129129

130130
enum llama_split_mode {
131-
LLAMA_SPLIT_MODE_NONE = 0, // single GPU
132-
LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
133-
LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
131+
LLAMA_SPLIT_NONE = 0, // single GPU
132+
LLAMA_SPLIT_LAYER = 1, // split layers and KV across GPUs
133+
LLAMA_SPLIT_ROW = 2, // split rows across GPUs
134134
};
135135

136136
typedef struct llama_token_data {

0 commit comments

Comments
 (0)