Skip to content

Commit 89b2a43

Browse files
committed
llama : cont k-shift refactoring + normalize type names
ggml-ci
1 parent dd39219 commit 89b2a43

File tree

6 files changed

+199
-202
lines changed

6 files changed

+199
-202
lines changed

common/common.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
295295
break;
296296
}
297297
std::string value(argv[i]);
298-
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_NONE; }
299-
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_LINEAR; }
300-
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_YARN; }
298+
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
299+
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
300+
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
301301
else { invalid_param = true; break; }
302302
} else if (arg == "--rope-scale") {
303303
if (++i >= argc) {
@@ -630,11 +630,11 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
630630
}
631631
std::string arg_next = argv[i];
632632
if (arg_next == "none") {
633-
params.split_mode = LLAMA_SPLIT_NONE;
633+
params.split_mode = LLAMA_SPLIT_MODE_NONE;
634634
} else if (arg_next == "layer") {
635-
params.split_mode = LLAMA_SPLIT_LAYER;
635+
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
636636
} else if (arg_next == "row") {
637-
params.split_mode = LLAMA_SPLIT_ROW;
637+
params.split_mode = LLAMA_SPLIT_MODE_ROW;
638638
} else {
639639
invalid_param = true;
640640
break;

common/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct gpt_params {
6161
float p_split = 0.1f; // speculative decoding split probability
6262
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
6363
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
64-
llama_split_mode split_mode = LLAMA_SPLIT_LAYER; // how to split the model across GPUs
64+
llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
6565
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
6666
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
6767
int32_t n_beams = 0; // if non-zero then use beam search of given width.
@@ -75,7 +75,7 @@ struct gpt_params {
7575
float yarn_beta_fast = 32.0f; // YaRN low correction dim
7676
float yarn_beta_slow = 1.0f; // YaRN high correction dim
7777
int32_t yarn_orig_ctx = 0; // YaRN original context length
78-
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED;
78+
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
7979
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
8080

8181
// // sampling parameters

examples/llama-bench/llama-bench.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ static const char * output_format_str(output_formats format) {
157157

158158
static const char * split_mode_str(llama_split_mode mode) {
159159
switch (mode) {
160-
case LLAMA_SPLIT_NONE: return "none";
161-
case LLAMA_SPLIT_LAYER: return "layer";
162-
case LLAMA_SPLIT_ROW: return "row";
160+
case LLAMA_SPLIT_MODE_NONE: return "none";
161+
case LLAMA_SPLIT_MODE_LAYER: return "layer";
162+
case LLAMA_SPLIT_MODE_ROW: return "row";
163163
default: GGML_ASSERT(!"invalid split mode");
164164
}
165165
}
@@ -193,7 +193,7 @@ static const cmd_params cmd_params_defaults = {
193193
/* type_v */ {GGML_TYPE_F16},
194194
/* n_threads */ {get_num_physical_cores()},
195195
/* n_gpu_layers */ {99},
196-
/* split_mode */ {LLAMA_SPLIT_LAYER},
196+
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
197197
/* main_gpu */ {0},
198198
/* no_kv_offload */ {false},
199199
/* mul_mat_q */ {true},
@@ -358,11 +358,11 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
358358
for (const auto & m : p) {
359359
llama_split_mode mode;
360360
if (m == "none") {
361-
mode = LLAMA_SPLIT_NONE;
361+
mode = LLAMA_SPLIT_MODE_NONE;
362362
} else if (m == "layer") {
363-
mode = LLAMA_SPLIT_LAYER;
363+
mode = LLAMA_SPLIT_MODE_LAYER;
364364
} else if (m == "row") {
365-
mode = LLAMA_SPLIT_ROW;
365+
mode = LLAMA_SPLIT_MODE_ROW;
366366
} else {
367367
invalid_param = true;
368368
break;

examples/server/server.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,9 +2082,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
20822082
break;
20832083
}
20842084
std::string value(argv[i]);
2085-
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_NONE; }
2086-
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_LINEAR; }
2087-
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_YARN; }
2085+
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
2086+
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
2087+
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
20882088
else { invalid_param = true; break; }
20892089
}
20902090
else if (arg == "--rope-freq-base")
@@ -2208,15 +2208,15 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
22082208
std::string arg_next = argv[i];
22092209
if (arg_next == "none")
22102210
{
2211-
params.split_mode = LLAMA_SPLIT_NONE;
2211+
params.split_mode = LLAMA_SPLIT_MODE_NONE;
22122212
}
22132213
else if (arg_next == "layer")
22142214
{
2215-
params.split_mode = LLAMA_SPLIT_LAYER;
2215+
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
22162216
}
22172217
else if (arg_next == "row")
22182218
{
2219-
params.split_mode = LLAMA_SPLIT_ROW;
2219+
params.split_mode = LLAMA_SPLIT_MODE_ROW;
22202220
}
22212221
else {
22222222
invalid_param = true;

0 commit comments

Comments
 (0)