Skip to content

Commit 2b33896

Browse files
authored
ggml : refactor rope norm/neox (#7634)
* ggml : unify rope norm/neox (CPU) * ggml : fix compile warning * ggml : remove GLM rope mode ggml-ci * metal : better rope implementation ggml-ci * cuda : better rope implementation ggml-ci * naming : n_orig_ctx -> n_ctx_orig ggml-ci * dev : add reminders to update backends ggml-ci * vulkan : fix ggml_rope_ext() usage * cuda : fix array size + indents ggml-ci
1 parent 9973e81 commit 2b33896

19 files changed

+452
-699
lines changed

examples/baby-llama/baby-llama.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -522,8 +522,8 @@ static struct ggml_tensor * forward(
522522
// wk shape [n_embd, n_embd, 1, 1]
523523
// Qcur shape [n_embd/n_head, n_head, N, 1]
524524
// Kcur shape [n_embd/n_head, n_head, N, 1]
525-
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0, 0);
526-
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0, 0);
525+
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0);
526+
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0);
527527

528528
// store key and value to memory
529529
{
@@ -759,8 +759,8 @@ static struct ggml_tensor * forward_batch(
759759
// wk shape [n_embd, n_embd, 1, 1]
760760
// Qcur shape [n_embd/n_head, n_head, N, n_batch]
761761
// Kcur shape [n_embd/n_head, n_head, N, n_batch]
762-
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0, 0);
763-
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0, 0);
762+
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0);
763+
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0);
764764
assert_shape_4d(Qcur, n_embd/n_head, n_head, N, n_batch);
765765
assert_shape_4d(Kcur, n_embd/n_head, n_head, N, n_batch);
766766

@@ -1056,7 +1056,7 @@ static struct ggml_tensor * forward_lora(
10561056
model->layers[il].wqb,
10571057
cur)),
10581058
n_embd/n_head, n_head, N),
1059-
KQ_pos, n_rot, 0, 0);
1059+
KQ_pos, n_rot, 0);
10601060
struct ggml_tensor * Kcur = ggml_rope(ctx0,
10611061
ggml_reshape_3d(ctx0,
10621062
ggml_mul_mat(ctx0,
@@ -1065,7 +1065,7 @@ static struct ggml_tensor * forward_lora(
10651065
model->layers[il].wkb,
10661066
cur)),
10671067
n_embd/n_head, n_head, N),
1068-
KQ_pos, n_rot, 0, 0);
1068+
KQ_pos, n_rot, 0);
10691069

10701070
// store key and value to memory
10711071
{

examples/convert-legacy-llama.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ class Params:
176176
rope_scaling_type: gguf.RopeScalingType | None = None
177177
f_rope_freq_base: float | None = None
178178
f_rope_scale: float | None = None
179-
n_orig_ctx: int | None = None
179+
n_ctx_orig: int | None = None
180180
rope_finetuned: bool | None = None
181181

182182
ftype: GGMLFileType | None = None
@@ -226,7 +226,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
226226
with open(config_path) as f:
227227
config = json.load(f)
228228

229-
rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None
229+
rope_scaling_type = f_rope_scale = n_ctx_orig = rope_finetuned = None
230230
rope_scaling = config.get("rope_scaling")
231231

232232
if rope_scaling is not None and (typ := rope_scaling.get("type")):
@@ -236,7 +236,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
236236
rope_scaling_type = gguf.RopeScalingType.LINEAR
237237
elif typ == "yarn":
238238
rope_scaling_type = gguf.RopeScalingType.YARN
239-
n_orig_ctx = rope_scaling['original_max_position_embeddings']
239+
n_ctx_orig = rope_scaling['original_max_position_embeddings']
240240
rope_finetuned = rope_scaling['finetuned']
241241
else:
242242
raise NotImplementedError(f'Unknown rope scaling type: {typ}')
@@ -272,7 +272,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
272272
f_rope_freq_base = config.get("rope_theta"),
273273
rope_scaling_type = rope_scaling_type,
274274
f_rope_scale = f_rope_scale,
275-
n_orig_ctx = n_orig_ctx,
275+
n_ctx_orig = n_ctx_orig,
276276
rope_finetuned = rope_finetuned,
277277
)
278278

@@ -864,8 +864,8 @@ def add_meta_arch(self, params: Params) -> None:
864864
self.gguf.add_rope_scaling_type(params.rope_scaling_type)
865865
self.gguf.add_rope_scaling_factor(params.f_rope_scale)
866866

867-
if params.n_orig_ctx is not None:
868-
self.gguf.add_rope_scaling_orig_ctx_len(params.n_orig_ctx)
867+
if params.n_ctx_orig is not None:
868+
self.gguf.add_rope_scaling_orig_ctx_len(params.n_ctx_orig)
869869

870870
if params.rope_finetuned is not None:
871871
self.gguf.add_rope_scaling_finetuned(params.rope_finetuned)

examples/finetune/finetune.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
564564
const int rope_mode = 0;
565565

566566
return ggml_rope_ext(ctx,
567-
t, KQ_pos, nullptr, n_rot, rope_mode, n_ctx, 0,
567+
t, KQ_pos, nullptr, n_rot, rope_mode, n_ctx,
568568
rope_freq_base, rope_freq_scale, 0.0f, 1.0f, 0.0f, 0.0f
569569
);
570570
};

examples/train-text-from-scratch/train-text-from-scratch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ static struct ggml_tensor * llama_build_train_graphs(
302302
const int rope_mode = 0;
303303

304304
return ggml_rope_ext(
305-
ctx, t, KQ_pos, nullptr, n_rot, rope_mode, n_ctx, 0, rope_freq_base, rope_freq_scale, 0.0f, 1.0f, 0.0f, 0.0f
305+
ctx, t, KQ_pos, nullptr, n_rot, rope_mode, n_ctx, rope_freq_base, rope_freq_scale, 0.0f, 1.0f, 0.0f, 0.0f
306306
);
307307
};
308308

0 commit comments

Comments
 (0)