Skip to content

Commit d446902

Browse files
committed
naming : n_orig_ctx -> n_ctx_orig
ggml-ci
1 parent 814d57d commit d446902

File tree

14 files changed

+137
-137
lines changed

14 files changed

+137
-137
lines changed

convert.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ class Params:
174174
rope_scaling_type: gguf.RopeScalingType | None = None
175175
f_rope_freq_base: float | None = None
176176
f_rope_scale: float | None = None
177-
n_orig_ctx: int | None = None
177+
n_ctx_orig: int | None = None
178178
rope_finetuned: bool | None = None
179179

180180
ftype: GGMLFileType | None = None
@@ -224,7 +224,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
224224
with open(config_path) as f:
225225
config = json.load(f)
226226

227-
rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None
227+
rope_scaling_type = f_rope_scale = n_ctx_orig = rope_finetuned = None
228228
rope_scaling = config.get("rope_scaling")
229229

230230
if rope_scaling is not None and (typ := rope_scaling.get("type")):
@@ -234,7 +234,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
234234
rope_scaling_type = gguf.RopeScalingType.LINEAR
235235
elif typ == "yarn":
236236
rope_scaling_type = gguf.RopeScalingType.YARN
237-
n_orig_ctx = rope_scaling['original_max_position_embeddings']
237+
n_ctx_orig = rope_scaling['original_max_position_embeddings']
238238
rope_finetuned = rope_scaling['finetuned']
239239
else:
240240
raise NotImplementedError(f'Unknown rope scaling type: {typ}')
@@ -270,7 +270,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
270270
f_rope_freq_base = config.get("rope_theta"),
271271
rope_scaling_type = rope_scaling_type,
272272
f_rope_scale = f_rope_scale,
273-
n_orig_ctx = n_orig_ctx,
273+
n_ctx_orig = n_ctx_orig,
274274
rope_finetuned = rope_finetuned,
275275
)
276276

@@ -1162,8 +1162,8 @@ def add_meta_arch(self, params: Params) -> None:
11621162
self.gguf.add_rope_scaling_type(params.rope_scaling_type)
11631163
self.gguf.add_rope_scaling_factor(params.f_rope_scale)
11641164

1165-
if params.n_orig_ctx is not None:
1166-
self.gguf.add_rope_scaling_orig_ctx_len(params.n_orig_ctx)
1165+
if params.n_ctx_orig is not None:
1166+
self.gguf.add_rope_scaling_orig_ctx_len(params.n_ctx_orig)
11671167

11681168
if params.rope_finetuned is not None:
11691169
self.gguf.add_rope_scaling_finetuned(params.rope_finetuned)

ggml-cuda/rope.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
209209
const int n_dims = ((int32_t *) dst->op_params)[1];
210210
const int mode = ((int32_t *) dst->op_params)[2];
211211
//const int n_ctx = ((int32_t *) dst->op_params)[3];
212-
const int n_orig_ctx = ((int32_t *) dst->op_params)[4];
212+
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
213213

214214
// RoPE alteration for extended context
215215
float freq_base;
@@ -236,7 +236,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
236236
}
237237

238238
rope_corr_dims corr_dims;
239-
ggml_rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims.v);
239+
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v);
240240

241241
// compute
242242
if (is_neox) {

ggml-kompute.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,7 +1183,7 @@ static void ggml_vk_rope(
11831183
const std::shared_ptr<kp::Tensor>& inB,
11841184
const std::shared_ptr<kp::Tensor>& out,
11851185
uint32_t inAOff, uint32_t inBOff, uint32_t outOff,
1186-
ggml_type src0t, int32_t n_dims, int32_t mode, int32_t n_orig_ctx,
1186+
ggml_type src0t, int32_t n_dims, int32_t mode, int32_t n_ctx_orig,
11871187
float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow,
11881188
int32_t ne01, int32_t ne02, int32_t ne03,
11891189
uint32_t nb00, uint32_t nb01, uint32_t nb02, uint32_t nb03,
@@ -1212,14 +1212,14 @@ static void ggml_vk_rope(
12121212

12131213
struct PushConstants {
12141214
uint32_t inAOff, inBOff, outOff;
1215-
int32_t n_dims, mode, n_orig_ctx;
1215+
int32_t n_dims, mode, n_ctx_orig;
12161216
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
12171217
uint32_t nb00, nb01, nb02, nb03;
12181218
int32_t ne0;
12191219
uint32_t nb0, nb1, nb2, nb3;
12201220
} pushConsts {
12211221
safe_divide(inAOff, type_size), safe_divide(inBOff, 4), safe_divide(outOff, type_size),
1222-
n_dims, mode, n_orig_ctx,
1222+
n_dims, mode, n_ctx_orig,
12231223
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow,
12241224
nb00, nb01, nb02, nb03,
12251225
ne0,
@@ -1686,7 +1686,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
16861686
const int n_dims = ((int32_t *) dst->op_params)[1];
16871687
const int mode = ((int32_t *) dst->op_params)[2];
16881688
// skip 3, n_ctx used in GLM RoPE, unimplemented in Vulkan
1689-
const int n_orig_ctx = ((int32_t *) dst->op_params)[4];
1689+
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
16901690

16911691
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
16921692
memcpy(&freq_base, (int32_t *) dst->op_params + 5, sizeof(float));
@@ -1696,7 +1696,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
16961696
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
16971697
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
16981698
ggml_vk_rope(
1699-
seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, src0t, n_dims, mode, n_orig_ctx,
1699+
seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, src0t, n_dims, mode, n_ctx_orig,
17001700
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow,
17011701
ne01, ne02, ne03, nb00, nb01, nb02, nb03, ne0, nb0, nb1, nb2, nb3
17021702
);

ggml-metal.m

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,7 +2283,7 @@ static enum ggml_status ggml_metal_graph_compute(
22832283
const int n_dims = ((int32_t *) dst->op_params)[1];
22842284
const int mode = ((int32_t *) dst->op_params)[2];
22852285
// skip 3, n_ctx, used in GLM RoPE, unimplemented in metal
2286-
const int n_orig_ctx = ((int32_t *) dst->op_params)[4];
2286+
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
22872287

22882288
float freq_base;
22892289
float freq_scale;
@@ -2344,7 +2344,7 @@ static enum ggml_status ggml_metal_graph_compute(
23442344
[encoder setBytes:&nb3 length:sizeof(uint64_t) atIndex:19];
23452345
[encoder setBytes:&n_past length:sizeof( int) atIndex:20];
23462346
[encoder setBytes:&n_dims length:sizeof( int) atIndex:21];
2347-
[encoder setBytes:&n_orig_ctx length:sizeof( int) atIndex:22];
2347+
[encoder setBytes:&n_ctx_orig length:sizeof( int) atIndex:22];
23482348
[encoder setBytes:&freq_base length:sizeof( float) atIndex:23];
23492349
[encoder setBytes:&freq_scale length:sizeof( float) atIndex:24];
23502350
[encoder setBytes:&ext_factor length:sizeof( float) atIndex:25];

ggml-metal.metal

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,16 +1671,16 @@ static void rope_yarn(
16711671

16721672
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
16731673
// `corr_fac(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
1674-
static float rope_yarn_corr_factor(int n_dims, int n_orig_ctx, float n_rot, float base) {
1675-
return n_dims * log(n_orig_ctx / (n_rot * 2 * M_PI_F)) / (2 * log(base));
1674+
static float rope_yarn_corr_factor(int n_dims, int n_ctx_orig, float n_rot, float base) {
1675+
return n_dims * log(n_ctx_orig / (n_rot * 2 * M_PI_F)) / (2 * log(base));
16761676
}
16771677

16781678
static void rope_yarn_corr_dims(
1679-
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
1679+
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
16801680
) {
16811681
// start and end correction dims
1682-
dims[0] = max(0.0f, floor(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_fast, freq_base)));
1683-
dims[1] = min(n_dims - 1.0f, ceil(rope_yarn_corr_factor(n_dims, n_orig_ctx, beta_slow, freq_base)));
1682+
dims[0] = max(0.0f, floor(rope_yarn_corr_factor(n_dims, n_ctx_orig, beta_fast, freq_base)));
1683+
dims[1] = min(n_dims - 1.0f, ceil(rope_yarn_corr_factor(n_dims, n_ctx_orig, beta_slow, freq_base)));
16841684
}
16851685

16861686
template<typename T>
@@ -1707,7 +1707,7 @@ kernel void kernel_rope_norm(
17071707
constant uint64_t & nb3,
17081708
constant int & n_past,
17091709
constant int & n_dims,
1710-
constant int & n_orig_ctx,
1710+
constant int & n_ctx_orig,
17111711
constant float & freq_base,
17121712
constant float & freq_scale,
17131713
constant float & ext_factor,
@@ -1722,7 +1722,7 @@ kernel void kernel_rope_norm(
17221722
const int64_t i1 = tgpig[0];
17231723

17241724
float corr_dims[2];
1725-
rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims);
1725+
rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
17261726

17271727
device const int32_t * pos = src1;
17281728

@@ -1784,7 +1784,7 @@ kernel void kernel_rope_neox(
17841784
constant uint64_t & nb3,
17851785
constant int & n_past,
17861786
constant int & n_dims,
1787-
constant int & n_orig_ctx,
1787+
constant int & n_ctx_orig,
17881788
constant float & freq_base,
17891789
constant float & freq_scale,
17901790
constant float & ext_factor,
@@ -1799,7 +1799,7 @@ kernel void kernel_rope_neox(
17991799
const int64_t i1 = tgpig[0];
18001800

18011801
float corr_dims[2];
1802-
rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims);
1802+
rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
18031803

18041804
device const int32_t * pos = src1;
18051805

ggml-sycl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14008,7 +14008,7 @@ inline void ggml_sycl_op_rope(const ggml_tensor *src0, const ggml_tensor *src1,
1400814008
const int n_dims = ((int32_t *) dst->op_params)[1];
1400914009
const int mode = ((int32_t *) dst->op_params)[2];
1401014010
//const int n_ctx = ((int32_t *) dst->op_params)[3];
14011-
const int n_orig_ctx = ((int32_t *) dst->op_params)[4];
14011+
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
1401214012

1401314013
// RoPE alteration for extended context
1401414014
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
@@ -14040,7 +14040,7 @@ inline void ggml_sycl_op_rope(const ggml_tensor *src0, const ggml_tensor *src1,
1404014040
}
1404114041

1404214042
rope_corr_dims corr_dims;
14043-
ggml_rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims.v);
14043+
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v);
1404414044

1404514045
// compute
1404614046
if (is_neox) {

ggml-vulkan.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4293,7 +4293,7 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context * subctx, con
42934293
const int n_dims = ((int32_t *) dst->op_params)[1];
42944294
const int mode = ((int32_t *) dst->op_params)[2];
42954295
// const int n_ctx = ((int32_t *) dst->op_params)[3];
4296-
const int n_orig_ctx = ((int32_t *) dst->op_params)[4];
4296+
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
42974297
const float freq_base = ((float *) dst->op_params)[5];
42984298
const float freq_scale = ((float *) dst->op_params)[6];
42994299
const float ext_factor = ((float *) dst->op_params)[7];
@@ -4304,7 +4304,7 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context * subctx, con
43044304
const bool is_neox = mode & 2;
43054305

43064306
float corr_dims[2];
4307-
ggml_rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims);
4307+
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
43084308

43094309
if (is_neox) {
43104310
const float theta_scale = powf(freq_base, -2.0f/n_dims);
@@ -6862,15 +6862,15 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
68626862
} else if (tensor->op == GGML_OP_ROPE) {
68636863
const int n_dims = ((int32_t *) tensor->op_params)[1];
68646864
const int mode = ((int32_t *) tensor->op_params)[2];
6865-
const int n_ggml_ctx = ((int32_t *) tensor->op_params)[3];
6866-
const int n_orig_ggml_ctx = ((int32_t *) tensor->op_params)[4];
6865+
const int n_ctx_ggml = ((int32_t *) tensor->op_params)[3];
6866+
const int n_ctx_orig_ggml = ((int32_t *) tensor->op_params)[4];
68676867
float freq_base = ((float *) tensor->op_params)[5];
68686868
float freq_scale = ((float *) tensor->op_params)[6];
68696869
float ext_factor = ((float *) tensor->op_params)[7];
68706870
float attn_factor = ((float *) tensor->op_params)[8];
68716871
float beta_fast = ((float *) tensor->op_params)[9];
68726872
float beta_slow = ((float *) tensor->op_params)[10];
6873-
tensor_clone = ggml_rope_ext(ggml_ctx, src0_clone, src1_clone, src2_clone, n_dims, mode, n_ggml_ctx, n_orig_ggml_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6873+
tensor_clone = ggml_rope_ext(ggml_ctx, src0_clone, src1_clone, src2_clone, n_dims, mode, n_ctx_ggml, n_ctx_orig_ggml, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
68746874
} else if (tensor->op == GGML_OP_UNARY) {
68756875
switch (ggml_get_unary_op(tensor)) {
68766876
case GGML_UNARY_OP_SILU:

0 commit comments

Comments
 (0)