Skip to content

Commit 2a03ea7

Browse files
committed
add GGML_ROPE_TYPE_MROPE, GGML_ROPE_TYPE_VISION
1 parent 5920fe5 commit 2a03ea7

File tree

6 files changed

+39
-18
lines changed

6 files changed

+39
-18
lines changed

examples/llava/clip.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
754754
if (ctx->has_qwen2vl_merger) {
755755
Q = ggml_mrope_ext(
756756
ctx0, Q, positions, nullptr,
757-
d_head/2, mrope_sections, 2 /*LLAMA_ROPE_TYPE_NEOX8*/, 32768, 10000, 1, 0, 1, 32, 1);
757+
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
758758
}
759759
Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
760760
Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3));
@@ -767,7 +767,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
767767
if (ctx->has_qwen2vl_merger) {
768768
K = ggml_mrope_ext(
769769
ctx0, K, positions, nullptr,
770-
d_head/2, mrope_sections, 2 /*LLAMA_ROPE_TYPE_NEOX8*/, 32768, 10000, 1, 0, 1, 32, 1);
770+
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
771771
}
772772
K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3));
773773
K = ggml_reshape_3d(ctx0, K, d_head, num_positions, n_head * batch_size);
@@ -1294,8 +1294,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
12941294
idx = get_key_idx(ctx, KEY_USE_GELU);
12951295
new_clip->use_gelu = gguf_get_val_bool(ctx, idx);
12961296

1297-
idx = get_key_idx(ctx, KEY_USE_SILU);
1298-
new_clip->use_silu = gguf_get_val_bool(ctx, idx);
1297+
try {
1298+
idx = get_key_idx(ctx, KEY_USE_SILU);
1299+
new_clip->use_silu = gguf_get_val_bool(ctx, idx);
1300+
} catch (std::runtime_error & /*e*/) {
1301+
new_clip->use_silu = false;
1302+
}
12991303

13001304
if (verbosity >= 1) {
13011305
LOG_INF("%s: text_encoder: %d\n", __func__, new_clip->has_text_encoder);

ggml/include/ggml.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,9 @@
237237
#define GGML_EXIT_SUCCESS 0
238238
#define GGML_EXIT_ABORTED 1
239239

240-
#define GGML_ROPE_TYPE_NEOX 2
240+
#define GGML_ROPE_TYPE_NEOX 2
241+
#define GGML_ROPE_TYPE_MROPE 4
242+
#define GGML_ROPE_TYPE_VISION 12
241243

242244
#define GGUF_MAGIC "GGUF"
243245

ggml/src/ggml-cuda/rope.cu

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,15 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
414414
memcpy(&attn_factor, (int32_t *) dst->op_params + 8, sizeof(float));
415415
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
416416
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
417-
memcpy(&sections.v, (int32_t *) dst->op_params + 11, sizeof(int)*4);
417+
memcpy(&sections.v, (int32_t *) dst->op_params + 11, sizeof(int)*4);
418418

419-
const bool is_mrope = sections.v[0] > 0 || sections.v[1] > 0 || sections.v[2] > 0;
420-
const bool is_vision = is_mrope && sections.v[3] > 0;
421-
const bool is_neox = (mode & GGML_ROPE_TYPE_NEOX) & !(is_mrope || is_vision); // TODO: fix this with new rope type
419+
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
420+
const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
421+
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
422+
423+
if (is_mrope) {
424+
GGML_ASSERT(sections.v[0] > 0 || sections.v[1] > 0 || sections.v[2] > 0);
425+
}
422426

423427
if (is_vision) {
424428
GGML_ASSERT(n_dims == ne00/2);

ggml/src/ggml.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11376,8 +11376,12 @@ static void ggml_compute_forward_rope_f32(
1137611376
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
1137711377

1137811378
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
11379-
const bool is_mrope = sections[0] > 0 || sections[1] > 0 || sections[2] > 0;
11380-
const bool is_vision = is_mrope && sections[3] > 0;
11379+
const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
11380+
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
11381+
11382+
if (is_mrope) {
11383+
GGML_ASSERT(sections[0] > 0 || sections[1] > 0 || sections[2] > 0);
11384+
}
1138111385

1138211386
if (is_vision) {
1138311387
GGML_ASSERT(n_dims == ne0/2);
@@ -11556,8 +11560,12 @@ static void ggml_compute_forward_rope_f16(
1155611560
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
1155711561

1155811562
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
11559-
const bool is_mrope = sections[0] > 0 || sections[1] > 0 || sections[2] > 0;
11560-
const bool is_vision = is_mrope && sections[3] > 0;
11563+
const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
11564+
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
11565+
11566+
if (is_mrope) {
11567+
GGML_ASSERT(sections[0] > 0 || sections[1] > 0 || sections[2] > 0);
11568+
}
1156111569

1156211570
if (is_vision) {
1156311571
GGML_ASSERT(n_dims == ne0/2);
@@ -11579,7 +11587,6 @@ static void ggml_compute_forward_rope_f16(
1157911587

1158011588
for (int64_t i3 = 0; i3 < ne3; i3++) {
1158111589
for (int64_t i2 = 0; i2 < ne2; i2++) {
11582-
const int64_t p = pos[i2];
1158311590

1158411591
float * cache = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32)*ith;
1158511592
if (!is_mrope) {

include/llama.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,11 @@ extern "C" {
107107
};
108108

109109
enum llama_rope_type {
110-
LLAMA_ROPE_TYPE_NONE = -1,
111-
LLAMA_ROPE_TYPE_NORM = 0,
112-
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
110+
LLAMA_ROPE_TYPE_NONE = -1,
111+
LLAMA_ROPE_TYPE_NORM = 0,
112+
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
113+
LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
114+
LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
113115
};
114116

115117
enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file

src/llama.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19930,7 +19930,6 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
1993019930
case LLM_ARCH_BITNET:
1993119931
case LLM_ARCH_QWEN:
1993219932
case LLM_ARCH_QWEN2:
19933-
case LLM_ARCH_QWEN2VL:
1993419933
case LLM_ARCH_QWEN2MOE:
1993519934
case LLM_ARCH_OLMOE:
1993619935
case LLM_ARCH_PHI2:
@@ -19945,6 +19944,9 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
1994519944
case LLM_ARCH_EXAONE:
1994619945
case LLM_ARCH_MINICPM3:
1994719946
return LLAMA_ROPE_TYPE_NEOX;
19947+
19948+
case LLM_ARCH_QWEN2VL:
19949+
return LLAMA_ROPE_TYPE_MROPE;
1994819950

1994919951
// all model arches should be listed explicitly here
1995019952
case LLM_ARCH_UNKNOWN:

0 commit comments

Comments
 (0)