Skip to content

Commit a02a190

Browse files
committed
minor updates
1 parent e2e9a6c commit a02a190

File tree

5 files changed

+15
-9
lines changed

5 files changed

+15
-9
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
110110
- [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM)
111111
- [x] [Moondream](https://huggingface.co/vikhyatk/moondream2)
112112
- [x] [Bunny](https://github.com/BAAI-DCAI/Bunny)
113+
- [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d)
113114

114115
</details>
115116

examples/llava/qwen2_vl_surgery.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ def find_vision_tensors(qwen2vl, dtype) -> Dict[str, np.ndarray]:
5050
tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "q")] = wq
5151
tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "k")] = wk
5252
tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "v")] = wv
53-
# breakpoint()
5453
elif 'merger' in name:
5554
if name.endswith("ln_q.weight"):
5655
tensor_map['v.post_ln.weight'] = ten
@@ -97,7 +96,12 @@ def main(args):
9796
cfg: Qwen2VLConfig = qwen2vl.config # type: ignore[reportAssignmentType]
9897
vcfg = cfg.vision_config
9998

100-
fname_out = "qwen2vl-vision.gguf"
99+
if os.path.isdir(model_name):
100+
if model_name.endswith(os.sep):
101+
model_name = model_name[:-1]
102+
model_name = os.path.basename(model_name)
103+
fname_out = f"{model_name.replace('/', '-').lower()}-vision.gguf"
104+
101105
fout = GGUFWriter(path=fname_out, arch="clip")
102106
fout.add_description("image encoder for Qwen2VL")
103107

@@ -143,6 +147,7 @@ def main(args):
143147
fout.write_kv_data_to_file()
144148
fout.write_tensors_to_file()
145149
fout.close()
150+
print("save model as: ", fname_out)
146151

147152

148153
if __name__ == "__main__":

ggml/src/ggml-cuda/rope.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,6 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
397397
const int mode = ((int32_t *) dst->op_params)[2];
398398
//const int n_ctx = ((int32_t *) dst->op_params)[3];
399399
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
400-
// int sections[4];
401400
mrope_sections sections;
402401

403402
// RoPE alteration for extended context

ggml/src/ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3528,7 +3528,7 @@ static struct ggml_tensor * ggml_rope_impl(
35283528
memcpy(params + 8, &attn_factor, sizeof(float));
35293529
memcpy(params + 9, &beta_fast, sizeof(float));
35303530
memcpy(params + 10, &beta_slow, sizeof(float));
3531-
memcpy(params + 11, &sections, sizeof(int) * 4);
3531+
memcpy(params + 11, &sections, sizeof(int)*4);
35323532
ggml_set_op_params(result, params, sizeof(params));
35333533

35343534
result->op = GGML_OP_ROPE;

src/llama.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2558,6 +2558,9 @@ struct llama_hparams {
25582558
if (this->rope_finetuned != other.rope_finetuned) return true;
25592559
if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
25602560
if (this->rope_sections != other.rope_sections) return true;
2561+
if (std::equal(std::begin(this->rope_sections),
2562+
std::end(this->rope_sections),
2563+
std::begin(other.rope_sections))) return true;
25612564

25622565
if (this->ssm_d_conv != other.ssm_d_conv) return true;
25632566
if (this->ssm_d_inner != other.ssm_d_inner) return true;
@@ -3421,8 +3424,6 @@ struct llama_context {
34213424
struct ggml_tensor * inp_tokens; // I32 [n_batch]
34223425
struct ggml_tensor * inp_embd; // F32 [n_embd, n_batch]
34233426
struct ggml_tensor * inp_pos; // I32 [n_batch]
3424-
struct ggml_tensor * inp_pos_w; // I32 [n_batch] second-dimension of m-rope position index
3425-
struct ggml_tensor * inp_pos_h; // I32 [n_batch] third-dimension of m-rope position index
34263427
struct ggml_tensor * inp_out_ids; // I32 [n_outputs]
34273428
struct ggml_tensor * inp_KQ_mask; // F32 [kv_size, n_batch]
34283429
struct ggml_tensor * inp_KQ_mask_swa; // F32 [kv_size, n_batch]
@@ -12606,7 +12607,6 @@ struct llm_build_context {
1260612607
inpL = llm_build_inp_embd(ctx0, lctx, hparams, ubatch, model.tok_embd, cb);
1260712608

1260812609
// inp_pos - contains the positions
12609-
// struct ggml_tensor * inp_pos = build_inp_pos();
1261012610
lctx.inp_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens * 4);
1261112611
cb(lctx.inp_pos, "inp_pos", -1);
1261212612
ggml_set_input(lctx.inp_pos);
@@ -12646,14 +12646,15 @@ struct llm_build_context {
1264612646

1264712647
Qcur = ggml_rope_multi(
1264812648
ctx0,
12649-
ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr,
12649+
ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr,
1265012650
n_rot, sections, rope_type, n_ctx_orig, freq_base, freq_scale,
1265112651
ext_factor, attn_factor, beta_fast, beta_slow
1265212652
);
1265312653
cb(Qcur, "Qcur", il);
1265412654

1265512655
Kcur = ggml_rope_multi(
12656-
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, nullptr,
12656+
ctx0,
12657+
ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, nullptr,
1265712658
n_rot, sections, rope_type, n_ctx_orig, freq_base, freq_scale,
1265812659
ext_factor, attn_factor, beta_fast, beta_slow
1265912660
);

0 commit comments

Comments
 (0)