Skip to content

Commit cbd08b4

Browse files
committed
resolve linter, test errors
1 parent fac0345 commit cbd08b4

File tree

11 files changed

+168
-147
lines changed

11 files changed

+168
-147
lines changed

CMakePresets.json

Lines changed: 81 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,85 @@
11
{
22
"version": 4,
33
"configurePresets": [
4-
{
5-
"name": "base",
6-
"hidden": true,
7-
"generator": "Ninja",
8-
"binaryDir": "${sourceDir}/build-${presetName}",
9-
"cacheVariables": {
10-
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
11-
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
12-
}
13-
},
14-
{
15-
"name": "sycl-base",
16-
"hidden": true,
17-
"generator": "Ninja",
18-
"binaryDir": "${sourceDir}/build-${presetName}",
19-
"cacheVariables": {
20-
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
21-
"CMAKE_CXX_COMPILER": "icx",
22-
"CMAKE_C_COMPILER": "cl",
23-
"GGML_SYCL": "ON",
24-
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
25-
}
26-
},
27-
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
28-
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
29-
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
30-
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
31-
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
32-
33-
{
34-
"name": "arm64-windows-msvc", "hidden": true,
35-
"architecture": { "value": "arm64", "strategy": "external" },
36-
"toolset": { "value": "host=x64", "strategy": "external" },
37-
"cacheVariables": {
38-
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
39-
}
40-
},
41-
42-
{
43-
"name": "arm64-windows-llvm", "hidden": true,
44-
"architecture": { "value": "arm64", "strategy": "external" },
45-
"toolset": { "value": "host=x64", "strategy": "external" },
46-
"cacheVariables": {
47-
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
48-
}
49-
},
50-
51-
{
52-
"name": "arm64-apple-clang", "hidden": true,
53-
"architecture": { "value": "arm64", "strategy": "external" },
54-
"toolset": { "value": "host=x64", "strategy": "external" },
55-
"cacheVariables": {
56-
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
57-
}
58-
},
59-
60-
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
61-
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
62-
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
63-
64-
{ "name": "arm64-apple-clang-debug" , "inherits": [ "base", "arm64-apple-clang", "debug" ] },
65-
{ "name": "arm64-apple-clang-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
66-
{ "name": "arm64-apple-clang+static-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
67-
68-
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
69-
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
70-
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
71-
72-
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
73-
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
74-
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
75-
76-
{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
77-
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
78-
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
79-
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
80-
{
81-
"name": "x86-cuda-linux",
82-
"description": "",
83-
"displayName": "",
84-
"inherits": [
85-
"base",
86-
"debug"
87-
],
88-
"cacheVariables": {
89-
"GGML_CUDA": "1",
90-
"CUDA_PATH": "/usr/local/cuda",
91-
"CUDAToolkit_ROOT": "/usr/local/cuda",
92-
"CUDAToolkit_INCLUDE_DIR": "/usr/local/cuda/include/",
93-
"CUDAToolkit_LIBRARY_DIR": "/usr/local/cuda/lib64",
94-
"CUDA_NVCC_FLAGS": "-g -G",
95-
"CMAKE_CUDA_FLAGS_DEBUG": "-g -G",
96-
"CMAKE_CUDA_FLAGS": "-maxrregcount=40"
97-
}
98-
}
4+
{
5+
"name": "base",
6+
"hidden": true,
7+
"generator": "Ninja",
8+
"binaryDir": "${sourceDir}/build-${presetName}",
9+
"cacheVariables": {
10+
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
11+
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
12+
}
13+
},
14+
{
15+
"name": "sycl-base",
16+
"hidden": true,
17+
"generator": "Ninja",
18+
"binaryDir": "${sourceDir}/build-${presetName}",
19+
"cacheVariables": {
20+
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
21+
"CMAKE_CXX_COMPILER": "icx",
22+
"CMAKE_C_COMPILER": "cl",
23+
"GGML_SYCL": "ON",
24+
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
25+
}
26+
},
27+
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
28+
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
29+
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
30+
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
31+
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
32+
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
33+
34+
{
35+
"name": "arm64-windows-msvc", "hidden": true,
36+
"architecture": { "value": "arm64", "strategy": "external" },
37+
"toolset": { "value": "host=x64", "strategy": "external" },
38+
"cacheVariables": {
39+
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
40+
}
41+
},
42+
43+
{
44+
"name": "arm64-windows-llvm", "hidden": true,
45+
"architecture": { "value": "arm64", "strategy": "external" },
46+
"toolset": { "value": "host=x64", "strategy": "external" },
47+
"cacheVariables": {
48+
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
49+
}
50+
},
51+
52+
{
53+
"name": "arm64-apple-clang", "hidden": true,
54+
"architecture": { "value": "arm64", "strategy": "external" },
55+
"toolset": { "value": "host=x64", "strategy": "external" },
56+
"cacheVariables": {
57+
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
58+
}
59+
},
60+
61+
{ "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
62+
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
63+
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
64+
65+
{ "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
66+
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
67+
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
68+
69+
{ "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
70+
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
71+
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
72+
73+
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
74+
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
75+
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
76+
77+
{ "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
78+
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
79+
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
80+
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
81+
82+
{ "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
83+
{ "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
9984
]
100-
}
85+
}

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1991,7 +1991,7 @@ def set_vocab(self):
19911991
self._set_vocab_sentencepiece()
19921992
except FileNotFoundError:
19931993
self._set_vocab_gpt2()
1994-
1994+
19951995
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
19961996
for name, data in super().get_tensors():
19971997
if name.startswith("visual."):

examples/llava/clip.cpp

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,12 +2590,12 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
25902590
int* positions_data = (int*)malloc(ggml_nbytes(positions));
25912591

25922592
int ptr = 0;
2593-
for (size_t y = 0; y < ph; y+=2)
2593+
for (int y = 0; y < ph; y+=2)
25942594
{
2595-
for (size_t x = 0; x < pw; x+=2)
2595+
for (int x = 0; x < pw; x+=2)
25962596
{
2597-
for (size_t dy = 0; dy < 2; dy++) {
2598-
for (size_t dx = 0; dx < 2; dx++) {
2597+
for (int dy = 0; dy < 2; dy++) {
2598+
for (int dx = 0; dx < 2; dx++) {
25992599
positions_data[ptr] = y + dy;
26002600
positions_data[num_patches + ptr] = x + dx;
26012601
positions_data[num_patches * 2 + ptr] = y + dy;
@@ -2820,20 +2820,15 @@ bool clip_is_qwen2vl(const struct clip_ctx * ctx) {
28202820
}
28212821

28222822

2823-
bool tmp_clip_image_encode (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) {
2823+
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) {
28242824
clip_image_f32 clip_img;
28252825
clip_img.buf.resize(h * w * 3);
2826-
for (size_t i = 0; i < h*w*3; i++)
2826+
for (int i = 0; i < h*w*3; i++)
28272827
{
28282828
clip_img.buf[i] = img[i];
28292829
}
28302830
clip_img.nx = w;
28312831
clip_img.ny = h;
2832-
// ctx->vision_model.hparams.image_size = h;
28332832
clip_image_encode(ctx, n_threads, &clip_img, vec);
28342833
return true;
28352834
}
2836-
2837-
void tmp_clip_set_layers (struct clip_ctx * ctx, int layers) {
2838-
ctx->vision_model.hparams.n_layer = layers;
2839-
}

examples/llava/clip.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,7 @@ CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out
9191
CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx);
9292
CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
9393

94-
CLIP_API bool tmp_clip_image_encode (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
95-
CLIP_API void tmp_clip_set_layers (struct clip_ctx * ctx, int layers);
94+
CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
9695

9796
#ifdef __cplusplus
9897
}

examples/llava/qwen2_vl_surgery.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
import argparse
2-
import glob
3-
import os
4-
from typing import Any, Dict
2+
from typing import Dict
53

64
import torch
5+
import numpy as np
76
from gguf import *
87
from transformers import (
9-
Qwen2VLForConditionalGeneration,
8+
Qwen2VLForConditionalGeneration,
109
Qwen2VLProcessor,
1110
AutoProcessor,
1211
Qwen2VLConfig
@@ -44,7 +43,7 @@ def find_vision_tensors(qwen2vl, dtype) -> Dict[str, np.ndarray]:
4443
else: # bias
4544
c3 = ten.shape[0]
4645
assert c3 % 3 == 0
47-
c = c3//3
46+
c = c3 // 3
4847
wq = ten[:c]
4948
wk = ten[c: c * 2]
5049
wv = ten[c * 2:]
@@ -68,7 +67,7 @@ def find_vision_tensors(qwen2vl, dtype) -> Dict[str, np.ndarray]:
6867
tensor_map["v.patch_embd.weight.1"] = ten[:, :, 1, ...]
6968
else:
7069
tensor_map[to_gguf_name(f"vision_model.{name}")] = ten
71-
70+
7271
for new_name, ten in tensor_map.items():
7372
if ten.ndim <= 1 or new_name.endswith("_norm.weight"):
7473
tensor_map[new_name] = ten.astype(np.float32)
@@ -89,16 +88,14 @@ def main(args):
8988
ftype = 1
9089
else:
9190
raise ValueError()
92-
91+
9392
model_name = args.model_name
9493
print("model_name: ", model_name)
9594
qwen2vl = Qwen2VLForConditionalGeneration.from_pretrained(
9695
model_name, torch_dtype=dtype, device_map="cpu"
9796
)
98-
cfg: Qwen2VLConfig = qwen2vl.config
97+
cfg: Qwen2VLConfig = qwen2vl.config # type: ignore[reportAssignmentType]
9998
vcfg = cfg.vision_config
100-
rope_cfg = cfg.rope_scaling
101-
10299

103100
fname_out = "qwen2vl-vision.gguf"
104101
fout = GGUFWriter(path=fname_out, arch="clip")
@@ -125,23 +122,22 @@ def main(args):
125122
fout.add_tensor(name, data)
126123

127124
fout.add_uint32("clip.vision.patch_size", vcfg.patch_size)
128-
fout.add_uint32("clip.vision.image_size", 14*40) # some reasonable size that is divable by (14*2)
125+
fout.add_uint32("clip.vision.image_size", 14 * 40) # some reasonable size that is divable by (14*2)
129126
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), vcfg.embed_dim)
130127
fout.add_uint32("clip.vision.projection_dim", vcfg.hidden_size)
131128
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), vcfg.num_heads)
132129
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6)
133130
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), vcfg.depth)
134-
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), 0) # BUG: not sure what this does
131+
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), 0) # not sure what this does, put 0 here as a placeholder
135132
fout.add_name(model_name)
136133
"""
137-
HACK: Since vision rope related parameter aren't stored in the `Qwen2VLConfig,
134+
HACK: Since vision rope related parameter aren't stored in the `Qwen2VLConfig,
138135
it will be hardcoded in the `clip_image_build_graph` from `clip.cpp`.
139136
"""
140137

141138
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_name)
142-
# breakpoint()
143-
fout.add_array("clip.vision.image_mean", processor.image_processor.image_mean)
144-
fout.add_array("clip.vision.image_std", processor.image_processor.image_std)
139+
fout.add_array("clip.vision.image_mean", processor.image_processor.image_mean) # type: ignore[reportAttributeAccessIssue]
140+
fout.add_array("clip.vision.image_std", processor.image_processor.image_std) # type: ignore[reportAttributeAccessIssue]
145141

146142
fout.write_header_to_file()
147143
fout.write_kv_data_to_file()
@@ -154,4 +150,4 @@ def main(args):
154150
parser.add_argument("model_name", nargs='?', default="Qwen/Qwen2-VL-2B-Instruct")
155151
parser.add_argument("--data_type", nargs='?', choices=['fp32', 'fp16'], default="fp32")
156152
args = parser.parse_args()
157-
main(args)
153+
main(args)

examples/llava/qwen2vl-cli.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ static bool qwen2vl_eval_image_embed(llama_context * ctx_llama, const struct lla
2626
auto img_tokens = image_embed->n_image_pos;
2727
llama_pos mrope_pos[img_tokens * 4];
2828

29-
for (size_t y = 0; y < ph; y++)
29+
for (int y = 0; y < ph; y++)
3030
{
31-
for (size_t x = 0; x < pw; x++)
31+
for (int x = 0; x < pw; x++)
3232
{
3333
int i = y * pw + x;
3434
mrope_pos[i] = *st_pos_id;
@@ -270,7 +270,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
270270

271271
LOG("\n");
272272

273-
struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sparams);
273+
struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
274274
if (!smpl) {
275275
LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
276276
exit(1);
@@ -422,18 +422,15 @@ static void tmp_dump_img_embed(struct llava_context * ctx_llava, common_params *
422422
int ne = n_embd * 4;
423423
float vals[56 * 56 * 3];
424424
float embd[ne];
425-
// for (int i = 0; i < 3*56*56; i++)
426-
// {
427-
// vals[i] = 0.1;
428-
// }
425+
429426
for (int i = 0; i < 56*56; i++)
430427
{
431428
for (int c = 0; c < 3; c++)
432429
vals[i * 3 + c] = (float)(i % (56 * 56)) / (56*56);
433430
}
434431

435432
// auto param = &ctx_llava->ctx_clip->vision_model.hparams;
436-
tmp_clip_image_encode(ctx_llava->ctx_clip, 16, vals, 56, 56, embd);
433+
clip_encode_float_image(ctx_llava->ctx_clip, 16, vals, 56, 56, embd);
437434

438435
std::ofstream outFile("img_embed.bin", std::ios::binary);
439436
if (outFile.is_open()) {

ggml/include/ggml.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@
238238
#define GGML_EXIT_ABORTED 1
239239

240240
#define GGML_ROPE_TYPE_NEOX 2
241-
#define GGML_ROPE_TYPE_MROPE 4
242-
#define GGML_ROPE_TYPE_VISION 12
241+
#define GGML_ROPE_TYPE_MROPE 8
242+
#define GGML_ROPE_TYPE_VISION 24
243243

244244
#define GGUF_MAGIC "GGUF"
245245

0 commit comments

Comments
 (0)