Skip to content

Commit 62b7b1e

Browse files
committed
Merge branch 'master' into xsn/convert_gguf_qwen2vl
2 parents ef0bc7a + e84773a commit 62b7b1e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1516
-612
lines changed

.github/workflows/build-linux-cross.yml

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,25 @@ on:
44
workflow_call:
55

66
jobs:
7-
ubuntu-latest-riscv64-cpu-cross:
8-
runs-on: ubuntu-latest
7+
ubuntu-24-riscv64-cpu-cross:
8+
runs-on: ubuntu-24.04
99

1010
steps:
1111
- uses: actions/checkout@v4
1212
- name: Setup Riscv
1313
run: |
1414
sudo dpkg --add-architecture riscv64
15-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
16-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
17-
sudo apt-get clean
18-
sudo apt-get update
15+
16+
# Add arch-specific repositories for non-amd64 architectures
17+
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22+
EOF
23+
24+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
25+
1926
sudo apt-get install -y --no-install-recommends \
2027
build-essential \
2128
gcc-14-riscv64-linux-gnu \
@@ -40,21 +47,25 @@ jobs:
4047
4148
cmake --build build --config Release -j $(nproc)
4249
43-
ubuntu-latest-riscv64-vulkan-cross:
44-
runs-on: ubuntu-latest
50+
ubuntu-24-riscv64-vulkan-cross:
51+
runs-on: ubuntu-24.04
4552

4653
steps:
4754
- uses: actions/checkout@v4
48-
with:
49-
fetch-depth: 0
50-
5155
- name: Setup Riscv
5256
run: |
5357
sudo dpkg --add-architecture riscv64
54-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
55-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
56-
sudo apt-get clean
57-
sudo apt-get update
58+
59+
# Add arch-specific repositories for non-amd64 architectures
60+
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
61+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
62+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
63+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
64+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
65+
EOF
66+
67+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
68+
5869
sudo apt-get install -y --no-install-recommends \
5970
build-essential \
6071
glslc \
@@ -82,21 +93,25 @@ jobs:
8293
8394
cmake --build build --config Release -j $(nproc)
8495
85-
ubuntu-latest-arm64-vulkan-cross:
86-
runs-on: ubuntu-latest
96+
ubuntu-24-arm64-vulkan-cross:
97+
runs-on: ubuntu-24.04
8798

8899
steps:
89100
- uses: actions/checkout@v4
90-
with:
91-
fetch-depth: 0
92-
93101
- name: Setup Arm64
94102
run: |
95103
sudo dpkg --add-architecture arm64
96-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
97-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
98-
sudo apt-get clean
99-
sudo apt-get update
104+
105+
# Add arch-specific repositories for non-amd64 architectures
106+
cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
107+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
108+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
109+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
110+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
111+
EOF
112+
113+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
114+
100115
sudo apt-get install -y --no-install-recommends \
101116
build-essential \
102117
glslc \

.github/workflows/build.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -601,9 +601,8 @@ jobs:
601601
-DGGML_SYCL_F16=ON
602602
cmake --build build --config Release -j $(nproc)
603603
604-
# Disabled for now due to sporadic issue syncing.
605-
# build-linux-cross:
606-
# uses: ./.github/workflows/build-linux-cross.yml
604+
build-linux-cross:
605+
uses: ./.github/workflows/build-linux-cross.yml
607606

608607
macOS-latest-cmake-ios:
609608
runs-on: macos-latest

cmake/build-info.cmake

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,20 @@ endif()
4141

4242
if(MSVC)
4343
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
44-
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
44+
if (CMAKE_VS_PLATFORM_NAME)
45+
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
46+
else()
47+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
48+
endif()
4549
else()
4650
execute_process(
47-
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
51+
COMMAND ${CMAKE_C_COMPILER} --version
4852
OUTPUT_VARIABLE OUT
4953
OUTPUT_STRIP_TRAILING_WHITESPACE
5054
)
55+
string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
5156
set(BUILD_COMPILER ${OUT})
57+
5258
execute_process(
5359
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
5460
OUTPUT_VARIABLE OUT

common/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ add_custom_command(
3939
COMMENT "Generating build details from Git"
4040
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
4141
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
42-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
42+
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
43+
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
44+
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
4345
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
4446
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
4547
VERBATIM

common/arg.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -217,13 +217,11 @@ struct curl_slist_ptr {
217217
#define CURL_MAX_RETRY 3
218218
#define CURL_RETRY_DELAY_SECONDS 2
219219

220-
static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
220+
static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds, const char * method_name) {
221221
int remaining_attempts = max_attempts;
222-
char * method = nullptr;
223-
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_METHOD, &method);
224222

225223
while (remaining_attempts > 0) {
226-
LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
224+
LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method_name, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
227225

228226
CURLcode res = curl_easy_perform(curl);
229227
if (res == CURLE_OK) {
@@ -287,24 +285,17 @@ static bool common_download_file_single(const std::string & url, const std::stri
287285
try {
288286
metadata_in >> metadata;
289287
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
290-
if (metadata.contains("url") && metadata.at("url").is_string()) {
291-
auto previous_url = metadata.at("url").get<std::string>();
292-
if (previous_url != url) {
293-
LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
294-
return false;
295-
}
296-
}
297288
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
298289
etag = metadata.at("etag");
299290
}
300291
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
301292
last_modified = metadata.at("lastModified");
302293
}
303294
} catch (const nlohmann::json::exception & e) {
304-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
305-
return false;
295+
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
306296
}
307297
}
298+
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
308299
} else {
309300
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
310301
}
@@ -350,7 +341,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
350341

351342
// we only allow retrying once for HEAD requests
352343
// this is for the use case of using running offline (no internet), retrying can be annoying
353-
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0);
344+
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
354345
if (!was_perform_successful) {
355346
head_request_ok = false;
356347
}
@@ -432,7 +423,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
432423
// start the download
433424
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
434425
llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
435-
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
426+
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET");
436427
if (!was_perform_successful) {
437428
return false;
438429
}
@@ -2792,7 +2783,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27922783
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP"));
27932784
add_opt(common_arg(
27942785
{"--cache-reuse"}, "N",
2795-
string_format("min chunk size to attempt reusing from the cache via KV shifting (default: %d)", params.n_cache_reuse),
2786+
string_format(
2787+
"min chunk size to attempt reusing from the cache via KV shifting (default: %d)\n"
2788+
"[(card)](https://ggml.ai/f0.png)", params.n_cache_reuse
2789+
),
27962790
[](common_params & params, int value) {
27972791
params.n_cache_reuse = value;
27982792
}

convert_hf_to_gguf.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,9 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]
419419
@staticmethod
420420
def load_hparams(dir_model: Path):
421421
try:
422-
return AutoConfig.from_pretrained(dir_model).to_dict()
422+
# for security reason, we don't allow loading remote code by default
423+
# if a model need remote code, we will fallback to config.json
424+
return AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
423425
except Exception as e:
424426
logger.warning(f"Failed to load model config from {dir_model}: {e}")
425427
logger.warning("Trying to load config.json instead")
@@ -1901,7 +1903,10 @@ def prepare_tensors(self):
19011903
raise ValueError(f"Unprocessed experts: {experts}")
19021904

19031905

1904-
@ModelBase.register("LlavaForConditionalGeneration")
1906+
@ModelBase.register(
1907+
"LlavaForConditionalGeneration", # pixtral
1908+
"Mistral3ForConditionalGeneration", # mistral small 3.1
1909+
)
19051910
class LlavaVisionModel(VisionModel):
19061911
img_break_tok_id = -1
19071912

@@ -1910,17 +1915,38 @@ def __init__(self, *args, **kwargs):
19101915
if self.hparams["model_type"] == "pixtral":
19111916
# layer_norm_eps is not in config.json, it is hard-coded in modeling_pixtral.py
19121917
self.hparams["layer_norm_eps"] = self.hparams.get("layer_norm_eps", 1e-5)
1913-
self.img_break_tok_id = 12 # see tokenizer_config.json
1918+
self.img_break_tok_id = self.get_token_id("[IMG_BREAK]")
1919+
logger.info(f"Image break token id: {self.img_break_tok_id}")
19141920
else:
19151921
raise ValueError(f"Unsupported model type: {self.hparams['model_type']}")
19161922

1923+
def get_token_id(self, token: str) -> int:
1924+
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
1925+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
1926+
added_tokens_decoder = json.load(f)['added_tokens_decoder']
1927+
for id_, token_data in added_tokens_decoder.items():
1928+
if token_data["content"] == token:
1929+
return int(id_)
1930+
raise ValueError(f"Token '{token}' not found in tokenizer config.")
1931+
19171932
def set_gguf_parameters(self):
19181933
super().set_gguf_parameters()
19191934
hparams = self.hparams
19201935
if hparams["model_type"] == "pixtral":
19211936
self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.PIXTRAL)
19221937
self.gguf_writer.add_vision_attention_layernorm_eps(hparams["layer_norm_eps"])
1923-
self.gguf_writer.add_vision_use_silu(True)
1938+
1939+
# hidden_act
1940+
if hparams["hidden_act"] == "silu":
1941+
self.gguf_writer.add_vision_use_silu(True)
1942+
elif hparams["hidden_act"] == "gelu":
1943+
self.gguf_writer.add_vision_use_gelu(True)
1944+
else:
1945+
raise ValueError(f"Unsupported hidden_act: {hparams['hidden_act']}")
1946+
1947+
# spatial_merge_size
1948+
if "spatial_merge_size" in self.global_config:
1949+
self.gguf_writer.add_vision_spatial_merge_size(self.global_config["spatial_merge_size"])
19241950

19251951
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
19261952
del bid # unused

examples/llava/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,11 @@ llama-mtmd-cli -hf ggml-org/Qwen2-VL-7B-Instruct-GGUF
4242
# Qwen 2.5 VL
4343
llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-3B-Instruct-GGUF
4444
llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-7B-Instruct-GGUF
45+
llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-32B-Instruct-GGUF
4546
llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-72B-Instruct-GGUF
46-
# NOTE: Qwen2.5-VL-32B text-only model is currently unusable
47+
48+
# Mistral Small 3.1 24B (IQ2_M quantization)
49+
llama-mtmd-cli -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF --chat-template mistral-v7
4750
```
4851

4952
## How it works and what is `mmproj`?
@@ -74,7 +77,8 @@ For the following models, you can use `convert_hf_to_gguf.py`with `--mmproj` fla
7477
- SmolVLM (from [HuggingFaceTB](https://huggingface.co/HuggingFaceTB))
7578
- SmolVLM2 (from [HuggingFaceTB](https://huggingface.co/HuggingFaceTB))
7679
- [Pixtral 12B](https://huggingface.co/mistral-community/pixtral-12b) - only works with `transformers`-compatible checkpoint
77-
- Qwen 2 VL and Qwen 2.5 VL
80+
- Qwen 2 VL and Qwen 2.5 VL (from [Qwen](https://huggingface.co/Qwen))
81+
- [Mistral Small 3.1 24B](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503)
7882

7983
For older models, please refer to the relevant guide for instructions on how to obtain or create them:
8084

@@ -85,3 +89,4 @@ For older models, please refer to the relevant guide for instructions on how to
8589
- [MiniCPM-V 2.6](../../docs/multimodal/minicpmv2.6.md)
8690
- [MiniCPM-o 2.6](../../docs/multimodal/minicpmo2.6.md)
8791
- [IBM Granite Vision](../../docs/multimodal/granitevision.md)
92+
- [Google Gemma 3](../../docs/multimodal/gemma3.md)

examples/llava/clip-impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#define KEY_FEATURE_LAYER "clip.vision.feature_layer"
3232
#define KEY_PROJ_SCALE_FACTOR "clip.vision.projector.scale_factor"
3333
#define KEY_PROJ_TYPE "clip.projector_type"
34+
#define KEY_SPATIAL_MERGE_SIZE "clip.vision.spatial_merge_size"
3435

3536
#define KEY_USE_GLU_MLP "clip.use_glu_mlp" // for qwen2.5vl
3637
#define KEY_USE_RMS_NORM "clip.use_rms_norm" // for qwen2.5vl
@@ -68,9 +69,11 @@
6869
#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
6970
#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
7071
#define TN_IMAGE_NEWLINE "model.image_newline"
72+
#define TN_MM_INP_NORM "mm.input_norm.weight"
7173
#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3
7274
#define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3
7375
#define TN_MM_PROJECTOR "mm.model.fc.weight" // idefics3
76+
#define TN_MM_PATCH_MERGER "mm.patch_merger.weight" // mistral small 3.1
7477
#define TN_TOK_IMG_BREAK "v.token_embd.img_break" // pixtral
7578

7679
// mimicpmv

0 commit comments

Comments
 (0)