Skip to content

Commit b472634

Browse files
authored
mtmd : remove libllava, remove clip-quantize-cli (⚠️ breaking change) (#13460)
* mtmd : remove libllava, remove clip-quantize-cli * rm clip_model_quantize
1 parent bf79371 commit b472634

16 files changed

+4
-977
lines changed

tools/mtmd/CMakeLists.txt

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,3 @@
1-
# llava (legacy)
2-
3-
add_library(llava OBJECT
4-
llava.cpp
5-
llava.h
6-
clip.cpp
7-
clip.h
8-
)
9-
10-
target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
11-
12-
target_include_directories(llava PUBLIC .)
13-
target_include_directories(llava PUBLIC ../..)
14-
target_include_directories(llava PUBLIC ../../common)
15-
16-
target_compile_features(llava PRIVATE cxx_std_17)
17-
18-
add_library(llava_static STATIC $<TARGET_OBJECTS:llava>)
19-
if (BUILD_SHARED_LIBS)
20-
set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON)
21-
target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD)
22-
add_library(llava_shared SHARED $<TARGET_OBJECTS:llava>)
23-
target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
24-
install(TARGETS llava_shared LIBRARY)
25-
endif()
26-
271
# mtmd
282

293
add_library(mtmd OBJECT
@@ -53,12 +27,10 @@ if (BUILD_SHARED_LIBS)
5327
endif()
5428

5529
if (NOT MSVC)
56-
target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
5730
target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h
5831
endif()
5932

6033
if(TARGET BUILD_INFO)
61-
add_dependencies(llava BUILD_INFO)
6234
add_dependencies(mtmd BUILD_INFO)
6335
endif()
6436

@@ -73,10 +45,3 @@ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
7345
install(TARGETS ${TARGET} RUNTIME)
7446
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
7547
target_compile_features(${TARGET} PRIVATE cxx_std_17)
76-
77-
set(TARGET llama-llava-clip-quantize-cli)
78-
add_executable(${TARGET} clip-quantize-cli.cpp)
79-
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-clip-quantize-cli)
80-
install(TARGETS ${TARGET} RUNTIME)
81-
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
82-
target_compile_features(${TARGET} PRIVATE cxx_std_17)

tools/mtmd/README-quantize.md

Lines changed: 0 additions & 44 deletions
This file was deleted.

tools/mtmd/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ Built upon `clip.cpp` (similar to `llava.cpp`), `libmtmd` offers several advanta
4141

4242
Multimodal projector (`mmproj`) files are specific to each model architecture.
4343

44-
For the following models, you can use `convert_hf_to_gguf.py`with `--mmproj` flag to get the `mmproj` file:
45-
- [Gemma 3](https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d) - Note: 1B variant does not have vision support
44+
For the following models, you can use `convert_hf_to_gguf.py` with `--mmproj` flag to get the `mmproj` file:
45+
- [Gemma 3](https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d) ; See the guide [here](../../docs/multimodal/gemma3.md) - Note: 1B variant does not have vision support
4646
- SmolVLM (from [HuggingFaceTB](https://huggingface.co/HuggingFaceTB))
4747
- SmolVLM2 (from [HuggingFaceTB](https://huggingface.co/HuggingFaceTB))
4848
- [Pixtral 12B](https://huggingface.co/mistral-community/pixtral-12b) - only works with `transformers`-compatible checkpoint
@@ -52,11 +52,12 @@ For the following models, you can use `convert_hf_to_gguf.py`with `--mmproj` fla
5252

5353
For older models, please refer to the relevant guide for instructions on how to obtain or create them:
5454

55+
NOTE: conversion scripts are located under `tools/mtmd/legacy-models`
56+
5557
- [LLaVA](../../docs/multimodal/llava.md)
5658
- [MobileVLM](../../docs/multimodal/MobileVLM.md)
5759
- [GLM-Edge](../../docs/multimodal/glmedge.md)
5860
- [MiniCPM-V 2.5](../../docs/multimodal/minicpmv2.5.md)
5961
- [MiniCPM-V 2.6](../../docs/multimodal/minicpmv2.6.md)
6062
- [MiniCPM-o 2.6](../../docs/multimodal/minicpmo2.6.md)
6163
- [IBM Granite Vision](../../docs/multimodal/granitevision.md)
62-
- [Google Gemma 3](../../docs/multimodal/gemma3.md)

tools/mtmd/android/adb_run.sh

Lines changed: 0 additions & 53 deletions
This file was deleted.

tools/mtmd/android/build_64.sh

Lines changed: 0 additions & 8 deletions
This file was deleted.

tools/mtmd/clip-quantize-cli.cpp

Lines changed: 0 additions & 59 deletions
This file was deleted.

tools/mtmd/clip.cpp

Lines changed: 0 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -3586,141 +3586,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
35863586
return true;
35873587
}
35883588

3589-
bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) {
3590-
assert(itype < GGML_TYPE_COUNT);
3591-
ggml_type type = static_cast<ggml_type>(itype);
3592-
3593-
auto * ctx_clip = clip_init(fname_inp, clip_context_params{
3594-
/* use_gpu */ false,
3595-
/* verbosity */ GGML_LOG_LEVEL_ERROR,
3596-
});
3597-
3598-
const auto & ctx_src = ctx_clip->ctx_gguf.get();
3599-
const auto & ctx_data = ctx_clip->ctx_data.get();
3600-
3601-
auto * ctx_out = gguf_init_empty();
3602-
gguf_set_kv(ctx_out, ctx_src);
3603-
gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
3604-
gguf_set_val_u32(ctx_out, "general.file_type", itype);
3605-
3606-
auto fout = std::ofstream(fname_out, std::ios::binary);
3607-
3608-
const int n_tensors = gguf_get_n_tensors(ctx_src);
3609-
3610-
for (int i = 0; i < n_tensors; ++i) {
3611-
const char * name = gguf_get_tensor_name(ctx_src, i);
3612-
ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
3613-
gguf_add_tensor(ctx_out, cur);
3614-
}
3615-
3616-
const size_t meta_size = gguf_get_meta_size(ctx_out);
3617-
for (size_t i = 0; i < meta_size; ++i) {
3618-
fout.put(0);
3619-
}
3620-
3621-
// regexes of tensor names to be quantized
3622-
const std::vector<std::string> k_names = {
3623-
".*weight",
3624-
};
3625-
3626-
std::vector<uint8_t> work(512);
3627-
std::vector<float> conv_buf(512);
3628-
size_t total_size_org = 0;
3629-
size_t total_size_new = 0;
3630-
3631-
for (int i = 0; i < n_tensors; ++i) {
3632-
const std::string name = gguf_get_tensor_name(ctx_src, i);
3633-
ggml_tensor * cur = ggml_get_tensor(ctx_data, name.c_str());
3634-
3635-
enum ggml_type new_type;
3636-
void * new_data;
3637-
size_t new_size;
3638-
3639-
bool quantize = false;
3640-
for (const auto & s : k_names) {
3641-
if (std::regex_match(name, std::regex(s))) {
3642-
quantize = true;
3643-
break;
3644-
}
3645-
}
3646-
3647-
// quantize only 2D tensors and bigger than block size
3648-
quantize &= (ggml_n_dims(cur) == 2) && cur->ne[0] > ggml_blck_size(type);
3649-
3650-
if (quantize) {
3651-
new_type = type;
3652-
if (new_type >= GGML_TYPE_Q2_K && name.find("embd") != std::string::npos) {
3653-
new_type = GGML_TYPE_Q8_0; // ggml_get_rows needs non K type
3654-
// LOG_ERR("%s: quantizing %s to %s\n", __func__, name.c_str(), ggml_type_name(new_type));
3655-
}
3656-
const size_t n_elms = ggml_nelements(cur);
3657-
float * f32_data;
3658-
3659-
switch (cur->type) {
3660-
case GGML_TYPE_F32:
3661-
f32_data = (float *)cur->data;
3662-
break;
3663-
case GGML_TYPE_F16:
3664-
if (conv_buf.size() < n_elms) {
3665-
conv_buf.resize(n_elms);
3666-
}
3667-
for (size_t j = 0; j < n_elms; ++j) {
3668-
conv_buf[j] = ggml_fp16_to_fp32(((ggml_fp16_t *)cur->data)[j]);
3669-
}
3670-
f32_data = (float *)conv_buf.data();
3671-
break;
3672-
default:
3673-
LOG_ERR("%s: Please use an input file in f32 or f16\n", __func__);
3674-
gguf_free(ctx_out);
3675-
return false;
3676-
}
3677-
3678-
if (work.size() < n_elms * 4) {
3679-
work.resize(n_elms * 4);
3680-
}
3681-
new_data = work.data();
3682-
3683-
new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
3684-
} else {
3685-
new_type = cur->type;
3686-
new_data = cur->data;
3687-
new_size = ggml_nbytes(cur);
3688-
}
3689-
const size_t orig_size = ggml_nbytes(cur);
3690-
total_size_org += orig_size;
3691-
total_size_new += new_size;
3692-
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
3693-
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
3694-
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
3695-
fout.write((const char *)new_data, new_size);
3696-
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
3697-
for (size_t j = 0; j < pad; ++j) {
3698-
fout.put(0);
3699-
}
3700-
3701-
LOG_INF("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), ggml_n_dims(cur), quantize,
3702-
orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
3703-
}
3704-
3705-
// go back to beginning of file and write the updated metadata
3706-
fout.seekp(0, std::ios::beg);
3707-
std::vector<uint8_t> meta(meta_size);
3708-
gguf_get_meta_data(ctx_out, meta.data());
3709-
fout.write((const char *)meta.data(), meta_size);
3710-
3711-
fout.close();
3712-
3713-
clip_free(ctx_clip);
3714-
gguf_free(ctx_out);
3715-
3716-
{
3717-
LOG_INF("%s: original size = %8.2f MB\n", __func__, total_size_org / 1024.0 / 1024.0);
3718-
LOG_INF("%s: quantized size = %8.2f MB\n", __func__, total_size_new / 1024.0 / 1024.0);
3719-
}
3720-
3721-
return true;
3722-
}
3723-
37243589
int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
37253590
switch (ctx->proj_type) {
37263591
case PROJECTOR_TYPE_LDP:

0 commit comments

Comments
 (0)