1
- #pragma once
1
+ #ifndef CLIP_H
2
+ #define CLIP_H
2
3
3
4
#include "ggml.h"
4
5
#include <stddef.h>
5
6
#include <stdint.h>
6
7
8
+ #ifdef LLAMA_SHARED
9
+ # if defined(_WIN32 ) && !defined(__MINGW32__ )
10
+ # ifdef LLAMA_BUILD
11
+ # define CLIP_API __declspec(dllexport)
12
+ # else
13
+ # define CLIP_API __declspec(dllimport)
14
+ # endif
15
+ # else
16
+ # define CLIP_API __attribute__ ((visibility ("default")))
17
+ # endif
18
+ #else
19
+ # define CLIP_API
20
+ #endif
21
+
22
+ #ifdef __cplusplus
23
+ extern "C" {
24
+ #endif
25
+
7
26
struct clip_ctx ;
8
27
9
28
struct clip_image_size {
@@ -20,80 +39,97 @@ struct clip_context_params {
20
39
enum ggml_log_level verbosity ;
21
40
};
22
41
23
- struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
42
+ // deprecated, use clip_init
43
+ CLIP_API struct clip_ctx * clip_model_load (const char * fname , int verbosity );
44
+
45
+ CLIP_API struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
24
46
25
- void clip_free (struct clip_ctx * ctx );
47
+ CLIP_API void clip_free (struct clip_ctx * ctx );
26
48
27
- size_t clip_embd_nbytes (const struct clip_ctx * ctx );
28
- size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
49
+ CLIP_API size_t clip_embd_nbytes (const struct clip_ctx * ctx );
50
+ CLIP_API size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
29
51
30
- int32_t clip_get_image_size (const struct clip_ctx * ctx );
31
- int32_t clip_get_patch_size (const struct clip_ctx * ctx );
32
- int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
52
+ CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx );
53
+ CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx );
54
+ CLIP_API int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
33
55
34
56
// TODO: should be enum, not string
35
- const char * clip_patch_merge_type (const struct clip_ctx * ctx );
57
+ CLIP_API const char * clip_patch_merge_type (const struct clip_ctx * ctx );
36
58
37
- const int32_t * clip_image_grid (const struct clip_ctx * ctx );
38
- size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
59
+ CLIP_API const int32_t * clip_image_grid (const struct clip_ctx * ctx );
60
+ CLIP_API size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
39
61
40
- int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
62
+ GGML_DEPRECATED (CLIP_API int clip_n_patches (const struct clip_ctx * ctx ),
63
+ "use clip_n_output_tokens instead" );
64
+ GGML_DEPRECATED (CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx , struct clip_image_f32 * img ),
65
+ "use clip_n_output_tokens instead" );
66
+
67
+ CLIP_API int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
41
68
42
69
// for M-RoPE, this will be the number of token positions in X and Y directions
43
70
// for other models, X will be the total number of tokens and Y will be 1
44
- int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
45
- int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
71
+ CLIP_API int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
72
+ CLIP_API int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
46
73
47
74
// this should be equal to the embedding dimension of the text model
48
- int clip_n_mmproj_embd (const struct clip_ctx * ctx );
75
+ CLIP_API int clip_n_mmproj_embd (const struct clip_ctx * ctx );
49
76
50
- int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
51
- void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
52
- struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
77
+ CLIP_API int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
78
+ CLIP_API void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
79
+ CLIP_API struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
53
80
54
- struct clip_image_size * clip_image_size_init (void );
55
- struct clip_image_u8 * clip_image_u8_init (void );
56
- struct clip_image_f32 * clip_image_f32_init (void );
57
- struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
81
+ CLIP_API struct clip_image_size * clip_image_size_init (void );
82
+ CLIP_API struct clip_image_u8 * clip_image_u8_init (void );
83
+ CLIP_API struct clip_image_f32 * clip_image_f32_init (void );
84
+ CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
58
85
59
86
// nx, ny are the output image dimensions
60
- unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
87
+ CLIP_API unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
61
88
62
- void clip_image_size_free (struct clip_image_size * img_size );
63
- void clip_image_u8_free (struct clip_image_u8 * img );
64
- void clip_image_f32_free (struct clip_image_f32 * img );
65
- void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
66
- void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
89
+ CLIP_API void clip_image_size_free (struct clip_image_size * img_size );
90
+ CLIP_API void clip_image_u8_free (struct clip_image_u8 * img );
91
+ CLIP_API void clip_image_f32_free (struct clip_image_f32 * img );
92
+ CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
93
+ CLIP_API void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
67
94
68
95
// use for accessing underlay data of clip_image_f32_batch
69
- size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
70
- size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
71
- size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
72
- struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
96
+ CLIP_API size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
97
+ CLIP_API size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
98
+ CLIP_API size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
99
+ CLIP_API struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
73
100
74
101
/**
75
102
* Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
76
103
* The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
77
104
*/
78
- void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
105
+ CLIP_API void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
79
106
80
- bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
107
+ CLIP_API bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
81
108
82
109
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
83
- bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
110
+ CLIP_API bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
84
111
85
112
/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
86
- bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
113
+ CLIP_API bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
114
+
115
+ CLIP_API struct ggml_tensor * clip_get_newline_tensor (const struct clip_ctx * ctx );
116
+
117
+ CLIP_API bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
118
+ CLIP_API bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
119
+
120
+ CLIP_API bool clip_model_quantize (const char * fname_inp , const char * fname_out , int itype );
121
+
122
+ CLIP_API int clip_is_minicpmv (const struct clip_ctx * ctx );
123
+ CLIP_API bool clip_is_glm (const struct clip_ctx * ctx );
124
+ CLIP_API bool clip_is_qwen2vl (const struct clip_ctx * ctx );
125
+ CLIP_API bool clip_is_llava (const struct clip_ctx * ctx );
126
+ CLIP_API bool clip_is_gemma3 (const struct clip_ctx * ctx );
87
127
88
- struct ggml_tensor * clip_get_newline_tensor ( const struct clip_ctx * ctx );
128
+ CLIP_API bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
89
129
90
- bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
91
- bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
92
130
93
- int clip_is_minicpmv (const struct clip_ctx * ctx );
94
- bool clip_is_glm (const struct clip_ctx * ctx );
95
- bool clip_is_qwen2vl (const struct clip_ctx * ctx );
96
- bool clip_is_llava (const struct clip_ctx * ctx );
97
- bool clip_is_gemma3 (const struct clip_ctx * ctx );
131
+ #ifdef __cplusplus
132
+ }
133
+ #endif
98
134
99
- bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
135
+ #endif // CLIP_H
0 commit comments