Skip to content

Commit ee02ad0

Browse files
clip : fix visual encoders with no CLS (#11982)
Signed-off-by: Alex-Brooks <[email protected]>
1 parent c392e50 commit ee02ad0

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

examples/llava/clip.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2712,9 +2712,13 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
27122712

27132713
if (!ctx->has_glm_projector) {
27142714
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
2715+
// The patches vector is used to get rows to index into the embeds with;
2716+
// we should skip dim 0 only if we have CLS to avoid going out of bounds
2717+
// when retrieving the rows.
2718+
int patch_offset = ctx->has_class_embedding ? 1 : 0;
27152719
int* patches_data = (int*)malloc(ggml_nbytes(patches));
27162720
for (int i = 0; i < num_patches; i++) {
2717-
patches_data[i] = i + 1;
2721+
patches_data[i] = i + patch_offset;
27182722
}
27192723
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
27202724
free(patches_data);

0 commit comments

Comments
 (0)