@@ -556,15 +556,14 @@ static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_im
556
556
}
557
557
558
558
// implementation of the 2D RoPE without adding a new op in ggml
559
+ // this is not efficient (use double the memory), but works on all backends
559
560
static ggml_tensor * build_rope_2d (
560
- ggml_cgraph * gf,
561
561
ggml_context * ctx0,
562
562
ggml_tensor * cur,
563
563
ggml_tensor * pos_h,
564
564
ggml_tensor * pos_w,
565
565
const float freq_base
566
566
) {
567
- ggml_tensor * tmp;
568
567
const int64_t n_dim = cur->ne [0 ];
569
568
const int64_t n_head = cur->ne [1 ];
570
569
const int64_t n_pos = cur->ne [2 ];
@@ -573,18 +572,24 @@ static ggml_tensor * build_rope_2d(
573
572
// we will have a list of 4 inv_freq: 1e-0, 1e-1, 1e-2, 1e-3
574
573
// first half of cur will use 1e-0, 1e-2 (even)
575
574
// second half of cur will use 1e-1, 1e-3 (odd)
576
- //
577
- // for the first half, the trick here is to rotate n_dim/2, so inv_freq will be even
575
+ // the trick here is to rotate just half of n_dim, so inv_freq will automatically be even
578
576
// ^ don't ask me why, it's math! -2(2i) / n_dim == -2i / (n_dim/2)
579
577
// then for the second half, we use freq_scale to shift the inv_freq
580
578
// ^ why? replace (2i) with (2i+1) in the above equation
581
579
const float freq_scale_odd = std::pow (freq_base, (float )-2 /n_dim);
582
580
583
581
// first half
582
+ ggml_tensor * first;
584
583
{
585
- cur = ggml_rope_ext_inplace (
584
+ first = ggml_view_3d (ctx0, cur,
585
+ n_dim/2 , n_head, n_pos,
586
+ ggml_row_size (cur->type , n_dim),
587
+ ggml_row_size (cur->type , n_dim*n_head),
588
+ 0 );
589
+ // first = ggml_cont(ctx0, first);
590
+ first = ggml_rope_ext (
586
591
ctx0,
587
- cur ,
592
+ first ,
588
593
pos_h, // positions
589
594
nullptr , // freq factors
590
595
n_dim/2 , // n_dims
@@ -593,27 +598,28 @@ static ggml_tensor * build_rope_2d(
593
598
);
594
599
}
595
600
596
- // second half
601
+ // second half (write to tmp)
602
+ ggml_tensor * second = cur;
597
603
{
598
- tmp = ggml_view_3d (ctx0, cur,
604
+ second = ggml_view_3d (ctx0, cur,
599
605
n_dim/2 , n_head, n_pos,
600
606
ggml_row_size (cur->type , n_dim),
601
607
ggml_row_size (cur->type , n_dim*n_head),
602
608
n_dim/2 * ggml_element_size (cur));
603
- tmp = ggml_rope_ext_inplace (
609
+ second = ggml_cont (ctx0, second); // copy, because ggml_rope don't play well with non-contiguous tensors
610
+ second = ggml_rope_ext (
604
611
ctx0,
605
- tmp ,
612
+ second ,
606
613
pos_w, // positions
607
614
nullptr , // freq factors
608
615
n_dim/2 , // n_dims
609
616
0 , 0 , freq_base,
610
617
freq_scale_odd,
611
618
0 .0f , 1 .0f , 0 .0f , 0 .0f
612
619
);
613
- // calculate inplace (modify cur directly)
614
- ggml_build_forward_expand (gf, tmp);
615
620
}
616
621
622
+ cur = ggml_concat (ctx0, first, second, 0 );
617
623
return cur;
618
624
}
619
625
@@ -682,13 +688,13 @@ static ggml_cgraph * clip_image_build_graph_pixtral(clip_ctx * ctx, const clip_i
682
688
struct ggml_tensor * Q = ggml_mul_mat (ctx0, model.layers [il].q_w , cur);
683
689
684
690
Q = ggml_reshape_3d (ctx0, Q, d_head, n_head, num_patches);
685
- Q = build_rope_2d (gf, ctx0, Q, pos_h, pos_w, hparams.rope_theta );
691
+ Q = build_rope_2d (ctx0, Q, pos_h, pos_w, hparams.rope_theta );
686
692
Q = ggml_cont (ctx0, ggml_permute (ctx0, Q, 0 , 2 , 1 , 3 ));
687
693
688
694
struct ggml_tensor * K = ggml_mul_mat (ctx0, model.layers [il].k_w , cur);
689
695
690
696
K = ggml_reshape_3d (ctx0, K, d_head, n_head, num_patches);
691
- K = build_rope_2d (gf, ctx0, K, pos_h, pos_w, hparams.rope_theta );
697
+ K = build_rope_2d (ctx0, K, pos_h, pos_w, hparams.rope_theta );
692
698
K = ggml_cont (ctx0, ggml_permute (ctx0, K, 0 , 2 , 1 , 3 ));
693
699
694
700
struct ggml_tensor * V = ggml_mul_mat (ctx0, model.layers [il].v_w , cur);
0 commit comments