@@ -4642,7 +4642,7 @@ struct llm_build_context {
4642
4642
0 );
4643
4643
cb (v_states, " v_states" , il);
4644
4644
4645
- q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
4645
+ q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
4646
4646
q_pe = ggml_rope_ext (
4647
4647
ctx0, q_pe, inp_pos, rope_factors,
4648
4648
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
@@ -4651,7 +4651,7 @@ struct llm_build_context {
4651
4651
cb (q_pe, " q_pe" , il);
4652
4652
4653
4653
// shared RoPE key
4654
- k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
4654
+ k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
4655
4655
k_pe = ggml_rope_ext (
4656
4656
ctx0, k_pe, inp_pos, rope_factors,
4657
4657
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
@@ -6496,7 +6496,7 @@ struct llm_build_context {
6496
6496
0 );
6497
6497
cb (v_states, " v_states" , il);
6498
6498
6499
- q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
6499
+ q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
6500
6500
q_pe = ggml_rope_ext (
6501
6501
ctx0, q_pe, inp_pos, nullptr ,
6502
6502
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
@@ -6505,7 +6505,7 @@ struct llm_build_context {
6505
6505
cb (q_pe, " q_pe" , il);
6506
6506
6507
6507
// shared RoPE key
6508
- k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
6508
+ k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
6509
6509
k_pe = ggml_rope_ext (
6510
6510
ctx0, k_pe, inp_pos, nullptr ,
6511
6511
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
0 commit comments