@@ -10125,24 +10125,18 @@ struct llm_build_deepseek2 : public llm_graph_context {
10125
10125
ggml_row_size(kv_cmpr_pe->type, kv_lora_rank));
10126
10126
cb(k_pe, "k_pe", il);
10127
10127
10128
- // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
10129
- q_pe = ggml_cont(ctx0, q_pe);
10130
10128
q_pe = ggml_rope_ext(ctx0, q_pe, inp_pos, nullptr,
10131
10129
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
10132
10130
ext_factor, attn_factor_scaled, beta_fast, beta_slow
10133
10131
);
10134
10132
cb(q_pe, "q_pe", il);
10135
10133
10136
- // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
10137
- k_pe = ggml_cont(ctx0, k_pe);
10138
10134
k_pe = ggml_rope_ext(ctx0, k_pe, inp_pos, nullptr,
10139
10135
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
10140
10136
ext_factor, attn_factor_scaled, beta_fast, beta_slow
10141
10137
);
10142
10138
cb(k_pe, "k_pe", il);
10143
10139
10144
- // TODO: the CUDA backend used to not support non-cont. (RMS) norm, investigate removing ggml_cont
10145
- kv_cmpr = ggml_cont(ctx0, kv_cmpr);
10146
10140
kv_cmpr = build_norm(kv_cmpr,
10147
10141
model.layers[il].attn_kv_a_norm, nullptr,
10148
10142
LLM_NORM_RMS, il);
0 commit comments