@@ -2742,6 +2742,7 @@ inline void ggml_cuda_op_mul(
2742
2742
(void ) dst;
2743
2743
(void ) src0_ddq_i;
2744
2744
(void ) i02;
2745
+ (void ) i1;
2745
2746
}
2746
2747
2747
2748
inline void ggml_cuda_op_gelu (
@@ -3037,15 +3038,15 @@ inline void ggml_cuda_op_rope(
3037
3038
const int64_t ne00 = src0->ne [0 ];
3038
3039
const int64_t i01_diff = i01_high - i01_low;
3039
3040
3040
- const int n_past = ((int32_t *) src1->data )[0 ];
3041
- const int n_dims = ((int32_t *) src1->data )[1 ];
3042
- const int mode = ((int32_t *) src1->data )[2 ];
3043
- const int n_ctx = ((int32_t *) src1->data )[3 ];
3044
-
3041
+ const int n_past = ((int32_t *) dst->op_params )[0 ];
3042
+ const int n_dims = ((int32_t *) dst->op_params )[1 ];
3043
+ const int mode = ((int32_t *) dst->op_params )[2 ];
3044
+ const int n_ctx = ((int32_t *) dst->op_params )[3 ];
3045
3045
// RoPE alteration for extended context
3046
+
3046
3047
float freq_base, freq_scale;
3047
- memcpy (&freq_base, (int32_t *) src1-> data + 4 , sizeof (float ));
3048
- memcpy (&freq_scale, (int32_t *) src1-> data + 5 , sizeof (float ));
3048
+ memcpy (&freq_base, (int32_t *) dst-> op_params + 4 , sizeof (float ));
3049
+ memcpy (&freq_scale, (int32_t *) dst-> op_params + 5 , sizeof (float ));
3049
3050
3050
3051
const float theta_scale = powf (freq_base, -2 .0f /n_dims);
3051
3052
const float p = (((mode & 1 ) == 0 ? n_past + i02 : i02)) * freq_scale;
@@ -3061,6 +3062,7 @@ inline void ggml_cuda_op_rope(
3061
3062
rope_f32_cuda (src0_ddf_i, dst_ddf_i, ne00, i01_diff, p, theta_scale, cudaStream_main);
3062
3063
}
3063
3064
3065
+ (void ) src1;
3064
3066
(void ) dst;
3065
3067
(void ) src0_ddq_i;
3066
3068
(void ) src1_ddf_i;
@@ -3079,11 +3081,12 @@ inline void ggml_cuda_op_diag_mask_inf(
3079
3081
const int64_t ne01 = src0->ne [1 ];
3080
3082
const int64_t i01_diff = i01_high - i01_low;
3081
3083
3082
- const int n_past = ((int32_t *) src1-> data )[0 ];
3084
+ const int n_past = ((int32_t *) dst-> op_params )[0 ];
3083
3085
3084
3086
// compute
3085
3087
diag_mask_inf_f32_cuda (src0_ddf_i, dst_ddf_i, ne00, i01_diff, ne01, n_past, cudaStream_main);
3086
3088
3089
+ (void ) src1;
3087
3090
(void ) dst;
3088
3091
(void ) src0_ddq_i;
3089
3092
(void ) src1_ddf_i;
@@ -3803,7 +3806,7 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
3803
3806
char * src0_ddc = (char *) src0_extra->data_device [g_main_device];
3804
3807
size_t offset = 0 ;
3805
3808
if (tensor->op == GGML_OP_VIEW) {
3806
- memcpy (&offset, tensor->src [ 2 ]-> data , sizeof (size_t ));
3809
+ memcpy (&offset, tensor->op_params , sizeof (size_t ));
3807
3810
}
3808
3811
extra = ggml_cuda_alloc_temp_tensor_extra ();
3809
3812
extra->data_device [g_main_device] = src0_ddc + offset;
0 commit comments