@@ -3936,7 +3936,7 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
3936
3936
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s); }
3937
3937
inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
3938
3938
inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
3939
- inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = log (x[i]); }
3939
+ inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf (x[i]); }
3940
3940
inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
3941
3941
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
3942
3942
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
@@ -4009,7 +4009,6 @@ inline static float ggml_silu_backward_f32(float x, float dy) {
4009
4009
4010
4010
#ifdef GGML_SILU_FP16
4011
4011
inline static void ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) {
4012
- uint16_t t;
4013
4012
for (int i = 0; i < n; ++i) {
4014
4013
// we did not use x[i] to compute forward silu but its f16 equivalent
4015
4014
// take derivative at f16 of x[i]:
@@ -6841,7 +6840,7 @@ struct ggml_tensor * ggml_rope(
6841
6840
int n_dims,
6842
6841
int mode) {
6843
6842
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, false);
6844
- };
6843
+ }
6845
6844
6846
6845
struct ggml_tensor * ggml_rope_inplace(
6847
6846
struct ggml_context * ctx,
@@ -6850,7 +6849,7 @@ struct ggml_tensor * ggml_rope_inplace(
6850
6849
int n_dims,
6851
6850
int mode) {
6852
6851
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, true);
6853
- };
6852
+ }
6854
6853
6855
6854
// ggml_rope_back
6856
6855
@@ -8003,7 +8002,7 @@ static void ggml_compute_forward_add_q_f32(
8003
8002
const int64_t ne00 = src0->ne[0];
8004
8003
const int64_t ne01 = src0->ne[1];
8005
8004
const int64_t ne02 = src0->ne[2];
8006
- const int64_t ne03 = src0->ne[3];
8005
+ // const int64_t ne03 = src0->ne[3];
8007
8006
8008
8007
const size_t nb00 = src0->nb[0];
8009
8008
const size_t nb01 = src0->nb[1];
@@ -8028,7 +8027,7 @@ static void ggml_compute_forward_add_q_f32(
8028
8027
quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q;
8029
8028
8030
8029
// we don't support permuted src0 or src1
8031
- GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
8030
+ GGML_ASSERT(nb00 == GGML_TYPE_SIZE[type]);
8032
8031
GGML_ASSERT(nb10 == sizeof(float));
8033
8032
8034
8033
// dst cannot be transposed or permuted
@@ -8131,9 +8130,6 @@ static void ggml_compute_forward_add1_f32(
8131
8130
return;
8132
8131
}
8133
8132
8134
- // scalar to add
8135
- const float v = *(float *) src1->data;
8136
-
8137
8133
const int ith = params->ith;
8138
8134
const int nth = params->nth;
8139
8135
@@ -8147,11 +8143,6 @@ static void ggml_compute_forward_add1_f32(
8147
8143
const size_t nb02 = src0->nb[2];
8148
8144
const size_t nb03 = src0->nb[3];
8149
8145
8150
- const size_t nb10 = src1->nb[0];
8151
- const size_t nb11 = src1->nb[1];
8152
- const size_t nb12 = src1->nb[2];
8153
- const size_t nb13 = src1->nb[3];
8154
-
8155
8146
const size_t nb0 = dst->nb[0];
8156
8147
const size_t nb1 = dst->nb[1];
8157
8148
const size_t nb2 = dst->nb[2];
@@ -8177,13 +8168,13 @@ static void ggml_compute_forward_add1_f32(
8177
8168
vDSP_vadd(
8178
8169
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
8179
8170
(float *) ((char *) src1->data), 0,
8180
- (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1,
8171
+ (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1,
8181
8172
ne0);
8182
8173
#else
8183
8174
ggml_vec_add1_f32(ne0,
8184
8175
(float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
8185
8176
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01),
8186
- v );
8177
+ *(float *) src1->data );
8187
8178
#endif
8188
8179
}
8189
8180
}
@@ -8348,7 +8339,7 @@ static void ggml_compute_forward_add1_q_f32(
8348
8339
quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q;
8349
8340
8350
8341
// we don't support permuted src0
8351
- GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
8342
+ GGML_ASSERT(nb00 == GGML_TYPE_SIZE[type]);
8352
8343
8353
8344
// dst cannot be transposed or permuted
8354
8345
GGML_ASSERT(nb0 <= nb1);
@@ -8510,7 +8501,7 @@ static void ggml_compute_forward_acc_f32(
8510
8501
vDSP_vadd(
8511
8502
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + offset), 1,
8512
8503
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
8513
- (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset), 1, nc);
8504
+ (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset), 1, nc);
8514
8505
#else
8515
8506
ggml_vec_add_f32(nc,
8516
8507
(float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset),
@@ -9825,28 +9816,30 @@ static void ggml_compute_forward_rms_norm_back_f32(
9825
9816
for (int64_t i02 = 0; i02 < ne02; i02++) {
9826
9817
for (int64_t i01 = ith; i01 < ne01; i01 += nth) {
9827
9818
// src1 is same shape as src0 => same indices
9828
- const auto i11 = i01;
9829
- const auto i12 = i02;
9830
- const auto i13 = i03;
9819
+ const int64_t i11 = i01;
9820
+ const int64_t i12 = i02;
9821
+ const int64_t i13 = i03;
9822
+
9831
9823
const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
9832
9824
const float * dz = (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13);
9833
9825
9834
- ggml_float sum_xx = 0.0;
9826
+ ggml_float sum_xx = 0.0;
9835
9827
ggml_float sum_xdz = 0.0;
9828
+
9836
9829
for (int64_t i00 = 0; i00 < ne00; i00++) {
9837
- sum_xx += (ggml_float)(x[i00] * x[i00]);
9830
+ sum_xx += (ggml_float)(x[i00] * x[i00]);
9838
9831
sum_xdz += (ggml_float)(x[i00] * dz[i00]);
9839
9832
}
9840
9833
9841
- const float mean = sum_xx/ne00;
9834
+ const float mean = sum_xx/ne00;
9842
9835
const float mean_eps = sum_xx/ne00 + eps;
9843
- const float sum_eps = sum_xx + eps*ne00;
9836
+ const float sum_eps = sum_xx + eps*ne00;
9844
9837
const float mean_xdz = sum_xdz/ne00;
9845
9838
// we could cache rms from forward pass to improve performance.
9846
9839
// to do this implement ggml_rms and compose ggml_rms_norm using ggml_rms.
9847
- const float rms = sqrtf(mean_eps);
9848
- const float rrms = 1.0f / sqrtf(mean_eps);
9849
- const float scale = -rrms/(ne00 * mean_eps); // -1/(n*rms**3)
9840
+ const float rms = sqrtf(mean_eps);
9841
+ const float rrms = 1.0f / sqrtf(mean_eps);
9842
+ const float scale = -rrms/(ne00 * mean_eps); // -1/(n*rms**3)
9850
9843
9851
9844
{
9852
9845
// z = rms_norm(x)
@@ -10760,11 +10753,6 @@ static void ggml_compute_forward_set_f32(
10760
10753
// src0 and dst as viewed during set
10761
10754
const size_t nb0 = ggml_element_size(src0);
10762
10755
10763
- const size_t nb00 = nb0;
10764
- const size_t nb01 = nb1;
10765
- const size_t nb02 = nb2;
10766
- const size_t nb03 = nb3;
10767
-
10768
10756
const int im0 = (ne10 == 0 ? 0 : ne10-1);
10769
10757
const int im1 = (ne11 == 0 ? 0 : ne11-1);
10770
10758
const int im2 = (ne12 == 0 ? 0 : ne12-1);
@@ -11154,7 +11142,7 @@ static void ggml_compute_forward_diag_f32(
11154
11142
GGML_ASSERT(ne03 == ne3);
11155
11143
11156
11144
const int nb00 = src0->nb[0];
11157
- const int nb01 = src0->nb[1];
11145
+ // const int nb01 = src0->nb[1];
11158
11146
const int nb02 = src0->nb[2];
11159
11147
const int nb03 = src0->nb[3];
11160
11148
const int nb0 = dst->nb[0];
0 commit comments