Skip to content

Commit 689a4ec

Browse files
committed
ggml : restore var names
ggml-ci
1 parent 480440a commit 689a4ec

File tree

2 files changed

+40
-40
lines changed

2 files changed

+40
-40
lines changed

ggml-common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ static_assert(sizeof(block_q8_0) == sizeof(ggml_half) + QK8_0, "wrong q8_0 block
121121
typedef struct {
122122
union {
123123
struct {
124-
ggml_half xxxd; // delta
125-
ggml_half xxxs; // d * sum(qs[i])
124+
ggml_half d; // delta
125+
ggml_half s; // d * sum(qs[i])
126126
} GGML_COMMON_AGGR;
127127
ggml_half2 ds;
128128
};

ggml-quants.c

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
954954
const float d = amax / ((1 << 7) - 1);
955955
const float id = d ? 1.0f/d : 0.0f;
956956

957-
y[i].xxxd = GGML_FP32_TO_FP16(d);
957+
y[i].d = GGML_FP32_TO_FP16(d);
958958

959959
int sum = 0;
960960

@@ -969,7 +969,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
969969
sum += y[i].qs[QK8_1/2 + j];
970970
}
971971

972-
y[i].xxxs = GGML_FP32_TO_FP16(sum*d);
972+
y[i].s = GGML_FP32_TO_FP16(sum*d);
973973
}
974974
}
975975

@@ -997,7 +997,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
997997
const float d = amax / ((1 << 7) - 1);
998998
const float id = d ? 1.0f/d : 0.0f;
999999

1000-
y[i].xxxd = GGML_FP32_TO_FP16(d);
1000+
y[i].d = GGML_FP32_TO_FP16(d);
10011001

10021002
int32x4_t accv = vdupq_n_s32(0);
10031003

@@ -1013,7 +1013,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
10131013
accv = vaddq_s32(accv, vi);
10141014
}
10151015

1016-
y[i].xxxs = GGML_FP32_TO_FP16(d * vaddvq_s32(accv));
1016+
y[i].s = GGML_FP32_TO_FP16(d * vaddvq_s32(accv));
10171017
}
10181018
#elif defined(__wasm_simd128__)
10191019
for (int i = 0; i < nb; i++) {
@@ -1036,7 +1036,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
10361036
const float d = amax / ((1 << 7) - 1);
10371037
const float id = d ? 1.0f/d : 0.0f;
10381038

1039-
y[i].xxxd = GGML_FP32_TO_FP16(d);
1039+
y[i].d = GGML_FP32_TO_FP16(d);
10401040

10411041
v128_t accv = wasm_i32x4_splat(0);
10421042

@@ -1052,7 +1052,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
10521052
accv = wasm_i32x4_add(accv, vi);
10531053
}
10541054

1055-
y[i].xxxs = GGML_FP32_TO_FP16(
1055+
y[i].s = GGML_FP32_TO_FP16(
10561056
d * (wasm_i32x4_extract_lane(accv, 0) +
10571057
wasm_i32x4_extract_lane(accv, 1) +
10581058
wasm_i32x4_extract_lane(accv, 2) +
@@ -1081,7 +1081,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
10811081

10821082
// Quantize these floats
10831083
const float d = maxScalar / 127.f;
1084-
y[i].xxxd = GGML_FP32_TO_FP16(d);
1084+
y[i].d = GGML_FP32_TO_FP16(d);
10851085
const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f;
10861086
const __m256 mul = _mm256_set1_ps( id );
10871087

@@ -1105,7 +1105,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
11051105

11061106
#if defined(__AVX2__)
11071107
// Compute the sum of the quants and set y[i].s
1108-
y[i].xxxs = GGML_FP32_TO_FP16(d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3))));
1108+
y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3))));
11091109

11101110
// Convert int32 to int16
11111111
i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15
@@ -1135,7 +1135,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
11351135
// Compute the sum of the quants and set y[i].s
11361136
const __m128i s0 = _mm_add_epi32(_mm_add_epi32(ni0, ni1), _mm_add_epi32(ni2, ni3));
11371137
const __m128i s1 = _mm_add_epi32(_mm_add_epi32(ni4, ni5), _mm_add_epi32(ni6, ni7));
1138-
y[i].xxxs = GGML_FP32_TO_FP16(d * hsum_i32_4(_mm_add_epi32(s0, s1)));
1138+
y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_4(_mm_add_epi32(s0, s1)));
11391139

11401140
// Convert int32 to int16
11411141
ni0 = _mm_packs_epi32( ni0, ni1 );
@@ -1166,7 +1166,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
11661166
const float d = amax / ((1 << 7) - 1);
11671167
const float id = d ? 1.0f/d : 0.0f;
11681168

1169-
y[i].xxxd = GGML_FP32_TO_FP16(d);
1169+
y[i].d = GGML_FP32_TO_FP16(d);
11701170

11711171
vfloat32m4_t x0 = __riscv_vfmul_vf_f32m4(v_x, id, vl);
11721172

@@ -1183,7 +1183,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
11831183

11841184
// set y[i].s
11851185
int sum = __riscv_vmv_x_s_i16m1_i16(vwrs);
1186-
y[i].xxxs = GGML_FP32_TO_FP16(sum*d);
1186+
y[i].s = GGML_FP32_TO_FP16(sum*d);
11871187
}
11881188
#else
11891189
GGML_UNUSED(nb);
@@ -4086,10 +4086,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
40864086
const block_q8_1 * restrict b_y0 = &vy0[i];
40874087
const block_q8_1 * restrict b_y1 = &vy1[i];
40884088

4089-
float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->xxxs),
4090-
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y0->xxxs),
4091-
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y1->xxxs),
4092-
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y1->xxxs)};
4089+
float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s),
4090+
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y0->s),
4091+
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y1->s),
4092+
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y1->s)};
40934093
summs0 += summs_t;
40944094

40954095
const uint8x16_t m4b = vdupq_n_u8(0x0F);
@@ -4110,10 +4110,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
41104110
const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16);
41114111

41124112
// mmla into int32x4_t
4113-
float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->xxxd),
4114-
GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->xxxd),
4115-
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->xxxd),
4116-
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->xxxd)};
4113+
float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d),
4114+
GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d),
4115+
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d),
4116+
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d)};
41174117

41184118
int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
41194119
int8x16_t l1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
@@ -4154,7 +4154,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
41544154
const block_q8_1 * restrict y0 = &y[i + 0];
41554155
const block_q8_1 * restrict y1 = &y[i + 1];
41564156

4157-
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->xxxs) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->xxxs);
4157+
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
41584158

41594159
const uint8x16_t m4b = vdupq_n_u8(0x0F);
41604160

@@ -4177,8 +4177,8 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
41774177
const int32x4_t p_0 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l), v0_0h, v1_0h);
41784178
const int32x4_t p_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l), v0_1h, v1_1h);
41794179

4180-
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->xxxd));
4181-
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->xxxd));
4180+
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
4181+
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
41824182
}
41834183

41844184
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs;
@@ -4191,9 +4191,9 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
41914191
// Main loop
41924192
for (int i = 0; i < nb; ++i) {
41934193
const float d0 = GGML_FP16_TO_FP32(x[i].d);
4194-
const float d1 = GGML_FP16_TO_FP32(y[i].xxxd);
4194+
const float d1 = GGML_FP16_TO_FP32(y[i].d);
41954195

4196-
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].xxxs);
4196+
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
41974197

41984198
const __m256 d0v = _mm256_set1_ps( d0 );
41994199
const __m256 d1v = _mm256_set1_ps( d1 );
@@ -4245,7 +4245,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
42454245

42464246
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
42474247

4248-
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].xxxd))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].xxxs);
4248+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
42494249
}
42504250

42514251
*s = sumf;
@@ -4263,7 +4263,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
42634263
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
42644264
}
42654265

4266-
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].xxxd))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].xxxs);
4266+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
42674267
}
42684268

42694269
*s = sumf;
@@ -4599,8 +4599,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
45994599

46004600
const uint8x16_t m4b = vdupq_n_u8(0x0F);
46014601

4602-
summs0 += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->xxxs);
4603-
summs1 += GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->xxxs);
4602+
summs0 += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
4603+
summs1 += GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
46044604

46054605
// extract the 5th bit via lookup table ((b) << 4)
46064606
memcpy(&qh0, x0->qh, sizeof(qh0));
@@ -4644,10 +4644,10 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
46444644

46454645
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(
46464646
ggml_vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l),
4647-
ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->xxxd));
4647+
ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
46484648
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(
46494649
ggml_vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l),
4650-
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->xxxd));
4650+
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
46514651
}
46524652

46534653
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1;
@@ -4664,7 +4664,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
46644664
const block_q5_1 * restrict x0 = &x[i];
46654665
const block_q8_1 * restrict y0 = &y[i];
46664666

4667-
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->xxxs);
4667+
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
46684668

46694669
const v128_t m4b = wasm_i8x16_splat(0x0F);
46704670

@@ -4711,7 +4711,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
47114711
wasm_i32x4_dot_i16x8(v0lfh, v1lh)),
47124712
wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl),
47134713
wasm_i32x4_dot_i16x8(v0hfh, v1hh)))),
4714-
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->xxxd))));
4714+
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d))));
47154715
}
47164716

47174717
*s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) +
@@ -4726,14 +4726,14 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
47264726
for (int i = 0; i < nb; i++) {
47274727
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
47284728

4729-
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].xxxs);
4729+
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
47304730

47314731
__m256i qx = bytes_from_nibbles_32(x[i].qs);
47324732
__m256i bxhi = bytes_from_bits_32(x[i].qh);
47334733
bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10));
47344734
qx = _mm256_or_si256(qx, bxhi);
47354735

4736-
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].xxxd));
4736+
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
47374737
const __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
47384738

47394739
const __m256 q = mul_sum_us8_pairs_float(qx, qy);
@@ -4753,7 +4753,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
47534753
for (int i = 0; i < nb; i++) {
47544754
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
47554755

4756-
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].xxxs);
4756+
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
47574757

47584758
__m256i bx_0 = bytes_from_nibbles_32(x[i].qs);
47594759
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
@@ -4767,7 +4767,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
47674767
bxh = _mm_or_si128(bxh, bxhih);
47684768
bx_0 = MM256_SET_M128I(bxh, bxl);
47694769

4770-
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].xxxd));
4770+
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
47714771
const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[i].qs);
47724772

47734773
const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0);
@@ -4834,7 +4834,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
48344834

48354835
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
48364836

4837-
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].xxxd))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].xxxs);
4837+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
48384838
}
48394839

48404840
*s = sumf;
@@ -4858,7 +4858,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
48584858
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
48594859
}
48604860

4861-
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].xxxd))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].xxxs);
4861+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
48624862
}
48634863

48644864
*s = sumf;

0 commit comments

Comments
 (0)