Skip to content

Commit 0c27e6f

Browse files
ggml : fix loongson compile warnings (#7537)
* ggml : fix loongson compile warnings ggml-ci * Fix loongarch quantize test fail. Fix unexpected error introduced during rebase code. * tests : disable json test due to lack of python on the CI node ggml-ci --------- Co-authored-by: junchao-loongson <[email protected]>
1 parent 2e32f87 commit 0c27e6f

File tree

3 files changed

+35
-11
lines changed

3 files changed

+35
-11
lines changed

ggml-quants.c

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6088,6 +6088,7 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
60886088

60896089
const uint8_t * restrict q2 = x[i].qs;
60906090
const int8_t * restrict q8 = y[i].qs;
6091+
60916092
const __m128i mins_and_scales = __lsx_vld((const __m128i*)x[i].scales, 0);
60926093
const __m128i scales8 = __lsx_vand_v(mins_and_scales, m4);
60936094
const __m128i mins8 = __lsx_vand_v(__lsx_vsrli_h(mins_and_scales, 4), m4);
@@ -6807,6 +6808,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
68076808
for (int i = 0; i < nb; ++i) {
68086809

68096810
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
6811+
const uint8_t * restrict q3 = x[i].qs;
6812+
const int8_t * restrict q8 = y[i].qs;
68106813
// Set up scales
68116814
memcpy(aux, x[i].scales, 12);
68126815
__m128i scales128 = lsx_set_w(
@@ -6830,8 +6833,6 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
68306833
int is = 0;
68316834
__m256i xvbit;
68326835

6833-
const uint8_t * restrict q3 = x[i].qs;
6834-
const int8_t * restrict q8 = y[i].qs;
68356836

68366837
for (int j = 0; j < QK_K/128; ++j) {
68376838
// load low 2 bits
@@ -7404,6 +7405,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
74047405
*s = vec_extract(vsumf0, 0);
74057406

74067407
#elif defined __loongarch_asx
7408+
GGML_UNUSED(kmask1);
7409+
GGML_UNUSED(kmask2);
7410+
GGML_UNUSED(kmask3);
74077411

74087412
const __m256i m4 = __lasx_xvreplgr2vr_b(0xF);
74097413

@@ -7416,6 +7420,11 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
74167420
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
74177421

74187422
memcpy(utmp, x[i].scales, 12);
7423+
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
7424+
const uint32_t uaux = utmp[1] & kmask1;
7425+
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
7426+
utmp[2] = uaux;
7427+
utmp[0] &= kmask1;
74197428

74207429
const uint8_t * restrict q4 = x[i].qs;
74217430
const int8_t * restrict q8 = y[i].qs;
@@ -7455,16 +7464,17 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
74557464

74567465
__m256 vd = __lasx_xvreplfr2vr_s(d);
74577466
acc = __lasx_xvfmadd_s(vd, __lasx_xvffint_s_w(sumi), acc);
7467+
74587468
}
74597469

74607470
acc_m = __lsx_vfadd_s(acc_m, (__m128)__lsx_vpermi_w((__m128i)acc_m, (__m128i)acc_m, 0xee));
74617471
__m128i tmp1 = __lsx_vinsgr2vr_w(__lsx_vldi(0), __lsx_vpickve2gr_w((__m128i)acc_m, 1), 0);
74627472
acc_m = __lsx_vfadd_s(acc_m, (__m128)tmp1);
74637473

7474+
74647475
ft_union fi;
74657476
fi.i = __lsx_vpickve2gr_w(acc_m, 0);
74667477
*s = hsum_float_8(acc) + fi.f ;
7467-
74687478
#else
74697479

74707480
const uint8_t * scales = (const uint8_t*)&utmp[0];
@@ -8002,6 +8012,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
80028012
*s = vec_extract(vsumf0, 0);
80038013

80048014
#elif defined __loongarch_asx
8015+
GGML_UNUSED(kmask1);
8016+
GGML_UNUSED(kmask2);
8017+
GGML_UNUSED(kmask3);
80058018

80068019
const __m256i m4 = __lasx_xvreplgr2vr_b(0xF);
80078020
const __m128i mzero = __lsx_vldi(0);
@@ -8020,6 +8033,11 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
80208033
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
80218034

80228035
memcpy(utmp, x[i].scales, 12);
8036+
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
8037+
const uint32_t uaux = utmp[1] & kmask1;
8038+
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
8039+
utmp[2] = uaux;
8040+
utmp[0] &= kmask1;
80238041

80248042
const __m256i mins_and_scales = lasx_extu8_16(lsx_set_w(utmp[3], utmp[2], utmp[1], utmp[0]));
80258043

@@ -8069,10 +8087,12 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
80698087
p16_1 = lasx_madd_h(scale_1, p16_1);
80708088

80718089
sumi = __lasx_xvadd_w(sumi, __lasx_xvadd_w(p16_0, p16_1));
8090+
80728091
}
80738092

80748093
__m256 vd = __lasx_xvreplfr2vr_s(d);
80758094
acc = __lasx_xvfmadd_s(vd, __lasx_xvffint_s_w(sumi), acc);
8095+
80768096
}
80778097

80788098
*s = hsum_float_8(acc) + summs;

ggml.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,11 +1576,11 @@ do { \
15761576

15771577
// F16 arithmetic is not supported by AVX, so we use F32 instead
15781578

1579-
#define GGML_F32Cx8 __m256
1579+
#define GGML_F32Cx8 __m256
15801580
#define GGML_F32Cx8_ZERO (__m256)__lasx_xvldi(0)
15811581
#define GGML_F32Cx8_SET1(x) (__m256)__lasx_xvreplgr2vr_w((x))
15821582

1583-
static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t *x) {
1583+
static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t * x) {
15841584
float tmp[8];
15851585

15861586
for (int i = 0; i < 8; i++) {
@@ -1589,13 +1589,14 @@ static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t *x) {
15891589

15901590
return (__m256)__lasx_xvld(tmp, 0);
15911591
}
1592-
static inline void __lasx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
1592+
static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) {
15931593
float arr[8];
15941594

15951595
__lasx_xvst(y, arr, 0);
15961596

1597-
for (int i = 0; i < 8; i++)
1597+
for (int i = 0; i < 8; i++) {
15981598
x[i] = GGML_FP32_TO_FP16(arr[i]);
1599+
}
15991600
}
16001601
#define GGML_F32Cx8_LOAD(x) __lasx_f32cx8_load(x)
16011602
#define GGML_F32Cx8_STORE(x, y) __lasx_f32cx8_store(x, y)
@@ -1671,7 +1672,7 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
16711672
#define GGML_F16_STEP 32
16721673
#define GGML_F16_EPR 4
16731674

1674-
static inline __m128 __lsx_f16x4_load(ggml_fp16_t *x) {
1675+
static inline __m128 __lsx_f16x4_load(const ggml_fp16_t * x) {
16751676
float tmp[4];
16761677

16771678
tmp[0] = GGML_FP16_TO_FP32(x[0]);
@@ -1682,7 +1683,7 @@ static inline __m128 __lsx_f16x4_load(ggml_fp16_t *x) {
16821683
return __lsx_vld(tmp, 0);
16831684
}
16841685

1685-
static inline void __lsx_f16x4_store(ggml_fp16_t *x, __m128 y) {
1686+
static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
16861687
float arr[4];
16871688

16881689
__lsx_vst(y, arr, 0);

tests/CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,11 @@ llama_target_and_test(test-rope.cpp)
129129
llama_target_and_test(test-model-load-cancel.cpp LABEL "model")
130130
llama_target_and_test(test-autorelease.cpp LABEL "model")
131131

132-
llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
133-
target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
132+
# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
133+
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
134+
llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
135+
target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
136+
endif()
134137

135138
# dummy executable - not installed
136139
get_filename_component(TEST_TARGET test-c.c NAME_WE)

0 commit comments

Comments
 (0)