Skip to content

Commit 872c365

Browse files
committed
ggml : fix AVX build + update to new Q8_0 format
1 parent 955ef9a commit 872c365

File tree

3 files changed

+20
-8
lines changed

3 files changed

+20
-8
lines changed

Makefile

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,17 @@ endif
7474
# feel free to update the Makefile for your architecture and send a pull request or issue
7575
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
7676
# Use all CPU extensions that are available:
77-
CFLAGS += -march=native -mtune=native
77+
CFLAGS += -march=native -mtune=native
7878
CXXFLAGS += -march=native -mtune=native
79+
80+
# Usage AVX-only
81+
#CFLAGS += -mfma -mf16c -mavx
82+
#CXXFLAGS += -mfma -mf16c -mavx
7983
endif
8084
ifneq ($(filter ppc64%,$(UNAME_M)),)
8185
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
8286
ifneq (,$(findstring POWER9,$(POWER9_M)))
83-
CFLAGS += -mcpu=power9
87+
CFLAGS += -mcpu=power9
8488
CXXFLAGS += -mcpu=power9
8589
endif
8690
# Require c++23's std::byteswap for big-endian support.
@@ -114,7 +118,7 @@ ifdef LLAMA_GPROF
114118
CXXFLAGS += -pg
115119
endif
116120
ifneq ($(filter aarch64%,$(UNAME_M)),)
117-
CFLAGS += -mcpu=native
121+
CFLAGS += -mcpu=native
118122
CXXFLAGS += -mcpu=native
119123
endif
120124
ifneq ($(filter armv6%,$(UNAME_M)),)

ggml.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,14 @@ static inline int hsum_i32_8(const __m256i a) {
468468
return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32));
469469
}
470470

471+
// horizontally add 4 int32_t
472+
static inline int hsum_i32_4(const __m128i a) {
473+
const __m128i hi64 = _mm_unpackhi_epi64(a, a);
474+
const __m128i sum64 = _mm_add_epi32(hi64, a);
475+
const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1));
476+
return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32));
477+
}
478+
471479
#if __AVX2__ || __AVX512F__
472480
// Unpack 32 4-bit fields into 32 bytes
473481
// The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval
@@ -1381,7 +1389,6 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
13811389
y[i].s1 = d * sum1;
13821390
}
13831391
#elif defined(__AVX2__) || defined(__AVX__)
1384-
// TODO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
13851392
for (int i = 0; i < nb; i++) {
13861393
// Load elements into 4 AVX vectors
13871394
__m256 v0 = _mm256_loadu_ps( x );
@@ -1460,7 +1467,8 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
14601467
// Compute the sum of the quants and set y[i].s
14611468
const __m128i s0 = _mm_add_epi32(_mm_add_epi32(ni0, ni1), _mm_add_epi32(ni2, ni3));
14621469
const __m128i s1 = _mm_add_epi32(_mm_add_epi32(ni4, ni5), _mm_add_epi32(ni6, ni7));
1463-
y[i].s = d * hsum_i32_8(_mm256_set_m128i(s1, s0));
1470+
y[i].s0 = d * hsum_i32_4(s0);
1471+
y[i].s1 = d * hsum_i32_4(s1);
14641472

14651473
// Convert int32 to int16
14661474
ni0 = _mm_packs_epi32( ni0, ni1 );

llama.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
6868
{ MODEL_65B, 512ull * MB },
6969
};
7070
return _MEM_REQ_SCRATCH1;
71-
};
71+
}
7272

7373
// 2*n_embd*n_ctx*n_layer*sizeof(float16)
7474
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
@@ -80,7 +80,7 @@ static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
8080
{ MODEL_65B, 5120ull * MB },
8181
};
8282
return _MEM_REQ_KV_SELF;
83-
};
83+
}
8484

8585
// this is mostly needed for temporary mul_mat buffers to dequantize the data
8686
// not actually needed if BLAS is disabled
@@ -93,7 +93,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
9393
{ MODEL_65B, 1536ull * MB },
9494
};
9595
return _MEM_REQ_EVAL;
96-
};
96+
}
9797

9898
// default hparams (LLaMA 7B)
9999
struct llama_hparams {

0 commit comments

Comments
 (0)