File tree Expand file tree Collapse file tree 5 files changed +15
-7
lines changed Expand file tree Collapse file tree 5 files changed +15
-7
lines changed Original file line number Diff line number Diff line change @@ -290,9 +290,9 @@ if (GGML_CPU_ALL_VARIANTS)
290
290
ggml_add_cpu_backend_variant (haswell AVX F16C AVX2 FMA )
291
291
ggml_add_cpu_backend_variant (skylakex AVX F16C AVX2 FMA AVX512 )
292
292
ggml_add_cpu_backend_variant (icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI )
293
+ ggml_add_cpu_backend_variant (alderlake AVX F16C AVX2 FMA AVX_VNNI )
293
294
if (NOT MSVC )
294
- # MSVC doesn't support AVX-VNNI or AMX
295
- ggml_add_cpu_backend_variant (alderlake AVX F16C AVX2 FMA AVX_VNNI )
295
+ # MSVC doesn't support AMX
296
296
ggml_add_cpu_backend_variant (sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8 )
297
297
endif ()
298
298
else ()
Original file line number Diff line number Diff line change @@ -215,8 +215,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
215
215
list (APPEND ARCH_DEFINITIONS GGML_SSE42 )
216
216
endif ()
217
217
if (GGML_AVX_VNNI )
218
- # MSVC generates AVX512 with AVX-VNNI intrinsics even with /arch:AVX2
219
- #list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
218
+ list (APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI )
220
219
endif ()
221
220
else ()
222
221
if (GGML_NATIVE )
Original file line number Diff line number Diff line change @@ -194,9 +194,12 @@ static inline __m256i sum_i16_pairs_int32x8(const __m256i x) {
194
194
}
195
195
196
196
static inline __m256i mul_sum_us8_pairs_int32x8 (const __m256i ax, const __m256i sy) {
197
- #if defined(__AVXVNNI__) || (defined( __AVX512VNNI__) && defined(__AVX512VL__) )
197
+ #if defined(__AVX512VNNI__) && defined(__AVX512VL__)
198
198
const __m256i zero = _mm256_setzero_si256 ();
199
199
return _mm256_dpbusd_epi32 (zero, ax, sy);
200
+ #elif defined(__AVXVNNI__)
201
+ const __m256i zero = _mm256_setzero_si256 ();
202
+ return _mm256_dpbusd_avx_epi32 (zero, ax, sy);
200
203
#else
201
204
// Perform multiplication and create 16-bit values
202
205
const __m256i dot = _mm256_maddubs_epi16 (ax, sy);
Original file line number Diff line number Diff line change @@ -103,10 +103,14 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
103
103
}
104
104
105
105
static inline __m256 mul_sum_us8_pairs_float (const __m256i ax , const __m256i sy ) {
106
- #if defined(__AVXVNNI__ ) || (defined( __AVX512VNNI__ ) && defined(__AVX512VL__ ) )
106
+ #if defined(__AVX512VNNI__ ) && defined(__AVX512VL__ )
107
107
const __m256i zero = _mm256_setzero_si256 ();
108
108
const __m256i summed_pairs = _mm256_dpbusd_epi32 (zero , ax , sy );
109
109
return _mm256_cvtepi32_ps (summed_pairs );
110
+ #elif defined(__AVXVNNI__ )
111
+ const __m256i zero = _mm256_setzero_si256 ();
112
+ const __m256i summed_pairs = _mm256_dpbusd_avx_epi32 (zero , ax , sy );
113
+ return _mm256_cvtepi32_ps (summed_pairs );
110
114
#else
111
115
// Perform multiplication and create 16-bit values
112
116
const __m256i dot = _mm256_maddubs_epi16 (ax , sy );
Original file line number Diff line number Diff line change @@ -1000,8 +1000,10 @@ class tinyBLAS_Q0_AVX {
1000
1000
1001
1001
inline __m256 updot (__m256i u, __m256i s) {
1002
1002
__m256i res;
1003
- #if defined(__AVXVNNI__) || (defined( __AVX512VNNI__) && defined(__AVX512VL__) )
1003
+ #if defined(__AVX512VNNI__) && defined(__AVX512VL__)
1004
1004
res = _mm256_dpbusd_epi32 (_mm256_setzero_si256 (), u, s);
1005
+ #elif defined(__AVXVNNI__)
1006
+ res = _mm256_dpbusd_avx_epi32 (_mm256_setzero_si256 (), u, s);
1005
1007
#else
1006
1008
res = _mm256_madd_epi16 (_mm256_set1_epi16 (1 ), _mm256_maddubs_epi16 (u, s));
1007
1009
#endif
You can’t perform that action at this time.
0 commit comments