Skip to content

Commit 532802f

Browse files
authored
Implement GGML_CPU_ALL_VARIANTS for ARM (#14080)
* ggml-cpu: Factor out feature detection build from x86 * ggml-cpu: Add ARM feature detection and scoring This is analogous to cpu-feats-x86.cpp. However, to detect compile-time activation of features, we rely on GGML_USE_<FEAT> which need to be set in cmake, instead of GGML_<FEAT> that users would set for x86. This is because on ARM, users specify features with GGML_CPU_ARM_ARCH, rather than with individual flags. * ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for ARM Like x86, however to pass around arch flags within cmake, we use GGML_INTERNAL_<FEAT> as we don't have GGML_<FEAT>. Some features are optional, so we may need to build multiple backends per arch version (armv8.2_1, armv8.2_2, ...), and let the scoring function sort out which one can be used. * ggml-cpu: Limit ARM GGML_CPU_ALL_VARIANTS to Linux for now The other platforms will need their own specific variants. This also fixes the bug that the the variant-building branch was always being executed as the else-branch of GGML_NATIVE=OFF. The branch is moved to an elseif-branch which restores the previous behavior.
1 parent d4e0d95 commit 532802f

File tree

3 files changed

+184
-24
lines changed

3 files changed

+184
-24
lines changed

ggml/src/CMakeLists.txt

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -270,17 +270,23 @@ endfunction()
270270
function(ggml_add_cpu_backend_variant tag_name)
271271
set(GGML_CPU_TAG_NAME ${tag_name})
272272
# other: OPENMP LLAMAFILE CPU_HBM
273-
foreach (feat NATIVE
274-
SSE42
275-
AVX AVX2 BMI2 AVX_VNNI FMA F16C
276-
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
277-
AMX_TILE AMX_INT8 AMX_BF16)
278-
set(GGML_${feat} OFF)
279-
endforeach()
280-
281-
foreach (feat ${ARGN})
282-
set(GGML_${feat} ON)
283-
endforeach()
273+
if (GGML_SYSTEM_ARCH STREQUAL "x86")
274+
foreach (feat NATIVE
275+
SSE42
276+
AVX AVX2 BMI2 AVX_VNNI FMA F16C
277+
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
278+
AMX_TILE AMX_INT8 AMX_BF16)
279+
set(GGML_${feat} OFF)
280+
endforeach()
281+
282+
foreach (feat ${ARGN})
283+
set(GGML_${feat} ON)
284+
endforeach()
285+
elseif (GGML_SYSTEM_ARCH STREQUAL "ARM")
286+
foreach (feat ${ARGN})
287+
set(GGML_INTERNAL_${feat} ON)
288+
endforeach()
289+
endif()
284290

285291
ggml_add_cpu_backend_variant_impl(${tag_name})
286292
endfunction()
@@ -290,6 +296,8 @@ ggml_add_backend(CPU)
290296
if (GGML_CPU_ALL_VARIANTS)
291297
if (NOT GGML_BACKEND_DL)
292298
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
299+
elseif (GGML_CPU_ARM_ARCH)
300+
message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS")
293301
endif()
294302
if (GGML_SYSTEM_ARCH STREQUAL "x86")
295303
ggml_add_cpu_backend_variant(x64)
@@ -303,8 +311,20 @@ if (GGML_CPU_ALL_VARIANTS)
303311
# MSVC doesn't support AMX
304312
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
305313
endif()
314+
elseif(GGML_SYSTEM_ARCH STREQUAL "ARM" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
315+
# Many of these features are optional so we build versions with popular
316+
# combinations and name the backends based on the version they were
317+
# first released with
318+
ggml_add_cpu_backend_variant(armv8.0_1)
319+
ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
320+
ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
321+
ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE)
322+
ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
323+
ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
324+
ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
325+
ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
306326
else()
307-
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported on ${GGML_SYSTEM_ARCH}")
327+
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
308328
endif()
309329
elseif (GGML_CPU)
310330
ggml_add_cpu_backend_variant_impl("")

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
function(ggml_add_cpu_backend_features cpu_name arch)
2+
# The feature detection code is compiled as a separate target so that
3+
# it can be built without the architecture flags
4+
# Since multiple variants of the CPU backend may be included in the same
5+
# build, using set_source_files_properties() to set the arch flags is not possible
6+
set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
7+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
8+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
9+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
10+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
11+
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
12+
target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
13+
endfunction()
14+
115
function(ggml_add_cpu_backend_variant_impl tag_name)
216
if (tag_name)
317
set(GGML_CPU_NAME ggml-cpu-${tag_name})
@@ -143,6 +157,49 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
143157
else()
144158
if (GGML_CPU_ARM_ARCH)
145159
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
160+
elseif(GGML_CPU_ALL_VARIANTS)
161+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
162+
# Begin with the lowest baseline
163+
set(ARM_MCPU "armv8-a")
164+
set(ARCH_TAGS "")
165+
set(ARCH_DEFINITIONS "")
166+
167+
# When a feature is selected, bump the MCPU to the first
168+
# version that supported it
169+
if (GGML_INTERNAL_DOTPROD)
170+
set(ARM_MCPU "armv8.2-a")
171+
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
172+
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
173+
endif()
174+
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
175+
set(ARM_MCPU "armv8.2-a")
176+
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
177+
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
178+
endif()
179+
if (GGML_INTERNAL_SVE)
180+
set(ARM_MCPU "armv8.2-a")
181+
set(ARCH_TAGS "${ARCH_TAGS}+sve")
182+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
183+
endif()
184+
if (GGML_INTERNAL_MATMUL_INT8)
185+
set(ARM_MCPU "armv8.6-a")
186+
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
187+
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
188+
endif()
189+
if (GGML_INTERNAL_SVE2)
190+
set(ARM_MCPU "armv8.6-a")
191+
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
192+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
193+
endif()
194+
if (GGML_INTERNAL_SME)
195+
set(ARM_MCPU "armv9.2-a")
196+
set(ARCH_TAGS "${ARCH_TAGS}+sme")
197+
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
198+
endif()
199+
200+
list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
201+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
202+
endif()
146203
endif()
147204
endif()
148205

@@ -306,18 +363,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
306363
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
307364
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
308365
endif()
309-
310-
# The feature detection code is compiled as a separate target so that
311-
# it can be built without the architecture flags
312-
# Since multiple variants of the CPU backend may be included in the same
313-
# build, using set_source_files_properties() to set the arch flags is not possible
314-
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
315-
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp)
316-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
317-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
318-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
319-
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
320-
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
366+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
321367
endif()
322368
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
323369
message(STATUS "PowerPC detected")
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#include "ggml-backend-impl.h"
2+
3+
#if defined(__aarch64__)
4+
5+
#if defined(__linux__)
6+
#include <sys/auxv.h>
7+
#elif defined(__APPLE__)
8+
#include <sys/sysctl.h>
9+
#endif
10+
11+
#if !defined(HWCAP2_I8MM)
12+
#define HWCAP2_I8MM (1 << 13)
13+
#endif
14+
15+
#if !defined(HWCAP2_SME)
16+
#define HWCAP2_SME (1 << 23)
17+
#endif
18+
19+
struct aarch64_features {
20+
// has_neon not needed, aarch64 has NEON guaranteed
21+
bool has_dotprod = false;
22+
bool has_fp16_va = false;
23+
bool has_sve = false;
24+
bool has_sve2 = false;
25+
bool has_i8mm = false;
26+
bool has_sme = false;
27+
28+
aarch64_features() {
29+
#if defined(__linux__)
30+
uint32_t hwcap = getauxval(AT_HWCAP);
31+
uint32_t hwcap2 = getauxval(AT_HWCAP2);
32+
33+
has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
34+
has_fp16_va = !!(hwcap & HWCAP_FPHP);
35+
has_sve = !!(hwcap & HWCAP_SVE);
36+
has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
37+
has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
38+
has_sme = !!(hwcap2 & HWCAP2_SME);
39+
#elif defined(__APPLE__)
40+
int oldp = 0;
41+
size_t size = sizeof(oldp);
42+
43+
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
44+
has_dotprod = static_cast<bool>(oldp);
45+
}
46+
47+
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
48+
has_i8mm = static_cast<bool>(oldp);
49+
}
50+
51+
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
52+
has_sme = static_cast<bool>(oldp);
53+
}
54+
55+
// Apple apparently does not implement SVE yet
56+
#endif
57+
}
58+
};
59+
60+
static int ggml_backend_cpu_aarch64_score() {
61+
int score = 1;
62+
aarch64_features af;
63+
64+
#ifdef GGML_USE_DOTPROD
65+
if (!af.has_dotprod) { return 0; }
66+
score += 1<<1;
67+
#endif
68+
#ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
69+
if (!af.has_fp16_va) { return 0; }
70+
score += 1<<2;
71+
#endif
72+
#ifdef GGML_USE_SVE
73+
if (!af.has_sve) { return 0; }
74+
score += 1<<3;
75+
#endif
76+
#ifdef GGML_USE_MATMUL_INT8
77+
if (!af.has_i8mm) { return 0; }
78+
score += 1<<4;
79+
#endif
80+
#ifdef GGML_USE_SVE2
81+
if (!af.has_sve2) { return 0; }
82+
score += 1<<5;
83+
#endif
84+
#ifdef GGML_USE_SME
85+
if (!af.has_sme) { return 0; }
86+
score += 1<<6;
87+
#endif
88+
89+
return score;
90+
}
91+
92+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
93+
94+
# endif // defined(__aarch64__)

0 commit comments

Comments
 (0)