Skip to content

Commit c554387

Browse files
committed
ggml-cpu: Add ARM feature detection and scoring
This is analogous to cpu-feats-x86.cpp. However, to detect compile-time activation of features, we rely on GGML_USE_<FEAT> which need to be set in cmake, instead of GGML_<FEAT> that users would set for x86. This is because on ARM, users specify features with GGML_CPU_ARM_ARCH, rather than with individual flags.
1 parent a8f0eb8 commit c554387

File tree

1 file changed

+94
-0
lines changed

1 file changed

+94
-0
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#include "ggml-backend-impl.h"
2+
3+
#if defined(__aarch64__)
4+
5+
#if defined(__linux__)
6+
#include <sys/auxv.h>
7+
#elif defined(__APPLE__)
8+
#include <sys/sysctl.h>
9+
#endif
10+
11+
#if !defined(HWCAP2_I8MM)
12+
#define HWCAP2_I8MM (1 << 13)
13+
#endif
14+
15+
#if !defined(HWCAP2_SME)
16+
#define HWCAP2_SME (1 << 23)
17+
#endif
18+
19+
struct aarch64_features {
20+
// has_neon not needed, aarch64 has NEON guaranteed
21+
bool has_dotprod = false;
22+
bool has_fp16_va = false;
23+
bool has_sve = false;
24+
bool has_sve2 = false;
25+
bool has_i8mm = false;
26+
bool has_sme = false;
27+
28+
aarch64_features() {
29+
#if defined(__linux__)
30+
uint32_t hwcap = getauxval(AT_HWCAP);
31+
uint32_t hwcap2 = getauxval(AT_HWCAP2);
32+
33+
has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
34+
has_fp16_va = !!(hwcap & HWCAP_FPHP);
35+
has_sve = !!(hwcap & HWCAP_SVE);
36+
has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
37+
has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
38+
has_sme = !!(hwcap2 & HWCAP2_SME);
39+
#elif defined(__APPLE__)
40+
int oldp = 0;
41+
size_t size = sizeof(oldp);
42+
43+
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
44+
has_dotprod = static_cast<bool>(oldp);
45+
}
46+
47+
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
48+
has_i8mm = static_cast<bool>(oldp);
49+
}
50+
51+
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
52+
has_sme = static_cast<bool>(oldp);
53+
}
54+
55+
// Apple apparently does not implement SVE yet
56+
#endif
57+
}
58+
};
59+
60+
static int ggml_backend_cpu_aarch64_score() {
61+
int score = 1;
62+
aarch64_features af;
63+
64+
#ifdef GGML_USE_DOTPROD
65+
if (!af.has_dotprod) { return 0; }
66+
score += 1<<1;
67+
#endif
68+
#ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
69+
if (!af.has_fp16_va) { return 0; }
70+
score += 1<<2;
71+
#endif
72+
#ifdef GGML_USE_SVE
73+
if (!af.has_sve) { return 0; }
74+
score += 1<<3;
75+
#endif
76+
#ifdef GGML_USE_MATMUL_INT8
77+
if (!af.has_i8mm) { return 0; }
78+
score += 1<<4;
79+
#endif
80+
#ifdef GGML_USE_SVE2
81+
if (!af.has_sve2) { return 0; }
82+
score += 1<<5;
83+
#endif
84+
#ifdef GGML_USE_SME
85+
if (!af.has_sme) { return 0; }
86+
score += 1<<6;
87+
#endif
88+
89+
return score;
90+
}
91+
92+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
93+
94+
# endif // defined(__aarch64__)

0 commit comments

Comments
 (0)