|
37 | 37 | #include <unistd.h>
|
38 | 38 | #endif
|
39 | 39 |
|
40 |
| -#if defined(__aarch64__) |
41 |
| -struct ggml_aarch64_features_type { |
| 40 | +#if defined(__ARM_ARCH) |
| 41 | +struct ggml_arm_arch_features_type { |
42 | 42 | int has_neon;
|
43 | 43 | int has_i8mm;
|
44 | 44 | int has_sve;
|
45 | 45 | int sve_cnt;
|
46 |
| -} ggml_aarch64_features = {-1, -1, -1, 0}; |
| 46 | +} ggml_arm_arch_features = {-1, -1, -1, 0}; |
47 | 47 | #endif
|
48 | 48 |
|
49 | 49 | #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
@@ -3649,60 +3649,61 @@ static inline int ggml_up(int n, int m) {
|
3649 | 3649 |
|
3650 | 3650 | ////////////////////////////////////////////////////////////////////////////////
|
3651 | 3651 |
|
3652 |
| -#if defined(__aarch64__) |
| 3652 | +#if defined(__ARM_ARCH) |
3653 | 3653 |
|
3654 |
| -#if defined(__linux__) |
| 3654 | +#if defined(__linux__) && defined(__aarch64__) |
3655 | 3655 | #include <sys/auxv.h>
|
3656 | 3656 | #elif defined(__APPLE__)
|
3657 | 3657 | #include <sys/sysctl.h>
|
3658 | 3658 | #endif
|
3659 | 3659 |
|
3660 |
| -static void ggml_init_aarch64_features(void) { |
3661 |
| -#if defined(__linux__) |
| 3660 | +static void ggml_init_arm_arch_features(void) { |
| 3661 | +#if defined(__linux__) && defined(__aarch64__) |
3662 | 3662 | uint32_t hwcap = getauxval(AT_HWCAP);
|
3663 | 3663 | uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
3664 | 3664 |
|
3665 |
| - ggml_aarch64_features.has_neon = !!(hwcap & HWCAP_ASIMD); |
3666 |
| - ggml_aarch64_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM); |
3667 |
| - ggml_aarch64_features.has_sve = !!(hwcap & HWCAP_SVE); |
| 3665 | + ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD); |
| 3666 | + ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM); |
| 3667 | + ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE); |
| 3668 | + |
3668 | 3669 | #if defined(__ARM_FEATURE_SVE)
|
3669 |
| - ggml_aarch64_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); |
| 3670 | + ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); |
3670 | 3671 | #endif
|
3671 | 3672 | #elif defined(__APPLE__)
|
3672 | 3673 | int oldp = 0;
|
3673 | 3674 | size_t size = sizeof(oldp);
|
3674 | 3675 | if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
3675 | 3676 | oldp = 0;
|
3676 | 3677 | }
|
3677 |
| - ggml_aarch64_features.has_neon = oldp; |
| 3678 | + ggml_arm_arch_features.has_neon = oldp; |
3678 | 3679 |
|
3679 | 3680 | if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
3680 | 3681 | oldp = 0;
|
3681 | 3682 | }
|
3682 |
| - ggml_aarch64_features.has_i8mm = oldp; |
| 3683 | + ggml_arm_arch_features.has_i8mm = oldp; |
3683 | 3684 |
|
3684 |
| - ggml_aarch64_features.has_sve = 0; |
3685 |
| - ggml_aarch64_features.sve_cnt = 0; |
| 3685 | + ggml_arm_arch_features.has_sve = 0; |
| 3686 | + ggml_arm_arch_features.sve_cnt = 0; |
3686 | 3687 | #else
|
3687 | 3688 | // Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
3688 | 3689 | #if defined(__ARM_NEON)
|
3689 |
| - ggml_aarch64_features.has_neon = 1; |
| 3690 | + ggml_arm_arch_features.has_neon = 1; |
3690 | 3691 | #else
|
3691 |
| - ggml_aarch64_features.has_neon = 0; |
| 3692 | + ggml_arm_arch_features.has_neon = 0; |
3692 | 3693 | #endif
|
3693 | 3694 |
|
3694 | 3695 | #if defined(__ARM_FEATURE_MATMUL_INT8)
|
3695 |
| - ggml_aarch64_features.has_i8mm = 1; |
| 3696 | + ggml_arm_arch_features.has_i8mm = 1; |
3696 | 3697 | #else
|
3697 |
| - ggml_aarch64_features.has_i8mm = 0; |
| 3698 | + ggml_arm_arch_features.has_i8mm = 0; |
3698 | 3699 | #endif
|
3699 | 3700 |
|
3700 | 3701 | #if defined(__ARM_FEATURE_SVE)
|
3701 |
| - ggml_aarch64_features.has_sve = 1; |
3702 |
| - ggml_aarch64_features.sve_cnt = 16; |
| 3702 | + ggml_arm_arch_features.has_sve = 1; |
| 3703 | + ggml_arm_arch_features.sve_cnt = 16; |
3703 | 3704 | #else
|
3704 |
| - ggml_aarch64_features.has_sve = 0; |
3705 |
| - ggml_aarch64_features.sve_cnt = 0; |
| 3705 | + ggml_arm_arch_features.has_sve = 0; |
| 3706 | + ggml_arm_arch_features.sve_cnt = 0; |
3706 | 3707 | #endif
|
3707 | 3708 | #endif
|
3708 | 3709 | }
|
@@ -3758,8 +3759,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
3758 | 3759 | GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
3759 | 3760 | }
|
3760 | 3761 |
|
3761 |
| -#if defined(__aarch64__) |
3762 |
| - ggml_init_aarch64_features(); |
| 3762 | +#if defined(__ARM_ARCH) |
| 3763 | + ggml_init_arm_arch_features(); |
3763 | 3764 | #endif
|
3764 | 3765 |
|
3765 | 3766 | is_first_call = false;
|
@@ -23328,16 +23329,16 @@ int ggml_cpu_has_fma(void) {
|
23328 | 23329 | }
|
23329 | 23330 |
|
23330 | 23331 | int ggml_cpu_has_neon(void) {
|
23331 |
| -#if defined(__aarch64__) |
23332 |
| - return ggml_aarch64_features.has_neon; |
| 23332 | +#if defined(__ARM_ARCH) |
| 23333 | + return ggml_arm_arch_features.has_neon; |
23333 | 23334 | #else
|
23334 | 23335 | return 0;
|
23335 | 23336 | #endif
|
23336 | 23337 | }
|
23337 | 23338 |
|
23338 | 23339 | int ggml_cpu_has_sve(void) {
|
23339 |
| -#if defined(__aarch64__) |
23340 |
| - return ggml_aarch64_features.has_sve; |
| 23340 | +#if defined(__ARM_ARCH) |
| 23341 | + return ggml_arm_arch_features.has_sve; |
23341 | 23342 | #else
|
23342 | 23343 | return 0;
|
23343 | 23344 | #endif
|
@@ -23484,16 +23485,16 @@ int ggml_cpu_has_vsx(void) {
|
23484 | 23485 | }
|
23485 | 23486 |
|
23486 | 23487 | int ggml_cpu_has_matmul_int8(void) {
|
23487 |
| -#if defined(__aarch64__) |
23488 |
| - return ggml_aarch64_features.has_i8mm; |
| 23488 | +#if defined(__ARM_ARCH) |
| 23489 | + return ggml_arm_arch_features.has_i8mm; |
23489 | 23490 | #else
|
23490 | 23491 | return 0;
|
23491 | 23492 | #endif
|
23492 | 23493 | }
|
23493 | 23494 |
|
23494 | 23495 | int ggml_cpu_get_sve_cnt(void) {
|
23495 |
| -#if defined(__aarch64__) |
23496 |
| - return ggml_aarch64_features.sve_cnt; |
| 23496 | +#if defined(__ARM_ARCH) |
| 23497 | + return ggml_arm_arch_features.sve_cnt; |
23497 | 23498 | #else
|
23498 | 23499 | return 0;
|
23499 | 23500 | #endif
|
|
0 commit comments