Skip to content

Commit bf21397

Browse files
author
AndreasKunar
committed
Improvements for Windows with Snapdragon X
1 parent 5e116e8 commit bf21397

File tree

3 files changed

+16
-8
lines changed

3 files changed

+16
-8
lines changed

cmake/arm64-windows-llvm.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ set( CMAKE_CXX_COMPILER clang++ )
99
set( CMAKE_C_COMPILER_TARGET ${target} )
1010
set( CMAKE_CXX_COMPILER_TARGET ${target} )
1111

12-
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
12+
# march for Snapdragon X should be 8.7-a, but this currently breaks Q_4_0_4_4 acceleration, 8.5 works
13+
set( arch_c_flags "-march=armv8.5-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
1314
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
1415

1516
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )

docs/build.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ In order to build llama.cpp you have four different options.
1616
make
1717
```
1818

19-
- On Windows:
19+
- On Windows (x86/x64 only, arm64 requires cmake):
2020

2121
1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
2222
2. Extract `w64devkit` on your pc.
@@ -45,6 +45,13 @@ In order to build llama.cpp you have four different options.
4545
- For `Q4_0_4_4` quantization type build, add the `-DGGML_LLAMAFILE=OFF` cmake option. For example, use `cmake -B build -DGGML_LLAMAFILE=OFF`.
4646
- For faster compilation, add the `-j` argument to run multiple jobs in parallel. For example, `cmake --build build --config Release -j 8` will run 8 jobs in parallel.
4747
- For faster repeated compilation, install [ccache](https://ccache.dev/).
48+
- For Windows:
49+
- Install cmake e.g. via `winget install cmake`:
50+
- As alternative to the w64devkit mentioned in "using make" above, install MSVC (e.g. via Visual Studio 2022 Community Edition).
51+
- For Windows on ARM you need MSVC installed and _additonally_:
52+
- Install [clang via LLVM for woa64](https://releases.llvm.org) to enable better ARM optimizations (clang needs the MSVC backend).
53+
- For using clang, the first build step needs to be `cmake --preset arm64-windows-llvm-release` (instead of the `cmake -B ...` which defaults to MSVC).
54+
- Note: Building for ARM can also just be done with MSVC (without installing clang or using the preset), but this e.g. does not support Q_4_0_4_4 acceleration, because the MSVC frontend cannot inline ARM assembly-code.
4855
- For debug builds, there are two cases:
4956

5057
1. Single-config generators (e.g. default = `Unix Makefiles`; note that they just ignore the `--config` flag):

ggml/src/ggml-aarch64.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
392392
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
393393
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
394394
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
395-
#elif defined(__ARM_NEON) && defined(__aarch64__)
395+
#elif defined(__ARM_NEON) && defined(__aarch64__) && ! defined(_MSC_VER)
396396
const void * b_ptr = vx;
397397
const void * a_ptr = vy;
398398
float * res_ptr = s;
@@ -501,7 +501,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
501501
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
502502
}
503503
#endif
504-
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
504+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(_MSC_VER)
505505
const void * b_ptr = vx;
506506
const void * a_ptr = vy;
507507
float * res_ptr = s;
@@ -613,7 +613,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
613613
UNUSED(ncols_interleaved);
614614
UNUSED(blocklen);
615615

616-
#if defined(__ARM_FEATURE_SVE)
616+
#if defined(__ARM_FEATURE_SVE) && ! defined(_MSC_VER)
617617
if (svcntw() == 8) {
618618
const void * b_ptr = vx;
619619
const void * a_ptr = vy;
@@ -753,7 +753,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
753753
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
754754
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
755755
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
756-
#elif defined(__ARM_NEON) && defined(__aarch64__)
756+
#elif defined(__ARM_NEON) && defined(__aarch64__) && ! defined(_MSC_VER)
757757
const void * b_ptr = vx;
758758
const void * a_ptr = vy;
759759
float * res_ptr = s;
@@ -1271,7 +1271,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
12711271
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
12721272
}
12731273
#endif
1274-
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
1274+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(_MSC_VER)
12751275
const void * b_ptr = vx;
12761276
const void * a_ptr = vy;
12771277
float * res_ptr = s;
@@ -1727,7 +1727,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
17271727
UNUSED(ncols_interleaved);
17281728
UNUSED(blocklen);
17291729

1730-
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
1730+
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(_MSC_VER)
17311731
if (svcntw() == 8) {
17321732
const void * b_ptr = vx;
17331733
const void * a_ptr = vy;

0 commit comments

Comments
 (0)