Skip to content

Commit 21905dd

Browse files
AndreasKunarggerganov
authored andcommitted
ggml : fix build on Windows with Snapdragon X (ggml-org#8531)
* Improvements for Windows with Snapdragon X * Revert "Improvements for Windows with Snapdragon X" This reverts commit bf21397. * Improvements for Windows with Snapdragon X * WOA build clarifications * WIndows on ARM build clarifications * cmake build for Windows clarifications * Update docs/build.md Co-authored-by: Georgi Gerganov <[email protected]> --------- Co-authored-by: AndreasKunar <andreaskmsn.com> Co-authored-by: Georgi Gerganov <[email protected]>
1 parent fd905e8 commit 21905dd

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

docs/build.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ In order to build llama.cpp you have four different options.
1616
make
1717
```
1818

19-
- On Windows:
19+
- On Windows (x86/x64 only, arm64 requires cmake):
2020

2121
1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
2222
2. Extract `w64devkit` on your pc.
@@ -60,6 +60,17 @@ In order to build llama.cpp you have four different options.
6060
cmake -B build -G "Xcode"
6161
cmake --build build --config Debug
6262
```
63+
- Building for Windows (x86, x64 and arm64) with MSVC or clang as compilers:
64+
- Install Visual Studio 2022, e.g. via the [Community Edition](https://visualstudio.microsoft.com/de/vs/community/). In the installer, select at least the following options (this also automatically installs the required additional tools like CMake,...):
65+
- Tab Workload: Desktop-development with C++
66+
- Tab Components (select quickly via search): C++-_CMake_ Tools for Windows, _Git_ for Windows, C++-_Clang_ Compiler for Windows, MS-Build Support for LLVM-Toolset (clang)
67+
- Please remember to always use a Developer Command Prompt / PowerShell for VS2022 for git, build, test
68+
- For Windows on ARM (arm64, WoA) build with:
69+
```bash
70+
cmake --preset arm64-windows-llvm-release -D GGML_OPENMP=OFF
71+
cmake --build build-arm64-windows-llvm-release
72+
```
73+
Note: Building for arm64 could also be done just with MSVC (with the build-arm64-windows-MSVC preset, or the standard CMake build instructions). But MSVC does not support inline ARM assembly-code, used e.g. for the accelerated Q4_0_4_8 CPU kernels.
6374

6475
- Using `gmake` (FreeBSD):
6576

ggml/src/ggml-aarch64.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
392392
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
393393
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
394394
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
395-
#elif defined(__ARM_NEON) && defined(__aarch64__)
395+
#elif defined(__ARM_NEON) && defined(__aarch64__) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
396396
const void * b_ptr = vx;
397397
const void * a_ptr = vy;
398398
float * res_ptr = s;
@@ -501,7 +501,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
501501
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
502502
}
503503
#endif
504-
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
504+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
505505
const void * b_ptr = vx;
506506
const void * a_ptr = vy;
507507
float * res_ptr = s;
@@ -613,7 +613,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
613613
UNUSED(ncols_interleaved);
614614
UNUSED(blocklen);
615615

616-
#if defined(__ARM_FEATURE_SVE)
616+
#if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
617617
if (svcntw() == 8) {
618618
const void * b_ptr = vx;
619619
const void * a_ptr = vy;
@@ -753,7 +753,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
753753
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
754754
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
755755
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
756-
#elif defined(__ARM_NEON) && defined(__aarch64__)
756+
#elif defined(__ARM_NEON) && defined(__aarch64__) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
757757
const void * b_ptr = vx;
758758
const void * a_ptr = vy;
759759
float * res_ptr = s;
@@ -1271,7 +1271,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
12711271
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
12721272
}
12731273
#endif
1274-
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
1274+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
12751275
const void * b_ptr = vx;
12761276
const void * a_ptr = vy;
12771277
float * res_ptr = s;
@@ -1727,7 +1727,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
17271727
UNUSED(ncols_interleaved);
17281728
UNUSED(blocklen);
17291729

1730-
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
1730+
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
17311731
if (svcntw() == 8) {
17321732
const void * b_ptr = vx;
17331733
const void * a_ptr = vy;

0 commit comments

Comments
 (0)