Skip to content

Commit 467576b

Browse files
CMake: default to -arch=native for CUDA build (#10320)
1 parent eda7e1d commit 467576b

File tree

2 files changed

+11
-8
lines changed

2 files changed

+11
-8
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,14 +459,14 @@ To learn more how to measure perplexity using llama.cpp, [read this documentatio
459459
- Make sure to read this: [Inference at the edge](https://github.com/ggerganov/llama.cpp/discussions/205)
460460
- A bit of backstory for those who are interested: [Changelog podcast](https://changelog.com/podcast/532)
461461

462-
## Other documentations
462+
## Other documentation
463463

464464
- [main (cli)](./examples/main/README.md)
465465
- [server](./examples/server/README.md)
466466
- [jeopardy](./examples/jeopardy/README.md)
467467
- [GBNF grammars](./grammars/README.md)
468468

469-
**Development documentations**
469+
**Development documentation**
470470

471471
- [How to build](./docs/build.md)
472472
- [Running on Docker](./docs/docker.md)

ggml/src/ggml-cuda/CMakeLists.txt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@ if (CUDAToolkit_FOUND)
66
message(STATUS "CUDA Toolkit found")
77

88
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
9-
# 52 == lowest CUDA 12 standard
10-
# 60 == FP16 CUDA intrinsics
11-
# 61 == integer CUDA intrinsics
12-
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
13-
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
9+
# native == GPUs available at build time
10+
# 52 == Maxwell, lowest CUDA 12 standard
11+
# 60 == P100, FP16 CUDA intrinsics
12+
# 61 == Pascal, __dp4a instruction (per-byte integer dot product)
13+
# 70 == V100, FP16 tensor cores
14+
# 75 == Turing, int6 tensor cores
15+
if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6")
16+
set(CMAKE_CUDA_ARCHITECTURES "native")
17+
elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
1418
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
1519
else()
1620
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
17-
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
1821
endif()
1922
endif()
2023
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")

0 commit comments

Comments
 (0)