Skip to content

Commit f64d44a

Browse files
CUDA: Fixed OpenLLaMA 3b mmq, reduced compile time (#2590)
1 parent b19edd5 commit f64d44a

File tree

2 files changed

+606
-410
lines changed

2 files changed

+606
-410
lines changed

CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ option(LLAMA_BLAS "llama: use BLAS"
6969
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
7070
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
7171
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
72-
set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA kernels")
7372
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
7473
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
7574
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
@@ -256,7 +255,6 @@ if (LLAMA_CUBLAS)
256255
# if (LLAMA_CUDA_CUBLAS)
257256
# add_compile_definitions(GGML_CUDA_CUBLAS)
258257
# endif()
259-
add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y})
260258
if (LLAMA_CUDA_FORCE_DMMV)
261259
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
262260
endif()

0 commit comments

Comments
 (0)