ggml-org
diff --git a/‎Makefile
Lines changed: 69 additions & 80 deletions b/‎Makefile
Lines changed: 69 additions & 80 deletions
diff --git a/‎Package.swift
Lines changed: 11 additions & 2 deletions b/‎Package.swift
Lines changed: 11 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 36 additions & 5 deletions b/‎README.md
Lines changed: 36 additions & 5 deletions
@@ -71,21 +71,21 @@ OPT = -Ofast
 else
 OPT = -O3
 endif
-CFLAGS   = -I.            $(OPT) -std=c11   -fPIC
-CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC
-LDFLAGS  =
+MK_CPPFLAGS = -I. -Icommon
+MK_CFLAGS   = $(CPPFLAGS) $(OPT) -std=c11   -fPIC
+MK_CXXFLAGS = $(CPPFLAGS) $(OPT) -std=c++11 -fPIC
+MK_LDFLAGS  =
 
 ifdef LLAMA_DEBUG
-	CFLAGS   += -O0 -g
-	CXXFLAGS += -O0 -g
-	LDFLAGS  += -g
+	MK_CFLAGS   += -O0 -g
+	MK_CXXFLAGS += -O0 -g
+	MK_LDFLAGS  += -g
 else
-	CFLAGS   += -DNDEBUG
-	CXXFLAGS += -DNDEBUG
+	MK_CPPFLAGS += -DNDEBUG
 endif
 
 ifdef LLAMA_SERVER_VERBOSE
-	CXXFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
+	MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
 endif
 
 ifdef LLAMA_DISABLE_LOGS
@@ -94,9 +94,9 @@ ifdef LLAMA_DISABLE_LOGS
 endif # LLAMA_DISABLE_LOGS
 
 # warnings
-CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
-			-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
-CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
+MK_CFLAGS    += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
+				-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
+MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
 
 ifeq '' '$(findstring clang++,$(CXX))'
 	# g++ only
@@ -105,29 +105,9 @@ endif
 
 # OS specific
 # TODO: support Windows
-ifeq ($(UNAME_S),Linux)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
-endif
-ifeq ($(UNAME_S),Darwin)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
-endif
-ifeq ($(UNAME_S),FreeBSD)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
-endif
-ifeq ($(UNAME_S),NetBSD)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
-endif
-ifeq ($(UNAME_S),OpenBSD)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
-endif
-ifeq ($(UNAME_S),Haiku)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
+ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
+	MK_CFLAGS   += -pthread
+	MK_CXXFLAGS += -pthread
 endif
 
 # detect Windows
@@ -153,12 +133,11 @@ ifeq ($(_WIN32),1)
 endif
 
 ifdef LLAMA_GPROF
-	CFLAGS   += -pg
-	CXXFLAGS += -pg
+	MK_CFLAGS   += -pg
+	MK_CXXFLAGS += -pg
 endif
 ifdef LLAMA_PERF
-	CFLAGS   += -DGGML_PERF
-	CXXFLAGS += -DGGML_PERF
+	MK_CPPFLAGS += -DGGML_PERF
 endif
 
 # Architecture specific
@@ -169,16 +148,16 @@ ifndef RISCV
 
 ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
 	# Use all CPU extensions that are available:
-	CFLAGS   += -march=native -mtune=native
-	CXXFLAGS += -march=native -mtune=native
+	MK_CFLAGS   += -march=native -mtune=native
+	MK_CXXFLAGS += -march=native -mtune=native
 
 	# Usage AVX-only
-	#CFLAGS   += -mfma -mf16c -mavx
-	#CXXFLAGS += -mfma -mf16c -mavx
+	#MK_CFLAGS   += -mfma -mf16c -mavx
+	#MK_CXXFLAGS += -mfma -mf16c -mavx
 
 	# Usage SSSE3-only (Not is SSE3!)
-	#CFLAGS   += -mssse3
-	#CXXFLAGS += -mssse3
+	#MK_CFLAGS   += -mssse3
+	#MK_CXXFLAGS += -mssse3
 endif
 
 # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
@@ -192,34 +171,33 @@ endif
 ifneq ($(filter aarch64%,$(UNAME_M)),)
 	# Apple M1, M2, etc.
 	# Raspberry Pi 3, 4, Zero 2 (64-bit)
-	CFLAGS   += -mcpu=native
-	CXXFLAGS += -mcpu=native
+	MK_CFLAGS   += -mcpu=native
+	MK_CXXFLAGS += -mcpu=native
 endif
 
 ifneq ($(filter armv6%,$(UNAME_M)),)
 	# Raspberry Pi 1, Zero
-	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
+	MK_CFLAGS   += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
+	MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
 endif
 
 ifneq ($(filter armv7%,$(UNAME_M)),)
 	# Raspberry Pi 2
-	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
+	MK_CFLAGS   += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
+	MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
 endif
 
 ifneq ($(filter armv8%,$(UNAME_M)),)
 	# Raspberry Pi 3, 4, Zero 2 (32-bit)
-	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
+	MK_CFLAGS   += -mfp16-format=ieee -mno-unaligned-access
+	MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
 endif
 
 ifneq ($(filter ppc64%,$(UNAME_M)),)
 	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
 	ifneq (,$(findstring POWER9,$(POWER9_M)))
-		CFLAGS   += -mcpu=power9
-		CXXFLAGS += -mcpu=power9
-	endif
-	# Require c++23's std::byteswap for big-endian support.
-	ifeq ($(UNAME_M),ppc64)
-		CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
+		MK_CFLAGS   += -mcpu=power9
+		MK_CXXFLAGS += -mcpu=power9
 	endif
 endif
 
@@ -229,21 +207,19 @@ else
 endif
 
 ifndef LLAMA_NO_K_QUANTS
-	CFLAGS   += -DGGML_USE_K_QUANTS
-	CXXFLAGS += -DGGML_USE_K_QUANTS
+	MK_CPPFLAGS += -DGGML_USE_K_QUANTS
 	OBJS     += k_quants.o
 ifdef LLAMA_QKK_64
-	CFLAGS   += -DGGML_QKK_64
-	CXXFLAGS += -DGGML_QKK_64
+	MK_CPPFLAGS += -DGGML_QKK_64
 endif
 endif
 
 ifndef LLAMA_NO_ACCELERATE
 	# Mac OS - include Accelerate framework.
 	# `-framework Accelerate` works both with Apple Silicon and Mac Intel
 	ifeq ($(UNAME_S),Darwin)
-		CFLAGS  += -DGGML_USE_ACCELERATE
-		LDFLAGS += -framework Accelerate
+		MK_CPPFLAGS += -DGGML_USE_ACCELERATE
+		MK_LDFLAGS  += -framework Accelerate
 	endif
 endif # LLAMA_NO_ACCELERATE
 
@@ -258,25 +234,26 @@ ifndef LLAMA_NO_METAL
 endif # LLAMA_NO_METAL
 
 ifdef LLAMA_MPI
-	CFLAGS += -DGGML_USE_MPI -Wno-cast-qual
-	CXXFLAGS += -DGGML_USE_MPI -Wno-cast-qual
+	MK_CPPFLAGS += -DGGML_USE_MPI
+	MK_CFLAGS   += -Wno-cast-qual
+	MK_CXXFLAGS += -Wno-cast-qual
 	OBJS     += ggml-mpi.o
 endif # LLAMA_MPI
 
 ifdef LLAMA_OPENBLAS
-	CFLAGS  += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags openblas)
-	LDFLAGS += $(shell pkg-config --libs openblas)
+	MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
+	MK_CFLAGS   += $(shell pkg-config --cflags-only-other openblas)
+	MK_LDFLAGS  += $(shell pkg-config --libs openblas)
 endif # LLAMA_OPENBLAS
 
 ifdef LLAMA_BLIS
-	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
-	LDFLAGS += -lblis -L/usr/local/lib
+	MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
+	MK_LDFLAGS  += -lblis -L/usr/local/lib
 endif # LLAMA_BLIS
 
 ifdef LLAMA_CUBLAS
-	CFLAGS    += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
-	CXXFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
-	LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
+	MK_CPPFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
+	MK_LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
 	OBJS      += ggml-cuda.o
 	NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
 ifdef LLAMA_CUDA_NVCC
@@ -327,14 +304,15 @@ endif # LLAMA_CUBLAS
 
 ifdef LLAMA_CLBLAST
 
-	CFLAGS   += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL)
-	CXXFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL)
+	MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
+	MK_CFLAGS   += $(shell pkg-config --cflags-only-other clblast OpenCL)
+	MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
 
 	# Mac provides OpenCL as a framework
 	ifeq ($(UNAME_S),Darwin)
-		LDFLAGS += -lclblast -framework OpenCL
+		MK_LDFLAGS += -lclblast -framework OpenCL
 	else
-		LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
+		MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
 	endif
 	OBJS    += ggml-opencl.o
 
@@ -349,10 +327,9 @@ ifdef LLAMA_HIPBLAS
 	LLAMA_CUDA_DMMV_X       ?= 32
 	LLAMA_CUDA_MMV_Y        ?= 1
 	LLAMA_CUDA_KQUANTS_ITER ?= 2
-	CFLAGS      += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
-	CXXFLAGS    += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
-	LDFLAGS     += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
-	LDFLAGS		+= -lhipblas -lamdhip64 -lrocblas
+	MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
+	MK_LDFLAGS  += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
+	MK_LDFLAGS	+= -lhipblas -lamdhip64 -lrocblas
 	HIPFLAGS    += $(addprefix --offload-arch=,$(GPU_TARGETS))
 	HIPFLAGS    += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
 	HIPFLAGS    += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
@@ -366,6 +343,12 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
 	$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
 endif # LLAMA_HIPBLAS
 
+ifndef LLAMA_NO_METAL
+	MK_CPPFLAGS += -DGGML_USE_METAL #-DGGML_METAL_NDEBUG
+	MK_LDFLAGS  += -framework Foundation -framework Metal -framework MetalKit
+	OBJS		+= ggml-metal.o
+endif # LLAMA_METAL
+
 ifndef LLAMA_NO_METAL
 ggml-metal.o: ggml-metal.m ggml-metal.h
 	$(CC) $(CFLAGS) -c $< -o $@
@@ -376,11 +359,17 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
 	$(CC) $(CFLAGS) -c $< -o $@
 endif # LLAMA_MPI
 
-ifdef LLAMA_NO_K_QUANTS
+ifndef LLAMA_NO_K_QUANTS
 k_quants.o: k_quants.c k_quants.h
 	$(CC) $(CFLAGS) -c $< -o $@
 endif # LLAMA_NO_K_QUANTS
 
+# combine build flags with cmdline overrides
+override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
+override CFLAGS   := $(MK_CFLAGS) $(CFLAGS)
+override CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
+override LDFLAGS  := $(MK_LDFLAGS) $(LDFLAGS)
+
 #
 # Print build information
 #
 
@@ -12,9 +12,18 @@ let package = Package(
             name: "llama",
             path: ".",
             exclude: ["ggml-metal.metal"],
-            sources: ["ggml.c", "llama.cpp"],
+            sources: [
+                "ggml.c",
+                "llama.cpp",
+                "ggml-alloc.c",
+                "k_quants.c"
+            ],
             publicHeadersPath: "spm-headers",
-            cSettings: [.unsafeFlags(["-Wno-shorten-64-to-32"]), .define("GGML_USE_ACCELERATE")],
+            cSettings: [
+                .unsafeFlags(["-Wno-shorten-64-to-32"]),
+                .define("GGML_USE_K_QUANTS"),
+                .define("GGML_USE_ACCELERATE")
+            ],
             linkerSettings: [
                 .linkedFramework("Accelerate")
             ]
 
@@ -120,6 +120,7 @@ as the main playground for developing new features for the [ggml](https://github
 
 - [nat/openplayground](https://github.com/nat/openplayground)
 - [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui)
+- [withcatai/catai](https://github.com/withcatai/catai)
 
 ---
 
@@ -464,6 +465,8 @@ Building the program with BLAS support may lead to some performance improvements
   You will need the [OpenCL SDK](https://github.com/KhronosGroup/OpenCL-SDK).
     - For Ubuntu or Debian, the packages `opencl-headers`, `ocl-icd` may be needed.
 
+    - For Windows, a pre-built SDK is available on the [OpenCL Releases](https://github.com/KhronosGroup/OpenCL-SDK/releases) page.
+
     - <details>
         <summary>Installing the OpenCL SDK from source</summary>
 
@@ -481,10 +484,27 @@ Building the program with BLAS support may lead to some performance improvements
         ```
       </details>
 
-  Installing CLBlast: it may be found in your operating system's packages.
+  ##### Installing CLBlast
+
+  Pre-built CLBlast binaries may be found on the [CLBlast Releases](https://github.com/CNugteren/CLBlast/releases) page. For Unix variants, it may also be found in your operating system's packages.
+
+  Alternatively, they may be built from source.
 
   - <details>
-    <summary>If not, then installing from source:</summary>
+    <summary>Windows:</summary>
+
+      ```cmd
+      set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64"
+      git clone https://github.com/CNugteren/CLBlast.git
+      mkdir CLBlast\build
+      cd CLBlast\build
+      cmake .. -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
+      cmake --build . --config Release
+      cmake --install . --prefix C:/CLBlast
+      ```
+
+  - <details>
+    <summary>Unix:</summary>
 
       ```sh
       git clone https://github.com/CNugteren/CLBlast.git
@@ -498,21 +518,32 @@ Building the program with BLAS support may lead to some performance improvements
       Where `/some/path` is where the built library will be installed (default is `/usr/local`).
     </details>
 
-  Building:
+  ##### Building Llama with CLBlast
 
   - Build with make:
     ```sh
     make LLAMA_CLBLAST=1
     ```
-  - CMake:
+  - CMake (Unix):
     ```sh
     mkdir build
     cd build
     cmake .. -DLLAMA_CLBLAST=ON -DCLBlast_dir=/some/path
     cmake --build . --config Release
     ```
+  - CMake (Windows):
+    ```cmd
+    set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast"
+    git clone https://github.com/ggerganov/llama.cpp
+    cd llama.cpp
+    mkdir build
+    cd build
+    cmake .. -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
+    cmake --build . --config Release
+    cmake --install . --prefix C:/LlamaCPP
+    ```
 
-  Running:
+  ##### Running Llama with CLBlast
 
   The CLBlast build supports `--gpu-layers|-ngl` like the CUDA version does.