ggml-org
diff --git a/‎.devops/nix/package.nix
Lines changed: 7 additions & 10 deletions b/‎.devops/nix/package.nix
Lines changed: 7 additions & 10 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/config.yml
Lines changed: 0 additions & 2 deletions b/‎.github/ISSUE_TEMPLATE/config.yml
Lines changed: 0 additions & 2 deletions
diff --git a/‎.gitignore
Lines changed: 7 additions & 5 deletions b/‎.gitignore
Lines changed: 7 additions & 5 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 5 additions & 1 deletion b/‎CMakeLists.txt
Lines changed: 5 additions & 1 deletion
diff --git a/‎CMakePresets.json
Lines changed: 1 addition & 0 deletions b/‎CMakePresets.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎CONTRIBUTING.md
Lines changed: 20 additions & 10 deletions b/‎CONTRIBUTING.md
Lines changed: 20 additions & 10 deletions
diff --git a/‎Makefile
Lines changed: 24 additions & 0 deletions b/‎Makefile
Lines changed: 24 additions & 0 deletions
@@ -17,19 +17,18 @@
   rocmPackages,
   vulkan-headers,
   vulkan-loader,
-  clblast,
+  curl,
   useBlas ? builtins.all (x: !x) [
     useCuda
     useMetalKit
-    useOpenCL
     useRocm
     useVulkan
   ] && blas.meta.available,
   useCuda ? config.cudaSupport,
-  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
+  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
   useMpi ? false, # Increases the runtime closure size by ~700M
-  useOpenCL ? false,
   useRocm ? config.rocmSupport,
+  enableCurl ? true,
   useVulkan ? false,
   llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
 
@@ -56,7 +55,6 @@ let
     ++ lib.optionals useCuda [ "CUDA" ]
     ++ lib.optionals useMetalKit [ "MetalKit" ]
     ++ lib.optionals useMpi [ "MPI" ]
-    ++ lib.optionals useOpenCL [ "OpenCL" ]
     ++ lib.optionals useRocm [ "ROCm" ]
     ++ lib.optionals useVulkan [ "Vulkan" ];
 
@@ -198,19 +196,19 @@ effectiveStdenv.mkDerivation (
       optionals effectiveStdenv.isDarwin darwinBuildInputs
       ++ optionals useCuda cudaBuildInputs
       ++ optionals useMpi [ mpi ]
-      ++ optionals useOpenCL [ clblast ]
       ++ optionals useRocm rocmBuildInputs
       ++ optionals useBlas [ blas ]
-      ++ optionals useVulkan vulkanBuildInputs;
+      ++ optionals useVulkan vulkanBuildInputs
+      ++ optionals enableCurl [ curl ];
 
     cmakeFlags =
       [
         (cmakeBool "LLAMA_BUILD_SERVER" true)
         (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
         (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+        (cmakeBool "LLAMA_CURL" enableCurl)
         (cmakeBool "GGML_NATIVE" false)
         (cmakeBool "GGML_BLAS" useBlas)
-        (cmakeBool "GGML_CLBLAST" useOpenCL)
         (cmakeBool "GGML_CUDA" useCuda)
         (cmakeBool "GGML_HIPBLAS" useRocm)
         (cmakeBool "GGML_METAL" useMetalKit)
@@ -254,7 +252,6 @@ effectiveStdenv.mkDerivation (
         useCuda
         useMetalKit
         useMpi
-        useOpenCL
         useRocm
         useVulkan
         ;
@@ -281,7 +278,7 @@ effectiveStdenv.mkDerivation (
       # Configurations we don't want even the CI to evaluate. Results in the
       # "unsupported platform" messages. This is mostly a no-op, because
       # cudaPackages would've refused to evaluate anyway.
-      badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
+      badPlatforms = optionals useCuda lib.platforms.darwin;
 
       # Configurations that are known to result in build failures. Can be
       # overridden by importing Nixpkgs with `allowBroken = true`.
 
@@ -9,5 +9,3 @@ contact_links:
   - name: Want to contribute?
     url: https://github.com/ggerganov/llama.cpp/wiki/contribute
     about: Head to the contribution guide page of the wiki for areas you can help with
-
-
@@ -47,6 +47,7 @@ build*
 !build-info.cpp.in
 !build-info.sh
 !build.zig
+!docs/build.md
 /libllama.so
 /llama-*
 android-ndk-*
@@ -98,13 +99,14 @@ examples/server/*.mjs.hpp
 
 # Python
 
-__pycache__
-.venv
-/Pipfile
-dist
-poetry.lock
+/.venv
+__pycache__/
+*/poetry.lock
 poetry.toml
 
+# Nix
+/result
+
 # Test binaries
 /tests/test-backend-ops
 /tests/test-double-float
 
@@ -42,6 +42,10 @@ endif()
 
 option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
 
+if (WIN32)
+    add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
+endif()
+
 #
 # option list
 #
@@ -152,7 +156,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
 
 install(
-    FILES convert-hf-to-gguf.py
+    FILES convert_hf_to_gguf.py
     PERMISSIONS
         OWNER_READ
         OWNER_WRITE
 
@@ -19,6 +19,7 @@
         "cacheVariables": {
             "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
             "CMAKE_CXX_COMPILER": "icx",
+            "CMAKE_C_COMPILER": "cl",
             "GGML_SYCL": "ON",
             "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
         }
 
@@ -1,14 +1,24 @@
-# Contributing Guidelines
+# Pull requests
 
-## Checklist
+- Always squash-merge the PR before merging
+- Use the following format for your final commit: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
+- Test your changes:
+  - Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
+  - Execute [the full CI locally on your machine](ci/README.md) before publishing
+- If the pull request contains only documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times
+- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
+  - The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your conveience
 
-* Make sure your PR follows the [coding guidelines](https://github.com/ggerganov/llama.cpp/blob/master/README.md#coding-guidelines)
-* Test your changes using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
-* Execute [the full CI locally on your machine](ci/README.md) before publishing
+# Coding guidelines
 
-## PR formatting
+- Avoid adding third-party dependencies, extra files, extra headers, etc.
+- Always consider cross-compatibility with other operating systems and architectures
+- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
+- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit
+- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
+- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
+- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
+- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
+
+![matmul](media/matmul.png)
 
-* Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
-    - The PR template has a series of review complexity checkboxes `[ ]` that you can mark as `[X]` for your conveience. Refer to [About task lists](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) for more information.
-* If the pull request only contains documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times.
-* When squashing multiple commits on merge, use the following format for your commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : Fix typo in utils.py (#1234)`
 
@@ -14,6 +14,7 @@ BUILD_TARGETS = \
 	llama-finetune \
 	llama-gbnf-validator \
 	llama-gguf \
+	llama-gguf-hash \
 	llama-gguf-split \
 	llama-gritlm \
 	llama-imatrix \
@@ -62,6 +63,11 @@ TEST_TARGETS = \
 	tests/test-tokenizer-1-bpe \
 	tests/test-tokenizer-1-spm
 
+# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
+LEGACY_TARGETS = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
+	simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
+	retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
+
 # Deprecation aliases
 ifdef LLAMA_CUBLAS
 $(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
@@ -1086,6 +1092,7 @@ clean:
 	rm -vrf ggml/src/ggml-cuda/template-instances/*.o
 	rm -rvf $(BUILD_TARGETS)
 	rm -rvf $(TEST_TARGETS)
+	rm -rvf $(LEGACY_TARGETS)
 	find examples pocs -type f -name "*.o" -delete
 
 #
@@ -1172,6 +1179,23 @@ llama-gguf: examples/gguf/gguf.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
+examples/gguf-hash/deps/sha1/sha1.o: \
+	examples/gguf-hash/deps/sha1/sha1.c
+	$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
+
+examples/gguf-hash/deps/xxhash/xxhash.o: \
+	examples/gguf-hash/deps/xxhash/xxhash.c
+	$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
+
+examples/gguf-hash/deps/sha256/sha256.o: \
+	examples/gguf-hash/deps/sha256/sha256.c
+	$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
+
+llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\
+	$(OBJ_ALL)
+	$(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+
 llama-gguf-split: examples/gguf-split/gguf-split.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@`
`19`	`19`	`"cacheVariables": {`
`20`	`20`	`"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",`
`21`	`21`	`"CMAKE_CXX_COMPILER": "icx",`
	`22`	`+ "CMAKE_C_COMPILER": "cl",`
`22`	`23`	`"GGML_SYCL": "ON",`
`23`	`24`	`"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."`
`24`	`25`	`}`