ggml-org
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 9 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 9 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 26 additions & 5 deletions b/‎Makefile
Lines changed: 26 additions & 5 deletions
diff --git a/‎examples/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎examples/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/common.cpp
Lines changed: 14 additions & 0 deletions b/‎examples/common.cpp
Lines changed: 14 additions & 0 deletions
diff --git a/‎examples/common.h
Lines changed: 2 additions & 0 deletions b/‎examples/common.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/main/main.cpp
Lines changed: 10 additions & 0 deletions b/‎examples/main/main.cpp
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/mulmat-tune/CMakeLists.txt
Lines changed: 14 additions & 0 deletions b/‎examples/mulmat-tune/CMakeLists.txt
Lines changed: 14 additions & 0 deletions
@@ -40,6 +40,7 @@ models/*
 /server
 /Pipfile
 /libllama.so
+/mulmat-tune
 
 build-info.h
 arm_neon.h
 
@@ -78,6 +78,7 @@ option(LLAMA_K_QUANTS                        "llama: use k-quants"
 option(LLAMA_BUILD_TESTS                "llama: build tests"    ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_EXAMPLES             "llama: build examples" ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_SERVER               "llama: build server example"                           OFF)
+option(LLAMA_MULMAT_TUNE                "llama: mulmat tune"                                    OFF)
 
 #
 # Build info header
@@ -214,6 +215,7 @@ if (LLAMA_BLAS)
         message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
         add_compile_options(${BLAS_LINKER_FLAGS})
         add_compile_definitions(GGML_USE_OPENBLAS)
+        add_compile_definitions(GGML_BLAS_VENDOR="${LLAMA_BLAS_VENDOR}")
         set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
         set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
 
@@ -276,6 +278,11 @@ if (LLAMA_METAL)
         )
 endif()
 
+if (LLAMA_MULMAT_TUNE)
+    add_compile_definitions(GGML_USE_MULMAT_TUNE)
+    add_compile_definitions(GGML_MULMAT_TUNE_NDEBUG)
+endif()
+
 if (LLAMA_K_QUANTS)
     set(GGML_SOURCES_EXTRA ${GGML_SOURCES_EXTRA} k_quants.c k_quants.h)
     add_compile_definitions(GGML_USE_K_QUANTS)
@@ -450,6 +457,8 @@ endif()
 
 add_library(ggml OBJECT
             ggml.c
+            ggml-threading.c
+            ggml-tune.c
             ggml.h
             ${GGML_SOURCES_CUDA}
             ${GGML_SOURCES_OPENCL}
 
@@ -1,5 +1,5 @@
 # Define the default target now so that it is always the first target
-BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple
+BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple mulmat-tune
 
 ifdef LLAMA_BUILD_SERVER
 	BUILD_TARGETS += server
@@ -47,7 +47,8 @@ endif
 OPT = -O3
 CFLAGS   = -I.              $(OPT) -std=c11   -fPIC
 CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
-LDFLAGS  =
+# -lm fixed error: ggml.o: undefined reference to symbol 'tanhf@@GLIBC_2.2.5' from ubuntu 22.04
+LDFLAGS  = -lm
 
 ifdef LLAMA_DEBUG
 	CFLAGS   += -O0 -g
@@ -134,8 +135,7 @@ ifndef LLAMA_NO_K_QUANTS
 endif
 
 ifndef LLAMA_NO_ACCELERATE
-	# Mac M1 - include Accelerate framework.
-	# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
+	# Mac Intel & M1 - include Accelerate framework.
 	ifeq ($(UNAME_S),Darwin)
 		CFLAGS  += -DGGML_USE_ACCELERATE
 		LDFLAGS += -framework Accelerate
@@ -145,10 +145,16 @@ endif # LLAMA_NO_ACCELERATE
 ifdef LLAMA_OPENBLAS
 	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas
 	LDFLAGS += -lopenblas
+	ifeq ($(UNAME_S),Darwin)
+		# openblas installed with Homebew on macOS.
+		CFLAGS  += -I/usr/local/opt/openblas/include
+		LDFLAGS += -L/usr/local/opt/openblas/lib
+	endif
 endif # LLAMA_OPENBLAS
 
 ifdef LLAMA_BLIS
 	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
+	CFLAGS  += -DGGML_BLAS_VENDOR="\"BLIS\""
 	LDFLAGS += -lblis -L/usr/local/lib
 endif # LLAMA_BLIS
 
@@ -230,6 +236,11 @@ k_quants.o: k_quants.c k_quants.h
 	$(CC) $(CFLAGS) -c $< -o $@
 endif # LLAMA_NO_K_QUANTS
 
+ifdef LLAMA_MULMAT_TUNE
+	CFLAGS   += -DGGML_USE_MULMAT_TUNE -DGGML_MULMAT_TUNE_NDEBUG
+	CXXFLAGS += -DGGML_USE_MULMAT_TUNE
+endif
+
 #
 # Print build information
 #
@@ -245,6 +256,8 @@ $(info I CC:       $(CCV))
 $(info I CXX:      $(CXXV))
 $(info )
 
+OBJS += ggml-tune.o ggml-threading.o
+
 #
 # Build library
 #
@@ -253,7 +266,12 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
 	$(CC)  $(CFLAGS)   -c $< -o $@
 
 llama.o: llama.cpp ggml.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
+
+ggml-threading.o: ggml-threading.c ggml.h
+	$(CC)  $(CFLAGS) -c $< -o $@
+
+ggml-tune.o: ggml-tune.c ggml.h
+	$(CC)  $(CFLAGS) -c $< -o $@
 
 common.o: examples/common.cpp examples/common.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
@@ -298,6 +316,9 @@ server: examples/server/server.cpp examples/server/httplib.h examples/server/jso
 train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp    build-info.h ggml.o llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
+mulmat-tune: examples/mulmat-tune/mulmat-tune.cpp build-info.h ggml.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o mulmat-tune $(LDFLAGS)
+
 build-info.h: $(wildcard .git/index) scripts/build-info.sh
 	@sh scripts/build-info.sh > $@.tmp
 	@if ! cmp -s $@.tmp $@; then \
 
@@ -39,6 +39,7 @@ else()
     add_subdirectory(baby-llama)
     add_subdirectory(train-text-from-scratch)
     add_subdirectory(simple)
+    add_subdirectory(mulmat-tune)
     if (LLAMA_METAL)
         add_subdirectory(metal)
     endif()
 
@@ -345,6 +345,16 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
             params.mem_test = true;
         } else if (arg == "--export") {
             params.export_cgraph = true;
+#ifdef GGML_USE_MULMAT_TUNE
+        } else if (arg == "--tune") {
+            params.tune = true;
+        } else if (arg == "--tune-file") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.tune_file = argv[i];
+#endif // GGML_USE_MULMAT_TUNE
         } else if (arg == "--verbose-prompt") {
             params.verbose_prompt = true;
         } else if (arg == "-r" || arg == "--reverse-prompt") {
@@ -498,6 +508,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
 #endif
     fprintf(stderr, "  --mtest               compute maximum memory usage\n");
     fprintf(stderr, "  --export              export the computation graph to 'llama.ggml'\n");
+#ifdef GGML_USE_MULMAT_TUNE
+    fprintf(stderr, "  --tune                mulmat tune enable. If tune-file is set then exit after bench\n");
+    fprintf(stderr, "  --tune-file FILE      mulmat tune data file. If tune is true, then write bench result to this file, else load the file and run\n");
+#endif
     fprintf(stderr, "  --verbose-prompt      print prompt before generation\n");
     fprintf(stderr, "  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n");
     fprintf(stderr, "  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n");
 
@@ -77,6 +77,8 @@ struct gpt_params {
     bool mem_test          = false; // compute maximum memory usage
     bool export_cgraph     = false; // export the computation graph
     bool verbose_prompt    = false; // print prompt tokens before generation
+    bool tune              = false; // mulmat tune: enable
+    std::string tune_file  = "";    // mulmat tune: data file
 };
 
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
 
@@ -117,6 +117,16 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
+#ifdef GGML_USE_MULMAT_TUNE
+    if (params.tune || !params.tune_file.empty()) {
+        bool ok = llama_mulmat_tune(ctx, params.n_threads, params.tune, params.tune_file.c_str());
+        if (!ok || (params.tune && !params.tune_file.empty())) {
+            llama_free(ctx);
+            return ok? 0: 1;
+        }
+    }
+#endif
+
     // print system information
     {
         fprintf(stderr, "\n");
 
@@ -0,0 +1,14 @@
+set(TARGET mulmat-tune)
+add_executable(${TARGET} mulmat-tune.cpp)
+
+if (XCODE OR MSVC)
+  set(MULMAT_TUNE_LIBS ggml)
+else()
+  set(MULMAT_TUNE_LIBS ggml m)
+endif()
+
+target_link_libraries(${TARGET} PRIVATE ${MULMAT_TUNE_LIBS} ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+if(TARGET BUILD_INFO)
+  add_dependencies(${TARGET} BUILD_INFO)
+endif()