Skip to content

Commit 213f133

Browse files
committed
initial
1 parent ce2c7d7 commit 213f133

25 files changed

+3646
-500
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ models/*
4040
/server
4141
/Pipfile
4242
/libllama.so
43+
/mulmat-tune
4344

4445
build-info.h
4546
arm_neon.h

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ option(LLAMA_K_QUANTS "llama: use k-quants"
7878
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7979
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
8080
option(LLAMA_BUILD_SERVER "llama: build server example" OFF)
81+
option(LLAMA_MULMAT_TUNE "llama: mulmat tune" OFF)
8182

8283
#
8384
# Build info header
@@ -214,6 +215,7 @@ if (LLAMA_BLAS)
214215
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
215216
add_compile_options(${BLAS_LINKER_FLAGS})
216217
add_compile_definitions(GGML_USE_OPENBLAS)
218+
add_compile_definitions(GGML_BLAS_VENDOR="${LLAMA_BLAS_VENDOR}")
217219
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
218220
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
219221

@@ -276,6 +278,11 @@ if (LLAMA_METAL)
276278
)
277279
endif()
278280

281+
if (LLAMA_MULMAT_TUNE)
282+
add_compile_definitions(GGML_USE_MULMAT_TUNE)
283+
add_compile_definitions(GGML_MULMAT_TUNE_NDEBUG)
284+
endif()
285+
279286
if (LLAMA_K_QUANTS)
280287
set(GGML_SOURCES_EXTRA ${GGML_SOURCES_EXTRA} k_quants.c k_quants.h)
281288
add_compile_definitions(GGML_USE_K_QUANTS)
@@ -450,6 +457,8 @@ endif()
450457

451458
add_library(ggml OBJECT
452459
ggml.c
460+
ggml-threading.c
461+
ggml-tune.c
453462
ggml.h
454463
${GGML_SOURCES_CUDA}
455464
${GGML_SOURCES_OPENCL}

Makefile

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple
2+
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple mulmat-tune
33

44
ifdef LLAMA_BUILD_SERVER
55
BUILD_TARGETS += server
@@ -47,7 +47,8 @@ endif
4747
OPT = -O3
4848
CFLAGS = -I. $(OPT) -std=c11 -fPIC
4949
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
50-
LDFLAGS =
50+
# -lm fixed error: ggml.o: undefined reference to symbol 'tanhf@@GLIBC_2.2.5' from ubuntu 22.04
51+
LDFLAGS = -lm
5152

5253
ifdef LLAMA_DEBUG
5354
CFLAGS += -O0 -g
@@ -134,8 +135,7 @@ ifndef LLAMA_NO_K_QUANTS
134135
endif
135136

136137
ifndef LLAMA_NO_ACCELERATE
137-
# Mac M1 - include Accelerate framework.
138-
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
138+
# Mac Intel & M1 - include Accelerate framework.
139139
ifeq ($(UNAME_S),Darwin)
140140
CFLAGS += -DGGML_USE_ACCELERATE
141141
LDFLAGS += -framework Accelerate
@@ -145,10 +145,16 @@ endif # LLAMA_NO_ACCELERATE
145145
ifdef LLAMA_OPENBLAS
146146
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas
147147
LDFLAGS += -lopenblas
148+
ifeq ($(UNAME_S),Darwin)
149+
# openblas installed with Homebew on macOS.
150+
CFLAGS += -I/usr/local/opt/openblas/include
151+
LDFLAGS += -L/usr/local/opt/openblas/lib
152+
endif
148153
endif # LLAMA_OPENBLAS
149154

150155
ifdef LLAMA_BLIS
151156
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
157+
CFLAGS += -DGGML_BLAS_VENDOR="\"BLIS\""
152158
LDFLAGS += -lblis -L/usr/local/lib
153159
endif # LLAMA_BLIS
154160

@@ -230,6 +236,11 @@ k_quants.o: k_quants.c k_quants.h
230236
$(CC) $(CFLAGS) -c $< -o $@
231237
endif # LLAMA_NO_K_QUANTS
232238

239+
ifdef LLAMA_MULMAT_TUNE
240+
CFLAGS += -DGGML_USE_MULMAT_TUNE -DGGML_MULMAT_TUNE_NDEBUG
241+
CXXFLAGS += -DGGML_USE_MULMAT_TUNE
242+
endif
243+
233244
#
234245
# Print build information
235246
#
@@ -245,6 +256,8 @@ $(info I CC: $(CCV))
245256
$(info I CXX: $(CXXV))
246257
$(info )
247258

259+
OBJS += ggml-tune.o ggml-threading.o
260+
248261
#
249262
# Build library
250263
#
@@ -253,7 +266,12 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
253266
$(CC) $(CFLAGS) -c $< -o $@
254267

255268
llama.o: llama.cpp ggml.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
256-
$(CXX) $(CXXFLAGS) -c $< -o $@
269+
270+
ggml-threading.o: ggml-threading.c ggml.h
271+
$(CC) $(CFLAGS) -c $< -o $@
272+
273+
ggml-tune.o: ggml-tune.c ggml.h
274+
$(CC) $(CFLAGS) -c $< -o $@
257275

258276
common.o: examples/common.cpp examples/common.h
259277
$(CXX) $(CXXFLAGS) -c $< -o $@
@@ -298,6 +316,9 @@ server: examples/server/server.cpp examples/server/httplib.h examples/server/jso
298316
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS)
299317
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
300318

319+
mulmat-tune: examples/mulmat-tune/mulmat-tune.cpp build-info.h ggml.o $(OBJS)
320+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o mulmat-tune $(LDFLAGS)
321+
301322
build-info.h: $(wildcard .git/index) scripts/build-info.sh
302323
@sh scripts/build-info.sh > $@.tmp
303324
@if ! cmp -s $@.tmp $@; then \

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ else()
3939
add_subdirectory(baby-llama)
4040
add_subdirectory(train-text-from-scratch)
4141
add_subdirectory(simple)
42+
add_subdirectory(mulmat-tune)
4243
if (LLAMA_METAL)
4344
add_subdirectory(metal)
4445
endif()

examples/common.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,16 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
345345
params.mem_test = true;
346346
} else if (arg == "--export") {
347347
params.export_cgraph = true;
348+
#ifdef GGML_USE_MULMAT_TUNE
349+
} else if (arg == "--tune") {
350+
params.tune = true;
351+
} else if (arg == "--tune-file") {
352+
if (++i >= argc) {
353+
invalid_param = true;
354+
break;
355+
}
356+
params.tune_file = argv[i];
357+
#endif // GGML_USE_MULMAT_TUNE
348358
} else if (arg == "--verbose-prompt") {
349359
params.verbose_prompt = true;
350360
} else if (arg == "-r" || arg == "--reverse-prompt") {
@@ -498,6 +508,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
498508
#endif
499509
fprintf(stderr, " --mtest compute maximum memory usage\n");
500510
fprintf(stderr, " --export export the computation graph to 'llama.ggml'\n");
511+
#ifdef GGML_USE_MULMAT_TUNE
512+
fprintf(stderr, " --tune mulmat tune enable. If tune-file is set then exit after bench\n");
513+
fprintf(stderr, " --tune-file FILE mulmat tune data file. If tune is true, then write bench result to this file, else load the file and run\n");
514+
#endif
501515
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
502516
fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
503517
fprintf(stderr, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");

examples/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ struct gpt_params {
7777
bool mem_test = false; // compute maximum memory usage
7878
bool export_cgraph = false; // export the computation graph
7979
bool verbose_prompt = false; // print prompt tokens before generation
80+
bool tune = false; // mulmat tune: enable
81+
std::string tune_file = ""; // mulmat tune: data file
8082
};
8183

8284
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

examples/main/main.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,16 @@ int main(int argc, char ** argv) {
117117
return 1;
118118
}
119119

120+
#ifdef GGML_USE_MULMAT_TUNE
121+
if (params.tune || !params.tune_file.empty()) {
122+
bool ok = llama_mulmat_tune(ctx, params.n_threads, params.tune, params.tune_file.c_str());
123+
if (!ok || (params.tune && !params.tune_file.empty())) {
124+
llama_free(ctx);
125+
return ok? 0: 1;
126+
}
127+
}
128+
#endif
129+
120130
// print system information
121131
{
122132
fprintf(stderr, "\n");

examples/mulmat-tune/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
set(TARGET mulmat-tune)
2+
add_executable(${TARGET} mulmat-tune.cpp)
3+
4+
if (XCODE OR MSVC)
5+
set(MULMAT_TUNE_LIBS ggml)
6+
else()
7+
set(MULMAT_TUNE_LIBS ggml m)
8+
endif()
9+
10+
target_link_libraries(${TARGET} PRIVATE ${MULMAT_TUNE_LIBS} ${CMAKE_THREAD_LIBS_INIT})
11+
target_compile_features(${TARGET} PRIVATE cxx_std_11)
12+
if(TARGET BUILD_INFO)
13+
add_dependencies(${TARGET} BUILD_INFO)
14+
endif()

0 commit comments

Comments
 (0)