Skip to content

Commit 89ceced

Browse files
authored
Merge branch 'ggerganov:master' into llama_native
2 parents 1310dbe + 0ccfc62 commit 89ceced

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+11074
-3890
lines changed

.github/workflows/build.yml

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ jobs:
3838
- name: Build
3939
id: make_build
4040
run: |
41-
CC=gcc-8 make
41+
CC=gcc-8 make -j $(nproc)
4242
4343
- name: Test
4444
id: make_test
4545
run: |
46-
CC=gcc-8 make tests
47-
make test
46+
CC=gcc-8 make tests -j $(nproc)
47+
make test -j $(nproc)
4848
4949
ubuntu-latest-cmake:
5050
runs-on: ubuntu-latest
@@ -66,7 +66,7 @@ jobs:
6666
mkdir build
6767
cd build
6868
cmake ..
69-
cmake --build . --config Release
69+
cmake --build . --config Release -j $(nproc)
7070
7171
- name: Test
7272
id: cmake_test
@@ -101,7 +101,7 @@ jobs:
101101
mkdir build
102102
cd build
103103
cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
104-
cmake --build . --config ${{ matrix.build_type }}
104+
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
105105
106106
- name: Test
107107
id: cmake_test
@@ -135,7 +135,7 @@ jobs:
135135
mkdir build
136136
cd build
137137
cmake -DLLAMA_MPI=ON ..
138-
cmake --build . --config Release
138+
cmake --build . --config Release -j $(nproc)
139139
140140
- name: Test
141141
id: cmake_test
@@ -160,13 +160,13 @@ jobs:
160160
- name: Build
161161
id: make_build
162162
run: |
163-
make
163+
make -j $(sysctl -n hw.logicalcpu)
164164
165165
- name: Test
166166
id: make_test
167167
run: |
168-
make tests
169-
make test
168+
make tests -j $(sysctl -n hw.logicalcpu)
169+
make test -j $(sysctl -n hw.logicalcpu)
170170
171171
macOS-latest-cmake:
172172
runs-on: macos-latest
@@ -189,7 +189,7 @@ jobs:
189189
mkdir build
190190
cd build
191191
cmake -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF ..
192-
cmake --build . --config Release
192+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
193193
194194
- name: Test
195195
id: cmake_test
@@ -223,7 +223,7 @@ jobs:
223223
-DLLAMA_BUILD_SERVER=OFF \
224224
-DCMAKE_SYSTEM_NAME=iOS \
225225
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
226-
cmake --build . --config Release
226+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
227227
228228
macOS-latest-cmake-tvos:
229229
runs-on: macos-latest
@@ -251,7 +251,7 @@ jobs:
251251
-DLLAMA_BUILD_SERVER=OFF \
252252
-DCMAKE_SYSTEM_NAME=tvOS \
253253
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
254-
cmake --build . --config Release
254+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
255255
256256
windows-latest-cmake:
257257
runs-on: windows-latest
@@ -324,7 +324,7 @@ jobs:
324324
mkdir build
325325
cd build
326326
cmake .. ${{ matrix.defines }}
327-
cmake --build . --config Release
327+
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
328328
329329
- name: Add clblast.dll
330330
id: add_clblast_dll
@@ -415,7 +415,7 @@ jobs:
415415
mkdir build
416416
cd build
417417
cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
418-
cmake --build . --config Release
418+
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
419419
420420
- name: Determine tag name
421421
id: tag
@@ -457,21 +457,22 @@ jobs:
457457
path: |
458458
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
459459
460-
freeBSD-latest:
461-
runs-on: macos-12
462-
steps:
463-
- name: Clone
464-
uses: actions/checkout@v3
465-
466-
- name: Build
467-
uses: cross-platform-actions/[email protected]
468-
with:
469-
operating_system: freebsd
470-
version: '13.2'
471-
run: |
472-
sudo pkg update
473-
sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas
474-
gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15
460+
# freeBSD-latest:
461+
# runs-on: macos-12
462+
# steps:
463+
# - name: Clone
464+
# uses: actions/checkout@v3
465+
#
466+
# - name: Build
467+
# uses: cross-platform-actions/[email protected]
468+
# with:
469+
# operating_system: freebsd
470+
# version: '13.2'
471+
# hypervisor: 'qemu'
472+
# run: |
473+
# sudo pkg update
474+
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas
475+
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
475476

476477
release:
477478
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ models-mnt
5151
/save-load-state
5252
/server
5353
/simple
54+
/batched
55+
/export-lora
56+
/finetune
5457
/speculative
58+
/parallel
5559
/train-text-from-scratch
5660
/vdot
5761
build-info.h

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
124124
add_custom_command(
125125
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h"
126126
COMMENT "Generating build details from Git"
127-
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake"
127+
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake"
128128
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
129129
DEPENDS "${GIT_DIR}/index"
130130
VERBATIM
@@ -168,6 +168,8 @@ if (APPLE AND LLAMA_ACCELERATE)
168168
message(STATUS "Accelerate framework found")
169169

170170
add_compile_definitions(GGML_USE_ACCELERATE)
171+
add_compile_definitions(ACCELERATE_NEW_LAPACK)
172+
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
171173
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
172174
else()
173175
message(WARNING "Accelerate framework not found")

Makefile

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative tests/test-c.o
2+
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative parallel finetune export-lora tests/test-c.o
33

44
# Binaries only useful for tests
55
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
@@ -305,6 +305,8 @@ ifndef LLAMA_NO_ACCELERATE
305305
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
306306
ifeq ($(UNAME_S),Darwin)
307307
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
308+
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
309+
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
308310
MK_LDFLAGS += -framework Accelerate
309311
endif
310312
endif # LLAMA_NO_ACCELERATE
@@ -498,6 +500,9 @@ console.o: common/console.cpp common/console.h
498500
grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
499501
$(CXX) $(CXXFLAGS) -c $< -o $@
500502

503+
train.o: common/train.cpp common/train.h
504+
$(CXX) $(CXXFLAGS) -c $< -o $@
505+
501506
libllama.so: llama.o ggml.o $(OBJS)
502507
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
503508

@@ -517,6 +522,9 @@ main: examples/main/main.cpp build-info.h ggml.
517522
simple: examples/simple/simple.cpp build-info.h ggml.o llama.o common.o $(OBJS)
518523
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
519524

525+
batched: examples/batched/batched.cpp build-info.h ggml.o llama.o common.o $(OBJS)
526+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
527+
520528
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
521529
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
522530

@@ -545,7 +553,7 @@ embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-te
545553
gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
546554
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
547555

548-
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o $(OBJS)
556+
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
549557
$(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
550558

551559
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
@@ -554,15 +562,24 @@ convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggm
554562
llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
555563
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
556564

557-
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o common.o $(OBJS)
565+
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o common.o train.o $(OBJS)
558566
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
559567

560568
beam-search: examples/beam-search/beam-search.cpp build-info.h ggml.o llama.o common.o $(OBJS)
561569
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
562570

571+
finetune: examples/finetune/finetune.cpp build-info.h ggml.o llama.o common.o train.o $(OBJS)
572+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
573+
574+
export-lora: examples/export-lora/export-lora.cpp build-info.h ggml.o llama.o common.o $(OBJS)
575+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
576+
563577
speculative: examples/speculative/speculative.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
564578
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
565579

580+
parallel: examples/parallel/parallel.cpp build-info.h ggml.o llama.o common.o $(OBJS)
581+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
582+
566583
ifdef LLAMA_METAL
567584
metal: examples/metal/metal.cpp ggml.o $(OBJS)
568585
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)

Package.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ let package = Package(
4545
.unsafeFlags(["-Wno-shorten-64-to-32"]),
4646
.define("GGML_USE_K_QUANTS"),
4747
.define("GGML_USE_ACCELERATE")
48+
.define("ACCELERATE_NEW_LAPACK")
49+
.define("ACCELERATE_LAPACK_ILP64")
4850
] + additionalSettings,
4951
linkerSettings: [
5052
.linkedFramework("Accelerate")

README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
1111

1212
### Hot topics
1313

14+
- Parallel decoding + continuous batching support incoming: [#3228](https://github.com/ggerganov/llama.cpp/pull/3228) \
15+
**Devs should become familiar with the new API**
1416
- Local Falcon 180B inference on Mac Studio
1517

1618
https://github.com/ggerganov/llama.cpp/assets/1991296/98abd4e8-7077-464c-ae89-aebabca7757e
@@ -90,6 +92,7 @@ as the main playground for developing new features for the [ggml](https://github
9092
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
9193
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft))
9294
- [X] [Aquila-7B](https://huggingface.co/BAAI/Aquila-7B) / [AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
95+
- [X] Mistral AI v0.1
9396

9497
**Bindings:**
9598

@@ -499,7 +502,7 @@ Building the program with BLAS support may lead to some performance improvements
499502
```sh
500503
mkdir build
501504
cd build
502-
cmake .. -DLLAMA_CLBLAST=ON -DCLBlast_dir=/some/path
505+
cmake .. -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
503506
cmake --build . --config Release
504507
```
505508
- CMake (Windows):
@@ -555,6 +558,10 @@ python3 convert.py models/7B/
555558
# quantize the model to 4-bits (using q4_0 method)
556559
./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q4_0.gguf q4_0
557560

561+
# update the gguf filetype to current if older version is unsupported by another application
562+
./quantize ./models/7B/ggml-model-q4_0.gguf ./models/7B/ggml-model-q4_0-v2.gguf COPY
563+
564+
558565
# run the inference
559566
./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
560567
```
@@ -591,6 +598,11 @@ Several quantization methods are supported. They differ in the resulting model d
591598
| 13B | ms/tok @ 8th | - | 73 | 82 | 98 | 105 | 128 |
592599
| 13B | bits/weight | 16.0 | 4.5 | 5.0 | 5.5 | 6.0 | 8.5 |
593600

601+
- [k-quants](https://github.com/ggerganov/llama.cpp/pull/1684)
602+
- recent k-quants improvements
603+
- [#2707](https://github.com/ggerganov/llama.cpp/pull/2707)
604+
- [#2807](https://github.com/ggerganov/llama.cpp/pull/2807)
605+
594606
### Perplexity (measuring model quality)
595607

596608
You can use the `perplexity` example to measure perplexity over a given prompt (lower perplexity is better).

build.zig

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,20 @@ const Maker = struct {
3636
}
3737

3838
fn init(builder: *std.build.Builder) !Maker {
39-
const commit_hash = @embedFile(".git/refs/heads/master");
39+
// const commit_hash = @embedFile(".git/refs/heads/master");
40+
const target = builder.standardTargetOptions(.{});
4041
const config_header = builder.addConfigHeader(
4142
.{ .style = .blank, .include_path = "build-info.h" },
4243
.{
4344
.BUILD_NUMBER = 0,
44-
.BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline
45+
.BUILD_COMMIT = "12345", // omit newline
46+
.BUILD_COMPILER = "Zig 0.11.0",
47+
.BUILD_TARGET = try target.allocDescription(builder.allocator),
4548
},
4649
);
4750
var m = Maker{
4851
.builder = builder,
49-
.target = builder.standardTargetOptions(.{}),
52+
.target = target,
5053
.optimize = builder.standardOptimizeOption(.{}),
5154
.config_header = config_header,
5255
.enable_lto = false,
@@ -58,7 +61,7 @@ const Maker = struct {
5861
try m.addCFlag("-std=c11");
5962
try m.addCxxFlag("-std=c++11");
6063
try m.addProjectInclude(&.{});
61-
try m.addProjectInclude(&.{"examples"});
64+
try m.addProjectInclude(&.{"common"});
6265
return m;
6366
}
6467

@@ -71,6 +74,7 @@ const Maker = struct {
7174
o.addCSourceFiles(&.{src}, m.cxxflags.items);
7275
o.linkLibCpp();
7376
}
77+
o.addConfigHeader(m.config_header);
7478
for (m.include_dirs.items) |i| o.addIncludePath(.{ .path = i });
7579
o.want_lto = m.enable_lto;
7680
return o;
@@ -104,15 +108,15 @@ pub fn build(b: *std.build.Builder) !void {
104108
const ggml = make.obj("ggml", "ggml.c");
105109
const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
106110
const llama = make.obj("llama", "llama.cpp");
107-
const common = make.obj("common", "examples/common.cpp");
108-
const console = make.obj("common", "examples/console.cpp");
109-
const grammar_parser = make.obj("grammar-parser", "examples/grammar-parser.cpp");
111+
const common = make.obj("common", "common/common.cpp");
112+
const console = make.obj("common", "common/console.cpp");
113+
const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
110114

111115
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, llama, common, console, grammar_parser });
112-
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, llama });
116+
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, llama, common });
113117
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, llama, common });
114118
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, llama, common });
115-
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, llama });
119+
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, llama, common });
116120

117121
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, llama, common, grammar_parser });
118122
if (server.target.isWindows()) {

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ add_library(${TARGET} OBJECT
99
console.cpp
1010
grammar-parser.h
1111
grammar-parser.cpp
12+
train.h
13+
train.cpp
1214
)
1315

1416
if (BUILD_SHARED_LIBS)

0 commit comments

Comments
 (0)