Skip to content

Commit 2c1930d

Browse files
committed
Merge branch 'master' into HEAD
2 parents 5cea869 + 3a00764 commit 2c1930d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+4353
-3150
lines changed

.devops/lamma-cpp-clblast.srpm.spec renamed to .devops/llama-cpp-clblast.srpm.spec

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
# It is up to the user to install the correct vendor-specific support.
1414

1515
Name: llama.cpp-clblast
16-
Version: master
16+
Version: %( date "+%%Y%%m%%d" )
1717
Release: 1%{?dist}
18-
Summary: OpenCL Inference of LLaMA model in pure C/C++
18+
Summary: OpenCL Inference of LLaMA model in C/C++
1919
License: MIT
2020
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21-
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel
21+
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel
22+
Requires: clblast
2223
URL: https://github.com/ggerganov/llama.cpp
2324

2425
%define debug_package %{nil}
@@ -35,18 +36,43 @@ make -j LLAMA_CLBLAST=1
3536

3637
%install
3738
mkdir -p %{buildroot}%{_bindir}/
38-
cp -p main %{buildroot}%{_bindir}/llamacppclblast
39-
cp -p server %{buildroot}%{_bindir}/llamacppclblastserver
40-
cp -p simple %{buildroot}%{_bindir}/llamacppclblastsimple
39+
cp -p main %{buildroot}%{_bindir}/llamaclblast
40+
cp -p server %{buildroot}%{_bindir}/llamaclblastserver
41+
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple
42+
43+
mkdir -p %{buildroot}/usr/lib/systemd/system
44+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
45+
[Unit]
46+
Description=Llama.cpp server, CPU only (no GPU support in this build).
47+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48+
49+
[Service]
50+
Type=simple
51+
EnvironmentFile=/etc/sysconfig/llama
52+
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS
53+
ExecReload=/bin/kill -s HUP $MAINPID
54+
Restart=never
55+
56+
[Install]
57+
WantedBy=default.target
58+
EOF
59+
60+
mkdir -p %{buildroot}/etc/sysconfig
61+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63+
EOF
4164

4265
%clean
4366
rm -rf %{buildroot}
4467
rm -rf %{_builddir}/*
4568

4669
%files
47-
%{_bindir}/llamacppclblast
48-
%{_bindir}/llamacppclblastserver
49-
%{_bindir}/llamacppclblastsimple
70+
%{_bindir}/llamaclblast
71+
%{_bindir}/llamaclblastserver
72+
%{_bindir}/llamaclblastsimple
73+
/usr/lib/systemd/system/llamaclblast.service
74+
%config /etc/sysconfig/llama
75+
5076

5177
%pre
5278

.devops/lamma-cpp-cublas.srpm.spec renamed to .devops/llama-cpp-cublas.srpm.spec

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# It is up to the user to install the correct vendor-specific support.
1414

1515
Name: llama.cpp-cublas
16-
Version: master
16+
Version: %( date "+%%Y%%m%%d" )
1717
Release: 1%{?dist}
1818
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
1919
License: MIT
@@ -40,6 +40,28 @@ cp -p main %{buildroot}%{_bindir}/llamacppcublas
4040
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver
4141
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple
4242

43+
mkdir -p %{buildroot}/usr/lib/systemd/system
44+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacublas.service
45+
[Unit]
46+
Description=Llama.cpp server, CPU only (no GPU support in this build).
47+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48+
49+
[Service]
50+
Type=simple
51+
EnvironmentFile=/etc/sysconfig/llama
52+
ExecStart=/usr/bin/llamacppcublasserver $LLAMA_ARGS
53+
ExecReload=/bin/kill -s HUP $MAINPID
54+
Restart=never
55+
56+
[Install]
57+
WantedBy=default.target
58+
EOF
59+
60+
mkdir -p %{buildroot}/etc/sysconfig
61+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63+
EOF
64+
4365
%clean
4466
rm -rf %{buildroot}
4567
rm -rf %{_builddir}/*
@@ -48,6 +70,8 @@ rm -rf %{_builddir}/*
4870
%{_bindir}/llamacppcublas
4971
%{_bindir}/llamacppcublasserver
5072
%{_bindir}/llamacppcublassimple
73+
/usr/lib/systemd/system/llamacublas.service
74+
%config /etc/sysconfig/llama
5175

5276
%pre
5377

.devops/llama-cpp.srpm.spec

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,47 +6,74 @@
66
# Notes for llama.cpp:
77
# 1. Tags are currently based on hash - which will not sort asciibetically.
88
# We need to declare standard versioning if people want to sort latest releases.
9+
# In the meantime, YYYYMMDD format will be used.
910
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
1011
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
1112
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
1213
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
1314
# It is up to the user to install the correct vendor-specific support.
1415

1516
Name: llama.cpp
16-
Version: master
17+
Version: %( date "+%%Y%%m%%d" )
1718
Release: 1%{?dist}
1819
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
1920
License: MIT
2021
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21-
BuildRequires: coreutils make gcc-c++ git
22+
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
23+
Requires: libstdc++
2224
URL: https://github.com/ggerganov/llama.cpp
2325

2426
%define debug_package %{nil}
2527
%define source_date_epoch_from_changelog 0
2628

2729
%description
2830
CPU inference for Meta's Lllama2 models using default options.
31+
Models are not included in this package and must be downloaded separately.
2932

3033
%prep
31-
%autosetup
34+
%setup -n llama.cpp-master
3235

3336
%build
3437
make -j
3538

3639
%install
3740
mkdir -p %{buildroot}%{_bindir}/
38-
cp -p main %{buildroot}%{_bindir}/llamacpp
39-
cp -p server %{buildroot}%{_bindir}/llamacppserver
40-
cp -p simple %{buildroot}%{_bindir}/llamacppsimple
41+
cp -p main %{buildroot}%{_bindir}/llama
42+
cp -p server %{buildroot}%{_bindir}/llamaserver
43+
cp -p simple %{buildroot}%{_bindir}/llamasimple
44+
45+
mkdir -p %{buildroot}/usr/lib/systemd/system
46+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
47+
[Unit]
48+
Description=Llama.cpp server, CPU only (no GPU support in this build).
49+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
50+
51+
[Service]
52+
Type=simple
53+
EnvironmentFile=/etc/sysconfig/llama
54+
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS
55+
ExecReload=/bin/kill -s HUP $MAINPID
56+
Restart=never
57+
58+
[Install]
59+
WantedBy=default.target
60+
EOF
61+
62+
mkdir -p %{buildroot}/etc/sysconfig
63+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
64+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
65+
EOF
4166

4267
%clean
4368
rm -rf %{buildroot}
4469
rm -rf %{_builddir}/*
4570

4671
%files
47-
%{_bindir}/llamacpp
48-
%{_bindir}/llamacppserver
49-
%{_bindir}/llamacppsimple
72+
%{_bindir}/llama
73+
%{_bindir}/llamaserver
74+
%{_bindir}/llamasimple
75+
/usr/lib/systemd/system/llama.service
76+
%config /etc/sysconfig/llama
5077

5178
%pre
5279

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,13 @@ poetry.toml
6464

6565
# Test binaries
6666
tests/test-grammar-parser
67+
tests/test-llama-grammar
6768
tests/test-double-float
6869
tests/test-grad0
6970
tests/test-opt
7071
tests/test-quantize-fns
7172
tests/test-quantize-perf
7273
tests/test-sampling
73-
tests/test-tokenizer-0
74+
tests/test-tokenizer-0-llama
75+
tests/test-tokenizer-0-falcon
76+
tests/test-tokenizer-1

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ if (LLAMA_METAL)
301301
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
302302

303303
add_compile_definitions(GGML_USE_METAL)
304-
add_compile_definitions(GGML_METAL_NDEBUG)
304+
#add_compile_definitions(GGML_METAL_NDEBUG)
305305

306306
# get full path to the file
307307
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")

Makefile

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple server embd-input-test gguf llama-bench
33

44
# Binaries only useful for tests
5-
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
5+
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1
66

77
default: $(BUILD_TARGETS)
88

@@ -305,7 +305,7 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
305305
endif # LLAMA_HIPBLAS
306306

307307
ifdef LLAMA_METAL
308-
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
308+
CFLAGS += -DGGML_USE_METAL #-DGGML_METAL_NDEBUG
309309
CXXFLAGS += -DGGML_USE_METAL
310310
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
311311
OBJS += ggml-metal.o
@@ -447,10 +447,10 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
447447
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
448448
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
449449

450-
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS)
450+
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
451451
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
452452

453-
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
453+
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
454454
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
455455

456456
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
@@ -471,5 +471,11 @@ tests/test-quantize-perf: tests/test-quantize-perf.cpp build-info.h ggml.o llama
471471
tests/test-sampling: tests/test-sampling.cpp build-info.h ggml.o llama.o common.o $(OBJS)
472472
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
473473

474-
tests/test-tokenizer-0: tests/test-tokenizer-0.cpp build-info.h ggml.o llama.o common.o $(OBJS)
474+
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h ggml.o llama.o common.o $(OBJS)
475+
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
476+
477+
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
478+
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
479+
480+
tests/test-tokenizer-1: tests/test-tokenizer-1.cpp build-info.h ggml.o llama.o common.o $(OBJS)
475481
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
1111

1212
### Hot topics
1313

14+
- #### IMPORTANT: Tokenizer fixes and API change (developers and projects using `llama.cpp` built-in tokenization must read): https://github.com/ggerganov/llama.cpp/pull/2810
15+
16+
- GGUFv2 adds support for 64-bit sizes + backwards compatible: https://github.com/ggerganov/llama.cpp/pull/2821
17+
1418
- Added support for Falcon models: https://github.com/ggerganov/llama.cpp/pull/2717
1519

1620
- A new file format has been introduced: [GGUF](https://github.com/ggerganov/llama.cpp/pull/2398)

0 commit comments

Comments
 (0)