Skip to content

Commit ed6b909

Browse files
authored
Merge branch 'master' into dry-sampler
2 parents d1676a1 + c21a896 commit ed6b909

File tree

98 files changed

+2452
-1052
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+2452
-1052
lines changed

.devops/llama-server.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git libcurl4-openssl-dev curl
6+
apt-get install -y build-essential git libcurl4-openssl-dev
77

88
WORKDIR /app
99

@@ -16,7 +16,7 @@ RUN make -j$(nproc) llama-server
1616
FROM ubuntu:$UBUNTU_VERSION AS runtime
1717

1818
RUN apt-get update && \
19-
apt-get install -y libcurl4-openssl-dev libgomp1
19+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2020

2121
COPY --from=build /app/llama-server /llama-server
2222

.devops/nix/package.nix

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,16 +126,9 @@ let
126126
++ optionals useMetalKit [ MetalKit ];
127127

128128
cudaBuildInputs = with cudaPackages; [
129-
cuda_cccl.dev # <nv/target>
130-
131-
# A temporary hack for reducing the closure size, remove once cudaPackages
132-
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
133-
cuda_cudart.dev
134-
cuda_cudart.lib
135-
cuda_cudart.static
136-
libcublas.dev
137-
libcublas.lib
138-
libcublas.static
129+
cuda_cudart
130+
cuda_cccl # <nv/target>
131+
libcublas
139132
];
140133

141134
rocmBuildInputs = with rocmPackages; [

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location o
139139
# determining _precisely_ which defines are necessary for the llama-config
140140
# package.
141141
#
142-
get_directory_property(GGML_DIR_DEFINES DIRECTORY ggml/src COMPILE_DEFINITIONS)
142+
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
143+
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
143144
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
144145
set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
145146
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)

Makefile

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,42 +1605,41 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
16051605
# Mark legacy binary targets as .PHONY so that they are always checked.
16061606
.PHONY: main quantize perplexity embedding server
16071607

1608+
# Define the object file target
1609+
examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp
1610+
$(CXX) $(CXXFLAGS) -c $< -o $@
1611+
16081612
# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
16091613
# Eventually we will want to remove these target from building all the time.
1610-
main: examples/deprecation-warning/deprecation-warning.cpp
1611-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1612-
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1614+
main: examples/deprecation-warning/deprecation-warning.o
1615+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16131616
@echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
16141617

1615-
server: examples/deprecation-warning/deprecation-warning.cpp
1616-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1617-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1618+
server: examples/deprecation-warning/deprecation-warning.o
1619+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16181620
@echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
16191621

1620-
quantize: examples/deprecation-warning/deprecation-warning.cpp
1622+
quantize: examples/deprecation-warning/deprecation-warning.o
16211623
ifneq (,$(wildcard quantize))
1622-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1623-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1624+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16241625
@echo "#########"
16251626
@echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
16261627
@echo " Remove the 'quantize' binary to remove this warning."
16271628
@echo "#########"
16281629
endif
16291630

1630-
perplexity: examples/deprecation-warning/deprecation-warning.cpp
1631+
perplexity: examples/deprecation-warning/deprecation-warning.o
16311632
ifneq (,$(wildcard perplexity))
1632-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1633-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1633+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16341634
@echo "#########"
16351635
@echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
16361636
@echo " Remove the 'perplexity' binary to remove this warning."
16371637
@echo "#########"
16381638
endif
16391639

1640-
embedding: examples/deprecation-warning/deprecation-warning.cpp
1640+
embedding: examples/deprecation-warning/deprecation-warning.o
16411641
ifneq (,$(wildcard embedding))
1642-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1643-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1642+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16441643
@echo "#########"
16451644
@echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
16461645
@echo " Remove the 'embedding' binary to remove this warning."

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,16 @@ Typically finetunes of the base models below are supported as well.
9595
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
9696
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
9797
- [x] [OLMo](https://allenai.org/olmo)
98+
- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330)
9899
- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia)
100+
- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520)
101+
- [x] [Smaug](https://huggingface.co/models?search=Smaug)
102+
- [x] [Poro 34B](https://huggingface.co/LumiOpen/Poro-34B)
103+
- [x] [Bitnet b1.58 models](https://huggingface.co/1bitLLM)
104+
- [x] [Flan T5](https://huggingface.co/models?search=flan-t5)
105+
- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca)
99106
- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b)
107+
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
100108

101109
(instructions for supporting more models: [HOWTO-add-model.md](./docs/development/HOWTO-add-model.md))
102110

@@ -145,6 +153,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
145153
- [Faraday](https://faraday.dev/) (proprietary)
146154
- [LMStudio](https://lmstudio.ai/) (proprietary)
147155
- [Layla](https://play.google.com/store/apps/details?id=com.laylalite) (proprietary)
156+
- [ramalama](https://github.com/containers/ramalama) (MIT)
148157
- [LocalAI](https://github.com/mudler/LocalAI) (MIT)
149158
- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL)
150159
- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile)

common/common.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
16591659
options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() });
16601660
options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port });
16611661
options.push_back({ "server", " --path PATH", "path to serve static files from (default: %s)", params.public_path.c_str() });
1662-
options.push_back({ "server", " --embedding(s)", "enable embedding endpoint (default: %s)", params.embedding ? "enabled" : "disabled" });
1662+
options.push_back({ "server", " --embedding(s)", "restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled" });
16631663
options.push_back({ "server", " --api-key KEY", "API key to use for authentication (default: none)" });
16641664
options.push_back({ "server", " --api-key-file FNAME", "path to file containing API keys (default: none)" });
16651665
options.push_back({ "server", " --ssl-key-file FNAME", "path to file a PEM-encoded SSL private key" });
@@ -2064,8 +2064,8 @@ std::string fs_get_cache_file(const std::string & filename) {
20642064
//
20652065
// Model utils
20662066
//
2067-
2068-
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
2067+
struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
2068+
llama_init_result iparams;
20692069
auto mparams = llama_model_params_from_gpt_params(params);
20702070

20712071
llama_model * model = nullptr;
@@ -2080,7 +2080,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
20802080

20812081
if (model == NULL) {
20822082
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
2083-
return std::make_tuple(nullptr, nullptr);
2083+
return iparams;
20842084
}
20852085

20862086
auto cparams = llama_context_params_from_gpt_params(params);
@@ -2089,7 +2089,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
20892089
if (lctx == NULL) {
20902090
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
20912091
llama_free_model(model);
2092-
return std::make_tuple(nullptr, nullptr);
2092+
return iparams;
20932093
}
20942094

20952095
if (!params.control_vectors.empty()) {
@@ -2100,7 +2100,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
21002100
if (cvec.n_embd == -1) {
21012101
llama_free(lctx);
21022102
llama_free_model(model);
2103-
return std::make_tuple(nullptr, nullptr);
2103+
return iparams;
21042104
}
21052105

21062106
int err = llama_control_vector_apply(lctx,
@@ -2112,7 +2112,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
21122112
if (err) {
21132113
llama_free(lctx);
21142114
llama_free_model(model);
2115-
return std::make_tuple(nullptr, nullptr);
2115+
return iparams;
21162116
}
21172117
}
21182118

@@ -2124,7 +2124,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
21242124
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
21252125
llama_free(lctx);
21262126
llama_free_model(model);
2127-
return std::make_tuple(nullptr, nullptr);
2127+
return iparams;
21282128
}
21292129
llama_lora_adapter_set(lctx, adapter, lora_scale);
21302130
}
@@ -2160,7 +2160,9 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
21602160
llama_reset_timings(lctx);
21612161
}
21622162

2163-
return std::make_tuple(model, lctx);
2163+
iparams.model = model;
2164+
iparams.context = lctx;
2165+
return iparams;
21642166
}
21652167

21662168
struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & params) {

common/common.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,12 @@ std::string fs_get_cache_file(const std::string & filename);
308308
// Model utils
309309
//
310310

311-
// TODO: avoid tuplue, use struct
312-
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params);
311+
struct llama_init_result {
312+
struct llama_model * model = nullptr;
313+
struct llama_context * context = nullptr;
314+
};
315+
316+
struct llama_init_result llama_init_from_gpt_params(gpt_params & params);
313317

314318
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
315319
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def prepare_tensors(self):
316316
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
317317
if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
318318
data = gguf.quantize_bf16(data)
319-
assert data.dtype == np.int16
319+
assert data.dtype == np.uint16
320320
data_qtype = gguf.GGMLQuantizationType.BF16
321321

322322
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):

docs/build.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,11 @@ For Jetson user, if you have Jetson Orin, you can try this: [Offical Support](ht
178178
cmake --build build --config Release
179179
```
180180
181-
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance:
181+
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used.
182+
183+
The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. In Windows this setting is available in the NVIDIA control panel as `System Memory Fallback`.
184+
185+
The following compilation options are also available to tweak performance:
182186
183187
| Option | Legal values | Default | Description |
184188
|-------------------------------|------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|

examples/baby-llama/baby-llama.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "ggml.h"
22
#include "train.h"
33

4-
#include <vector>
54
#include <cassert>
65
#include <cstdlib>
76
#include <cstring>

examples/batched-bench/batched-bench.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ int main(int argc, char ** argv) {
6969
llama_context_params ctx_params = llama_context_params_from_gpt_params(params);
7070

7171
// ensure enough sequences are available
72-
ctx_params.n_seq_max = *std::max_element(n_pl.begin(), n_pl.end());
72+
ctx_params.n_seq_max = n_pl.empty() ? 1 : *std::max_element(n_pl.begin(), n_pl.end());
7373

7474
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
7575

examples/cvector-generator/cvector-generator.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,9 +414,10 @@ int main(int argc, char ** argv) {
414414
llama_numa_init(params.numa);
415415

416416
// load the model to get hparams
417-
llama_model * model;
418-
llama_context * ctx;
419-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
417+
llama_init_result llama_init = llama_init_from_gpt_params(params);
418+
419+
llama_model * model = llama_init.model;
420+
llama_context * ctx = llama_init.context;
420421

421422
// int n_ctx = llama_n_ctx(ctx);
422423
int n_layers = llama_n_layer(model);

examples/embedding/embedding.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ int main(int argc, char ** argv) {
7979
llama_backend_init();
8080
llama_numa_init(params.numa);
8181

82-
llama_model * model;
83-
llama_context * ctx;
84-
8582
// load the model
86-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
83+
llama_init_result llama_init = llama_init_from_gpt_params(params);
84+
85+
llama_model * model = llama_init.model;
86+
llama_context * ctx = llama_init.context;
8787
if (model == NULL) {
8888
fprintf(stderr, "%s: error: unable to load model\n", __func__);
8989
return 1;

examples/eval-callback/eval-callback.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,10 @@ int main(int argc, char ** argv) {
163163
params.warmup = false;
164164

165165
// init
166-
llama_model * model;
167-
llama_context * ctx;
168-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
166+
llama_init_result llama_init = llama_init_from_gpt_params(params);
167+
168+
llama_model * model = llama_init.model;
169+
llama_context * ctx = llama_init.context;
169170
if (model == nullptr || ctx == nullptr) {
170171
fprintf(stderr, "%s : failed to init\n", __func__);
171172
return 1;

examples/imatrix/imatrix.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -611,10 +611,10 @@ int main(int argc, char ** argv) {
611611
params.warmup = false;
612612

613613
// init
614-
llama_model * model;
615-
llama_context * ctx;
614+
llama_init_result llama_init = llama_init_from_gpt_params(params);
616615

617-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
616+
llama_model * model = llama_init.model;
617+
llama_context * ctx = llama_init.context;
618618
if (model == nullptr || ctx == nullptr) {
619619
fprintf(stderr, "%s : failed to init\n", __func__);
620620
return 1;

examples/infill/infill.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,10 @@ int main(int argc, char ** argv) {
179179

180180
// load the model and apply lora adapter, if any
181181
LOG("%s: load the model and apply lora adapter, if any\n", __func__);
182-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
182+
llama_init_result llama_init = llama_init_from_gpt_params(params);
183+
184+
model = llama_init.model;
185+
ctx = llama_init.context;
183186

184187
if (model == NULL) {
185188
LOG_TEE("%s: error: unable to load model\n", __func__);

examples/lookahead/lookahead.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@ int main(int argc, char ** argv) {
5858
llama_backend_init();
5959
llama_numa_init(params.numa);
6060

61-
llama_model * model = NULL;
62-
llama_context * ctx = NULL;
63-
6461
// load the target model
65-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
62+
llama_init_result llama_init = llama_init_from_gpt_params(params);
63+
64+
llama_model * model = llama_init.model;
65+
llama_context * ctx = llama_init.context;
6666

6767
// Tokenize the prompt
6868
std::vector<llama_token> inp;

examples/lookup/lookup-create.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ int main(int argc, char ** argv){
2222
llama_backend_init();
2323
llama_numa_init(params.numa);
2424

25-
llama_model * model = NULL;
26-
llama_context * ctx = NULL;
27-
2825
// load the model
29-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
26+
llama_init_result llama_init = llama_init_from_gpt_params(params);
27+
28+
llama_model * model = llama_init.model;
29+
llama_context * ctx = llama_init.context;
3030
GGML_ASSERT(model != nullptr);
3131

3232
// tokenize the prompt

examples/lookup/lookup-stats.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ int main(int argc, char ** argv){
2626
llama_backend_init();
2727
llama_numa_init(params.numa);
2828

29-
llama_model * model = NULL;
30-
llama_context * ctx = NULL;
31-
3229
// load the model
33-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
30+
llama_init_result llama_init = llama_init_from_gpt_params(params);
31+
32+
llama_model * model = llama_init.model;
33+
llama_context * ctx = llama_init.context;
3434

3535
// tokenize the prompt
3636
std::vector<llama_token> inp;

examples/lookup/lookup.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ int main(int argc, char ** argv){
3434
llama_backend_init();
3535
llama_numa_init(params.numa);
3636

37-
llama_model * model = NULL;
38-
llama_context * ctx = NULL;
39-
4037
// load the model
41-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
38+
llama_init_result llama_init = llama_init_from_gpt_params(params);
39+
40+
llama_model * model = llama_init.model;
41+
llama_context * ctx = llama_init.context;
4242

4343
// tokenize the prompt
4444
std::vector<llama_token> inp;

0 commit comments

Comments
 (0)