Skip to content

Commit 02025a7

Browse files
committed
Merge branch 'master' into pizza
2 parents cb443e7 + 698f7b5 commit 02025a7

File tree

5 files changed

+18
-20
lines changed

5 files changed

+18
-20
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
166166
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
167167
$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
168168

169+
libllama.so: llama.o ggml.o
170+
$(CXX) $(CXXFLAGS) -shared -fPIC -o libllama.so llama.o ggml.o $(LDFLAGS)
169171
#
170172
# Tests
171173
#

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,20 +350,22 @@ We have two Docker images available for this project:
350350

351351
The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image.
352352

353+
Replace `/path/to/models` below with the actual path where you downloaded the models.
354+
353355
```bash
354-
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B
356+
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B
355357
```
356358

357359
On complete, you are ready to play!
358360

359361
```bash
360-
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
362+
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
361363
```
362364

363365
or with light image:
364366

365367
```bash
366-
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
368+
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
367369
```
368370

369371
### Contributing

build.zig

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ const std = @import("std");
33
pub fn build(b: *std.Build) void {
44
const target = b.standardTargetOptions(.{});
55
const optimize = b.standardOptimizeOption(.{});
6+
const want_lto = b.option(bool, "lto", "Want -fLTO");
67

78
const lib = b.addStaticLibrary(.{
89
.name = "llama",
910
.target = target,
1011
.optimize = optimize,
1112
});
13+
lib.want_lto = want_lto;
1214
lib.linkLibCpp();
1315
lib.addIncludePath(".");
1416
lib.addIncludePath("examples");
@@ -17,11 +19,11 @@ pub fn build(b: *std.Build) void {
1719
}, &.{"-std=c11"});
1820
lib.addCSourceFiles(&.{
1921
"llama.cpp",
20-
"examples/common.cpp",
2122
}, &.{"-std=c++11"});
2223
lib.install();
2324

24-
const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize };
25+
const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize, .want_lto = want_lto };
26+
2527
const exe = build_example("main", build_args);
2628
_ = build_example("quantize", build_args);
2729
_ = build_example("perplexity", build_args);
@@ -44,16 +46,19 @@ fn build_example(comptime name: []const u8, args: anytype) *std.build.LibExeObjS
4446
const lib = args.lib;
4547
const target = args.target;
4648
const optimize = args.optimize;
49+
const want_lto = args.want_lto;
4750

4851
const exe = b.addExecutable(.{
4952
.name = name,
5053
.target = target,
5154
.optimize = optimize,
5255
});
56+
exe.want_lto = want_lto;
5357
exe.addIncludePath(".");
5458
exe.addIncludePath("examples");
5559
exe.addCSourceFiles(&.{
5660
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{name, name}),
61+
"examples/common.cpp",
5762
}, &.{"-std=c++11"});
5863
exe.linkLibrary(lib);
5964
exe.install();

examples/main/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ int main(int argc, char ** argv) {
432432
}
433433

434434
// end of text token
435-
if (embd.back() == llama_token_eos()) {
435+
if (!embd.empty() && embd.back() == llama_token_eos()) {
436436
if (params.instruct) {
437437
is_interacting = true;
438438
} else {

llama.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,19 +1454,13 @@ static llama_vocab::id llama_sample_top_p_top_k(
14541454
}
14551455
}
14561456

1457-
if (top_k > 0 && top_k < n_logits) {
1458-
sample_top_k(logits_id, top_k);
1459-
}
1460-
1461-
float maxl = -std::numeric_limits<float>::infinity();
1462-
for (const auto & kv : logits_id) {
1463-
maxl = std::max(maxl, kv.first);
1464-
}
1457+
sample_top_k(logits_id, top_k > 0 ? std::min(top_k, n_logits) : n_logits);
14651458

14661459
// compute probs for the top k tokens
14671460
std::vector<float> probs;
14681461
probs.reserve(logits_id.size());
14691462

1463+
float maxl = logits_id[0].first;
14701464
double sum = 0.0;
14711465
for (const auto & kv : logits_id) {
14721466
const float p = expf(kv.first - maxl);
@@ -1489,16 +1483,11 @@ static llama_vocab::id llama_sample_top_p_top_k(
14891483
break;
14901484
}
14911485
}
1492-
1493-
cumsum = 1.0/cumsum;
1494-
for (int i = 0; i < (int) probs.size(); i++) {
1495-
probs[i] *= cumsum;
1496-
}
14971486
}
14981487

14991488
//printf("\n");
15001489
//for (int i = 0; i < (int) 10; i++) {
1501-
// printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
1490+
// printf("%d: '%s' %f\n", i, lctx.vocab.id_to_token.at(logits_id[i].second).tok.c_str(), probs[i]);
15021491
//}
15031492
//printf("\n\n");
15041493
//exit(0);

0 commit comments

Comments
 (0)