Skip to content

Commit 6a54973

Browse files
committed
Merge branch 'master' into compilade/convert-hf-refactor
2 parents 13f4cf7 + 60325fa commit 6a54973

File tree

11 files changed

+495
-152
lines changed

11 files changed

+495
-152
lines changed

.github/workflows/close-issue.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
steps:
1313
- uses: actions/stale@v5
1414
with:
15-
exempt-issue-labels: "refactor,help wanted,good first issue,research"
15+
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
1616
days-before-issue-stale: 30
1717
days-before-issue-close: 14
1818
stale-issue-label: "stale"

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ struct gpt_params {
135135
bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt
136136
size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed
137137

138-
bool kl_divergence = false; // compute KL-divergence
138+
bool kl_divergence = false; // compute KL divergence
139139

140140
bool random_prompt = false; // do not randomize prompt if none provided
141141
bool use_color = false; // use color to distinguish generations and inputs

common/log.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std::
234234
// INTERNAL, DO NOT USE
235235
// USE LOG() INSTEAD
236236
//
237-
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
237+
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
238238
#define LOG_IMPL(str, ...) \
239239
do { \
240240
if (LOG_TARGET != nullptr) \
@@ -257,7 +257,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std::
257257
// INTERNAL, DO NOT USE
258258
// USE LOG_TEE() INSTEAD
259259
//
260-
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
260+
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
261261
#define LOG_TEE_IMPL(str, ...) \
262262
do { \
263263
if (LOG_TARGET != nullptr) \

examples/main/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ int main(int argc, char ** argv) {
544544
// if we run out of context:
545545
// - take the n_keep first tokens from the original prompt (via n_past)
546546
// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
547-
if (n_past + (int) embd.size() + std::max<int>(0, guidance_offset) > n_ctx) {
547+
if (n_past + (int) embd.size() + std::max<int>(0, guidance_offset) >= n_ctx) {
548548
if (params.n_predict == -2) {
549549
LOG_TEE("\n\n%s: context full and n_predict == -%d => stopping\n", __func__, params.n_predict);
550550
break;

examples/perplexity/README.md

Lines changed: 115 additions & 3 deletions
Large diffs are not rendered by default.

examples/perplexity/perplexity.cpp

Lines changed: 175 additions & 55 deletions
Large diffs are not rendered by default.

examples/server/tests/features/results.feature

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,44 +7,16 @@ Feature: Results
77
And a model file tinyllamas/split/stories15M-00001-of-00003.gguf from HF repo ggml-org/models
88
And a model file test-model-00001-of-00003.gguf
99
And 128 as batch size
10-
And 256 KV cache size
10+
And 1024 KV cache size
1111
And 128 max tokens to predict
12+
And continuous batching
1213

13-
Scenario Outline: Multi users completion
14+
Scenario Outline: consistent results with same seed
1415
Given <n_slots> slots
15-
And continuous batching
1616
Then the server is starting
1717
Then the server is healthy
1818

19-
Given 42 as seed
20-
And a prompt:
21-
"""
22-
Write a very long story about AI.
23-
"""
24-
25-
Given 42 as seed
26-
And a prompt:
27-
"""
28-
Write a very long story about AI.
29-
"""
30-
31-
Given 42 as seed
32-
And a prompt:
33-
"""
34-
Write a very long story about AI.
35-
"""
36-
37-
Given 42 as seed
38-
And a prompt:
39-
"""
40-
Write a very long story about AI.
41-
"""
42-
43-
Given 42 as seed
44-
And a prompt:
45-
"""
46-
Write a very long story about AI.
47-
"""
19+
Given 4 prompts "Title: Little Red Riding Hood But In Space\n\nSummary:" with seed 42
4820

4921
Given concurrent completion requests
5022
Then the server is busy
@@ -55,3 +27,55 @@ Feature: Results
5527
| n_slots |
5628
| 1 |
5729
| 2 |
30+
31+
Scenario Outline: different results with different seed
32+
Given <n_slots> slots
33+
Then the server is starting
34+
Then the server is healthy
35+
36+
Given 1 prompts "Title: Little Red Riding Hood But In Space\n\nSummary:" with seed 42
37+
Given 1 prompts "Title: Little Red Riding Hood But In Space\n\nSummary:" with seed 43
38+
Given 1 prompts "Title: Little Red Riding Hood But In Space\n\nSummary:" with seed 44
39+
Given 1 prompts "Title: Little Red Riding Hood But In Space\n\nSummary:" with seed 45
40+
41+
Given concurrent completion requests
42+
Then the server is busy
43+
Then the server is idle
44+
And all slots are idle
45+
Then all predictions are different
46+
Examples:
47+
| n_slots |
48+
| 1 |
49+
| 2 |
50+
51+
Scenario Outline: consistent results with same seed and varying batch size
52+
Given 4 slots
53+
And <temp> temperature
54+
# And 0 as draft
55+
Then the server is starting
56+
Then the server is healthy
57+
58+
Given 1 prompts "Write a very long story about AI." with seed 42
59+
And concurrent completion requests
60+
# Then the server is busy # Not all slots will be utilized.
61+
Then the server is idle
62+
And all slots are idle
63+
64+
Given <n_parallel> prompts "Write a very long story about AI." with seed 42
65+
And concurrent completion requests
66+
# Then the server is busy # Not all slots will be utilized.
67+
Then the server is idle
68+
And all slots are idle
69+
70+
Then all predictions are equal
71+
Examples:
72+
| n_parallel | temp |
73+
| 1 | 0.0 |
74+
| 2 | 0.0 |
75+
| 4 | 0.0 |
76+
| 1 | 1.0 |
77+
# FIXME: These tests fail on master. The problem seems to be the unified KV cache.
78+
# See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227
79+
# and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574 .
80+
# | 2 | 1.0 |
81+
# | 4 | 1.0 |

0 commit comments

Comments
 (0)