Skip to content

Commit dbbaf82

Browse files
committed
pipeline parallelism demo
1 parent f172de0 commit dbbaf82

File tree

4 files changed

+245
-173
lines changed

4 files changed

+245
-173
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1149,7 +1149,8 @@ int main(int argc, char ** argv) {
11491149

11501150
// warmup run
11511151
if (t.n_prompt > 0) {
1152-
test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads);
1152+
//test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads);
1153+
test_prompt(ctx, std::min(t.n_prompt, 32), 0, t.n_batch, t.n_threads);
11531154
}
11541155
if (t.n_gen > 0) {
11551156
test_gen(ctx, 1, 0, t.n_threads);

ggml-alloc.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,13 @@ struct ggml_backend_buffer * ggml_tallocr_get_buffer(ggml_tallocr_t alloc) {
319319
return alloc->buffer;
320320
}
321321

322+
void ggml_tallocr_set_buffer(ggml_tallocr_t talloc, struct ggml_backend_buffer * buffer) {
323+
talloc->buffer = buffer;
324+
talloc->base = ggml_backend_buffer_get_base(buffer);
325+
talloc->alignment = ggml_backend_buffer_get_alignment(buffer);
326+
ggml_tallocr_reset(talloc);
327+
}
328+
322329
void ggml_tallocr_free(ggml_tallocr_t alloc) {
323330
if (alloc == NULL) {
324331
return;

ggml-alloc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ GGML_API ggml_tallocr_t ggml_tallocr_new_measure_from_buft(struct ggml_backend_b
5959
GGML_API ggml_tallocr_t ggml_tallocr_new_measure_from_backend(struct ggml_backend * backend);
6060

6161
GGML_API struct ggml_backend_buffer * ggml_tallocr_get_buffer(ggml_tallocr_t talloc);
62+
GGML_API void ggml_tallocr_set_buffer(ggml_tallocr_t talloc, struct ggml_backend_buffer * buffer);
6263

6364
GGML_API void ggml_tallocr_free (ggml_tallocr_t talloc);
6465
GGML_API bool ggml_tallocr_is_measure (ggml_tallocr_t talloc);

0 commit comments

Comments
 (0)