Skip to content

Commit 2b5c73d

Browse files
committed
move BLAS to a separate backend
1 parent adc9ff3 commit 2b5c73d

File tree

8 files changed

+477
-241
lines changed

8 files changed

+477
-241
lines changed

CMakeLists.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -312,9 +312,9 @@ if (LLAMA_BLAS)
312312
if (LLAMA_STATIC)
313313
set(BLA_STATIC ON)
314314
endif()
315-
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
316-
set(BLA_SIZEOF_INTEGER 8)
317-
endif()
315+
#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
316+
# set(BLA_SIZEOF_INTEGER 8)
317+
#endif()
318318

319319
set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
320320
find_package(BLAS)
@@ -381,6 +381,9 @@ if (LLAMA_BLAS)
381381
add_compile_definitions(GGML_BLAS_USE_MKL)
382382
endif()
383383

384+
set(GGML_HEADERS_BLAS ggml-blas.h)
385+
set(GGML_SOURCES_BLAS ggml-blas.c)
386+
384387
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
385388
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
386389
else()
@@ -1273,6 +1276,7 @@ add_library(ggml OBJECT
12731276
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
12741277
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
12751278
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1279+
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
12761280
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
12771281
)
12781282

Makefile

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ ifndef LLAMA_NO_ACCELERATE
408408
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
409409
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
410410
MK_LDFLAGS += -framework Accelerate
411+
OBJS += ggml-blas.o
411412
endif
412413
endif # LLAMA_NO_ACCELERATE
413414

@@ -421,23 +422,35 @@ ifdef LLAMA_OPENBLAS
421422
MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
422423
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
423424
MK_LDFLAGS += $(shell pkg-config --libs openblas)
425+
OBJS += ggml-blas.o
424426
endif # LLAMA_OPENBLAS
425427

426-
ifndef LLAMA_NO_LLAMAFILE
427-
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
428-
OBJS += sgemm.o
429-
endif
428+
ifdef LLAMA_OPENBLAS64
429+
MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas64)
430+
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
431+
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
432+
OBJS += ggml-blas.o
433+
endif # LLAMA_OPENBLAS64
430434

431435
ifdef LLAMA_BLIS
432436
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
433437
MK_LDFLAGS += -lblis -L/usr/local/lib
438+
OBJS += ggml-blas.o
434439
endif # LLAMA_BLIS
435440

441+
ifndef LLAMA_NO_LLAMAFILE
442+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
443+
OBJS += sgemm.o
444+
endif
445+
436446
ifdef LLAMA_RPC
437447
MK_CPPFLAGS += -DGGML_USE_RPC
438448
OBJS += ggml-rpc.o
439449
endif # LLAMA_RPC
440450

451+
ggml-blas.o: ggml-blas.c ggml-blas.h
452+
$(CC) $(CFLAGS) -c $< -o $@
453+
441454
ifdef LLAMA_CUBLAS
442455
# LLAMA_CUBLAS is deprecated and will be removed in the future
443456
LLAMA_CUDA := 1

ggml-alloc.c

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ struct hash_node {
339339
};
340340

341341
struct tensor_alloc {
342+
int buffer_id;
342343
size_t offset;
343344
size_t size_max; // 0 = pre-allocated, unused, or view
344345
};
@@ -349,7 +350,6 @@ struct leaf_alloc {
349350
};
350351

351352
struct node_alloc {
352-
int buffer_id;
353353
struct tensor_alloc dst;
354354
struct tensor_alloc src[GGML_MAX_SRC];
355355
};
@@ -511,17 +511,18 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
511511
}
512512
}
513513

514-
static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) {
514+
static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
515515
// graph outputs are never freed
516516
if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
517517
AT_PRINTF("not freeing output %s\n", node->name);
518518
return;
519519
}
520520

521-
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
522-
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
523521
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
524522
size_t offset = hn->offset;
523+
int buffer_id = hn->buffer_id;
524+
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
525+
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
525526
size_t size = ggml_backend_buft_get_alloc_size(buft, node);
526527
ggml_dyn_tallocr_free_tensor(alloc, offset, size, node);
527528
hn->allocated = false;
@@ -626,11 +627,11 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
626627
AT_PRINTF("view_src %s: %d children, %d views\n",
627628
view_src->name, view_src_hn->n_children, view_src_hn->n_views);
628629
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src_hn->allocated) {
629-
ggml_gallocr_free_node(galloc, view_src, buffer_id);
630+
ggml_gallocr_free_node(galloc, view_src);
630631
}
631632
}
632633
else if (p_hn->allocated) {
633-
ggml_gallocr_free_node(galloc, parent, buffer_id);
634+
ggml_gallocr_free_node(galloc, parent);
634635
}
635636
}
636637
AT_PRINTF("\n");
@@ -674,22 +675,26 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
674675
for (int i = 0; i < graph->n_nodes; i++) {
675676
struct ggml_tensor * node = graph->nodes[i];
676677
struct node_alloc * node_alloc = &galloc->node_allocs[i];
677-
node_alloc->buffer_id = get_node_buffer_id(node_buffer_ids, i);
678+
//node_alloc->buffer_id = get_node_buffer_id(node_buffer_ids, i);
678679
if (node->view_src || node->data) {
680+
node_alloc->dst.buffer_id = -1;
679681
node_alloc->dst.offset = SIZE_MAX;
680682
node_alloc->dst.size_max = 0;
681683
} else {
682684
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
683-
node_alloc->dst.offset = hn->offset;
684-
node_alloc->dst.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node);
685+
node_alloc->dst.buffer_id = hn->buffer_id;
686+
node_alloc->dst.offset = hn->offset;
687+
node_alloc->dst.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node);
685688
}
686689
for (int j = 0; j < GGML_MAX_SRC; j++) {
687690
struct ggml_tensor * src = node->src[j];
688691
if (!src || src->view_src || src->data) {
692+
node_alloc->src[j].buffer_id = -1;
689693
node_alloc->src[j].offset = SIZE_MAX;
690694
node_alloc->src[j].size_max = 0;
691695
} else {
692696
struct hash_node * hn = ggml_gallocr_hash_get(galloc, src);
697+
node_alloc->src[j].buffer_id = hn->buffer_id;
693698
node_alloc->src[j].offset = hn->offset;
694699
node_alloc->src[j].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], src);
695700
}
@@ -706,9 +711,11 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
706711
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
707712
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
708713
if (leaf->view_src || leaf->data) {
714+
galloc->leaf_allocs[i].leaf.buffer_id = -1;
709715
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
710716
galloc->leaf_allocs[i].leaf.size_max = 0;
711717
} else {
718+
galloc->leaf_allocs[i].leaf.buffer_id = hn->buffer_id;
712719
galloc->leaf_allocs[i].leaf.offset = hn->offset;
713720
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
714721
}
@@ -740,7 +747,8 @@ bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
740747
return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
741748
}
742749

743-
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, int buffer_id, struct tensor_alloc * tensor_alloc) {
750+
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, struct tensor_alloc * tensor_alloc) {
751+
int buffer_id = tensor_alloc->buffer_id;
744752
assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
745753

746754
if (tensor->view_src != NULL) {
@@ -768,8 +776,8 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
768776
}
769777
}
770778

771-
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct node_alloc * nalloc, struct tensor_alloc * talloc) {
772-
ggml_backend_buffer_type_t buft = galloc->bufts[nalloc->buffer_id];
779+
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
780+
ggml_backend_buffer_type_t buft = talloc->buffer_id != -1 ? galloc->bufts[talloc->buffer_id] : NULL;
773781
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node);
774782
return talloc->size_max >= node_size;
775783
}
@@ -793,7 +801,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
793801
struct ggml_tensor * node = graph->nodes[i];
794802
struct node_alloc * node_alloc = &galloc->node_allocs[i];
795803

796-
if (!ggml_gallocr_node_needs_realloc(galloc, node, node_alloc, &node_alloc->dst)) {
804+
if (!ggml_gallocr_node_needs_realloc(galloc, node, &node_alloc->dst)) {
797805
#ifndef NDEBUG
798806
fprintf(stderr, "%s: node %s is not valid\n", __func__, node->name);
799807
#endif
@@ -805,7 +813,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
805813
if (src == NULL) {
806814
continue;
807815
}
808-
if (!ggml_gallocr_node_needs_realloc(galloc, src, node_alloc, &node_alloc->src[j])) {
816+
if (!ggml_gallocr_node_needs_realloc(galloc, src, &node_alloc->src[j])) {
809817
#ifndef NDEBUG
810818
fprintf(stderr, "%s: src %d (%s) of node %s is not valid\n", __func__, j, src->name, node->name);
811819
#endif
@@ -846,7 +854,7 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
846854
for (int i = 0; i < graph->n_leafs; i++) {
847855
struct ggml_tensor * leaf = graph->leafs[i];
848856
struct leaf_alloc * leaf_alloc = &galloc->leaf_allocs[i];
849-
ggml_gallocr_init_tensor(galloc, leaf, leaf_alloc->buffer_id, &leaf_alloc->leaf);
857+
ggml_gallocr_init_tensor(galloc, leaf, &leaf_alloc->leaf);
850858
}
851859
// nodes
852860
for (int i = 0; i < graph->n_nodes; i++) {
@@ -857,9 +865,9 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
857865
if (src == NULL) {
858866
continue;
859867
}
860-
ggml_gallocr_init_tensor(galloc, src, node_alloc->buffer_id, &node_alloc->src[j]);
868+
ggml_gallocr_init_tensor(galloc, src, &node_alloc->src[j]);
861869
}
862-
ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst);
870+
ggml_gallocr_init_tensor(galloc, node, &node_alloc->dst);
863871
}
864872

865873
return true;

0 commit comments

Comments
 (0)