Skip to content

Commit 4aa7e80

Browse files
ggerganovarthw
authored andcommitted
ggml : hide ggml_object, ggml_cgraph, ggml_hash_set (ggml-org#9408)
* ggml : hide ggml_object, ggml_cgraph, ggml_hash_set ggml-ci * ggml : add ggml-impl.h to backends * ggml : fix compiler warnings ggml-ci * ggml : add assert upon adding nodes
1 parent 299e70d commit 4aa7e80

File tree

18 files changed

+170
-129
lines changed

18 files changed

+170
-129
lines changed

examples/benchmark/benchmark-matmult.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ int main(int argc, char ** argv) {
183183

184184
ggml_graph_compute_helper(work_buffer, gf, benchmark_params.n_threads);
185185

186-
TENSOR_DUMP(gf->nodes[0]);
186+
TENSOR_DUMP(ggml_graph_node(gf, 0));
187187

188188
printf("\n------ Test 2 - Matrix Mult via %s code\n", ggml_type_name(qtype));
189189

@@ -224,7 +224,7 @@ int main(int argc, char ** argv) {
224224

225225

226226
// Let's use the F32 result from above as a reference for the quantized multiplication
227-
float sum_of_F32_reference = tensor_sum_elements(gf->nodes[0]);
227+
float sum_of_F32_reference = tensor_sum_elements(ggml_graph_node(gf, 0));
228228

229229
printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n");
230230
printf("=====================================================================================\n");
@@ -252,7 +252,7 @@ int main(int argc, char ** argv) {
252252

253253
// Check that the matrix multiplication result is in the right ballpark
254254
// We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different
255-
float sum_of_Q4_result = tensor_sum_elements(gf31->nodes[0]);
255+
float sum_of_Q4_result = tensor_sum_elements(ggml_graph_node(gf31, 0));
256256
float delta = std::abs(sum_of_Q4_result - sum_of_F32_reference);
257257
float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; // Let's accept an epsilon of 10^-6
258258

examples/cvector-generator/pca.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ static ggml_status compute_piter(
226226
result.eigenvectors.resize(params.n_batch);
227227
result.distances.resize(params.n_batch);
228228
// get output nodes
229-
for (int i = 0; i < gf->n_nodes; ++i) {
230-
auto node = gf->nodes[i];
229+
for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
230+
auto node = ggml_graph_node(gf, i);
231231
int iter = -1;
232232
// find b_tensor (without copying data from device)
233233
if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {

examples/export-lora/export-lora.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ struct lora_merge_ctx {
370370

371371
// write data to output file
372372
{
373-
auto result = gf->nodes[gf->n_nodes - 1];
373+
auto * result = ggml_graph_node(gf, -1);
374374
size_t len = ggml_nbytes(result);
375375
if (read_buf.size() < len) {
376376
read_buf.resize(len);

examples/llava/clip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2449,7 +2449,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
24492449
ggml_backend_graph_compute(ctx->backend, gf);
24502450

24512451
// the last node is the embedding tensor
2452-
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];
2452+
struct ggml_tensor * embeddings = ggml_graph_node(gf, -1);
24532453

24542454
// copy the embeddings to the location passed by the user
24552455
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));

examples/llava/llava.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
184184
// ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
185185
ggml_build_forward_expand(gf, flatten);
186186
ggml_graph_compute_with_ctx(model.ctx, gf, 1);
187-
struct ggml_tensor* result = gf->nodes[gf->n_nodes - 1];
187+
struct ggml_tensor* result = ggml_graph_node(gf, -1);
188188

189189
memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
190190
// append without newline tokens (default behavior in llava_arch when not using unpad ):

ggml/include/ggml.h

Lines changed: 24 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ extern "C" {
358358

359359
struct ggml_object;
360360
struct ggml_context;
361+
struct ggml_cgraph;
361362

362363
// NOTE: always add types at the end of the enum to keep backward compatibility
363364
enum ggml_type {
@@ -575,23 +576,9 @@ extern "C" {
575576
GGML_TENSOR_FLAG_PARAM = 4,
576577
};
577578

578-
// ggml object
579-
struct ggml_object {
580-
size_t offs;
581-
size_t size;
582-
583-
struct ggml_object * next;
584-
585-
enum ggml_object_type type;
586-
587-
char padding[4];
588-
};
589-
590-
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
591-
592579
// n-dimensional tensor
593580
struct ggml_tensor {
594-
enum ggml_type type;
581+
enum ggml_type type;
595582

596583
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
597584

@@ -655,7 +642,7 @@ extern "C" {
655642

656643
struct ggml_threadpool; // forward declaration, see ggml.c
657644

658-
typedef struct ggml_threadpool * ggml_threadpool_t;
645+
typedef struct ggml_threadpool * ggml_threadpool_t;
659646

660647
// the compute plan that needs to be prepared for ggml_graph_compute()
661648
// since https://github.com/ggerganov/ggml/issues/287
@@ -671,35 +658,6 @@ extern "C" {
671658
void * abort_callback_data;
672659
};
673660

674-
enum ggml_cgraph_eval_order {
675-
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
676-
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
677-
GGML_CGRAPH_EVAL_ORDER_COUNT
678-
};
679-
680-
typedef uint32_t ggml_bitset_t;
681-
682-
struct ggml_hash_set {
683-
size_t size;
684-
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
685-
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
686-
};
687-
688-
// computation graph
689-
struct ggml_cgraph {
690-
int size;
691-
int n_nodes;
692-
int n_leafs;
693-
694-
struct ggml_tensor ** nodes;
695-
struct ggml_tensor ** grads;
696-
struct ggml_tensor ** leafs;
697-
698-
struct ggml_hash_set visited_hash_set;
699-
700-
enum ggml_cgraph_eval_order order;
701-
};
702-
703661
// scratch buffer
704662
struct ggml_scratch {
705663
size_t offs;
@@ -2017,8 +1975,6 @@ extern "C" {
20171975
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
20181976
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
20191977

2020-
#define GGML_N_TASKS_MAX -1
2021-
20221978
GGML_API struct ggml_tensor * ggml_map_custom1(
20231979
struct ggml_context * ctx,
20241980
struct ggml_tensor * a,
@@ -2088,30 +2044,35 @@ extern "C" {
20882044
struct ggml_context * ctx,
20892045
struct ggml_tensor * tensor);
20902046

2091-
20922047
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
20932048
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
20942049

20952050
// graph allocation in a context
2096-
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
2097-
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
2098-
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
2099-
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
2100-
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2101-
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
2102-
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
2051+
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
2052+
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
2053+
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
2054+
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2055+
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
2056+
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
2057+
2058+
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
2059+
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
2060+
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
2061+
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
2062+
2063+
GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
21032064

21042065
GGML_API size_t ggml_graph_overhead(void);
21052066
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
21062067

2107-
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
2108-
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params *p, int n_threads);
2109-
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
2110-
GGML_API struct ggml_threadpool* ggml_threadpool_new (struct ggml_threadpool_params * params);
2111-
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
2112-
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
2113-
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
2114-
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
2068+
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
2069+
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
2070+
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
2071+
GGML_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
2072+
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
2073+
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
2074+
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
2075+
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
21152076

21162077
// ggml_graph_plan() has to be called before ggml_graph_compute()
21172078
// when plan.work_size > 0, caller must allocate memory for plan.work_data

ggml/src/ggml-blas.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "ggml-impl.h"
12
#include "ggml-blas.h"
23
#include "ggml-backend-impl.h"
34

ggml/src/ggml-cann.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <cstring>
3131
#include <mutex>
3232

33+
#include "ggml-impl.h"
3334
#include "ggml-backend-impl.h"
3435
#include "ggml-cann/aclnn_ops.h"
3536
#include "ggml-cann/common.h"

ggml/src/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include "ggml-cuda.h"
2-
#include "ggml.h"
2+
#include "ggml-impl.h"
33
#include "ggml-backend-impl.h"
44

55
#include "ggml-cuda/common.cuh"

ggml/src/ggml-impl.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,8 +629,16 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
629629
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
630630
#endif
631631

632+
enum ggml_cgraph_eval_order {
633+
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
634+
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
635+
GGML_CGRAPH_EVAL_ORDER_COUNT
636+
};
637+
632638
// bitset
633639

640+
typedef uint32_t ggml_bitset_t;
641+
634642
static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
635643
#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
636644
#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
@@ -656,6 +664,12 @@ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
656664
#define GGML_HASHSET_FULL ((size_t)-1)
657665
#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
658666

667+
struct ggml_hash_set {
668+
size_t size;
669+
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
670+
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
671+
};
672+
659673
struct ggml_hash_set ggml_hash_set_new(size_t size);
660674
void ggml_hash_set_free(struct ggml_hash_set * hash_set);
661675

@@ -745,6 +759,24 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
745759
GGML_ABORT("fatal error");
746760
}
747761

762+
// computation graph
763+
764+
struct ggml_cgraph {
765+
int size;
766+
int n_nodes;
767+
int n_leafs;
768+
769+
struct ggml_tensor ** nodes;
770+
struct ggml_tensor ** grads;
771+
struct ggml_tensor ** leafs;
772+
773+
struct ggml_hash_set visited_hash_set;
774+
775+
enum ggml_cgraph_eval_order order;
776+
};
777+
778+
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
779+
748780
#ifdef __cplusplus
749781
}
750782
#endif

ggml/src/ggml-kompute.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "ggml.h"
1+
#include "ggml-impl.h"
22
#include "ggml-backend.h"
33
#include "ggml-backend-impl.h"
44
#include "ggml-kompute.h"

ggml/src/ggml-metal.m

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#import "ggml-metal.h"
22

3+
#import "ggml-impl.h"
34
#import "ggml-backend-impl.h"
4-
#import "ggml.h"
55

66
#import <Foundation/Foundation.h>
77

@@ -882,7 +882,7 @@ static enum ggml_status ggml_metal_graph_compute(
882882
// create multiple command buffers and enqueue them
883883
// then, we encode the graph into the command buffers in parallel
884884

885-
const int n_nodes = gf->n_nodes;
885+
const int n_nodes = gf->n_nodes;
886886
const int n_cb = ctx->n_cb;
887887
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
888888

ggml/src/ggml-rpc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include "ggml-rpc.h"
2-
#include "ggml.h"
2+
#include "ggml-impl.h"
33
#include "ggml-backend-impl.h"
44

55
#include <cinttypes>

ggml/src/ggml-sycl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
#include <sycl/half_type.hpp>
3434

3535
#include "ggml-sycl.h"
36-
#include "ggml.h"
36+
#include "ggml-impl.h"
3737
#include "ggml-backend-impl.h"
3838

3939
#include "ggml-sycl/backend.hpp"

ggml/src/ggml-vulkan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#include <memory>
2222
#include <mutex>
2323

24-
#include "ggml.h"
24+
#include "ggml-impl.h"
2525
#include "ggml-backend-impl.h"
2626

2727
#include "ggml-vulkan-shaders.hpp"

0 commit comments

Comments
 (0)