Skip to content

Commit 06abf8e

Browse files
authored
ggml : add view_src and view_offs to ggml_tensor for views (#2874)
* ggml : add view_src and view_offs * update ggml-alloc to use view_src * update ggml_diag_mask to work correctly with automatic inplace * exclude other ops that set an inplace flag from automatic inplace
1 parent c03a243 commit 06abf8e

File tree

3 files changed

+105
-170
lines changed

3 files changed

+105
-170
lines changed

ggml-alloc.c

Lines changed: 6 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,7 @@ bool ggml_allocr_is_measure(struct ggml_allocr * alloc) {
321321
//////////// compute graph allocator
322322

323323
static bool ggml_is_view(struct ggml_tensor * t) {
324-
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
325-
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
324+
return t->view_src != NULL;
326325
}
327326

328327
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
@@ -340,36 +339,13 @@ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml
340339
return true;
341340
}
342341

343-
static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
344-
switch (t->op) {
345-
case GGML_OP_PERMUTE:
346-
case GGML_OP_RESHAPE:
347-
case GGML_OP_TRANSPOSE:
348-
case GGML_OP_VIEW:
349-
return t->src[0];
350-
case GGML_OP_CPY:
351-
return t->src[1];
352-
default:
353-
return NULL;
354-
}
355-
}
356-
357-
static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
358-
struct ggml_tensor * parent = t;
359-
do {
360-
parent = get_view_parent(parent);
361-
} while (ggml_is_view(parent));
362-
return parent;
363-
}
364-
365342
static bool ggml_op_can_inplace(enum ggml_op op) {
366343
switch (op) {
367344
case GGML_OP_SCALE:
368345
case GGML_OP_DIAG_MASK_ZERO:
369346
case GGML_OP_DIAG_MASK_INF:
370347
case GGML_OP_ADD:
371348
case GGML_OP_ADD1:
372-
case GGML_OP_ACC:
373349
case GGML_OP_SUB:
374350
case GGML_OP_MUL:
375351
case GGML_OP_DIV:
@@ -379,7 +355,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
379355
case GGML_OP_UNARY:
380356
case GGML_OP_ROPE:
381357
case GGML_OP_RMS_NORM:
382-
case GGML_OP_SET:
383358
case GGML_OP_SOFT_MAX:
384359
case GGML_OP_CONT:
385360
return true;
@@ -393,24 +368,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
393368
struct hash_node * ht = alloc->hash_table;
394369
if (node->data == NULL) {
395370
if (ggml_is_view(node)) {
396-
size_t offset;
397-
switch(node->op) {
398-
case GGML_OP_VIEW:
399-
memcpy(&offset, node->op_params, sizeof(size_t));
400-
node->data = (char *) node->src[0]->data + offset;
401-
break;
402-
case GGML_OP_PERMUTE:
403-
case GGML_OP_RESHAPE:
404-
case GGML_OP_TRANSPOSE:
405-
node->data = node->src[0]->data;
406-
break;
407-
case GGML_OP_CPY:
408-
node->data = node->src[1]->data;
409-
break;
410-
default:
411-
GGML_ASSERT(!"unknown view op");
412-
break;
413-
}
371+
assert(node->view_src->data != NULL);
372+
node->data = (char *)node->view_src->data + node->view_offs;
414373
} else {
415374
// see if we can reuse a parent's buffer (inplace)
416375
if (ggml_op_can_inplace(node->op)) {
@@ -430,7 +389,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
430389
struct hash_node * p_hn = hash_get(ht, parent);
431390
if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) {
432391
if (ggml_is_view(parent)) {
433-
struct ggml_tensor * view_src = get_view_source(parent);
392+
struct ggml_tensor * view_src = parent->view_src;
434393
struct hash_node * view_src_hn = hash_get(ht, view_src);
435394
if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) {
436395
// TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite
@@ -472,7 +431,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
472431
struct ggml_tensor * node = gf->nodes[i];
473432

474433
if (ggml_is_view(node)) {
475-
struct ggml_tensor * view_src = get_view_source(node);
434+
struct ggml_tensor * view_src = node->view_src;
476435
hash_get(ht, view_src)->n_views += 1;
477436
}
478437

@@ -557,7 +516,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
557516

558517
if (p_hn->n_children == 0 && p_hn->n_views == 0) {
559518
if (ggml_is_view(parent)) {
560-
struct ggml_tensor * view_src = get_view_source(parent);
519+
struct ggml_tensor * view_src = parent->view_src;
561520
struct hash_node * view_src_hn = hash_get(ht, view_src);
562521
view_src_hn->n_views -= 1;
563522
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views);

0 commit comments

Comments
 (0)